Function: accelerate_kernel_.DIR.OMP.PARALLEL.2 | Module: exec | Source: accelerate_kernel.f90:57-79 | Coverage: 5.29% |
---|
Function: accelerate_kernel_.DIR.OMP.PARALLEL.2 | Module: exec | Source: accelerate_kernel.f90:57-79 | Coverage: 5.29% |
---|
/scratch_na/users/xoserete/qaas_runs/171-415-7919/intel/CloverLeafFC/build/CloverLeafFC/CloverLeaf_ref/kernels/accelerate_kernel.f90: 57 - 79 |
-------------------------------------------------------------------------------- |
57: !$OMP PARALLEL |
58: |
59: !$OMP DO PRIVATE(j,k,stepbymass_s) |
60: DO k=y_min,y_max+1 |
61: !$OMP SIMD |
62: DO j=x_min,x_max+1 |
63: stepbymass_s=halfdt/((density0(j-1,k-1)*volume(j-1,k-1) & |
64: +density0(j ,k-1)*volume(j ,k-1) & |
65: +density0(j ,k )*volume(j ,k ) & |
66: +density0(j-1,k )*volume(j-1,k )) & |
67: *0.25_8) |
68: |
69: xvel1(j,k)=xvel0(j,k)-stepbymass_s*(xarea(j ,k )*(pressure(j ,k )-pressure(j-1,k )) & |
70: +xarea(j ,k-1)*(pressure(j ,k-1)-pressure(j-1,k-1))) |
71: yvel1(j,k)=yvel0(j,k)-stepbymass_s*(yarea(j ,k )*(pressure(j ,k )-pressure(j ,k-1)) & |
72: +yarea(j-1,k )*(pressure(j-1,k )-pressure(j-1,k-1))) |
73: xvel1(j,k)=xvel1(j,k)-stepbymass_s*(xarea(j ,k )*(viscosity(j ,k )-viscosity(j-1,k )) & |
74: +xarea(j ,k-1)*(viscosity(j ,k-1)-viscosity(j-1,k-1))) |
75: yvel1(j,k)=yvel1(j,k)-stepbymass_s*(yarea(j ,k )*(viscosity(j ,k )-viscosity(j ,k-1)) & |
76: +yarea(j-1,k )*(viscosity(j-1,k )-viscosity(j-1,k-1))) |
77: ENDDO |
78: ENDDO |
79: !$OMP END DO |
0x427340 PUSH %RBP |
0x427341 MOV %RSP,%RBP |
0x427344 PUSH %R15 |
0x427346 PUSH %R14 |
0x427348 PUSH %R13 |
0x42734a PUSH %R12 |
0x42734c PUSH %RBX |
0x42734d AND $-0x40,%RSP |
0x427351 SUB $0x6c0,%RSP |
0x427358 MOV 0xd0(%RBP),%RAX |
0x42735f MOV %RAX,0x1c0(%RSP) |
0x427367 MOV 0xc8(%RBP),%RAX |
0x42736e MOV %RAX,0x1b8(%RSP) |
0x427376 MOV 0xc0(%RBP),%RAX |
0x42737d MOV %RAX,0x1b0(%RSP) |
0x427385 MOV 0xb8(%RBP),%RAX |
0x42738c MOV %RAX,0x1a8(%RSP) |
0x427394 MOV 0xb0(%RBP),%RAX |
0x42739b MOV %RAX,0x1a0(%RSP) |
0x4273a3 MOV 0xa8(%RBP),%RAX |
0x4273aa MOV %RAX,0x198(%RSP) |
0x4273b2 MOV 0xa0(%RBP),%RAX |
0x4273b9 MOV %RAX,0x190(%RSP) |
0x4273c1 MOV 0x98(%RBP),%RAX |
0x4273c8 MOV %RAX,0x188(%RSP) |
0x4273d0 MOV 0x70(%RBP),%R14D |
0x4273d4 MOV 0x68(%RBP),%EAX |
0x4273d7 SUB %R14D,%EAX |
0x4273da INC %EAX |
0x4273dc MOV 0x90(%RBP),%RCX |
0x4273e3 MOV %RCX,0x180(%RSP) |
0x4273eb MOV 0x88(%RBP),%RCX |
0x4273f2 MOV %RCX,0x178(%RSP) |
0x4273fa MOV 0x80(%RBP),%R13 |
0x427401 MOV 0x78(%RBP),%R12 |
0x427405 MOV 0x60(%RBP),%RCX |
0x427409 MOV %RCX,0xa8(%RSP) |
0x427411 MOV 0x58(%RBP),%RCX |
0x427415 MOV %RCX,0xa0(%RSP) |
0x42741d MOV 0x50(%RBP),%RCX |
0x427421 MOV %RCX,0x98(%RSP) |
0x427429 MOV 0x48(%RBP),%RCX |
0x42742d MOV %RCX,0x90(%RSP) |
0x427435 MOV 0x40(%RBP),%RCX |
0x427439 MOV %RCX,0x88(%RSP) |
0x427441 MOV 0x38(%RBP),%RCX |
0x427445 MOV %RCX,0x80(%RSP) |
0x42744d MOV 0x30(%RBP),%RCX |
0x427451 MOV %RCX,0x78(%RSP) |
0x427456 MOV 0x28(%RBP),%RCX |
0x42745a MOV %RCX,0x70(%RSP) |
0x42745f MOV 0x20(%RBP),%RCX |
0x427463 MOV %RCX,0x68(%RSP) |
0x427468 MOV 0x18(%RBP),%RCX |
0x42746c MOV %RCX,0x60(%RSP) |
0x427471 MOV 0x10(%RBP),%R15 |
0x427475 MOVL $0,0x5c(%RSP) |
0x42747d JS 427506 |
0x427483 MOV %RDX,%RBX |
0x427486 MOV %R8,0x20(%RSP) |
0x42748b MOV %RDI,0xd8(%RSP) |
0x427493 MOV (%RDI),%ESI |
0x427495 MOVL $0,0x34(%RSP) |
0x42749d MOV %EAX,0x30(%RSP) |
0x4274a1 MOVL $0x1,0x58(%RSP) |
0x4274a9 SUB $0x8,%RSP |
0x4274ad LEA 0x60(%RSP),%RAX |
0x4274b2 LEA 0x64(%RSP),%RCX |
0x4274b7 LEA 0x3c(%RSP),%R8 |
0x4274bc LEA 0x38(%RSP),%R9 |
0x4274c1 MOV $0x747290,%EDI |
0x4274c6 MOV %ESI,0x54(%RSP) |
0x4274ca MOV $0x22,%EDX |
0x4274cf PUSH $0x1 |
0x4274d1 PUSH $0x1 |
0x4274d3 PUSH %RAX |
0x4274d4 CALL 4045a0 <__kmpc_for_static_init_4@plt> |
0x4274d9 ADD $0x20,%RSP |
0x4274dd MOV 0x34(%RSP),%EAX |
0x4274e1 MOV 0x30(%RSP),%ECX |
0x4274e5 SUB %EAX,%ECX |
0x4274e7 MOV %ECX,0x54(%RSP) |
0x4274eb JAE 427540 |
0x4274ed MOV $0x7472b0,%EDI |
0x4274f2 MOV 0x4c(%RSP),%ESI |
0x4274f6 VZEROUPPER |
0x4274f9 CALL 404190 <__kmpc_for_static_fini@plt> |
0x4274fe MOV 0xd8(%RSP),%RDI |
0x427506 MOV (%RDI),%ESI |
0x427508 MOV $0x7472d0,%EDI |
0x42750d LEA -0x28(%RBP),%RSP |
0x427511 POP %RBX |
0x427512 POP %R12 |
0x427514 POP %R13 |
0x427516 POP %R14 |
0x427518 POP %R15 |
0x42751a POP %RBP |
0x42751b JMP 404660 |
0x427520 NOPW %CS:(%RAX,%RAX,1) |
0x42752f NOPW %CS:(%RAX,%RAX,1) |
0x42753e XCHG %AX,%AX |
0x427540 MOV %RAX,%RDX |
0x427543 VMOVQ %RBX,%XMM0 |
0x427548 ADD %R14D,%EDX |
0x42754b MOVSXD (%R13),%RCX |
0x42754f MOV (%R12),%EAX |
0x427553 SUB %ECX,%EAX |
0x427555 LEA 0x1(%RAX),%EDI |
0x427558 ADD $0x2,%EAX |
0x42755b CMP $0x2,%EAX |
0x42755e MOV $0x1,%R8D |
0x427564 CMOVGE %EAX,%R8D |
0x427568 MOV %R8D,%EAX |
0x42756b AND $0x7ffffff8,%EAX |
0x427570 MOV %RAX,0x40(%RSP) |
0x427575 MOVSXD 0x20(%RSP),%R10 |
0x42757a MOVSXD %R15D,%R11 |
0x42757d VPBROADCASTQ %XMM0,%ZMM0 |
0x427583 MOV %R8,0x170(%RSP) |
0x42758b VPBROADCASTQ %R8,%ZMM1 |
0x427591 VMOVDQA64 %ZMM1,0x200(%RSP) |
0x427599 MOV %RCX,0x100(%RSP) |
0x4275a1 LEA (,%RCX,8),%RAX |
0x4275a9 LEA (,%R11,8),%RCX |
0x4275b1 SUB %RCX,%RAX |
0x4275b4 MOV 0x80(%RSP),%RCX |
0x4275bc LEA 0x10(%RCX,%RAX,1),%RCX |
0x4275c1 MOV %RCX,0x168(%RSP) |
0x4275c9 MOV $0x1,%ECX |
0x4275ce SUB %R10,%RCX |
0x4275d1 MOV %RCX,0x160(%RSP) |
0x4275d9 MOV $0x2,%ECX |
0x4275de SUB %R10,%RCX |
0x4275e1 MOV %RCX,0x158(%RSP) |
0x4275e9 MOV 0xa0(%RSP),%RCX |
0x4275f1 LEA 0x10(%RCX,%RAX,1),%RCX |
0x4275f6 MOV %RCX,0x150(%RSP) |
0x4275fe MOV 0x88(%RSP),%RCX |
0x427606 LEA 0x10(%RCX,%RAX,1),%RCX |
0x42760b MOV %RCX,0x148(%RSP) |
0x427613 MOV 0x90(%RSP),%RCX |
0x42761b LEA 0x10(%RCX,%RAX,1),%RCX |
0x427620 MOV %RCX,0x140(%RSP) |
0x427628 MOV 0x98(%RSP),%RCX |
0x427630 LEA 0x10(%RCX,%RAX,1),%RCX |
0x427635 MOV %RCX,0x138(%RSP) |
0x42763d MOV 0x60(%RSP),%RCX |
0x427642 LEA 0x10(%RCX,%RAX,1),%RCX |
0x427647 MOV %RCX,0x130(%RSP) |
0x42764f MOV 0x70(%RSP),%RCX |
0x427654 LEA 0x10(%RCX,%RAX,1),%RCX |
0x427659 MOV %RCX,0x128(%RSP) |
0x427661 MOV 0x68(%RSP),%RCX |
0x427666 LEA 0x10(%RCX,%RAX,1),%RCX |
0x42766b MOV %RCX,0x120(%RSP) |
0x427673 MOV 0x78(%RSP),%RCX |
0x427678 LEA 0x10(%RCX,%RAX,1),%RCX |
0x42767d MOV %RCX,0x118(%RSP) |
0x427685 MOV 0xa8(%RSP),%RCX |
0x42768d LEA 0x10(%RCX,%RAX,1),%RAX |
0x427692 MOV %RAX,0x110(%RSP) |
0x42769a VBROADCASTSD 0xdd634(%RIP),%ZMM3 |
0x4276a4 MOV %R10,0xf8(%RSP) |
0x4276ac LEA -0x2(%R10),%RAX |
0x4276b0 MOV %RAX,0xe8(%RSP) |
0x4276b8 MOV %R11,0xf0(%RSP) |
0x4276c0 LEA -0x2(%R11),%RAX |
0x4276c4 MOV %RAX,0xe0(%RSP) |
0x4276cc XOR %R9D,%R9D |
0x4276cf MOV %RDX,0x108(%RSP) |
0x4276d7 MOV %EDX,%R12D |
0x4276da MOV %EDI,0x2c(%RSP) |
0x4276de JMP 427715 |
0x4276e0 NOPW %CS:(%RAX,%RAX,1) |
0x4276ef NOPW %CS:(%RAX,%RAX,1) |
0x4276fe XCHG %AX,%AX |
(126) 0x427700 LEA 0x1(%R9),%EAX |
(126) 0x427704 INC %R12D |
(126) 0x427707 CMP 0x54(%RSP),%R9D |
(126) 0x42770c MOV %EAX,%R9D |
(126) 0x42770f JE 4274ed |
(126) 0x427715 TEST %EDI,%EDI |
(126) 0x427717 JS 427700 |
(126) 0x427719 MOV 0x178(%RSP),%RAX |
(126) 0x427721 MOV (%RAX),%R11 |
(126) 0x427724 MOV 0x180(%RSP),%RAX |
(126) 0x42772c MOV (%RAX),%R10 |
(126) 0x42772f MOV 0x188(%RSP),%RAX |
(126) 0x427737 MOV (%RAX),%RSI |
(126) 0x42773a MOV 0x190(%RSP),%RAX |
(126) 0x427742 MOV (%RAX),%RCX |
(126) 0x427745 MOV 0x198(%RSP),%RAX |
(126) 0x42774d MOV (%RAX),%RDX |
(126) 0x427750 MOV 0x1a0(%RSP),%RAX |
(126) 0x427758 MOV (%RAX),%R15 |
(126) 0x42775b MOV 0x1a8(%RSP),%RAX |
(126) 0x427763 MOV (%RAX),%R14 |
(126) 0x427766 MOV 0x1b0(%RSP),%RAX |
(126) 0x42776e MOV (%RAX),%RBX |
(126) 0x427771 MOV 0x1b8(%RSP),%RAX |
(126) 0x427779 MOV (%RAX),%R8 |
(126) 0x42777c MOV 0x1c0(%RSP),%RAX |
(126) 0x427784 MOV (%RAX),%R13 |
(126) 0x427787 CMPQ $0,0x40(%RSP) |
(126) 0x42778d MOV %R9,0xd0(%RSP) |
(126) 0x427795 MOV %R8,0xc8(%RSP) |
(126) 0x42779d MOV %R14,0x1f0(%RSP) |
(126) 0x4277a5 MOV %RSI,0xc0(%RSP) |
(126) 0x4277ad MOV %R10,0x1e8(%RSP) |
(126) 0x4277b5 JE 427ac0 |
(126) 0x4277bb MOV %R12D,0x50(%RSP) |
(126) 0x4277c0 MOVSXD %R12D,%RDI |
(126) 0x4277c3 MOV 0x160(%RSP),%RAX |
(126) 0x4277cb MOV %R15,0xb8(%RSP) |
(126) 0x4277d3 LEA (%RAX,%RDI,1),%R15 |
(126) 0x4277d7 ADD 0x158(%RSP),%RDI |
(126) 0x4277df MOV %R13,%R12 |
(126) 0x4277e2 IMUL %R15,%R12 |
(126) 0x4277e6 MOV 0x168(%RSP),%RAX |
(126) 0x4277ee ADD %RAX,%R12 |
(126) 0x4277f1 MOV %R13,0x1e0(%RSP) |
(126) 0x4277f9 IMUL %RDI,%R13 |
(126) 0x4277fd ADD %RAX,%R13 |
(126) 0x427800 MOV %RBX,0x1d8(%RSP) |
(126) 0x427808 MOV %RBX,%R9 |
(126) 0x42780b IMUL %RDI,%R9 |
(126) 0x42780f ADD 0x150(%RSP),%R9 |
(126) 0x427817 MOV %RDX,%RAX |
(126) 0x42781a IMUL %R15,%RAX |
(126) 0x42781e MOV %RCX,0xb0(%RSP) |
(126) 0x427826 MOV 0x148(%RSP),%RCX |
(126) 0x42782e ADD %RCX,%RAX |
(126) 0x427831 MOV %RDX,0x1d0(%RSP) |
(126) 0x427839 MOV %RDX,%RBX |
(126) 0x42783c IMUL %RDI,%RBX |
(126) 0x427840 ADD %RCX,%RBX |
(126) 0x427843 MOV %R11,%RSI |
(126) 0x427846 IMUL %RDI,%RSI |
(126) 0x42784a MOV 0x140(%RSP),%RDX |
(126) 0x427852 ADD %RDX,%RSI |
(126) 0x427855 MOV %R10,%RCX |
(126) 0x427858 IMUL %RDI,%RCX |
(126) 0x42785c MOV 0x138(%RSP),%R8 |
(126) 0x427864 ADD %R8,%RCX |
(126) 0x427867 MOV %R11,0x1c8(%RSP) |
(126) 0x42786f IMUL %R15,%R11 |
(126) 0x427873 ADD %RDX,%R11 |
(126) 0x427876 MOV %R10,%RDX |
(126) 0x427879 IMUL %R15,%RDX |
(126) 0x42787d ADD %R8,%RDX |
(126) 0x427880 MOV 0xc8(%RSP),%R10 |
(126) 0x427888 IMUL %RDI,%R10 |
(126) 0x42788c ADD 0x130(%RSP),%R10 |
(126) 0x427894 MOV %R10,0x20(%RSP) |
(126) 0x427899 IMUL %RDI,%R14 |
(126) 0x42789d ADD 0x128(%RSP),%R14 |
(126) 0x4278a5 MOV %R14,0x38(%RSP) |
(126) 0x4278aa MOV 0xb8(%RSP),%R8 |
(126) 0x4278b2 IMUL %RDI,%R8 |
(126) 0x4278b6 ADD 0x120(%RSP),%R8 |
(126) 0x4278be MOV %R8,0x1f8(%RSP) |
(126) 0x4278c6 MOV 0xc0(%RSP),%R14 |
(126) 0x4278ce IMUL %RDI,%R14 |
(126) 0x4278d2 ADD 0x118(%RSP),%R14 |
(126) 0x4278da MOV 0xb0(%RSP),%R10 |
(126) 0x4278e2 IMUL %R10,%R15 |
(126) 0x4278e6 MOV 0x110(%RSP),%R8 |
(126) 0x4278ee ADD %R8,%R15 |
(126) 0x4278f1 IMUL %R10,%RDI |
(126) 0x4278f5 ADD %R8,%RDI |
(126) 0x4278f8 XOR %R8D,%R8D |
(126) 0x4278fb NOPL (%RAX,%RAX,1) |
(127) 0x427900 VMOVUPD -0x8(%RDX,%R8,8),%ZMM1 |
(127) 0x42790b VMOVUPD (%RDX,%R8,8),%ZMM2 |
(127) 0x427912 VMULPD -0x8(%R11,%R8,8),%ZMM1,%ZMM1 |
(127) 0x42791d VFMADD231PD (%R11,%R8,8),%ZMM2,%ZMM1 |
(127) 0x427924 VMOVUPD -0x8(%RCX,%R8,8),%ZMM2 |
(127) 0x42792f VMOVUPD (%RCX,%R8,8),%ZMM4 |
(127) 0x427936 VFMADD132PD (%RSI,%R8,8),%ZMM1,%ZMM4 |
(127) 0x42793d VFMADD231PD -0x8(%RSI,%R8,8),%ZMM2,%ZMM4 |
(127) 0x427948 VMULPD %ZMM3,%ZMM4,%ZMM1 |
(127) 0x42794e VDIVPD %ZMM1,%ZMM0,%ZMM1 |
(127) 0x427954 VMOVUPD (%RDI,%R8,8),%ZMM2 |
(127) 0x42795b VMOVUPD -0x8(%RBX,%R8,8),%ZMM4 |
(127) 0x427966 VMOVUPD (%RBX,%R8,8),%ZMM5 |
(127) 0x42796d VSUBPD %ZMM5,%ZMM4,%ZMM18 |
(127) 0x427973 VMULPD %ZMM2,%ZMM18,%ZMM18 |
(127) 0x427979 VMOVUPD (%R15,%R8,8),%ZMM19 |
(127) 0x427980 VMOVUPD -0x8(%RAX,%R8,8),%ZMM20 |
(127) 0x42798b VMOVUPD (%RAX,%R8,8),%ZMM21 |
(127) 0x427992 VSUBPD %ZMM21,%ZMM20,%ZMM22 |
(127) 0x427998 VFMADD213PD %ZMM18,%ZMM19,%ZMM22 |
(127) 0x42799e VMOVUPD -0x8(%R9,%R8,8),%ZMM18 |
(127) 0x4279a9 VMOVUPD (%R9,%R8,8),%ZMM23 |
(127) 0x4279b0 VSUBPD %ZMM5,%ZMM21,%ZMM5 |
(127) 0x4279b6 VMULPD %ZMM5,%ZMM23,%ZMM5 |
(127) 0x4279bc VSUBPD %ZMM4,%ZMM20,%ZMM4 |
(127) 0x4279c2 VFMADD213PD %ZMM5,%ZMM18,%ZMM4 |
(127) 0x4279c8 VMOVUPD -0x8(%R13,%R8,8),%ZMM5 |
(127) 0x4279d3 VMOVUPD (%R13,%R8,8),%ZMM20 |
(127) 0x4279db VSUBPD %ZMM20,%ZMM5,%ZMM21 |
(127) 0x4279e1 VMOVUPD -0x8(%R12,%R8,8),%ZMM26 |
(127) 0x4279ec VMOVUPD (%R12,%R8,8),%ZMM27 |
(127) 0x4279f3 VSUBPD %ZMM27,%ZMM26,%ZMM28 |
(127) 0x4279f9 VFMADD213PD %ZMM22,%ZMM2,%ZMM21 |
(127) 0x4279ff VFMADD231PD %ZMM28,%ZMM19,%ZMM21 |
(127) 0x427a05 VFMADD213PD (%R14,%R8,8),%ZMM1,%ZMM21 |
(127) 0x427a0c MOV 0x1f8(%RSP),%R10 |
(127) 0x427a14 VMOVUPD %ZMM21,(%R10,%R8,8) |
(127) 0x427a1b VSUBPD %ZMM20,%ZMM27,%ZMM2 |
(127) 0x427a21 VSUBPD %ZMM5,%ZMM26,%ZMM5 |
(127) 0x427a27 VFMADD213PD %ZMM4,%ZMM23,%ZMM2 |
(127) 0x427a2d VFMADD231PD %ZMM5,%ZMM18,%ZMM2 |
(127) 0x427a33 MOV 0x38(%RSP),%R10 |
(127) 0x427a38 VFMADD213PD (%R10,%R8,8),%ZMM1,%ZMM2 |
(127) 0x427a3f MOV 0x20(%RSP),%R10 |
(127) 0x427a44 VMOVUPD %ZMM2,(%R10,%R8,8) |
(127) 0x427a4b ADD $0x8,%R8 |
(127) 0x427a4f CMP 0x40(%RSP),%R8 |
(127) 0x427a54 JB 427900 |
(126) 0x427a5a MOV 0x40(%RSP),%RAX |
(126) 0x427a5f MOV %RAX,%R10 |
(126) 0x427a62 CMP 0x170(%RSP),%RAX |
(126) 0x427a6a MOV 0x2c(%RSP),%EDI |
(126) 0x427a6e MOV 0x50(%RSP),%R12D |
(126) 0x427a73 MOV 0xd0(%RSP),%R9 |
(126) 0x427a7b MOV 0xb8(%RSP),%R15 |
(126) 0x427a83 MOV 0x1e0(%RSP),%R13 |
(126) 0x427a8b MOV 0x1d8(%RSP),%RBX |
(126) 0x427a93 MOV 0x1d0(%RSP),%RDX |
(126) 0x427a9b MOV 0xb0(%RSP),%RCX |
(126) 0x427aa3 MOV 0x1c8(%RSP),%R11 |
(126) 0x427aab JE 427700 |
(126) 0x427ab1 JMP 427ac3 |
0x427ab3 NOPW %CS:(%RAX,%RAX,1) |
(126) 0x427ac0 XOR %R10D,%R10D |
(126) 0x427ac3 VPBROADCASTQ %R10,%ZMM1 |
(126) 0x427ac9 VMOVDQA64 0x200(%RSP),%ZMM2 |
(126) 0x427ad1 VPSUBQ %ZMM1,%ZMM2,%ZMM1 |
(126) 0x427ad7 VPCMPNLEUQ 0xdd89e(%RIP),%ZMM1,%K1 |
(126) 0x427ae2 KORTESTB %K1,%K1 |
(126) 0x427ae6 JE 427700 |
(126) 0x427aec MOV 0x108(%RSP),%RAX |
(126) 0x427af4 ADD %R9D,%EAX |
(126) 0x427af7 MOVSXD %EAX,%RDI |
(126) 0x427afa MOV %RDI,0x38(%RSP) |
(126) 0x427aff MOV 0xf8(%RSP),%RAX |
(126) 0x427b07 NEG %RAX |
(126) 0x427b0a LEA 0x1(%RDI,%RAX,1),%RDI |
(126) 0x427b0f MOV 0x1e8(%RSP),%R9 |
(126) 0x427b17 MOV %R9,%RAX |
(126) 0x427b1a IMUL %RDI,%RAX |
(126) 0x427b1e MOV %RDI,0x20(%RSP) |
(126) 0x427b23 MOV %RCX,%RSI |
(126) 0x427b26 MOV 0x98(%RSP),%RCX |
(126) 0x427b2e ADD %RCX,%RAX |
(126) 0x427b31 ADD 0x100(%RSP),%R10 |
(126) 0x427b39 MOV %R15,%R8 |
(126) 0x427b3c MOV %R10,%R14 |
(126) 0x427b3f SUB 0xf0(%RSP),%R14 |
(126) 0x427b47 VMOVUPD 0x8(%RAX,%R14,8),%ZMM26{%K1}{z} |
(126) 0x427b52 SUB 0xe0(%RSP),%R10 |
(126) 0x427b5a VMOVUPD (%RAX,%R10,8),%ZMM27{%K1}{z} |
(126) 0x427b61 MOV %R11,%RAX |
(126) 0x427b64 IMUL %RDI,%RAX |
(126) 0x427b68 MOV %RDX,%R15 |
(126) 0x427b6b MOV 0x90(%RSP),%RDX |
(126) 0x427b73 ADD %RDX,%RAX |
(126) 0x427b76 VMOVUPD 0x8(%RAX,%R14,8),%ZMM28{%K1}{z} |
(126) 0x427b81 VMOVUPD (%RAX,%R10,8),%ZMM29{%K1}{z} |
(126) 0x427b88 MOV 0x38(%RSP),%RDI |
(126) 0x427b8d SUB 0xe8(%RSP),%RDI |
(126) 0x427b95 IMUL %RDI,%R9 |
(126) 0x427b99 ADD %RCX,%R9 |
(126) 0x427b9c VMOVUPD (%R9,%R10,8),%ZMM30{%K1}{z} |
(126) 0x427ba3 VMOVUPD 0x8(%R9,%R14,8),%ZMM31{%K1}{z} |
(126) 0x427bae MOV 0xd0(%RSP),%R9 |
(126) 0x427bb6 IMUL %RDI,%R11 |
(126) 0x427bba ADD %RDX,%R11 |
(126) 0x427bbd VMOVUPD (%R11,%R10,8),%ZMM2{%K1}{z} |
(126) 0x427bc4 VMOVUPD 0x8(%R11,%R14,8),%ZMM4{%K1}{z} |
(126) 0x427bcf MOV %RSI,%RAX |
(126) 0x427bd2 IMUL %RDI,%RAX |
(126) 0x427bd6 MOV 0xa8(%RSP),%RCX |
(126) 0x427bde ADD %RCX,%RAX |
(126) 0x427be1 VMOVUPD (%RAX,%R10,8),%ZMM18{%K1}{z} |
(126) 0x427be8 MOV %R15,%RAX |
(126) 0x427beb IMUL %RDI,%RAX |
(126) 0x427bef MOV 0x88(%RSP),%RDX |
(126) 0x427bf7 ADD %RDX,%RAX |
(126) 0x427bfa VMOVUPD (%RAX,%R10,8),%ZMM19{%K1}{z} |
(126) 0x427c01 VMOVUPD 0x8(%RAX,%R14,8),%ZMM20{%K1}{z} |
(126) 0x427c0c MOV 0x20(%RSP),%RAX |
(126) 0x427c11 IMUL %RAX,%RSI |
(126) 0x427c15 ADD %RCX,%RSI |
(126) 0x427c18 VMOVUPD (%RSI,%R10,8),%ZMM21{%K1}{z} |
(126) 0x427c1f IMUL %RAX,%R15 |
(126) 0x427c23 MOV %RAX,%RSI |
(126) 0x427c26 ADD %RDX,%R15 |
(126) 0x427c29 VMOVUPD (%R15,%R10,8),%ZMM22{%K1}{z} |
(126) 0x427c30 VMOVUPD 0x8(%R15,%R14,8),%ZMM23{%K1}{z} |
(126) 0x427c3b IMUL %RDI,%RBX |
(126) 0x427c3f ADD 0xa0(%RSP),%RBX |
(126) 0x427c47 VMOVUPD (%RBX,%R10,8),%ZMM1{%K1}{z} |
(126) 0x427c4e VMOVUPD 0x8(%RBX,%R14,8),%ZMM5{%K1}{z} |
(126) 0x427c59 MOV %R13,%RAX |
(126) 0x427c5c IMUL %RDI,%RAX |
(126) 0x427c60 MOV 0x80(%RSP),%RCX |
(126) 0x427c68 ADD %RCX,%RAX |
(126) 0x427c6b VMOVUPD (%RAX,%R10,8),%ZMM24{%K1}{z} |
(126) 0x427c72 VMOVUPD 0x8(%RAX,%R14,8),%ZMM25{%K1}{z} |
(126) 0x427c7d IMUL %RSI,%R13 |
(126) 0x427c81 ADD %RCX,%R13 |
(126) 0x427c84 VMOVUPD 0x8(%R13,%R14,8),%ZMM6{%K1}{z} |
(126) 0x427c8f VMOVUPD (%R13,%R10,8),%ZMM7{%K1}{z} |
(126) 0x427c97 MOV 0xc0(%RSP),%RAX |
(126) 0x427c9f IMUL %RDI,%RAX |
(126) 0x427ca3 ADD 0x78(%RSP),%RAX |
(126) 0x427ca8 VMOVUPD (%RAX,%R10,8),%ZMM8{%K1}{z} |
(126) 0x427caf VMOVAPD 0x280(%RSP),%ZMM12 |
(126) 0x427cb7 VMOVAPD %ZMM26,%ZMM12{%K1} |
(126) 0x427cbd VMOVAPD 0x300(%RSP),%ZMM11 |
(126) 0x427cc5 VMOVAPD %ZMM28,%ZMM11{%K1} |
(126) 0x427ccb VMOVAPD 0x380(%RSP),%ZMM10 |
(126) 0x427cd3 VMOVAPD %ZMM27,%ZMM10{%K1} |
(126) 0x427cd9 VMOVAPD 0x400(%RSP),%ZMM9 |
(126) 0x427ce1 VMOVAPD %ZMM29,%ZMM9{%K1} |
(126) 0x427ce7 VMOVAPD 0x440(%RSP),%ZMM29 |
(126) 0x427cef VMOVAPD %ZMM30,%ZMM29{%K1} |
(126) 0x427cf5 VMOVAPD 0x480(%RSP),%ZMM28 |
(126) 0x427cfd VMOVAPD %ZMM2,%ZMM28{%K1} |
(126) 0x427d03 VMOVAPD 0x4c0(%RSP),%ZMM27 |
(126) 0x427d0b VMOVAPD %ZMM31,%ZMM27{%K1} |
(126) 0x427d11 VMOVAPD 0x500(%RSP),%ZMM26 |
(126) 0x427d19 VMOVAPD %ZMM4,%ZMM26{%K1} |
(126) 0x427d1f VMOVAPD %ZMM18,%ZMM17{%K1} |
(126) 0x427d25 VMOVAPD %ZMM19,%ZMM16{%K1} |
(126) 0x427d2b VMOVAPD %ZMM20,%ZMM15{%K1} |
(126) 0x427d31 VMOVAPD %ZMM21,%ZMM14{%K1} |
(126) 0x427d37 VMOVAPD %ZMM22,%ZMM13{%K1} |
(126) 0x427d3d VSUBPD %ZMM16,%ZMM15,%ZMM2 |
(126) 0x427d43 VMULPD %ZMM17,%ZMM2,%ZMM2 |
(126) 0x427d49 VMOVAPD 0x240(%RSP),%ZMM21 |
(126) 0x427d51 VMOVAPD %ZMM23,%ZMM21{%K1} |
(126) 0x427d57 VSUBPD %ZMM13,%ZMM21,%ZMM4 |
(126) 0x427d5d VFMADD213PD %ZMM2,%ZMM14,%ZMM4 |
(126) 0x427d63 VMOVAPD 0x3c0(%RSP),%ZMM20 |
(126) 0x427d6b VMOVAPD %ZMM24,%ZMM20{%K1} |
(126) 0x427d71 VMOVAPD 0x540(%RSP),%ZMM19 |
(126) 0x427d79 VMOVAPD %ZMM25,%ZMM19{%K1} |
(126) 0x427d7f VMOVAPD 0x580(%RSP),%ZMM18 |
(126) 0x427d87 VMOVAPD %ZMM7,%ZMM18{%K1} |
(126) 0x427d8d VMOVAPD 0x5c0(%RSP),%ZMM7 |
(126) 0x427d95 VMOVAPD %ZMM6,%ZMM7{%K1} |
(126) 0x427d9b VSUBPD %ZMM20,%ZMM19,%ZMM2 |
(126) 0x427da1 VFMADD213PD %ZMM4,%ZMM17,%ZMM2 |
(126) 0x427da7 VSUBPD %ZMM18,%ZMM7,%ZMM4 |
(126) 0x427dad VFMADD231PD %ZMM4,%ZMM14,%ZMM2 |
(126) 0x427db3 VMOVAPD %ZMM11,0x300(%RSP) |
(126) 0x427dbb VMOVAPD %ZMM12,0x280(%RSP) |
(126) 0x427dc3 VMULPD %ZMM11,%ZMM12,%ZMM4 |
(126) 0x427dc9 VMOVAPD %ZMM9,0x400(%RSP) |
(126) 0x427dd1 VMOVAPD %ZMM10,0x380(%RSP) |
(126) 0x427dd9 VFMADD231PD %ZMM9,%ZMM10,%ZMM4 |
(126) 0x427ddf VMOVAPD %ZMM28,0x480(%RSP) |
(126) 0x427de7 VMOVAPD %ZMM29,0x440(%RSP) |
(126) 0x427def VFMADD231PD %ZMM28,%ZMM29,%ZMM4 |
(126) 0x427df5 VMOVAPD %ZMM26,0x500(%RSP) |
(126) 0x427dfd VMOVAPD %ZMM27,0x4c0(%RSP) |
(126) 0x427e05 VFMADD231PD %ZMM26,%ZMM27,%ZMM4 |
(126) 0x427e0b VMULPD %ZMM3,%ZMM4,%ZMM4 |
(126) 0x427e11 VDIVPD %ZMM4,%ZMM0,%ZMM4 |
(126) 0x427e17 VMOVAPD 0x600(%RSP),%ZMM6 |
(126) 0x427e1f VMOVAPD %ZMM8,%ZMM6{%K1} |
(126) 0x427e25 VMOVAPD %ZMM6,0x600(%RSP) |
(126) 0x427e2d VFMADD213PD %ZMM6,%ZMM4,%ZMM2 |
(126) 0x427e33 IMUL %RDI,%R8 |
(126) 0x427e37 ADD 0x68(%RSP),%R8 |
(126) 0x427e3c VMOVUPD %ZMM2,(%R8,%R10,8){%K1} |
(126) 0x427e43 MOV 0x1f0(%RSP),%RAX |
(126) 0x427e4b IMUL %RDI,%RAX |
(126) 0x427e4f ADD 0x70(%RSP),%RAX |
(126) 0x427e54 VMOVUPD (%RAX,%R10,8),%ZMM2{%K1}{z} |
(126) 0x427e5b MOV 0xc8(%RSP),%RAX |
(126) 0x427e63 IMUL %RDI,%RAX |
(126) 0x427e67 MOV 0x2c(%RSP),%EDI |
(126) 0x427e6b VMOVAPD 0x2c0(%RSP),%ZMM9 |
(126) 0x427e73 VMOVAPD %ZMM1,%ZMM9{%K1} |
(126) 0x427e79 VSUBPD %ZMM16,%ZMM13,%ZMM1 |
(126) 0x427e7f VMULPD %ZMM1,%ZMM9,%ZMM1 |
(126) 0x427e85 VMOVAPD 0x340(%RSP),%ZMM8 |
(126) 0x427e8d VMOVAPD %ZMM5,%ZMM8{%K1} |
(126) 0x427e93 VMOVAPD %ZMM21,0x240(%RSP) |
(126) 0x427e9b VSUBPD %ZMM15,%ZMM21,%ZMM5 |
(126) 0x427ea1 VFMADD213PD %ZMM1,%ZMM8,%ZMM5 |
(126) 0x427ea7 VMOVAPD %ZMM18,0x580(%RSP) |
(126) 0x427eaf VMOVAPD %ZMM20,0x3c0(%RSP) |
(126) 0x427eb7 VSUBPD %ZMM20,%ZMM18,%ZMM1 |
(126) 0x427ebd VMOVAPD %ZMM7,0x5c0(%RSP) |
(126) 0x427ec5 VMOVAPD %ZMM19,0x540(%RSP) |
(126) 0x427ecd VSUBPD %ZMM19,%ZMM7,%ZMM6 |
(126) 0x427ed3 VMOVAPD %ZMM9,0x2c0(%RSP) |
(126) 0x427edb VFMADD213PD %ZMM5,%ZMM9,%ZMM1 |
(126) 0x427ee1 VMOVAPD %ZMM8,0x340(%RSP) |
(126) 0x427ee9 VFMADD231PD %ZMM6,%ZMM8,%ZMM1 |
(126) 0x427eef VMOVAPD 0x640(%RSP),%ZMM5 |
(126) 0x427ef7 VMOVAPD %ZMM2,%ZMM5{%K1} |
(126) 0x427efd VMOVAPD %ZMM5,0x640(%RSP) |
(126) 0x427f05 VFMADD213PD %ZMM5,%ZMM4,%ZMM1 |
(126) 0x427f0b ADD 0x60(%RSP),%RAX |
(126) 0x427f10 VMOVUPD %ZMM1,(%RAX,%R10,8){%K1} |
(126) 0x427f17 JMP 427700 |
0x427f1c NOPW %CS:(%RAX,%RAX,1) |
0x427f26 NOPW %CS:(%RAX,%RAX,1) |
0x427f30 NOPW %CS:(%RAX,%RAX,1) |
0x427f3a NOPW (%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Source file and lines | accelerate_kernel.f90:57-79 |
Module | exec |
nb instructions | 181 |
nb uops | 184 |
loop length | 1009 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 3 |
nb stack references | 79 |
micro-operation queue | 30.67 cycles |
front end | 30.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.10 | 10.00 | 17.00 | 17.00 | 30.50 | 7.00 | 6.90 | 30.50 | 30.50 | 30.50 | 7.00 | 17.00 |
cycles | 7.10 | 14.53 | 17.00 | 17.00 | 30.50 | 7.00 | 6.90 | 30.50 | 30.50 | 30.50 | 7.00 | 17.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 30.58-30.59 |
Stall cycles | 0.00 |
Front-end | 30.67 |
Dispatch | 30.50 |
Overall L1 | 30.67 |
all | 2% |
load | 0% |
store | 2% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 2% |
load | 0% |
store | 2% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 7% |
store | 13% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 8% |
store | 13% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x6c0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0xd0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1b8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xc0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1b0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1a8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x198(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x190(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x98(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x188(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x70(%RBP),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x68(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x90(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x88(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x178(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x80(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x78(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0xa8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x58(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x50(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x48(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x40(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x5c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JS 427506 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x1c6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0xd8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EAX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x60(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x64(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x3c(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x38(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x747290,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x54(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 4045a0 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x34(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RSP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EAX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ECX,0x54(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JAE 427540 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x200> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x7472b0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x4c(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 404190 <__kmpc_for_static_fini@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0xd8(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x7472d0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
JMP 404660 <__kmpc_barrier@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVQ %RBX,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
ADD %R14D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD (%R13),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %ECX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $0x2,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0x2,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x1,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVGE %EAX,%R8D | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R8D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $0x7ffffff8,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RAX,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD 0x20(%RSP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %R15D,%R11 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
VPBROADCASTQ %XMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R8,0x170(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R8,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA64 %ZMM1,0x200(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
MOV %RCX,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%RCX,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (,%R11,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x80(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RCX,%RAX,1),%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RCX,0x168(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x1,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %R10,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x2,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %R10,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0x158(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa0(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RCX,%RAX,1),%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RCX,0x150(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x88(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RCX,%RAX,1),%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RCX,0x148(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x90(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RCX,%RAX,1),%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RCX,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x98(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RCX,%RAX,1),%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RCX,0x138(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x60(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RCX,%RAX,1),%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RCX,0x130(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x70(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RCX,%RAX,1),%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RCX,0x128(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x68(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RCX,%RAX,1),%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RCX,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x78(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RCX,%RAX,1),%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RCX,0x118(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa8(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RCX,%RAX,1),%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x110(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD 0xdd634(%RIP),%ZMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV %R10,0xf8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x2(%R10),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0xe8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0xf0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x2(%R11),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,0x108(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EDI,0x2c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 427715 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x3d5> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | accelerate_kernel.f90:57-79 |
Module | exec |
nb instructions | 181 |
nb uops | 184 |
loop length | 1009 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 3 |
nb stack references | 79 |
micro-operation queue | 30.67 cycles |
front end | 30.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.10 | 10.00 | 17.00 | 17.00 | 30.50 | 7.00 | 6.90 | 30.50 | 30.50 | 30.50 | 7.00 | 17.00 |
cycles | 7.10 | 14.53 | 17.00 | 17.00 | 30.50 | 7.00 | 6.90 | 30.50 | 30.50 | 30.50 | 7.00 | 17.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 30.58-30.59 |
Stall cycles | 0.00 |
Front-end | 30.67 |
Dispatch | 30.50 |
Overall L1 | 30.67 |
all | 2% |
load | 0% |
store | 2% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 2% |
load | 0% |
store | 2% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 7% |
store | 13% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 8% |
store | 13% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x6c0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0xd0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1b8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xc0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1b0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1a8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x198(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x190(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x98(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x188(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x70(%RBP),%R14D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x68(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %R14D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x90(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x88(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x178(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x80(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x78(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0xa8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x58(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x50(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x48(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x40(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x5c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JS 427506 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x1c6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0xd8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EAX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x60(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x64(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x3c(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x38(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x747290,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x54(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 4045a0 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x34(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RSP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EAX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ECX,0x54(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JAE 427540 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x200> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x7472b0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x4c(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 404190 <__kmpc_for_static_fini@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0xd8(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x7472d0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
JMP 404660 <__kmpc_barrier@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVQ %RBX,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
ADD %R14D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD (%R13),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %ECX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $0x2,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0x2,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x1,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVGE %EAX,%R8D | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R8D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $0x7ffffff8,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RAX,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD 0x20(%RSP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %R15D,%R11 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
VPBROADCASTQ %XMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R8,0x170(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R8,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA64 %ZMM1,0x200(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
MOV %RCX,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%RCX,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (,%R11,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x80(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RCX,%RAX,1),%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RCX,0x168(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x1,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %R10,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x2,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %R10,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0x158(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa0(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RCX,%RAX,1),%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RCX,0x150(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x88(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RCX,%RAX,1),%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RCX,0x148(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x90(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RCX,%RAX,1),%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RCX,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x98(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RCX,%RAX,1),%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RCX,0x138(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x60(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RCX,%RAX,1),%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RCX,0x130(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x70(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RCX,%RAX,1),%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RCX,0x128(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x68(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RCX,%RAX,1),%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RCX,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x78(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RCX,%RAX,1),%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RCX,0x118(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa8(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x10(%RCX,%RAX,1),%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x110(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD 0xdd634(%RIP),%ZMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV %R10,0xf8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x2(%R10),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0xe8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0xf0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x2(%R11),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,0x108(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EDI,0x2c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 427715 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x3d5> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼accelerate_kernel_.DIR.OMP.PARALLEL.2– | 5.29 | 4.01 |
▼Loop 126 - accelerate_kernel.f90:60-76 - exec– | 0.01 | 0.01 |
○Loop 127 - accelerate_kernel.f90:62-76 - exec | 5.28 | 4 |