Function: accelerate_kernel_.DIR.OMP.PARALLEL.2 | Module: exec | Source: accelerate_kernel.f90:57-79 | Coverage: 5.3% |
---|
Function: accelerate_kernel_.DIR.OMP.PARALLEL.2 | Module: exec | Source: accelerate_kernel.f90:57-79 | Coverage: 5.3% |
---|
/scratch_na/users/xoserete/qaas_runs/171-215-0463/intel/CloverLeafFC/build/CloverLeafFC/CloverLeaf_ref/kernels/accelerate_kernel.f90: 57 - 79 |
-------------------------------------------------------------------------------- |
57: !$OMP PARALLEL |
58: |
59: !$OMP DO PRIVATE(j,k,stepbymass_s) |
60: DO k=y_min,y_max+1 |
61: !$OMP SIMD |
62: DO j=x_min,x_max+1 |
63: stepbymass_s=halfdt/((density0(j-1,k-1)*volume(j-1,k-1) & |
64: +density0(j ,k-1)*volume(j ,k-1) & |
65: +density0(j ,k )*volume(j ,k ) & |
66: +density0(j-1,k )*volume(j-1,k )) & |
67: *0.25_8) |
68: |
69: xvel1(j,k)=xvel0(j,k)-stepbymass_s*(xarea(j ,k )*(pressure(j ,k )-pressure(j-1,k )) & |
70: +xarea(j ,k-1)*(pressure(j ,k-1)-pressure(j-1,k-1))) |
71: yvel1(j,k)=yvel0(j,k)-stepbymass_s*(yarea(j ,k )*(pressure(j ,k )-pressure(j ,k-1)) & |
72: +yarea(j-1,k )*(pressure(j-1,k )-pressure(j-1,k-1))) |
73: xvel1(j,k)=xvel1(j,k)-stepbymass_s*(xarea(j ,k )*(viscosity(j ,k )-viscosity(j-1,k )) & |
74: +xarea(j ,k-1)*(viscosity(j ,k-1)-viscosity(j-1,k-1))) |
75: yvel1(j,k)=yvel1(j,k)-stepbymass_s*(yarea(j ,k )*(viscosity(j ,k )-viscosity(j ,k-1)) & |
76: +yarea(j-1,k )*(viscosity(j-1,k )-viscosity(j-1,k-1))) |
77: ENDDO |
78: ENDDO |
79: !$OMP END DO |
0x4244a0 PUSH %RBP |
0x4244a1 MOV %RSP,%RBP |
0x4244a4 PUSH %R15 |
0x4244a6 PUSH %R14 |
0x4244a8 PUSH %R13 |
0x4244aa PUSH %R12 |
0x4244ac PUSH %RBX |
0x4244ad AND $-0x20,%RSP |
0x4244b1 SUB $0x480,%RSP |
0x4244b8 MOV %R9,0xa8(%RSP) |
0x4244c0 MOV %RCX,%RSI |
0x4244c3 MOV 0xc0(%RBP),%RAX |
0x4244ca MOV %RAX,0x1e0(%RSP) |
0x4244d2 MOV 0xb8(%RBP),%RAX |
0x4244d9 MOV %RAX,0x1d8(%RSP) |
0x4244e1 MOV 0xb0(%RBP),%RAX |
0x4244e8 MOV %RAX,0x1d0(%RSP) |
0x4244f0 MOV 0xa8(%RBP),%RAX |
0x4244f7 MOV %RAX,0x1c8(%RSP) |
0x4244ff MOV 0xa0(%RBP),%RAX |
0x424506 MOV %RAX,0x1c0(%RSP) |
0x42450e MOV 0x98(%RBP),%RAX |
0x424515 MOV %RAX,0x1b8(%RSP) |
0x42451d MOV 0x60(%RBP),%EBX |
0x424520 MOV 0x58(%RBP),%EAX |
0x424523 SUB %EBX,%EAX |
0x424525 INC %EAX |
0x424527 MOV 0x90(%RBP),%RCX |
0x42452e MOV %RCX,0x1b0(%RSP) |
0x424536 MOV 0x88(%RBP),%RCX |
0x42453d MOV %RCX,0x1a8(%RSP) |
0x424545 MOV 0x80(%RBP),%RCX |
0x42454c MOV %RCX,0x1a0(%RSP) |
0x424554 MOV 0x78(%RBP),%RCX |
0x424558 MOV %RCX,0x198(%RSP) |
0x424560 MOV 0x70(%RBP),%R13 |
0x424564 MOV 0x68(%RBP),%R9 |
0x424568 MOV 0x50(%RBP),%RCX |
0x42456c MOV %RCX,0xa0(%RSP) |
0x424574 MOV 0x48(%RBP),%RCX |
0x424578 MOV %RCX,0x98(%RSP) |
0x424580 MOV 0x40(%RBP),%RCX |
0x424584 MOV %RCX,0x90(%RSP) |
0x42458c MOV 0x38(%RBP),%RCX |
0x424590 MOV %RCX,0x88(%RSP) |
0x424598 MOV 0x30(%RBP),%RCX |
0x42459c MOV %RCX,0x80(%RSP) |
0x4245a4 MOV 0x28(%RBP),%RCX |
0x4245a8 MOV %RCX,0x78(%RSP) |
0x4245ad MOV 0x20(%RBP),%RCX |
0x4245b1 MOV %RCX,0x70(%RSP) |
0x4245b6 MOV 0x18(%RBP),%RCX |
0x4245ba MOV %RCX,0x68(%RSP) |
0x4245bf MOV 0x10(%RBP),%RCX |
0x4245c3 MOV %RCX,0x60(%RSP) |
0x4245c8 MOVL $0,0x5c(%RSP) |
0x4245d0 JS 42465f |
0x4245d6 MOV %R9,0x8(%RSP) |
0x4245db MOV %RDX,%R14 |
0x4245de MOV %R8,%R15 |
0x4245e1 MOV %RSI,0x20(%RSP) |
0x4245e6 MOV %RDI,0x110(%RSP) |
0x4245ee MOV (%RDI),%ESI |
0x4245f0 MOVL $0,0x34(%RSP) |
0x4245f8 MOV %EAX,0x30(%RSP) |
0x4245fc MOVL $0x1,0x58(%RSP) |
0x424604 SUB $0x8,%RSP |
0x424608 LEA 0x60(%RSP),%RAX |
0x42460d LEA 0x64(%RSP),%RCX |
0x424612 LEA 0x3c(%RSP),%R8 |
0x424617 LEA 0x38(%RSP),%R9 |
0x42461c MOV $0x736290,%EDI |
0x424621 MOV %ESI,0x54(%RSP) |
0x424625 MOV $0x22,%EDX |
0x42462a PUSH $0x1 |
0x42462c PUSH $0x1 |
0x42462e PUSH %RAX |
0x42462f CALL 404520 <__kmpc_for_static_init_4@plt> |
0x424634 ADD $0x20,%RSP |
0x424638 MOV 0x34(%RSP),%EAX |
0x42463c MOV 0x30(%RSP),%R12D |
0x424641 SUB %EAX,%R12D |
0x424644 JAE 424679 |
0x424646 MOV $0x7362b0,%EDI |
0x42464b MOV 0x4c(%RSP),%ESI |
0x42464f VZEROUPPER |
0x424652 CALL 404110 <__kmpc_for_static_fini@plt> |
0x424657 MOV 0x110(%RSP),%RDI |
0x42465f MOV (%RDI),%ESI |
0x424661 MOV $0x7362d0,%EDI |
0x424666 LEA -0x28(%RBP),%RSP |
0x42466a POP %RBX |
0x42466b POP %R12 |
0x42466d POP %R13 |
0x42466f POP %R14 |
0x424671 POP %R15 |
0x424673 POP %RBP |
0x424674 JMP 4045e0 |
0x424679 MOV %RAX,%R8 |
0x42467c VMOVQ %R14,%XMM0 |
0x424681 MOV %R15,%R9 |
0x424684 SAL $0x20,%R9 |
0x424688 MOV $-0x200000000,%RAX |
0x424692 LEA (%R9,%RAX,1),%RCX |
0x424696 MOV %RCX,%R10 |
0x424699 SAR $0x20,%R10 |
0x42469d MOV 0x20(%RSP),%RDX |
0x4246a2 SAL $0x20,%RDX |
0x4246a6 ADD %RDX,%RAX |
0x4246a9 MOV %RDX,%R14 |
0x4246ac MOV %RAX,%R11 |
0x4246af SAR $0x20,%R11 |
0x4246b3 ADD %EBX,%R8D |
0x4246b6 MOVSXD (%R13),%RBX |
0x4246ba MOV 0x8(%RSP),%RDX |
0x4246bf MOV (%RDX),%EDX |
0x4246c1 SUB %EBX,%EDX |
0x4246c3 LEA 0x1(%RDX),%ESI |
0x4246c6 MOV %ESI,0x54(%RSP) |
0x4246ca ADD $0x2,%EDX |
0x4246cd CMP $0x2,%EDX |
0x4246d0 MOV $0x1,%R15D |
0x4246d6 CMOVGE %EDX,%R15D |
0x4246da MOV %R15D,%EDX |
0x4246dd AND $0x7ffffffc,%EDX |
0x4246e3 MOV %RDX,0x20(%RSP) |
0x4246e8 TEST %RCX,%RCX |
0x4246eb MOV $-0x1,%RDX |
0x4246f2 CMOVNS %RCX,%RDX |
0x4246f6 TEST %RDX,%RDX |
0x4246f9 MOV $0x1,%ESI |
0x4246fe CMOVG %RSI,%RDX |
0x424702 MOV $0x200000000,%R13 |
0x42470c MOV %R13,%RDI |
0x42470f SUB %R9,%RDI |
0x424712 CMP %RDI,%RCX |
0x424715 CMOVG %RCX,%RDI |
0x424719 MOV %R11,0xb8(%RSP) |
0x424721 NOT %R11 |
0x424724 MOV %R11,0xb0(%RSP) |
0x42472c VPBROADCASTQ %XMM0,%YMM0 |
0x424731 MOV %R15,0x170(%RSP) |
0x424739 VPBROADCASTQ %R15,%YMM1 |
0x42473f VMOVDQA %YMM1,0x220(%RSP) |
0x424748 SHR $0x20,%RDI |
0x42474c IMUL %RDX,%RDI |
0x424750 MOV %RBX,0x180(%RSP) |
0x424758 LEA (,%RBX,8),%R11 |
0x424760 SAL $0x3,%RDI |
0x424764 SUB %RDI,%R11 |
0x424767 MOV $-0x1,%RCX |
0x42476e TEST %RAX,%RAX |
0x424771 CMOVNS %RAX,%RCX |
0x424775 TEST %RCX,%RCX |
0x424778 CMOVG %RSI,%RCX |
0x42477c MOV %R10,0x188(%RSP) |
0x424784 NOT %R10 |
0x424787 MOV %R10,0x178(%RSP) |
0x42478f SUB %R14,%R13 |
0x424792 MOV 0x90(%RSP),%RDX |
0x42479a ADD %R11,%RDX |
0x42479d MOV %RDX,0x158(%RSP) |
0x4247a5 CMP %R13,%RAX |
0x4247a8 CMOVG %RAX,%R13 |
0x4247ac MOV 0x88(%RSP),%RAX |
0x4247b4 ADD %R11,%RAX |
0x4247b7 MOV %RAX,0x150(%RSP) |
0x4247bf SHR $0x20,%R13 |
0x4247c3 IMUL %RCX,%R13 |
0x4247c7 NEG %R13 |
0x4247ca MOV %R13,0x168(%RSP) |
0x4247d2 MOV 0x80(%RSP),%RAX |
0x4247da ADD %R11,%RAX |
0x4247dd MOV %RAX,0x148(%RSP) |
0x4247e5 MOV 0x98(%RSP),%RAX |
0x4247ed LEA (%RAX,%R11,1),%RAX |
0x4247f1 MOV %RAX,0x140(%RSP) |
0x4247f9 MOV 0x78(%RSP),%RAX |
0x4247fe LEA (%RAX,%R11,1),%RAX |
0x424802 MOV %RAX,0x138(%RSP) |
0x42480a MOV 0xa8(%RSP),%RAX |
0x424812 LEA (%RAX,%R11,1),%RAX |
0x424816 MOV %RAX,0x130(%RSP) |
0x42481e MOV 0x68(%RSP),%RAX |
0x424823 ADD %R11,%RAX |
0x424826 MOV %RAX,0x128(%RSP) |
0x42482e MOV 0x60(%RSP),%RAX |
0x424833 ADD %R11,%RAX |
0x424836 MOV %RAX,0x120(%RSP) |
0x42483e MOV 0x70(%RSP),%RAX |
0x424843 ADD %R11,%RAX |
0x424846 MOV %RAX,0x118(%RSP) |
0x42484e ADD 0xa0(%RSP),%R11 |
0x424856 MOV %R11,0x160(%RSP) |
0x42485e VBROADCASTSD 0xd0f51(%RIP),%YMM3 |
0x424867 XOR %R13D,%R13D |
0x42486a MOV %R8,0x190(%RSP) |
0x424872 MOV %R8D,%R15D |
0x424875 MOV %R12D,0x2c(%RSP) |
0x42487a JMP 424d23 |
0x42487f NOP |
(119) 0x424880 MOVSXD %ESI,%RSI |
(119) 0x424883 MOV 0xb0(%RSP),%RDI |
(119) 0x42488b ADD %RSI,%RDI |
(119) 0x42488e MOV %RCX,%R9 |
(119) 0x424891 IMUL %RDI,%R9 |
(119) 0x424895 MOV %R9,0xd0(%RSP) |
(119) 0x42489d MOV %RDX,%R13 |
(119) 0x4248a0 MOV %R8,%RDX |
(119) 0x4248a3 MOV %R8,%R9 |
(119) 0x4248a6 IMUL %RDI,%R9 |
(119) 0x4248aa MOV %R9,0xd8(%RSP) |
(119) 0x4248b2 MOV %R13,%R14 |
(119) 0x4248b5 IMUL %RDI,%R13 |
(119) 0x4248b9 MOV %R10,%R9 |
(119) 0x4248bc IMUL %RDI,%R10 |
(119) 0x4248c0 MOV %R11,%R8 |
(119) 0x4248c3 MOV %R11,%R12 |
(119) 0x4248c6 IMUL %RDI,%R8 |
(119) 0x4248ca SUB 0xb8(%RSP),%RSI |
(119) 0x4248d2 IMUL %RSI,%RCX |
(119) 0x4248d6 IMUL %RSI,%RDX |
(119) 0x4248da IMUL %RSI,%R14 |
(119) 0x4248de MOV %R14,0xe0(%RSP) |
(119) 0x4248e6 IMUL %RSI,%R9 |
(119) 0x4248ea MOV %R9,0xe8(%RSP) |
(119) 0x4248f2 MOV %RBX,%RAX |
(119) 0x4248f5 IMUL %RSI,%RAX |
(119) 0x4248f9 IMUL %RSI,%R12 |
(119) 0x4248fd MOV %R12,0xf8(%RSP) |
(119) 0x424905 MOV 0x2c(%RSP),%R12D |
(119) 0x42490a MOV 0x18(%RSP),%R9 |
(119) 0x42490f IMUL %RSI,%R9 |
(119) 0x424913 MOV 0x10(%RSP),%RDI |
(119) 0x424918 IMUL %RSI,%RDI |
(119) 0x42491c MOV 0x38(%RSP),%RBX |
(119) 0x424921 IMUL %RSI,%RBX |
(119) 0x424925 MOV 0x8(%RSP),%R11 |
(119) 0x42492a IMUL %RSI,%R11 |
(119) 0x42492e MOV %R9,0xf0(%RSP) |
(119) 0x424936 MOV %R8,0x100(%RSP) |
(119) 0x42493e MOV %RAX,%R9 |
(119) 0x424941 MOV %R13,%R14 |
(119) 0x424944 MOV 0x108(%RSP),%R13 |
(119) 0x42494c MOV %RDX,0xc8(%RSP) |
(119) 0x424954 MOV %RCX,0xc0(%RSP) |
(119) 0x42495c XOR %EAX,%EAX |
(119) 0x42495e VPBROADCASTQ %RAX,%YMM1 |
(119) 0x424964 VMOVDQA 0x220(%RSP),%YMM2 |
(119) 0x42496d VPSUBQ %YMM1,%YMM2,%YMM1 |
(119) 0x424971 VPCMPNLEUQ 0xd03e4(%RIP),%YMM1,%K1 |
(119) 0x42497c MOV 0x90(%RSP),%RDX |
(119) 0x424984 MOV 0xd0(%RSP),%RSI |
(119) 0x42498c ADD %RDX,%RSI |
(119) 0x42498f ADD 0x180(%RSP),%RAX |
(119) 0x424997 MOV 0x178(%RSP),%RCX |
(119) 0x42499f ADD %RAX,%RCX |
(119) 0x4249a2 VMOVUPD (%RSI,%RCX,8),%YMM26{%K1}{z} |
(119) 0x4249a9 SUB 0x188(%RSP),%RAX |
(119) 0x4249b1 VMOVUPD (%RSI,%RAX,8),%YMM27{%K1}{z} |
(119) 0x4249b8 MOV 0x88(%RSP),%RSI |
(119) 0x4249c0 MOV 0xd8(%RSP),%R8 |
(119) 0x4249c8 ADD %RSI,%R8 |
(119) 0x4249cb VMOVUPD (%R8,%RCX,8),%YMM28{%K1}{z} |
(119) 0x4249d2 VMOVUPD (%R8,%RAX,8),%YMM29{%K1}{z} |
(119) 0x4249d9 MOV 0xc0(%RSP),%R8 |
(119) 0x4249e1 ADD %RDX,%R8 |
(119) 0x4249e4 VMOVUPD (%R8,%RAX,8),%YMM30{%K1}{z} |
(119) 0x4249eb VMOVUPD (%R8,%RCX,8),%YMM31{%K1}{z} |
(119) 0x4249f2 MOV 0xc8(%RSP),%RDX |
(119) 0x4249fa ADD %RSI,%RDX |
(119) 0x4249fd VMOVUPD (%RDX,%RAX,8),%YMM2{%K1}{z} |
(119) 0x424a04 VMOVUPD (%RDX,%RCX,8),%YMM1{%K1}{z} |
(119) 0x424a0b MOV 0xa0(%RSP),%RDX |
(119) 0x424a13 MOV 0xe0(%RSP),%RSI |
(119) 0x424a1b ADD %RDX,%RSI |
(119) 0x424a1e VMOVUPD (%RSI,%RAX,8),%YMM4{%K1}{z} |
(119) 0x424a25 MOV 0x80(%RSP),%RSI |
(119) 0x424a2d MOV 0xe8(%RSP),%R8 |
(119) 0x424a35 ADD %RSI,%R8 |
(119) 0x424a38 VMOVUPD (%R8,%RAX,8),%YMM5{%K1}{z} |
(119) 0x424a3f VMOVUPD (%R8,%RCX,8),%YMM18{%K1}{z} |
(119) 0x424a46 ADD %RDX,%R14 |
(119) 0x424a49 VMOVUPD (%R14,%RAX,8),%YMM19{%K1}{z} |
(119) 0x424a50 ADD %RSI,%R10 |
(119) 0x424a53 VMOVUPD (%R10,%RAX,8),%YMM20{%K1}{z} |
(119) 0x424a5a VMOVUPD (%R10,%RCX,8),%YMM21{%K1}{z} |
(119) 0x424a61 ADD 0x98(%RSP),%R9 |
(119) 0x424a69 VMOVUPD (%R9,%RAX,8),%YMM22{%K1}{z} |
(119) 0x424a70 VMOVUPD (%R9,%RCX,8),%YMM23{%K1}{z} |
(119) 0x424a77 MOV 0x78(%RSP),%RDX |
(119) 0x424a7c MOV 0xf8(%RSP),%RSI |
(119) 0x424a84 ADD %RDX,%RSI |
(119) 0x424a87 VMOVUPD (%RSI,%RAX,8),%YMM24{%K1}{z} |
(119) 0x424a8e VMOVUPD (%RSI,%RCX,8),%YMM25{%K1}{z} |
(119) 0x424a95 MOV 0x100(%RSP),%RSI |
(119) 0x424a9d ADD %RDX,%RSI |
(119) 0x424aa0 VMOVUPD (%RSI,%RCX,8),%YMM6{%K1}{z} |
(119) 0x424aa7 VMOVUPD (%RSI,%RAX,8),%YMM7{%K1}{z} |
(119) 0x424aae MOV 0xf0(%RSP),%RCX |
(119) 0x424ab6 ADD 0x70(%RSP),%RCX |
(119) 0x424abb VMOVUPD (%RCX,%RAX,8),%YMM8{%K1}{z} |
(119) 0x424ac2 VMOVAPD 0x260(%RSP),%YMM12 |
(119) 0x424acb VMOVAPD %YMM26,%YMM12{%K1} |
(119) 0x424ad1 VMOVAPD 0x2a0(%RSP),%YMM11 |
(119) 0x424ada VMOVAPD %YMM28,%YMM11{%K1} |
(119) 0x424ae0 VMOVAPD 0x2e0(%RSP),%YMM10 |
(119) 0x424ae9 VMOVAPD %YMM27,%YMM10{%K1} |
(119) 0x424aef VMOVAPD 0x320(%RSP),%YMM9 |
(119) 0x424af8 VMOVAPD %YMM29,%YMM9{%K1} |
(119) 0x424afe VMOVAPD 0x340(%RSP),%YMM29 |
(119) 0x424b06 VMOVAPD %YMM30,%YMM29{%K1} |
(119) 0x424b0c VMOVAPD 0x360(%RSP),%YMM28 |
(119) 0x424b14 VMOVAPD %YMM2,%YMM28{%K1} |
(119) 0x424b1a VMOVAPD 0x380(%RSP),%YMM27 |
(119) 0x424b22 VMOVAPD %YMM31,%YMM27{%K1} |
(119) 0x424b28 VMOVAPD 0x3a0(%RSP),%YMM26 |
(119) 0x424b30 VMOVAPD %YMM1,%YMM26{%K1} |
(119) 0x424b36 VMOVAPD %YMM4,%YMM17{%K1} |
(119) 0x424b3c VMOVAPD %YMM5,%YMM16{%K1} |
(119) 0x424b42 VMOVAPD %YMM18,%YMM15{%K1} |
(119) 0x424b48 VMOVAPD %YMM19,%YMM14{%K1} |
(119) 0x424b4e VMOVAPD %YMM20,%YMM13{%K1} |
(119) 0x424b54 VSUBPD %YMM16,%YMM15,%YMM1 |
(119) 0x424b5a VMULPD %YMM17,%YMM1,%YMM1 |
(119) 0x424b60 VMOVAPD 0x240(%RSP),%YMM5 |
(119) 0x424b69 VMOVAPD %YMM21,%YMM5{%K1} |
(119) 0x424b6f VSUBPD %YMM13,%YMM5,%YMM2 |
(119) 0x424b74 VFMADD213PD %YMM1,%YMM14,%YMM2 |
(119) 0x424b79 VMOVAPD 0x300(%RSP),%YMM20 |
(119) 0x424b81 VMOVAPD %YMM24,%YMM20{%K1} |
(119) 0x424b87 VMOVAPD 0x3c0(%RSP),%YMM19 |
(119) 0x424b8f VMOVAPD %YMM25,%YMM19{%K1} |
(119) 0x424b95 VMOVAPD 0x3e0(%RSP),%YMM18 |
(119) 0x424b9d VMOVAPD %YMM7,%YMM18{%K1} |
(119) 0x424ba3 VMOVAPD 0x400(%RSP),%YMM7 |
(119) 0x424bac VMOVAPD %YMM6,%YMM7{%K1} |
(119) 0x424bb2 VSUBPD %YMM20,%YMM19,%YMM1 |
(119) 0x424bb8 VFMADD213PD %YMM2,%YMM17,%YMM1 |
(119) 0x424bbe VSUBPD %YMM18,%YMM7,%YMM2 |
(119) 0x424bc4 VFMADD231PD %YMM2,%YMM14,%YMM1 |
(119) 0x424bc9 VMOVAPD %YMM11,0x2a0(%RSP) |
(119) 0x424bd2 VMOVAPD %YMM12,0x260(%RSP) |
(119) 0x424bdb VMULPD %YMM11,%YMM12,%YMM2 |
(119) 0x424be0 VMOVAPD %YMM9,0x320(%RSP) |
(119) 0x424be9 VMOVAPD %YMM10,0x2e0(%RSP) |
(119) 0x424bf2 VFMADD231PD %YMM9,%YMM10,%YMM2 |
(119) 0x424bf7 VMOVAPD %YMM28,0x360(%RSP) |
(119) 0x424bff VMOVAPD %YMM29,0x340(%RSP) |
(119) 0x424c07 VFMADD231PD %YMM28,%YMM29,%YMM2 |
(119) 0x424c0d VMOVAPD %YMM26,0x3a0(%RSP) |
(119) 0x424c15 VMOVAPD %YMM27,0x380(%RSP) |
(119) 0x424c1d VFMADD231PD %YMM26,%YMM27,%YMM2 |
(119) 0x424c23 VMULPD %YMM3,%YMM2,%YMM2 |
(119) 0x424c27 VDIVPD %YMM2,%YMM0,%YMM2 |
(119) 0x424c2b VMOVAPD 0x420(%RSP),%YMM4 |
(119) 0x424c34 VMOVAPD %YMM8,%YMM4{%K1} |
(119) 0x424c3a VMOVAPD %YMM4,0x420(%RSP) |
(119) 0x424c43 VFMADD213PD %YMM4,%YMM2,%YMM1 |
(119) 0x424c48 ADD 0x60(%RSP),%RDI |
(119) 0x424c4d VMOVUPD %YMM1,(%RDI,%RAX,8){%K1} |
(119) 0x424c54 ADD 0x68(%RSP),%RBX |
(119) 0x424c59 VMOVUPD (%RBX,%RAX,8),%YMM1{%K1}{z} |
(119) 0x424c60 VMOVAPD 0x280(%RSP),%YMM9 |
(119) 0x424c69 VMOVAPD %YMM22,%YMM9{%K1} |
(119) 0x424c6f VSUBPD %YMM16,%YMM13,%YMM4 |
(119) 0x424c75 VMULPD %YMM4,%YMM9,%YMM4 |
(119) 0x424c79 VMOVAPD 0x2c0(%RSP),%YMM8 |
(119) 0x424c82 VMOVAPD %YMM23,%YMM8{%K1} |
(119) 0x424c88 VMOVAPD %YMM5,0x240(%RSP) |
(119) 0x424c91 VSUBPD %YMM15,%YMM5,%YMM5 |
(119) 0x424c96 VFMADD213PD %YMM4,%YMM8,%YMM5 |
(119) 0x424c9b VMOVAPD %YMM18,0x3e0(%RSP) |
(119) 0x424ca3 VMOVAPD %YMM20,0x300(%RSP) |
(119) 0x424cab VSUBPD %YMM20,%YMM18,%YMM4 |
(119) 0x424cb1 VMOVAPD %YMM7,0x400(%RSP) |
(119) 0x424cba VMOVAPD %YMM19,0x3c0(%RSP) |
(119) 0x424cc2 VSUBPD %YMM19,%YMM7,%YMM6 |
(119) 0x424cc8 VMOVAPD %YMM9,0x280(%RSP) |
(119) 0x424cd1 VFMADD213PD %YMM5,%YMM9,%YMM4 |
(119) 0x424cd6 VMOVAPD %YMM8,0x2c0(%RSP) |
(119) 0x424cdf VFMADD231PD %YMM6,%YMM8,%YMM4 |
(119) 0x424ce4 VMOVAPD 0x440(%RSP),%YMM5 |
(119) 0x424ced VMOVAPD %YMM1,%YMM5{%K1} |
(119) 0x424cf3 VMOVAPD %YMM5,0x440(%RSP) |
(119) 0x424cfc VFMADD213PD %YMM5,%YMM2,%YMM4 |
(119) 0x424d01 ADD 0xa8(%RSP),%R11 |
(119) 0x424d09 VMOVUPD %YMM4,(%R11,%RAX,8){%K1} |
(119) 0x424d10 LEA 0x1(%R13),%EAX |
(119) 0x424d14 INC %R15D |
(119) 0x424d17 CMP %R12D,%R13D |
(119) 0x424d1a MOV %EAX,%R13D |
(119) 0x424d1d JE 424646 |
(119) 0x424d23 CMPL $0,0x54(%RSP) |
(119) 0x424d28 JS 424d10 |
(119) 0x424d2a MOV 0x190(%RSP),%RAX |
(119) 0x424d32 LEA (%RAX,%R13,1),%ESI |
(119) 0x424d36 MOV 0x198(%RSP),%RAX |
(119) 0x424d3e MOV (%RAX),%R8 |
(119) 0x424d41 MOV 0x1a0(%RSP),%RAX |
(119) 0x424d49 MOV (%RAX),%RCX |
(119) 0x424d4c MOV 0x1a8(%RSP),%RAX |
(119) 0x424d54 MOV (%RAX),%RAX |
(119) 0x424d57 MOV %RAX,0x18(%RSP) |
(119) 0x424d5c MOV 0x1b0(%RSP),%RAX |
(119) 0x424d64 MOV (%RAX),%RDX |
(119) 0x424d67 MOV 0x1b8(%RSP),%RAX |
(119) 0x424d6f MOV (%RAX),%R10 |
(119) 0x424d72 MOV 0x1c0(%RSP),%RAX |
(119) 0x424d7a MOV (%RAX),%RAX |
(119) 0x424d7d MOV %RAX,0x10(%RSP) |
(119) 0x424d82 MOV 0x1c8(%RSP),%RAX |
(119) 0x424d8a MOV (%RAX),%RAX |
(119) 0x424d8d MOV %RAX,0x38(%RSP) |
(119) 0x424d92 MOV 0x1d0(%RSP),%RAX |
(119) 0x424d9a MOV (%RAX),%RBX |
(119) 0x424d9d MOV 0x1d8(%RSP),%RAX |
(119) 0x424da5 MOV (%RAX),%RAX |
(119) 0x424da8 MOV %RAX,0x8(%RSP) |
(119) 0x424dad MOV 0x1e0(%RSP),%RAX |
(119) 0x424db5 MOV (%RAX),%R11 |
(119) 0x424db8 CMPQ $0,0x20(%RSP) |
(119) 0x424dbe MOV %R13,0x108(%RSP) |
(119) 0x424dc6 JE 424880 |
(119) 0x424dcc MOV %R15D,0x50(%RSP) |
(119) 0x424dd1 MOVSXD %R15D,%R15 |
(119) 0x424dd4 MOV 0xb0(%RSP),%RDI |
(119) 0x424ddc LEA (%RDI,%R15,1),%RAX |
(119) 0x424de0 ADD 0x168(%RSP),%R15 |
(119) 0x424de8 MOVSXD %ESI,%RSI |
(119) 0x424deb ADD %RSI,%RDI |
(119) 0x424dee MOV %RCX,%R12 |
(119) 0x424df1 MOV %RCX,%R9 |
(119) 0x424df4 IMUL %RDI,%R9 |
(119) 0x424df8 MOV %R9,0xd0(%RSP) |
(119) 0x424e00 MOV %R8,%R9 |
(119) 0x424e03 IMUL %RDI,%R9 |
(119) 0x424e07 MOV %R9,0xd8(%RSP) |
(119) 0x424e0f SUB 0xb8(%RSP),%RSI |
(119) 0x424e17 MOV %RCX,%R9 |
(119) 0x424e1a IMUL %RSI,%R9 |
(119) 0x424e1e MOV %R9,0xc0(%RSP) |
(119) 0x424e26 MOV %R8,%R9 |
(119) 0x424e29 IMUL %RSI,%R9 |
(119) 0x424e2d MOV %R9,0xc8(%RSP) |
(119) 0x424e35 MOV %RDX,0x40(%RSP) |
(119) 0x424e3a MOV %RDX,%R9 |
(119) 0x424e3d IMUL %RSI,%R9 |
(119) 0x424e41 MOV %R9,0xe0(%RSP) |
(119) 0x424e49 MOV %R10,%R9 |
(119) 0x424e4c IMUL %RSI,%R9 |
(119) 0x424e50 MOV %R9,0xe8(%RSP) |
(119) 0x424e58 MOV %RDX,%R9 |
(119) 0x424e5b IMUL %RDI,%R9 |
(119) 0x424e5f MOV %R9,0x1f0(%RSP) |
(119) 0x424e67 MOV %R10,%R9 |
(119) 0x424e6a IMUL %RDI,%R9 |
(119) 0x424e6e MOV %R9,0x1f8(%RSP) |
(119) 0x424e76 MOV %RBX,%R9 |
(119) 0x424e79 MOV %RBX,%RDX |
(119) 0x424e7c IMUL %RSI,%R9 |
(119) 0x424e80 MOV %R9,0x200(%RSP) |
(119) 0x424e88 MOV %R11,%R9 |
(119) 0x424e8b IMUL %RSI,%R9 |
(119) 0x424e8f MOV %R9,0xf8(%RSP) |
(119) 0x424e97 IMUL %R11,%RDI |
(119) 0x424e9b MOV %RDI,0x100(%RSP) |
(119) 0x424ea3 MOV 0x18(%RSP),%RDI |
(119) 0x424ea8 IMUL %RSI,%RDI |
(119) 0x424eac MOV %RDI,0xf0(%RSP) |
(119) 0x424eb4 MOV 0x10(%RSP),%RDI |
(119) 0x424eb9 IMUL %RSI,%RDI |
(119) 0x424ebd MOV %RDI,0x208(%RSP) |
(119) 0x424ec5 MOV 0x38(%RSP),%RBX |
(119) 0x424eca MOV %RBX,%RDI |
(119) 0x424ecd IMUL %RSI,%RDI |
(119) 0x424ed1 MOV %RDI,0x210(%RSP) |
(119) 0x424ed9 MOV 0x8(%RSP),%R13 |
(119) 0x424ede IMUL %R13,%RSI |
(119) 0x424ee2 MOV %RSI,0x218(%RSP) |
(119) 0x424eea MOV %RCX,%RDI |
(119) 0x424eed IMUL %RAX,%RDI |
(119) 0x424ef1 MOV %R8,%R14 |
(119) 0x424ef4 IMUL %RAX,%R14 |
(119) 0x424ef8 MOV %R15,%RCX |
(119) 0x424efb IMUL %R15,%R12 |
(119) 0x424eff IMUL %R15,%R8 |
(119) 0x424f03 MOV %R10,%R15 |
(119) 0x424f06 IMUL %RCX,%R15 |
(119) 0x424f0a IMUL %RAX,%R10 |
(119) 0x424f0e IMUL %RCX,%RDX |
(119) 0x424f12 MOV %RDX,0x1e8(%RSP) |
(119) 0x424f1a MOV %R11,%R9 |
(119) 0x424f1d IMUL %RCX,%R9 |
(119) 0x424f21 IMUL %RAX,%R11 |
(119) 0x424f25 IMUL %RCX,%R13 |
(119) 0x424f29 IMUL %RCX,%RBX |
(119) 0x424f2d MOV 0x10(%RSP),%RSI |
(119) 0x424f32 IMUL %RCX,%RSI |
(119) 0x424f36 MOV %RSI,0x10(%RSP) |
(119) 0x424f3b MOV 0x18(%RSP),%RSI |
(119) 0x424f40 IMUL %RCX,%RSI |
(119) 0x424f44 MOV %RSI,0x18(%RSP) |
(119) 0x424f49 MOV 0x40(%RSP),%RSI |
(119) 0x424f4e IMUL %RSI,%RAX |
(119) 0x424f52 IMUL %RSI,%RCX |
(119) 0x424f56 MOV 0x158(%RSP),%RDX |
(119) 0x424f5e ADD %RDX,%RDI |
(119) 0x424f61 MOV 0x150(%RSP),%RSI |
(119) 0x424f69 ADD %RSI,%R14 |
(119) 0x424f6c ADD %RDX,%R12 |
(119) 0x424f6f MOV %R12,0x8(%RSP) |
(119) 0x424f74 ADD %RSI,%R8 |
(119) 0x424f77 MOV %R8,0x38(%RSP) |
(119) 0x424f7c MOV 0x148(%RSP),%RSI |
(119) 0x424f84 ADD %RSI,%R15 |
(119) 0x424f87 ADD %RSI,%R10 |
(119) 0x424f8a MOV %R10,0x40(%RSP) |
(119) 0x424f8f MOV 0x1e8(%RSP),%RDX |
(119) 0x424f97 ADD 0x140(%RSP),%RDX |
(119) 0x424f9f MOV 0x138(%RSP),%RSI |
(119) 0x424fa7 ADD %RSI,%R9 |
(119) 0x424faa ADD %RSI,%R11 |
(119) 0x424fad MOV %R11,%R10 |
(119) 0x424fb0 ADD 0x130(%RSP),%R13 |
(119) 0x424fb8 MOV %R13,%R12 |
(119) 0x424fbb ADD 0x128(%RSP),%RBX |
(119) 0x424fc3 MOV %RBX,%R13 |
(119) 0x424fc6 MOV 0x10(%RSP),%R11 |
(119) 0x424fcb ADD 0x120(%RSP),%R11 |
(119) 0x424fd3 MOV 0x18(%RSP),%RBX |
(119) 0x424fd8 ADD 0x118(%RSP),%RBX |
(119) 0x424fe0 MOV 0x160(%RSP),%RSI |
(119) 0x424fe8 ADD %RSI,%RAX |
(119) 0x424feb ADD %RSI,%RCX |
(119) 0x424fee XOR %ESI,%ESI |
(120) 0x424ff0 VMOVUPD -0x8(%RDI,%RSI,8),%YMM1 |
(120) 0x424ff6 VMOVUPD (%RDI,%RSI,8),%YMM2 |
(120) 0x424ffb VMULPD -0x8(%R14,%RSI,8),%YMM1,%YMM1 |
(120) 0x425002 VFMADD231PD (%R14,%RSI,8),%YMM2,%YMM1 |
(120) 0x425008 MOV 0x8(%RSP),%R8 |
(120) 0x42500d VMOVUPD -0x8(%R8,%RSI,8),%YMM2 |
(120) 0x425014 VMOVUPD (%R8,%RSI,8),%YMM4 |
(120) 0x42501a MOV 0x38(%RSP),%R8 |
(120) 0x42501f VFMADD132PD (%R8,%RSI,8),%YMM1,%YMM4 |
(120) 0x425025 VFMADD231PD -0x8(%R8,%RSI,8),%YMM2,%YMM4 |
(120) 0x42502c VMULPD %YMM3,%YMM4,%YMM1 |
(120) 0x425030 VDIVPD %YMM1,%YMM0,%YMM1 |
(120) 0x425034 VMOVUPD (%RCX,%RSI,8),%YMM2 |
(120) 0x425039 VMOVUPD -0x8(%R15,%RSI,8),%YMM4 |
(120) 0x425040 VMOVUPD (%R15,%RSI,8),%YMM5 |
(120) 0x425046 VSUBPD %YMM5,%YMM4,%YMM6 |
(120) 0x42504a VMULPD %YMM2,%YMM6,%YMM6 |
(120) 0x42504e VMOVUPD (%RAX,%RSI,8),%YMM7 |
(120) 0x425053 MOV 0x40(%RSP),%R8 |
(120) 0x425058 VMOVUPD -0x8(%R8,%RSI,8),%YMM8 |
(120) 0x42505f VMOVUPD (%R8,%RSI,8),%YMM18 |
(120) 0x425066 VSUBPD %YMM18,%YMM8,%YMM19 |
(120) 0x42506c VFMADD213PD %YMM6,%YMM7,%YMM19 |
(120) 0x425072 MOV %RDX,%R8 |
(120) 0x425075 VMOVUPD -0x8(%RDX,%RSI,8),%YMM6 |
(120) 0x42507b VMOVUPD (%RDX,%RSI,8),%YMM20 |
(120) 0x425082 VSUBPD %YMM5,%YMM18,%YMM5 |
(120) 0x425088 VMULPD %YMM5,%YMM20,%YMM5 |
(120) 0x42508e VSUBPD %YMM4,%YMM8,%YMM4 |
(120) 0x425092 VFMADD213PD %YMM5,%YMM6,%YMM4 |
(120) 0x425097 VMOVUPD -0x8(%R9,%RSI,8),%YMM5 |
(120) 0x42509e VMOVUPD (%R9,%RSI,8),%YMM8 |
(120) 0x4250a4 MOV %R10,%R8 |
(120) 0x4250a7 VMOVUPD -0x8(%R10,%RSI,8),%YMM18 |
(120) 0x4250b2 VMOVUPD (%R10,%RSI,8),%YMM21 |
(120) 0x4250b9 VSUBPD %YMM8,%YMM5,%YMM22 |
(120) 0x4250bf VSUBPD %YMM21,%YMM18,%YMM23 |
(120) 0x4250c5 VFMADD213PD %YMM19,%YMM2,%YMM22 |
(120) 0x4250cb VFMADD231PD %YMM23,%YMM7,%YMM22 |
(120) 0x4250d1 VFMADD213PD (%RBX,%RSI,8),%YMM1,%YMM22 |
(120) 0x4250d8 VMOVUPD %YMM22,(%R11,%RSI,8) |
(120) 0x4250df VSUBPD %YMM8,%YMM21,%YMM2 |
(120) 0x4250e5 VSUBPD %YMM5,%YMM18,%YMM5 |
(120) 0x4250eb VFMADD213PD %YMM4,%YMM20,%YMM2 |
(120) 0x4250f1 VFMADD231PD %YMM5,%YMM6,%YMM2 |
(120) 0x4250f6 VFMADD213PD (%R13,%RSI,8),%YMM1,%YMM2 |
(120) 0x4250fd VMOVUPD %YMM2,(%R12,%RSI,8) |
(120) 0x425103 ADD $0x4,%RSI |
(120) 0x425107 CMP 0x20(%RSP),%RSI |
(120) 0x42510c JB 424ff0 |
(119) 0x425112 MOV 0x20(%RSP),%RCX |
(119) 0x425117 MOV %RCX,%RAX |
(119) 0x42511a CMP 0x170(%RSP),%RCX |
(119) 0x425122 MOV 0x2c(%RSP),%R12D |
(119) 0x425127 MOV 0x50(%RSP),%R15D |
(119) 0x42512c MOV 0x108(%RSP),%R13 |
(119) 0x425134 MOV 0x218(%RSP),%R11 |
(119) 0x42513c MOV 0x210(%RSP),%RBX |
(119) 0x425144 MOV 0x208(%RSP),%RDI |
(119) 0x42514c MOV 0x200(%RSP),%R9 |
(119) 0x425154 MOV 0x1f8(%RSP),%R10 |
(119) 0x42515c MOV 0x1f0(%RSP),%R14 |
(119) 0x425164 JNE 42495e |
(119) 0x42516a JMP 424d10 |
0x42516f NOP |
Path / |
Source file and lines | accelerate_kernel.f90:57-79 |
Module | exec |
nb instructions | 202 |
nb uops | 205 |
loop length | 993 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 3 |
used zmm registers | 0 |
nb stack references | 76 |
micro-operation queue | 34.17 cycles |
front end | 34.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 12.40 | 12.40 | 16.67 | 16.67 | 30.50 | 12.40 | 12.40 | 30.50 | 30.50 | 30.50 | 12.40 | 16.67 |
cycles | 12.40 | 12.40 | 16.67 | 16.67 | 30.50 | 12.40 | 12.40 | 30.50 | 30.50 | 30.50 | 12.40 | 16.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 33.83 |
Stall cycles | 0.00 |
Front-end | 34.17 |
Dispatch | 30.50 |
Overall L1 | 34.17 |
all | 2% |
load | 0% |
store | 2% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 3% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 2% |
load | 0% |
store | 2% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 3% |
all | 11% |
load | 11% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 11% |
load | 11% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x480,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,0xa8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0xc0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1d8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1d0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1c8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x98(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1b8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x60(%RBP),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EBX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x90(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x1b0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x88(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x1a8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x80(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x1a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x78(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x198(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x70(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x68(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x48(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x40(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x5c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JS 42465f <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x1bf> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R9,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x110(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EAX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x60(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x64(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x3c(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x38(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x736290,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x54(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 404520 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x34(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RSP),%R12D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EAX,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 424679 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x1d9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x7362b0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x4c(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 404110 <__kmpc_for_static_fini@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x110(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x7362d0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
JMP 4045e0 <__kmpc_barrier@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV %RAX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVQ %R14,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV %R15,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAL $0x20,%R9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV $-0x200000000,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 |
LEA (%R9,%RAX,1),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAR $0x20,%R10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV 0x20(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x20,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RAX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAR $0x20,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %EBX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD (%R13),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EBX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RDX),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %ESI,0x54(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x2,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0x2,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x1,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVGE %EDX,%R15D | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R15D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $0x7ffffffc,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RDX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RCX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV $-0x1,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVNS %RCX,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
TEST %RDX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV $0x1,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RSI,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV $0x200000000,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R9,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RDI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RCX,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R11,0xb8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOT %R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R11,0xb0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %XMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R15,0x170(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R15,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA %YMM1,0x220(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
SHR $0x20,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
IMUL %RDX,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RBX,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%RBX,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x3,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
SUB %RDI,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x1,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
CMOVNS %RAX,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
TEST %RCX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
CMOVG %RSI,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x188(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOT %R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R10,0x178(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R14,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x90(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R11,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDX,0x158(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R13,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x88(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R11,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,0x150(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SHR $0x20,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
IMUL %RCX,%R13 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
NEG %R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R13,0x168(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x80(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R11,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,0x148(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x98(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%R11,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x78(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%R11,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0x138(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa8(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%R11,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0x130(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x68(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R11,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,0x128(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x60(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R11,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x70(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R11,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,0x118(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD 0xa0(%RSP),%R11 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV %R11,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD 0xd0f51(%RIP),%YMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,0x190(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R12D,0x2c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 424d23 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x883> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | accelerate_kernel.f90:57-79 |
Module | exec |
nb instructions | 202 |
nb uops | 205 |
loop length | 993 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 3 |
used zmm registers | 0 |
nb stack references | 76 |
micro-operation queue | 34.17 cycles |
front end | 34.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 12.40 | 12.40 | 16.67 | 16.67 | 30.50 | 12.40 | 12.40 | 30.50 | 30.50 | 30.50 | 12.40 | 16.67 |
cycles | 12.40 | 12.40 | 16.67 | 16.67 | 30.50 | 12.40 | 12.40 | 30.50 | 30.50 | 30.50 | 12.40 | 16.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 33.83 |
Stall cycles | 0.00 |
Front-end | 34.17 |
Dispatch | 30.50 |
Overall L1 | 34.17 |
all | 2% |
load | 0% |
store | 2% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 3% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 2% |
load | 0% |
store | 2% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 3% |
all | 11% |
load | 11% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 11% |
load | 11% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x480,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,0xa8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0xc0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1d8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1d0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1c8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x98(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x1b8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x60(%RBP),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EBX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x90(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x1b0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x88(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x1a8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x80(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x1a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x78(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x198(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x70(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x68(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x48(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x40(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x5c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JS 42465f <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x1bf> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R9,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x110(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EAX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x60(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x64(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x3c(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x38(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x736290,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x54(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 404520 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x34(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RSP),%R12D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EAX,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 424679 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x1d9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x7362b0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x4c(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 404110 <__kmpc_for_static_fini@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x110(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x7362d0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
JMP 4045e0 <__kmpc_barrier@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV %RAX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVQ %R14,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV %R15,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAL $0x20,%R9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV $-0x200000000,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 |
LEA (%R9,%RAX,1),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAR $0x20,%R10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV 0x20(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x20,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RAX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAR $0x20,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %EBX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD (%R13),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EBX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RDX),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %ESI,0x54(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x2,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0x2,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $0x1,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVGE %EDX,%R15D | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R15D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $0x7ffffffc,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RDX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RCX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV $-0x1,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVNS %RCX,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
TEST %RDX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV $0x1,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RSI,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV $0x200000000,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R9,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RDI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RCX,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R11,0xb8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOT %R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R11,0xb0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %XMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R15,0x170(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R15,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA %YMM1,0x220(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
SHR $0x20,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
IMUL %RDX,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RBX,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%RBX,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x3,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
SUB %RDI,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x1,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
CMOVNS %RAX,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
TEST %RCX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
CMOVG %RSI,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x188(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOT %R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R10,0x178(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R14,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x90(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R11,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDX,0x158(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R13,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x88(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R11,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,0x150(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SHR $0x20,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
IMUL %RCX,%R13 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
NEG %R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R13,0x168(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x80(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R11,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,0x148(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x98(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%R11,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x78(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%R11,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0x138(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa8(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%R11,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0x130(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x68(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R11,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,0x128(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x60(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R11,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x70(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R11,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,0x118(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD 0xa0(%RSP),%R11 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV %R11,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD 0xd0f51(%RIP),%YMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,0x190(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R12D,0x2c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 424d23 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x883> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼accelerate_kernel_.DIR.OMP.PARALLEL.2– | 5.3 | 4.08 |
▼Loop 119 - accelerate_kernel.f90:60-76 - exec– | 0 | 0 |
○Loop 120 - accelerate_kernel.f90:62-76 - exec | 5.29 | 4.07 |