Function: accelerate_kernel_.DIR.OMP.PARALLEL.2 | Module: exec | Source: accelerate_kernel.f90:57-79 | Coverage: 4.69% |
---|
Function: accelerate_kernel_.DIR.OMP.PARALLEL.2 | Module: exec | Source: accelerate_kernel.f90:57-79 | Coverage: 4.69% |
---|
/scratch_na/users/xoserete/qaas_runs/171-214-9740/intel/CloverLeafFC/build/CloverLeafFC/CloverLeaf_ref/kernels/accelerate_kernel.f90: 57 - 79 |
-------------------------------------------------------------------------------- |
57: !$OMP PARALLEL |
58: |
59: !$OMP DO PRIVATE(j,k,stepbymass_s) |
60: DO k=y_min,y_max+1 |
61: !$OMP SIMD |
62: DO j=x_min,x_max+1 |
63: stepbymass_s=halfdt/((density0(j-1,k-1)*volume(j-1,k-1) & |
64: +density0(j ,k-1)*volume(j ,k-1) & |
65: +density0(j ,k )*volume(j ,k ) & |
66: +density0(j-1,k )*volume(j-1,k )) & |
67: *0.25_8) |
68: |
69: xvel1(j,k)=xvel0(j,k)-stepbymass_s*(xarea(j ,k )*(pressure(j ,k )-pressure(j-1,k )) & |
70: +xarea(j ,k-1)*(pressure(j ,k-1)-pressure(j-1,k-1))) |
71: yvel1(j,k)=yvel0(j,k)-stepbymass_s*(yarea(j ,k )*(pressure(j ,k )-pressure(j ,k-1)) & |
72: +yarea(j-1,k )*(pressure(j-1,k )-pressure(j-1,k-1))) |
73: xvel1(j,k)=xvel1(j,k)-stepbymass_s*(xarea(j ,k )*(viscosity(j ,k )-viscosity(j-1,k )) & |
74: +xarea(j ,k-1)*(viscosity(j ,k-1)-viscosity(j-1,k-1))) |
75: yvel1(j,k)=yvel1(j,k)-stepbymass_s*(yarea(j ,k )*(viscosity(j ,k )-viscosity(j ,k-1)) & |
76: +yarea(j-1,k )*(viscosity(j-1,k )-viscosity(j-1,k-1))) |
77: ENDDO |
78: ENDDO |
79: !$OMP END DO |
0x42bf40 PUSH %RBP |
0x42bf41 MOV %RSP,%RBP |
0x42bf44 PUSH %R15 |
0x42bf46 PUSH %R14 |
0x42bf48 PUSH %R13 |
0x42bf4a PUSH %R12 |
0x42bf4c PUSH %RBX |
0x42bf4d AND $-0x40,%RSP |
0x42bf51 SUB $0x5c0,%RSP |
0x42bf58 MOV %R9,0x38(%RSP) |
0x42bf5d MOV 0x60(%RBP),%EBX |
0x42bf60 MOV 0x58(%RBP),%EAX |
0x42bf63 SUB %EBX,%EAX |
0x42bf65 INC %EAX |
0x42bf67 MOVL $0,0x34(%RSP) |
0x42bf6f JS 42bff7 |
0x42bf75 MOV %RDX,%R12 |
0x42bf78 MOV %R8,%R15 |
0x42bf7b MOV %RCX,%R14 |
0x42bf7e MOV %RDI,0x90(%RSP) |
0x42bf86 MOV (%RDI),%ESI |
0x42bf88 MOVL $0,0x1c(%RSP) |
0x42bf90 MOV %EAX,0x18(%RSP) |
0x42bf94 MOVL $0x1,0x30(%RSP) |
0x42bf9c SUB $0x8,%RSP |
0x42bfa0 LEA 0x38(%RSP),%RAX |
0x42bfa5 LEA 0x3c(%RSP),%RCX |
0x42bfaa LEA 0x24(%RSP),%R8 |
0x42bfaf LEA 0x20(%RSP),%R9 |
0x42bfb4 MOV $0x74bf70,%EDI |
0x42bfb9 MOV %ESI,0x30(%RSP) |
0x42bfbd MOV $0x22,%EDX |
0x42bfc2 PUSH $0x1 |
0x42bfc4 PUSH $0x1 |
0x42bfc6 PUSH %RAX |
0x42bfc7 CALL 4044c0 <__kmpc_for_static_init_4@plt> |
0x42bfcc ADD $0x20,%RSP |
0x42bfd0 MOV 0x1c(%RSP),%EAX |
0x42bfd4 MOV 0x18(%RSP),%R10D |
0x42bfd9 SUB %EAX,%R10D |
0x42bfdc JAE 42c040 |
0x42bfde MOV $0x74bf90,%EDI |
0x42bfe3 MOV 0x28(%RSP),%ESI |
0x42bfe7 VZEROUPPER |
0x42bfea CALL 4040b0 <__kmpc_for_static_fini@plt> |
0x42bfef MOV 0x90(%RSP),%RDI |
0x42bff7 MOV (%RDI),%ESI |
0x42bff9 MOV $0x74bfb0,%EDI |
0x42bffe CALL 404580 <__kmpc_barrier@plt> |
0x42c003 LEA -0x28(%RBP),%RSP |
0x42c007 POP %RBX |
0x42c008 POP %R12 |
0x42c00a POP %R13 |
0x42c00c POP %R14 |
0x42c00e POP %R15 |
0x42c010 POP %RBP |
0x42c011 RET |
0x42c012 NOPW %CS:(%RAX,%RAX,1) |
0x42c021 NOPW %CS:(%RAX,%RAX,1) |
0x42c030 NOPW %CS:(%RAX,%RAX,1) |
0x42c03f NOP |
0x42c040 MOV %RAX,%R8 |
0x42c043 MOV 0x48(%RBP),%R9 |
0x42c047 MOV 0x40(%RBP),%R11 |
0x42c04b VMOVQ %R12,%XMM0 |
0x42c050 SAL $0x20,%R15 |
0x42c054 MOV $-0x200000000,%RCX |
0x42c05e LEA (%R15,%RCX,1),%RDX |
0x42c062 MOV %RDX,%RAX |
0x42c065 SAR $0x20,%RAX |
0x42c069 MOV %RAX,0xa0(%RSP) |
0x42c071 SAL $0x20,%R14 |
0x42c075 ADD %R14,%RCX |
0x42c078 MOV %RCX,%R12 |
0x42c07b SAR $0x20,%R12 |
0x42c07f TEST %RDX,%RDX |
0x42c082 MOV $-0x1,%RSI |
0x42c089 CMOVNS %RDX,%RSI |
0x42c08d TEST %RSI,%RSI |
0x42c090 MOV $0x1,%EDI |
0x42c095 CMOVG %RDI,%RSI |
0x42c099 MOV $0x200000000,%R13 |
0x42c0a3 MOV %R13,%RAX |
0x42c0a6 SUB %R15,%RAX |
0x42c0a9 MOV 0x38(%RBP),%R15 |
0x42c0ad CMP %RAX,%RDX |
0x42c0b0 CMOVG %RDX,%RAX |
0x42c0b4 MOV $-0x1,%RDX |
0x42c0bb SHR $0x20,%RAX |
0x42c0bf IMUL %RSI,%RAX |
0x42c0c3 SAL $0x3,%RAX |
0x42c0c7 SUB %RAX,%R11 |
0x42c0ca MOV %R11,0xf0(%RSP) |
0x42c0d2 MOV %R8,%RSI |
0x42c0d5 ADD %EBX,%ESI |
0x42c0d7 SUB %RAX,%R15 |
0x42c0da MOV %R15,0xe8(%RSP) |
0x42c0e2 TEST %RCX,%RCX |
0x42c0e5 CMOVNS %RCX,%RDX |
0x42c0e9 TEST %RDX,%RDX |
0x42c0ec CMOVG %RDI,%RDX |
0x42c0f0 MOV 0x30(%RBP),%RDI |
0x42c0f4 SUB %R14,%R13 |
0x42c0f7 MOV 0x28(%RBP),%R8 |
0x42c0fb CMP %R13,%RCX |
0x42c0fe CMOVG %RCX,%R13 |
0x42c102 SHR $0x20,%R13 |
0x42c106 IMUL %RDX,%R13 |
0x42c10a MOV %R10D,%EDX |
0x42c10d SUB %RAX,%RDI |
0x42c110 MOV %RDI,0xe0(%RSP) |
0x42c118 SUB %RAX,%R9 |
0x42c11b MOV %R9,0xd8(%RSP) |
0x42c123 SUB %RAX,%R8 |
0x42c126 MOV %R8,0xd0(%RSP) |
0x42c12e MOV 0x38(%RSP),%RCX |
0x42c133 SUB %RAX,%RCX |
0x42c136 MOV %RCX,0xc8(%RSP) |
0x42c13e MOV 0x18(%RBP),%RCX |
0x42c142 SUB %RAX,%RCX |
0x42c145 MOV %RCX,0xc0(%RSP) |
0x42c14d MOV 0x10(%RBP),%RCX |
0x42c151 SUB %RAX,%RCX |
0x42c154 MOV %RCX,0xb8(%RSP) |
0x42c15c MOV 0x20(%RBP),%RCX |
0x42c160 SUB %RAX,%RCX |
0x42c163 MOV %RCX,0xb0(%RSP) |
0x42c16b MOV 0x50(%RBP),%RCX |
0x42c16f SUB %RAX,%RCX |
0x42c172 MOV %RCX,0x48(%RSP) |
0x42c177 MOV %R12,0x98(%RSP) |
0x42c17f NOT %R12 |
0x42c182 MOV %R12,0x40(%RSP) |
0x42c187 NEG %R13 |
0x42c18a MOV %R13,0xf8(%RSP) |
0x42c192 VBROADCASTSD 0xdeb44(%RIP),%ZMM1 |
0x42c19c VPBROADCASTQ %XMM0,%ZMM2 |
0x42c1a2 XOR %EDI,%EDI |
0x42c1a4 MOV %RSI,0xa8(%RSP) |
0x42c1ac MOV %ESI,%EBX |
0x42c1ae MOV %R10D,0x14(%RSP) |
0x42c1b3 JMP 42c1df |
0x42c1b5 NOPW %CS:(%RAX,%RAX,1) |
(224) 0x42c1c0 MOV %R9,%RDI |
(224) 0x42c1c3 NOPW %CS:(%RAX,%RAX,1) |
(224) 0x42c1d0 LEA 0x1(%RDI),%EAX |
(224) 0x42c1d3 INC %EBX |
(224) 0x42c1d5 CMP %EDX,%EDI |
(224) 0x42c1d7 MOV %EAX,%EDI |
(224) 0x42c1d9 JE 42bfde |
(224) 0x42c1df MOV 0x70(%RBP),%RAX |
(224) 0x42c1e3 MOVSXD (%RAX),%R8 |
(224) 0x42c1e6 MOV 0x68(%RBP),%RAX |
(224) 0x42c1ea MOV (%RAX),%ECX |
(224) 0x42c1ec MOV %ECX,%EAX |
(224) 0x42c1ee SUB %R8D,%EAX |
(224) 0x42c1f1 INC %EAX |
(224) 0x42c1f3 JS 42c1d0 |
(224) 0x42c1f5 MOV 0x78(%RBP),%RAX |
(224) 0x42c1f9 MOV (%RAX),%R13 |
(224) 0x42c1fc MOV 0x80(%RBP),%RAX |
(224) 0x42c203 MOV (%RAX),%R14 |
(224) 0x42c206 MOV 0x88(%RBP),%RAX |
(224) 0x42c20d MOV (%RAX),%RAX |
(224) 0x42c210 MOV %RAX,0x60(%RSP) |
(224) 0x42c215 MOV 0x90(%RBP),%RAX |
(224) 0x42c21c MOV (%RAX),%R11 |
(224) 0x42c21f MOV 0x98(%RBP),%RAX |
(224) 0x42c226 MOV (%RAX),%R12 |
(224) 0x42c229 MOV 0xa0(%RBP),%RAX |
(224) 0x42c230 MOV (%RAX),%R9 |
(224) 0x42c233 MOV 0xa8(%RBP),%RAX |
(224) 0x42c23a MOV (%RAX),%R15 |
(224) 0x42c23d MOV 0xb0(%RBP),%RAX |
(224) 0x42c244 MOV (%RAX),%RSI |
(224) 0x42c247 MOV 0xb8(%RBP),%RAX |
(224) 0x42c24e MOV (%RAX),%RAX |
(224) 0x42c251 MOV %RAX,0x78(%RSP) |
(224) 0x42c256 MOV 0xc0(%RBP),%RAX |
(224) 0x42c25d MOV (%RAX),%RAX |
(224) 0x42c260 MOV %RAX,0x58(%RSP) |
(224) 0x42c265 SUB %R8D,%ECX |
(224) 0x42c268 ADD $0x2,%ECX |
(224) 0x42c26b CMP $0x2,%ECX |
(224) 0x42c26e MOV $0x1,%EAX |
(224) 0x42c273 CMOVL %EAX,%ECX |
(224) 0x42c276 MOV %RCX,%RAX |
(224) 0x42c279 AND $0x7ffffff8,%RCX |
(224) 0x42c280 MOV %RDI,0x80(%RSP) |
(224) 0x42c288 MOV %R15,0x70(%RSP) |
(224) 0x42c28d MOV %R9,0x68(%RSP) |
(224) 0x42c292 JE 42c600 |
(224) 0x42c298 MOV %RAX,0x120(%RSP) |
(224) 0x42c2a0 MOV %EBX,0x2c(%RSP) |
(224) 0x42c2a4 MOVSXD %EBX,%RDI |
(224) 0x42c2a7 MOV 0x40(%RSP),%RAX |
(224) 0x42c2ac LEA (%RAX,%RDI,1),%R10 |
(224) 0x42c2b0 ADD 0xf8(%RSP),%RDI |
(224) 0x42c2b8 MOV %R14,%RAX |
(224) 0x42c2bb IMUL %R10,%RAX |
(224) 0x42c2bf LEA (%RAX,%R8,8),%RBX |
(224) 0x42c2c3 MOV %RCX,0x88(%RSP) |
(224) 0x42c2cb MOV 0xf0(%RSP),%RCX |
(224) 0x42c2d3 ADD %RCX,%RBX |
(224) 0x42c2d6 MOV %R13,%RAX |
(224) 0x42c2d9 IMUL %R10,%RAX |
(224) 0x42c2dd MOV %RSI,%R15 |
(224) 0x42c2e0 MOV %R11,0x50(%RSP) |
(224) 0x42c2e5 MOV %R14,%R9 |
(224) 0x42c2e8 LEA (%RAX,%R8,8),%R14 |
(224) 0x42c2ec MOV 0xe8(%RSP),%RDX |
(224) 0x42c2f4 ADD %RDX,%R14 |
(224) 0x42c2f7 MOV %R9,0x100(%RSP) |
(224) 0x42c2ff MOV %R9,%RAX |
(224) 0x42c302 IMUL %RDI,%RAX |
(224) 0x42c306 LEA (%RAX,%R8,8),%R9 |
(224) 0x42c30a ADD %RCX,%R9 |
(224) 0x42c30d MOV %R13,0x108(%RSP) |
(224) 0x42c315 IMUL %RDI,%R13 |
(224) 0x42c319 LEA (%R13,%R8,8),%R11 |
(224) 0x42c31e ADD %RDX,%R11 |
(224) 0x42c321 MOV %R12,%RAX |
(224) 0x42c324 IMUL %RDI,%RAX |
(224) 0x42c328 LEA (%RAX,%R8,8),%RAX |
(224) 0x42c32c MOV 0xe0(%RSP),%RSI |
(224) 0x42c334 ADD %RSI,%RAX |
(224) 0x42c337 MOV %R12,0x110(%RSP) |
(224) 0x42c33f IMUL %R10,%R12 |
(224) 0x42c343 MOV 0x58(%RSP),%RDX |
(224) 0x42c348 MOV %R10,0x20(%RSP) |
(224) 0x42c34d LEA (%R12,%R8,8),%R13 |
(224) 0x42c351 ADD %RSI,%R13 |
(224) 0x42c354 MOV %R15,0x118(%RSP) |
(224) 0x42c35c IMUL %RDI,%R15 |
(224) 0x42c360 LEA (%R15,%R8,8),%RSI |
(224) 0x42c364 ADD 0xd8(%RSP),%RSI |
(224) 0x42c36c MOV %RDX,%RCX |
(224) 0x42c36f IMUL %RDI,%RCX |
(224) 0x42c373 LEA (%RCX,%R8,8),%R12 |
(224) 0x42c377 MOV 0xd0(%RSP),%R15 |
(224) 0x42c37f ADD %R15,%R12 |
(224) 0x42c382 IMUL %R10,%RDX |
(224) 0x42c386 LEA (%RDX,%R8,8),%R10 |
(224) 0x42c38a ADD %R15,%R10 |
(224) 0x42c38d MOV 0x78(%RSP),%RCX |
(224) 0x42c392 IMUL %RDI,%RCX |
(224) 0x42c396 LEA (%RCX,%R8,8),%RCX |
(224) 0x42c39a ADD 0xc8(%RSP),%RCX |
(224) 0x42c3a2 MOV %RCX,0x138(%RSP) |
(224) 0x42c3aa MOV 0x70(%RSP),%RCX |
(224) 0x42c3af IMUL %RDI,%RCX |
(224) 0x42c3b3 LEA (%RCX,%R8,8),%RCX |
(224) 0x42c3b7 ADD 0xc0(%RSP),%RCX |
(224) 0x42c3bf MOV %RCX,0x130(%RSP) |
(224) 0x42c3c7 MOV 0x68(%RSP),%RCX |
(224) 0x42c3cc IMUL %RDI,%RCX |
(224) 0x42c3d0 LEA (%RCX,%R8,8),%RCX |
(224) 0x42c3d4 ADD 0xb8(%RSP),%RCX |
(224) 0x42c3dc MOV %RCX,0x128(%RSP) |
(224) 0x42c3e4 MOV 0x60(%RSP),%RDX |
(224) 0x42c3e9 MOV %RDI,%RCX |
(224) 0x42c3ec IMUL %RDI,%RDX |
(224) 0x42c3f0 LEA (%RDX,%R8,8),%R15 |
(224) 0x42c3f4 ADD 0xb0(%RSP),%R15 |
(224) 0x42c3fc MOV 0x50(%RSP),%RDX |
(224) 0x42c401 MOV 0x20(%RSP),%RDI |
(224) 0x42c406 IMUL %RDX,%RDI |
(224) 0x42c40a LEA (%RDI,%R8,8),%RDI |
(224) 0x42c40e ADD 0x48(%RSP),%RDI |
(224) 0x42c413 IMUL %RDX,%RCX |
(224) 0x42c417 MOV %R8,0x20(%RSP) |
(224) 0x42c41c LEA (%RCX,%R8,8),%RDX |
(224) 0x42c420 ADD 0x48(%RSP),%RDX |
(224) 0x42c425 XOR %R8D,%R8D |
(224) 0x42c428 NOPL (%RAX,%RAX,1) |
(225) 0x42c430 VMOVUPD -0x8(%RBX,%R8,8),%ZMM0 |
(225) 0x42c43b VMOVUPD (%RBX,%R8,8),%ZMM3 |
(225) 0x42c442 VMULPD -0x8(%R14,%R8,8),%ZMM0,%ZMM0 |
(225) 0x42c44d VFMADD231PD (%R14,%R8,8),%ZMM3,%ZMM0 |
(225) 0x42c454 VMOVUPD -0x8(%R9,%R8,8),%ZMM3 |
(225) 0x42c45f VMOVUPD (%R9,%R8,8),%ZMM4 |
(225) 0x42c466 VFMADD132PD (%R11,%R8,8),%ZMM0,%ZMM4 |
(225) 0x42c46d VFMADD231PD -0x8(%R11,%R8,8),%ZMM3,%ZMM4 |
(225) 0x42c478 VMULPD %ZMM1,%ZMM4,%ZMM0 |
(225) 0x42c47e VDIVPD %ZMM0,%ZMM2,%ZMM0 |
(225) 0x42c484 VMOVUPD (%RDX,%R8,8),%ZMM3 |
(225) 0x42c48b VMOVUPD -0x8(%RAX,%R8,8),%ZMM4 |
(225) 0x42c496 VMOVUPD (%RAX,%R8,8),%ZMM17 |
(225) 0x42c49d VSUBPD %ZMM17,%ZMM4,%ZMM18 |
(225) 0x42c4a3 VMULPD %ZMM3,%ZMM18,%ZMM18 |
(225) 0x42c4a9 VMOVUPD (%RDI,%R8,8),%ZMM19 |
(225) 0x42c4b0 VMOVUPD -0x8(%R13,%R8,8),%ZMM20 |
(225) 0x42c4bb VMOVUPD (%R13,%R8,8),%ZMM25 |
(225) 0x42c4c3 VSUBPD %ZMM25,%ZMM20,%ZMM26 |
(225) 0x42c4c9 VFMADD213PD %ZMM18,%ZMM19,%ZMM26 |
(225) 0x42c4cf VMOVUPD -0x8(%RSI,%R8,8),%ZMM18 |
(225) 0x42c4da VMOVUPD (%RSI,%R8,8),%ZMM27 |
(225) 0x42c4e1 VSUBPD %ZMM17,%ZMM25,%ZMM17 |
(225) 0x42c4e7 VMULPD %ZMM17,%ZMM27,%ZMM17 |
(225) 0x42c4ed VSUBPD %ZMM4,%ZMM20,%ZMM4 |
(225) 0x42c4f3 VFMADD213PD %ZMM17,%ZMM18,%ZMM4 |
(225) 0x42c4f9 VMOVUPD -0x8(%R12,%R8,8),%ZMM17 |
(225) 0x42c504 VMOVUPD (%R12,%R8,8),%ZMM20 |
(225) 0x42c50b VSUBPD %ZMM20,%ZMM17,%ZMM25 |
(225) 0x42c511 VMOVUPD -0x8(%R10,%R8,8),%ZMM28 |
(225) 0x42c51c VMOVUPD (%R10,%R8,8),%ZMM29 |
(225) 0x42c523 VSUBPD %ZMM29,%ZMM28,%ZMM30 |
(225) 0x42c529 VFMADD213PD %ZMM26,%ZMM3,%ZMM25 |
(225) 0x42c52f VFMADD231PD %ZMM30,%ZMM19,%ZMM25 |
(225) 0x42c535 VFMADD213PD (%R15,%R8,8),%ZMM0,%ZMM25 |
(225) 0x42c53c MOV 0x128(%RSP),%RCX |
(225) 0x42c544 VMOVUPD %ZMM25,(%RCX,%R8,8) |
(225) 0x42c54b VSUBPD %ZMM20,%ZMM29,%ZMM3 |
(225) 0x42c551 VSUBPD %ZMM17,%ZMM28,%ZMM17 |
(225) 0x42c557 VFMADD213PD %ZMM4,%ZMM27,%ZMM3 |
(225) 0x42c55d VFMADD231PD %ZMM17,%ZMM18,%ZMM3 |
(225) 0x42c563 MOV 0x130(%RSP),%RCX |
(225) 0x42c56b VFMADD213PD (%RCX,%R8,8),%ZMM0,%ZMM3 |
(225) 0x42c572 MOV 0x138(%RSP),%RCX |
(225) 0x42c57a VMOVUPD %ZMM3,(%RCX,%R8,8) |
(225) 0x42c581 ADD $0x8,%R8 |
(225) 0x42c585 CMP 0x88(%RSP),%R8 |
(225) 0x42c58d JB 42c430 |
(224) 0x42c593 MOV 0x120(%RSP),%RAX |
(224) 0x42c59b MOV 0x88(%RSP),%R10 |
(224) 0x42c5a3 CMP %RAX,%R10 |
(224) 0x42c5a6 MOV 0x14(%RSP),%EDX |
(224) 0x42c5aa MOV 0x2c(%RSP),%EBX |
(224) 0x42c5ae MOV 0x80(%RSP),%RDI |
(224) 0x42c5b6 JE 42c1d0 |
(224) 0x42c5bc MOV %RDI,%R9 |
(224) 0x42c5bf VPBROADCASTQ %RAX,%ZMM0 |
(224) 0x42c5c5 MOV 0x118(%RSP),%RSI |
(224) 0x42c5cd MOV 0x110(%RSP),%RDI |
(224) 0x42c5d5 MOV 0x50(%RSP),%R11 |
(224) 0x42c5da MOV 0x108(%RSP),%R13 |
(224) 0x42c5e2 MOV 0x100(%RSP),%R14 |
(224) 0x42c5ea MOV 0x20(%RSP),%R8 |
(224) 0x42c5ef JMP 42c60f |
0x42c5f1 NOPW %CS:(%RAX,%RAX,1) |
(224) 0x42c600 MOV %RDI,%R9 |
(224) 0x42c603 MOV %R12,%RDI |
(224) 0x42c606 VPBROADCASTQ %RAX,%ZMM0 |
(224) 0x42c60c XOR %R10D,%R10D |
(224) 0x42c60f VPBROADCASTQ %R10,%ZMM3 |
(224) 0x42c615 VPSUBQ %ZMM3,%ZMM0,%ZMM0 |
(224) 0x42c61b VPCMPNLEUQ 0xdd29a(%RIP),%ZMM0,%K1 |
(224) 0x42c626 KORTESTB %K1,%K1 |
(224) 0x42c62a JE 42c1c0 |
(224) 0x42c630 MOV 0xa8(%RSP),%RAX |
(224) 0x42c638 ADD %R9D,%EAX |
(224) 0x42c63b MOVSXD %EAX,%R15 |
(224) 0x42c63e ADD %R8,%R10 |
(224) 0x42c641 MOV 0x40(%RSP),%RAX |
(224) 0x42c646 MOV %RDI,%R8 |
(224) 0x42c649 LEA (%RAX,%R15,1),%R9 |
(224) 0x42c64d MOV %R14,%RAX |
(224) 0x42c650 IMUL %R9,%RAX |
(224) 0x42c654 MOV 0x40(%RBP),%RDX |
(224) 0x42c658 ADD %RDX,%RAX |
(224) 0x42c65b MOV %RSI,%RDI |
(224) 0x42c65e MOV 0xa0(%RSP),%RSI |
(224) 0x42c666 MOV %RSI,%RCX |
(224) 0x42c669 NOT %RCX |
(224) 0x42c66c ADD %R10,%RCX |
(224) 0x42c66f VMOVUPD (%RAX,%RCX,8),%ZMM25{%K1}{z} |
(224) 0x42c676 SUB %RSI,%R10 |
(224) 0x42c679 VMOVUPD (%RAX,%R10,8),%ZMM26{%K1}{z} |
(224) 0x42c680 MOV %R13,%RAX |
(224) 0x42c683 IMUL %R9,%RAX |
(224) 0x42c687 MOV 0x38(%RBP),%RSI |
(224) 0x42c68b ADD %RSI,%RAX |
(224) 0x42c68e VMOVUPD (%RAX,%RCX,8),%ZMM27{%K1}{z} |
(224) 0x42c695 VMOVUPD (%RAX,%R10,8),%ZMM28{%K1}{z} |
(224) 0x42c69c SUB 0x98(%RSP),%R15 |
(224) 0x42c6a4 IMUL %R15,%R14 |
(224) 0x42c6a8 ADD %RDX,%R14 |
(224) 0x42c6ab VMOVUPD (%R14,%R10,8),%ZMM29{%K1}{z} |
(224) 0x42c6b2 VMOVUPD (%R14,%RCX,8),%ZMM30{%K1}{z} |
(224) 0x42c6b9 IMUL %R15,%R13 |
(224) 0x42c6bd ADD %RSI,%R13 |
(224) 0x42c6c0 VMOVUPD (%R13,%R10,8),%ZMM31{%K1}{z} |
(224) 0x42c6c8 VMOVUPD (%R13,%RCX,8),%ZMM0{%K1}{z} |
(224) 0x42c6d0 MOV %R11,%RAX |
(224) 0x42c6d3 IMUL %R15,%RAX |
(224) 0x42c6d7 MOV 0x50(%RBP),%RSI |
(224) 0x42c6db ADD %RSI,%RAX |
(224) 0x42c6de VMOVUPD (%RAX,%R10,8),%ZMM3{%K1}{z} |
(224) 0x42c6e5 MOV %R8,%RAX |
(224) 0x42c6e8 IMUL %R15,%RAX |
(224) 0x42c6ec MOV 0x30(%RBP),%RDX |
(224) 0x42c6f0 ADD %RDX,%RAX |
(224) 0x42c6f3 VMOVUPD (%RAX,%R10,8),%ZMM4{%K1}{z} |
(224) 0x42c6fa VMOVUPD (%RAX,%RCX,8),%ZMM17{%K1}{z} |
(224) 0x42c701 IMUL %R9,%R11 |
(224) 0x42c705 ADD %RSI,%R11 |
(224) 0x42c708 VMOVUPD (%R11,%R10,8),%ZMM19{%K1}{z} |
(224) 0x42c70f IMUL %R9,%R8 |
(224) 0x42c713 ADD %RDX,%R8 |
(224) 0x42c716 VMOVUPD (%R8,%R10,8),%ZMM21{%K1}{z} |
(224) 0x42c71d VMOVUPD (%R8,%RCX,8),%ZMM22{%K1}{z} |
(224) 0x42c724 IMUL %R15,%RDI |
(224) 0x42c728 ADD 0x48(%RBP),%RDI |
(224) 0x42c72c VMOVUPD (%RDI,%R10,8),%ZMM18{%K1}{z} |
(224) 0x42c733 VMOVUPD (%RDI,%RCX,8),%ZMM20{%K1}{z} |
(224) 0x42c73a MOV 0x80(%RSP),%RDI |
(224) 0x42c742 MOV 0x58(%RSP),%RSI |
(224) 0x42c747 MOV %RSI,%RAX |
(224) 0x42c74a IMUL %R15,%RAX |
(224) 0x42c74e MOV 0x28(%RBP),%RDX |
(224) 0x42c752 ADD %RDX,%RAX |
(224) 0x42c755 VMOVUPD (%RAX,%R10,8),%ZMM23{%K1}{z} |
(224) 0x42c75c VMOVUPD (%RAX,%RCX,8),%ZMM24{%K1}{z} |
(224) 0x42c763 IMUL %R9,%RSI |
(224) 0x42c767 ADD %RDX,%RSI |
(224) 0x42c76a MOV 0x14(%RSP),%EDX |
(224) 0x42c76e VMOVUPD (%RSI,%RCX,8),%ZMM5{%K1}{z} |
(224) 0x42c775 VMOVUPD (%RSI,%R10,8),%ZMM6{%K1}{z} |
(224) 0x42c77c MOV 0x60(%RSP),%RAX |
(224) 0x42c781 IMUL %R15,%RAX |
(224) 0x42c785 ADD 0x20(%RBP),%RAX |
(224) 0x42c789 VMOVUPD (%RAX,%R10,8),%ZMM7{%K1}{z} |
(224) 0x42c790 VMOVAPD 0x180(%RSP),%ZMM11 |
(224) 0x42c798 VMOVAPD %ZMM25,%ZMM11{%K1} |
(224) 0x42c79e VMOVAPD 0x200(%RSP),%ZMM10 |
(224) 0x42c7a6 VMOVAPD %ZMM27,%ZMM10{%K1} |
(224) 0x42c7ac VMOVAPD 0x280(%RSP),%ZMM9 |
(224) 0x42c7b4 VMOVAPD %ZMM26,%ZMM9{%K1} |
(224) 0x42c7ba VMOVAPD 0x300(%RSP),%ZMM8 |
(224) 0x42c7c2 VMOVAPD %ZMM28,%ZMM8{%K1} |
(224) 0x42c7c8 VMOVAPD 0x340(%RSP),%ZMM28 |
(224) 0x42c7d0 VMOVAPD %ZMM29,%ZMM28{%K1} |
(224) 0x42c7d6 VMOVAPD 0x380(%RSP),%ZMM27 |
(224) 0x42c7de VMOVAPD %ZMM31,%ZMM27{%K1} |
(224) 0x42c7e4 VMOVAPD 0x3c0(%RSP),%ZMM26 |
(224) 0x42c7ec VMOVAPD %ZMM30,%ZMM26{%K1} |
(224) 0x42c7f2 VMOVAPD 0x400(%RSP),%ZMM25 |
(224) 0x42c7fa VMOVAPD %ZMM0,%ZMM25{%K1} |
(224) 0x42c800 VMOVAPD %ZMM3,%ZMM16{%K1} |
(224) 0x42c806 VMOVAPD %ZMM4,%ZMM15{%K1} |
(224) 0x42c80c VMOVAPD %ZMM17,%ZMM14{%K1} |
(224) 0x42c812 VMOVAPD %ZMM19,%ZMM13{%K1} |
(224) 0x42c818 VMOVAPD %ZMM21,%ZMM12{%K1} |
(224) 0x42c81e VSUBPD %ZMM15,%ZMM14,%ZMM0 |
(224) 0x42c824 VMULPD %ZMM16,%ZMM0,%ZMM0 |
(224) 0x42c82a VMOVAPD 0x140(%RSP),%ZMM29 |
(224) 0x42c832 VMOVAPD %ZMM22,%ZMM29{%K1} |
(224) 0x42c838 VSUBPD %ZMM12,%ZMM29,%ZMM3 |
(224) 0x42c83e VFMADD213PD %ZMM0,%ZMM13,%ZMM3 |
(224) 0x42c844 VMOVAPD 0x2c0(%RSP),%ZMM21 |
(224) 0x42c84c VMOVAPD %ZMM23,%ZMM21{%K1} |
(224) 0x42c852 VMOVAPD 0x440(%RSP),%ZMM19 |
(224) 0x42c85a VMOVAPD %ZMM24,%ZMM19{%K1} |
(224) 0x42c860 VMOVAPD 0x480(%RSP),%ZMM17 |
(224) 0x42c868 VMOVAPD %ZMM6,%ZMM17{%K1} |
(224) 0x42c86e VMOVAPD 0x4c0(%RSP),%ZMM6 |
(224) 0x42c876 VMOVAPD %ZMM5,%ZMM6{%K1} |
(224) 0x42c87c VSUBPD %ZMM21,%ZMM19,%ZMM0 |
(224) 0x42c882 VFMADD213PD %ZMM3,%ZMM16,%ZMM0 |
(224) 0x42c888 VSUBPD %ZMM17,%ZMM6,%ZMM3 |
(224) 0x42c88e VFMADD231PD %ZMM3,%ZMM13,%ZMM0 |
(224) 0x42c894 VMOVAPD %ZMM10,0x200(%RSP) |
(224) 0x42c89c VMOVAPD %ZMM11,0x180(%RSP) |
(224) 0x42c8a4 VMULPD %ZMM10,%ZMM11,%ZMM3 |
(224) 0x42c8aa VMOVAPD %ZMM8,0x300(%RSP) |
(224) 0x42c8b2 VMOVAPD %ZMM9,0x280(%RSP) |
(224) 0x42c8ba VFMADD231PD %ZMM8,%ZMM9,%ZMM3 |
(224) 0x42c8c0 VMOVAPD %ZMM27,0x380(%RSP) |
(224) 0x42c8c8 VMOVAPD %ZMM28,0x340(%RSP) |
(224) 0x42c8d0 VFMADD231PD %ZMM27,%ZMM28,%ZMM3 |
(224) 0x42c8d6 VMOVAPD %ZMM25,0x400(%RSP) |
(224) 0x42c8de VMOVAPD %ZMM26,0x3c0(%RSP) |
(224) 0x42c8e6 VFMADD231PD %ZMM25,%ZMM26,%ZMM3 |
(224) 0x42c8ec VMULPD %ZMM1,%ZMM3,%ZMM3 |
(224) 0x42c8f2 VDIVPD %ZMM3,%ZMM2,%ZMM3 |
(224) 0x42c8f8 VMOVAPD 0x500(%RSP),%ZMM4 |
(224) 0x42c900 VMOVAPD %ZMM7,%ZMM4{%K1} |
(224) 0x42c906 VMOVAPD %ZMM4,0x500(%RSP) |
(224) 0x42c90e VFMADD213PD %ZMM4,%ZMM3,%ZMM0 |
(224) 0x42c914 MOV 0x68(%RSP),%RAX |
(224) 0x42c919 IMUL %R15,%RAX |
(224) 0x42c91d ADD 0x10(%RBP),%RAX |
(224) 0x42c921 VMOVUPD %ZMM0,(%RAX,%R10,8){%K1} |
(224) 0x42c928 MOV 0x70(%RSP),%RAX |
(224) 0x42c92d IMUL %R15,%RAX |
(224) 0x42c931 ADD 0x18(%RBP),%RAX |
(224) 0x42c935 VMOVUPD (%RAX,%R10,8),%ZMM0{%K1}{z} |
(224) 0x42c93c MOV 0x78(%RSP),%RAX |
(224) 0x42c941 IMUL %R15,%RAX |
(224) 0x42c945 VMOVAPD 0x1c0(%RSP),%ZMM8 |
(224) 0x42c94d VMOVAPD %ZMM18,%ZMM8{%K1} |
(224) 0x42c953 VSUBPD %ZMM15,%ZMM12,%ZMM4 |
(224) 0x42c959 VMULPD %ZMM4,%ZMM8,%ZMM4 |
(224) 0x42c95f VMOVAPD 0x240(%RSP),%ZMM7 |
(224) 0x42c967 VMOVAPD %ZMM20,%ZMM7{%K1} |
(224) 0x42c96d VMOVAPD %ZMM29,0x140(%RSP) |
(224) 0x42c975 VSUBPD %ZMM14,%ZMM29,%ZMM5 |
(224) 0x42c97b VFMADD213PD %ZMM4,%ZMM7,%ZMM5 |
(224) 0x42c981 VMOVAPD %ZMM17,0x480(%RSP) |
(224) 0x42c989 VMOVAPD %ZMM21,0x2c0(%RSP) |
(224) 0x42c991 VSUBPD %ZMM21,%ZMM17,%ZMM4 |
(224) 0x42c997 VMOVAPD %ZMM6,0x4c0(%RSP) |
(224) 0x42c99f VMOVAPD %ZMM19,0x440(%RSP) |
(224) 0x42c9a7 VSUBPD %ZMM19,%ZMM6,%ZMM6 |
(224) 0x42c9ad VMOVAPD %ZMM8,0x1c0(%RSP) |
(224) 0x42c9b5 VFMADD213PD %ZMM5,%ZMM8,%ZMM4 |
(224) 0x42c9bb VMOVAPD %ZMM7,0x240(%RSP) |
(224) 0x42c9c3 VFMADD231PD %ZMM6,%ZMM7,%ZMM4 |
(224) 0x42c9c9 VMOVAPD 0x540(%RSP),%ZMM5 |
(224) 0x42c9d1 VMOVAPD %ZMM0,%ZMM5{%K1} |
(224) 0x42c9d7 VMOVAPD %ZMM5,0x540(%RSP) |
(224) 0x42c9df VFMADD213PD %ZMM5,%ZMM3,%ZMM4 |
(224) 0x42c9e5 ADD 0x38(%RSP),%RAX |
(224) 0x42c9ea VMOVUPD %ZMM4,(%RAX,%R10,8){%K1} |
(224) 0x42c9f1 JMP 42c1d0 |
0x42c9f6 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | accelerate_kernel.f90:57-79 |
Module | exec |
nb instructions | 145 |
nb uops | 149 |
loop length | 665 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 2 |
nb stack references | 38 |
micro-operation queue | 24.83 cycles |
front end | 24.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 11.20 | 11.20 | 8.67 | 8.67 | 17.50 | 11.20 | 11.20 | 17.50 | 17.50 | 17.50 | 11.20 | 8.67 |
cycles | 11.20 | 11.20 | 8.67 | 8.67 | 17.50 | 11.20 | 11.20 | 17.50 | 17.50 | 17.50 | 11.20 | 8.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 24.30-24.34 |
Stall cycles | 0.00 |
Front-end | 24.83 |
Dispatch | 17.50 |
Overall L1 | 24.83 |
all | 1% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 3% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 1% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 3% |
all | 11% |
load | 8% |
store | 10% |
mul | 12% |
add-sub | 10% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 11% |
load | 9% |
store | 10% |
mul | 12% |
add-sub | 10% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x5c0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x60(%RBP),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EBX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVL $0,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JS 42bff7 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0xb7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x1c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EAX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x38(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x3c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x24(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x20(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x74bf70,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 4044c0 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x1c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RSP),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EAX,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42c040 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x100> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x74bf90,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x28(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4040b0 <__kmpc_for_static_fini@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x90(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x74bfb0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 404580 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x48(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVQ %R12,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
SAL $0x20,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV $-0x200000000,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 |
LEA (%R15,%RCX,1),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RAX,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x20,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %R14,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAR $0x20,%R12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
TEST %RDX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV $-0x1,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVNS %RDX,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
TEST %RSI,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV $0x1,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RDI,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV $0x200000000,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 |
MOV %R13,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R15,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x38(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RDX,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV $-0x1,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SHR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
IMUL %RSI,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SAL $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
SUB %RAX,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R11,0xf0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD %EBX,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %RAX,%R15 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R15,0xe8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RCX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
CMOVNS %RCX,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
TEST %RDX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
CMOVG %RDI,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %R14,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x28(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R13,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RCX,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SHR $0x20,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
IMUL %RDX,%R13 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R10D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RAX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDI,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %RAX,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,0xd8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %RAX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,0xd0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0xc8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0xb8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0xb0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x50(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOT %R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R12,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NEG %R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R13,0xf8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD 0xdeb44(%RIP),%ZMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %XMM0,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,0xa8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R10D,0x14(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 42c1df <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x29f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | accelerate_kernel.f90:57-79 |
Module | exec |
nb instructions | 145 |
nb uops | 149 |
loop length | 665 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 2 |
nb stack references | 38 |
micro-operation queue | 24.83 cycles |
front end | 24.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 11.20 | 11.20 | 8.67 | 8.67 | 17.50 | 11.20 | 11.20 | 17.50 | 17.50 | 17.50 | 11.20 | 8.67 |
cycles | 11.20 | 11.20 | 8.67 | 8.67 | 17.50 | 11.20 | 11.20 | 17.50 | 17.50 | 17.50 | 11.20 | 8.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 24.30-24.34 |
Stall cycles | 0.00 |
Front-end | 24.83 |
Dispatch | 17.50 |
Overall L1 | 24.83 |
all | 1% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 3% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 1% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 3% |
all | 11% |
load | 8% |
store | 10% |
mul | 12% |
add-sub | 10% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 11% |
load | 9% |
store | 10% |
mul | 12% |
add-sub | 10% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x5c0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x60(%RBP),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EBX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVL $0,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JS 42bff7 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0xb7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x1c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EAX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0x1,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x38(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x3c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x24(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x20(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x74bf70,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 4044c0 <__kmpc_for_static_init_4@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x1c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RSP),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EAX,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42c040 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x100> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x74bf90,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x28(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4040b0 <__kmpc_for_static_fini@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x90(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x74bfb0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 404580 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x48(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVQ %R12,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
SAL $0x20,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV $-0x200000000,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 |
LEA (%R15,%RCX,1),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RAX,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x20,%R14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD %R14,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SAR $0x20,%R12 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
TEST %RDX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV $-0x1,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVNS %RDX,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
TEST %RSI,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV $0x1,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RDI,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV $0x200000000,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 |
MOV %R13,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R15,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x38(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RDX,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV $-0x1,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SHR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
IMUL %RSI,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SAL $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
SUB %RAX,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R11,0xf0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD %EBX,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %RAX,%R15 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R15,0xe8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RCX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
CMOVNS %RCX,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
TEST %RDX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
CMOVG %RDI,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %R14,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x28(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R13,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RCX,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SHR $0x20,%R13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
IMUL %RDX,%R13 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R10D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RAX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDI,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %RAX,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,0xd8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %RAX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,0xd0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0xc8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0xb8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0xb0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x50(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOT %R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R12,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NEG %R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R13,0xf8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VBROADCASTSD 0xdeb44(%RIP),%ZMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %XMM0,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,0xa8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R10D,0x14(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 42c1df <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x29f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼accelerate_kernel_.DIR.OMP.PARALLEL.2– | 4.69 | 1.46 |
▼Loop 224 - accelerate_kernel.f90:60-76 - exec– | 0.01 | 0 |
○Loop 225 - accelerate_kernel.f90:62-76 - exec | 4.68 | 1.43 |