Function: accelerate_kernel(int, int, int, int, double, clover::Buffer2D<double>&, clover::Buffer2D< ... | Module: exec | Source: accelerate.cpp:40-54 [...] | Coverage: 2.96% |
---|
Function: accelerate_kernel(int, int, int, int, double, clover::Buffer2D<double>&, clover::Buffer2D< ... | Module: exec | Source: accelerate.cpp:40-54 [...] | Coverage: 2.96% |
---|
/scratch_na/users/xoserete/qaas_runs/171-291-1828/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/accelerate.cpp: 40 - 54 |
-------------------------------------------------------------------------------- |
40: #pragma omp parallel for simd collapse(2) |
41: for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { |
42: for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { |
43: double stepbymass_s = halfdt / ((density0(i - 1, j - 1) * volume(i - 1, j - 1) + density0(i - 1, j + 0) * volume(i - 1, j + 0) + |
44: density0(i, j) * volume(i, j) + density0(i + 0, j - 1) * volume(i + 0, j - 1)) * |
45: 0.25); |
46: xvel1(i, j) = xvel0(i, j) - stepbymass_s * (xarea(i, j) * (pressure(i, j) - pressure(i - 1, j + 0)) + |
47: xarea(i + 0, j - 1) * (pressure(i + 0, j - 1) - pressure(i - 1, j - 1))); |
48: yvel1(i, j) = yvel0(i, j) - stepbymass_s * (yarea(i, j) * (pressure(i, j) - pressure(i + 0, j - 1)) + |
49: yarea(i - 1, j + 0) * (pressure(i - 1, j + 0) - pressure(i - 1, j - 1))); |
50: xvel1(i, j) = xvel1(i, j) - stepbymass_s * (xarea(i, j) * (viscosity(i, j) - viscosity(i - 1, j + 0)) + |
51: xarea(i + 0, j - 1) * (viscosity(i + 0, j - 1) - viscosity(i - 1, j - 1))); |
52: yvel1(i, j) = yvel1(i, j) - stepbymass_s * (yarea(i, j) * (viscosity(i, j) - viscosity(i + 0, j - 1)) + |
53: yarea(i - 1, j + 0) * (viscosity(i - 1, j + 0) - viscosity(i - 1, j - 1))); |
54: } |
/scratch_na/users/xoserete/qaas_runs/171-291-1828/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x41c000 PUSH %RBP |
0x41c001 MOV %RSP,%RBP |
0x41c004 PUSH %R15 |
0x41c006 PUSH %R14 |
0x41c008 PUSH %R13 |
0x41c00a PUSH %R12 |
0x41c00c PUSH %RBX |
0x41c00d AND $-0x40,%RSP |
0x41c011 SUB $0x280,%RSP |
0x41c018 MOV 0x68(%RBP),%RAX |
0x41c01c MOV 0x58(%RBP),%R12 |
0x41c020 MOV 0x50(%RBP),%RSI |
0x41c024 MOV 0x38(%RBP),%R11 |
0x41c028 MOV 0x30(%RBP),%R13 |
0x41c02c MOV 0x28(%RBP),%R10 |
0x41c030 MOV %R10,0x30(%RSP) |
0x41c035 MOV 0x20(%RBP),%RBX |
0x41c039 MOV 0x18(%RBP),%R15 |
0x41c03d VMOVQ 0x40(%RBP),%XMM0 |
0x41c042 MOV 0x10(%RBP),%R14 |
0x41c046 MOV 0x48(%RBP),%R10D |
0x41c04a MOV %R10D,0x44(%RSP) |
0x41c04f MOVL $0,0x6c(%RSP) |
0x41c057 TEST %RAX,%RAX |
0x41c05a JS 41c83b |
0x41c060 MOV %R15,0x48(%RSP) |
0x41c065 MOV %R11,0x58(%RSP) |
0x41c06a MOV %RSI,0x50(%RSP) |
0x41c06f MOV %R9,%R15 |
0x41c072 MOV %R8,0x120(%RSP) |
0x41c07a MOV %RDX,0x38(%RSP) |
0x41c07f MOV %RCX,0x140(%RSP) |
0x41c087 MOV (%RDI),%ESI |
0x41c089 MOVQ $0,0xc8(%RSP) |
0x41c095 MOV %RAX,0xc0(%RSP) |
0x41c09d MOVQ $0x1,0x118(%RSP) |
0x41c0a9 SUB $0x8,%RSP |
0x41c0ad LEA 0x120(%RSP),%RAX |
0x41c0b5 LEA 0x74(%RSP),%RCX |
0x41c0ba LEA 0xd0(%RSP),%R8 |
0x41c0c2 LEA 0xc8(%RSP),%R9 |
0x41c0ca MOV $0x6803b0,%EDI |
0x41c0cf MOV %ESI,0x70(%RSP) |
0x41c0d3 MOV $0x22,%EDX |
0x41c0d8 PUSH $0x1 |
0x41c0da PUSH $0x1 |
0x41c0dc PUSH %RAX |
0x41c0dd VMOVDQU %XMM0,0x190(%RSP) |
0x41c0e6 CALL 403180 <__kmpc_for_static_init_8@plt> |
0x41c0eb VMOVUPD 0x190(%RSP),%XMM3 |
0x41c0f4 ADD $0x20,%RSP |
0x41c0f8 MOV 0xc8(%RSP),%R11 |
0x41c100 MOV 0xc0(%RSP),%RAX |
0x41c108 MOV %RAX,0x98(%RSP) |
0x41c110 CMP %RAX,%R11 |
0x41c113 JA 41c81c |
0x41c119 MOV %R12,%RCX |
0x41c11c SUB 0x50(%RSP),%ECX |
0x41c120 MOV (%R15),%RAX |
0x41c123 MOV %RAX,0x60(%RSP) |
0x41c128 MOV 0x10(%R15),%R15 |
0x41c12c LEA 0x1(%R11),%RAX |
0x41c130 MOV 0x98(%RSP),%RDI |
0x41c138 LEA 0x1(%RDI),%R9 |
0x41c13c CMP %R9,%RAX |
0x41c13f CMOVG %RAX,%R9 |
0x41c143 MOV 0x120(%RSP),%R12 |
0x41c14b MOV (%R12),%RAX |
0x41c14f MOV %RAX,0x90(%RSP) |
0x41c157 MOV 0x10(%R12),%R12 |
0x41c15c MOV (%RBX),%RAX |
0x41c15f MOV %RAX,0x88(%RSP) |
0x41c167 MOV 0x10(%RBX),%RAX |
0x41c16b MOV %RAX,0xb8(%RSP) |
0x41c173 MOV 0x38(%RSP),%RAX |
0x41c178 MOV (%RAX),%RDX |
0x41c17b MOV %RDX,0x80(%RSP) |
0x41c183 MOV 0x10(%RAX),%RAX |
0x41c187 MOV %RAX,0x38(%RSP) |
0x41c18c MOV (%R14),%RDX |
0x41c18f MOV 0x10(%R14),%RAX |
0x41c193 MOV %RAX,0x78(%RSP) |
0x41c198 MOV (%R13),%R10 |
0x41c19c MOV 0x10(%R13),%RAX |
0x41c1a0 MOV %RAX,0xb0(%RSP) |
0x41c1a8 MOV 0x30(%RSP),%RAX |
0x41c1ad MOV (%RAX),%R14 |
0x41c1b0 MOV 0x10(%RAX),%RAX |
0x41c1b4 MOV %RAX,0xa8(%RSP) |
0x41c1bc MOV 0x140(%RSP),%RAX |
0x41c1c4 MOV (%RAX),%R13 |
0x41c1c7 MOV 0x10(%RAX),%RAX |
0x41c1cb MOV 0x58(%RSP),%RDI |
0x41c1d0 MOV (%RDI),%RSI |
0x41c1d3 MOV 0x10(%RDI),%RBX |
0x41c1d7 MOV %RBX,0x58(%RSP) |
0x41c1dc MOV 0x48(%RSP),%RDI |
0x41c1e1 MOV (%RDI),%R8 |
0x41c1e4 MOV 0x10(%RDI),%RDI |
0x41c1e8 SUB %R11,%R9 |
0x41c1eb MOV $-0x8,%EBX |
0x41c1f0 MOV %R9,0xd0(%RSP) |
0x41c1f8 AND %R9,%RBX |
0x41c1fb MOV %RBX,%R9 |
0x41c1fe MOV 0x38(%RSP),%RBX |
0x41c203 MOV %RCX,0x48(%RSP) |
0x41c208 MOV %R15,0x70(%RSP) |
0x41c20d MOV %R12,0x110(%RSP) |
0x41c215 MOV %RDX,0x108(%RSP) |
0x41c21d MOV %RSI,0xe8(%RSP) |
0x41c225 MOV %R8,0xe0(%RSP) |
0x41c22d MOV %RDI,0xd8(%RSP) |
0x41c235 MOV %R10,0x100(%RSP) |
0x41c23d MOV %R14,0xf8(%RSP) |
0x41c245 MOV %R13,0xf0(%RSP) |
0x41c24d MOV %RAX,0xa0(%RSP) |
0x41c255 JE 41cae2 |
0x41c25b VPBROADCASTQ %RCX,%ZMM16 |
0x41c261 MOV 0x44(%RSP),%EAX |
0x41c265 VPBROADCASTD %EAX,%YMM0 |
0x41c26b VMOVDQU %YMM0,0x140(%RSP) |
0x41c274 MOV 0x50(%RSP),%RAX |
0x41c279 VPBROADCASTD %EAX,%YMM0 |
0x41c27f VMOVDQU %YMM0,0x120(%RSP) |
0x41c288 MOV 0x60(%RSP),%RAX |
0x41c28d VPBROADCASTQ %RAX,%ZMM20 |
0x41c293 MOV 0x90(%RSP),%RAX |
0x41c29b VPBROADCASTQ %RAX,%ZMM21 |
0x41c2a1 VBROADCASTSD %XMM3,%ZMM0 |
0x41c2a7 VMOVUPD %ZMM0,0x200(%RSP) |
0x41c2af MOV 0x88(%RSP),%RAX |
0x41c2b7 VPBROADCASTQ %RAX,%ZMM0 |
0x41c2bd VMOVDQU64 %ZMM0,0x1c0(%RSP) |
0x41c2c5 MOV 0x80(%RSP),%RAX |
0x41c2cd VPBROADCASTQ %RAX,%ZMM24 |
0x41c2d3 VPBROADCASTQ %RDX,%ZMM25 |
0x41c2d9 VPBROADCASTQ %R10,%ZMM0 |
0x41c2df VMOVDQU64 %ZMM0,0x180(%RSP) |
0x41c2e7 VPBROADCASTQ %R14,%ZMM27 |
0x41c2ed VPBROADCASTQ %R13,%ZMM28 |
0x41c2f3 VPBROADCASTQ %RSI,%ZMM29 |
0x41c2f9 VPBROADCASTQ %R8,%ZMM30 |
0x41c2ff MOV %R11,0x30(%RSP) |
0x41c304 VPBROADCASTQ %R11,%ZMM0 |
0x41c30a VPADDQ 0x4736c(%RIP),%ZMM0,%ZMM17 |
0x41c314 XOR %R13D,%R13D |
0x41c317 MOV %RBX,%R14 |
0x41c31a MOV 0x78(%RSP),%RSI |
0x41c31f NOP |
(95) 0x41c320 VMOVDQA64 %ZMM17,%ZMM0 |
(95) 0x41c326 VMOVDQA64 %ZMM16,%ZMM1 |
(95) 0x41c32c MOV %R12,%RBX |
(95) 0x41c32f MOV %R15,%R12 |
(95) 0x41c332 MOV %R9,%R15 |
(95) 0x41c335 MOV $0x451520,%RAX |
(95) 0x41c33c CALL %RAX |
(95) 0x41c33e VPMOVQD %ZMM0,%YMM0 |
(95) 0x41c344 VPADDD 0x140(%RSP),%YMM0,%YMM31 |
(95) 0x41c34c VMOVDQA64 %ZMM17,%ZMM0 |
(95) 0x41c352 VMOVDQA64 %ZMM16,%ZMM1 |
(95) 0x41c358 CALL 4513a0 <__svml_i64rem8_z0> |
(95) 0x41c35e MOV %R15,%R9 |
(95) 0x41c361 MOV %R12,%R15 |
(95) 0x41c364 MOV %RBX,%R12 |
(95) 0x41c367 VPMOVQD %ZMM0,%YMM0 |
(95) 0x41c36d VPADDD 0x120(%RSP),%YMM0,%YMM2 |
(95) 0x41c376 VPCMPEQD %YMM1,%YMM1,%YMM1 |
(95) 0x41c37a VPADDD %YMM1,%YMM2,%YMM0 |
(95) 0x41c37e VPADDD %YMM1,%YMM31,%YMM1 |
(95) 0x41c384 VPMOVSXDQ %YMM1,%ZMM1 |
(95) 0x41c38a VPXOR %XMM5,%XMM5,%XMM5 |
(95) 0x41c38e VPMULLQ %ZMM1,%ZMM20,%ZMM5 |
(95) 0x41c394 VPMOVSXDQ %YMM0,%ZMM0 |
(95) 0x41c39a KXNORW %K0,%K0,%K1 |
(95) 0x41c39e VXORPD %XMM4,%XMM4,%XMM4 |
(95) 0x41c3a2 VPMULLQ %ZMM1,%ZMM21,%ZMM9 |
(95) 0x41c3a8 VPADDQ %ZMM0,%ZMM5,%ZMM7 |
(95) 0x41c3ae KXNORW %K0,%K0,%K2 |
(95) 0x41c3b2 VPMOVSXDQ %YMM31,%ZMM3 |
(95) 0x41c3b8 VPMULLQ %ZMM3,%ZMM20,%ZMM10 |
(95) 0x41c3be VPADDQ %ZMM0,%ZMM9,%ZMM8 |
(95) 0x41c3c4 VXORPD %XMM6,%XMM6,%XMM6 |
(95) 0x41c3c8 VGATHERQPD (%R15,%ZMM7,8),%ZMM4{%K1} |
(95) 0x41c3cf VPADDQ %ZMM0,%ZMM10,%ZMM11 |
(95) 0x41c3d5 VGATHERQPD (%RBX,%ZMM8,8),%ZMM6{%K2} |
(95) 0x41c3dc KXNORW %K0,%K0,%K1 |
(95) 0x41c3e0 VXORPD %XMM7,%XMM7,%XMM7 |
(95) 0x41c3e4 VPMULLQ %ZMM3,%ZMM21,%ZMM12 |
(95) 0x41c3ea VPADDQ %ZMM0,%ZMM12,%ZMM13 |
(95) 0x41c3f0 KXNORW %K0,%K0,%K2 |
(95) 0x41c3f4 VGATHERQPD (%R15,%ZMM11,8),%ZMM7{%K1} |
(95) 0x41c3fb VXORPD %XMM8,%XMM8,%XMM8 |
(95) 0x41c400 VGATHERQPD (%RBX,%ZMM13,8),%ZMM8{%K2} |
(95) 0x41c407 VPMOVSXDQ %YMM2,%ZMM2 |
(95) 0x41c40d VPADDQ %ZMM2,%ZMM10,%ZMM10 |
(95) 0x41c413 KXNORW %K0,%K0,%K1 |
(95) 0x41c417 VXORPD %XMM13,%XMM13,%XMM13 |
(95) 0x41c41c VPADDQ %ZMM2,%ZMM12,%ZMM11 |
(95) 0x41c422 KXNORW %K0,%K0,%K2 |
(95) 0x41c426 VGATHERQPD (%R15,%ZMM10,8),%ZMM13{%K1} |
(95) 0x41c42d VXORPD %XMM10,%XMM10,%XMM10 |
(95) 0x41c432 VGATHERQPD (%RBX,%ZMM11,8),%ZMM10{%K2} |
(95) 0x41c439 VPADDQ %ZMM2,%ZMM5,%ZMM5 |
(95) 0x41c43f KXNORW %K0,%K0,%K1 |
(95) 0x41c443 VXORPD %XMM14,%XMM14,%XMM14 |
(95) 0x41c448 VPADDQ %ZMM2,%ZMM9,%ZMM9 |
(95) 0x41c44e KXNORW %K0,%K0,%K2 |
(95) 0x41c452 VGATHERQPD (%R15,%ZMM5,8),%ZMM14{%K1} |
(95) 0x41c459 VXORPD %XMM15,%XMM15,%XMM15 |
(95) 0x41c45e VGATHERQPD (%RBX,%ZMM9,8),%ZMM15{%K2} |
(95) 0x41c465 VPXOR %XMM5,%XMM5,%XMM5 |
(95) 0x41c469 VPMULLQ %ZMM3,%ZMM24,%ZMM5 |
(95) 0x41c46f KXNORW %K0,%K0,%K1 |
(95) 0x41c473 VPXORD %XMM31,%XMM31,%XMM31 |
(95) 0x41c479 VPADDQ %ZMM2,%ZMM5,%ZMM5 |
(95) 0x41c47f VPXOR %XMM9,%XMM9,%XMM9 |
(95) 0x41c484 VPMULLQ %ZMM3,%ZMM25,%ZMM9 |
(95) 0x41c48a KXNORW %K0,%K0,%K2 |
(95) 0x41c48e VXORPD %XMM18,%XMM18,%XMM18 |
(95) 0x41c494 VPADDQ %ZMM2,%ZMM9,%ZMM11 |
(95) 0x41c49a VPADDQ %ZMM0,%ZMM9,%ZMM9 |
(95) 0x41c4a0 VGATHERQPD (%R14,%ZMM5,8),%ZMM31{%K1} |
(95) 0x41c4a7 KXNORW %K0,%K0,%K1 |
(95) 0x41c4ab VGATHERQPD (%RSI,%ZMM11,8),%ZMM18{%K2} |
(95) 0x41c4b2 VXORPD %XMM19,%XMM19,%XMM19 |
(95) 0x41c4b8 VGATHERQPD (%RSI,%ZMM9,8),%ZMM19{%K1} |
(95) 0x41c4bf VPXOR %XMM12,%XMM12,%XMM12 |
(95) 0x41c4c4 VPMULLQ %ZMM1,%ZMM25,%ZMM12 |
(95) 0x41c4ca KXNORW %K0,%K0,%K1 |
(95) 0x41c4ce VPADDQ %ZMM2,%ZMM12,%ZMM22 |
(95) 0x41c4d4 VXORPD %XMM23,%XMM23,%XMM23 |
(95) 0x41c4da VGATHERQPD (%RSI,%ZMM22,8),%ZMM23{%K1} |
(95) 0x41c4e1 VPADDQ %ZMM0,%ZMM12,%ZMM12 |
(95) 0x41c4e7 KXNORW %K0,%K0,%K1 |
(95) 0x41c4eb VMULPD %ZMM4,%ZMM6,%ZMM4 |
(95) 0x41c4f1 VXORPD %XMM26,%XMM26,%XMM26 |
(95) 0x41c4f7 VGATHERQPD (%RSI,%ZMM12,8),%ZMM26{%K1} |
(95) 0x41c4fe VFMADD213PD %ZMM4,%ZMM7,%ZMM8 |
(95) 0x41c504 VMOVDQU64 0x1c0(%RSP),%ZMM4 |
(95) 0x41c50c VPMULLQ %ZMM3,%ZMM4,%ZMM4 |
(95) 0x41c512 KXNORW %K0,%K0,%K1 |
(95) 0x41c516 VFMADD213PD %ZMM8,%ZMM13,%ZMM10 |
(95) 0x41c51c VXORPD %XMM8,%XMM8,%XMM8 |
(95) 0x41c521 VPXOR %XMM6,%XMM6,%XMM6 |
(95) 0x41c525 VPMULLQ %ZMM1,%ZMM24,%ZMM6 |
(95) 0x41c52b VPADDQ %ZMM2,%ZMM6,%ZMM6 |
(95) 0x41c531 VPADDQ %ZMM2,%ZMM4,%ZMM4 |
(95) 0x41c537 KXNORW %K0,%K0,%K2 |
(95) 0x41c53b MOV 0xb8(%RSP),%RAX |
(95) 0x41c543 VGATHERQPD (%RAX,%ZMM4,8),%ZMM8{%K1} |
(95) 0x41c54a VXORPD %XMM7,%XMM7,%XMM7 |
(95) 0x41c54e VGATHERQPD (%R14,%ZMM6,8),%ZMM7{%K2} |
(95) 0x41c555 VFMADD213PD %ZMM10,%ZMM14,%ZMM15 |
(95) 0x41c55b VSUBPD %ZMM18,%ZMM19,%ZMM10 |
(95) 0x41c561 VMULPD 0x470e5(%RIP){1to8},%ZMM15,%ZMM4 |
(95) 0x41c56b VMOVUPD 0x200(%RSP),%ZMM13 |
(95) 0x41c573 VDIVPD %ZMM4,%ZMM13,%ZMM4 |
(95) 0x41c579 VMULPD %ZMM31,%ZMM10,%ZMM10 |
(95) 0x41c57f VSUBPD %ZMM23,%ZMM26,%ZMM13 |
(95) 0x41c585 VMOVDQU64 0x180(%RSP),%ZMM14 |
(95) 0x41c58d VPMULLQ %ZMM3,%ZMM14,%ZMM14 |
(95) 0x41c593 VFMADD213PD %ZMM10,%ZMM7,%ZMM13 |
(95) 0x41c599 VPADDQ %ZMM2,%ZMM14,%ZMM7 |
(95) 0x41c59f KXNORW %K0,%K0,%K1 |
(95) 0x41c5a3 VPXOR %XMM10,%XMM10,%XMM10 |
(95) 0x41c5a8 VPMULLQ %ZMM3,%ZMM27,%ZMM10 |
(95) 0x41c5ae VFMADD213PD %ZMM8,%ZMM4,%ZMM13 |
(95) 0x41c5b4 VPXOR %XMM14,%XMM14,%XMM14 |
(95) 0x41c5b9 VPMULLQ %ZMM3,%ZMM28,%ZMM14 |
(95) 0x41c5bf KXNORW %K0,%K0,%K2 |
(95) 0x41c5c3 VXORPD %XMM15,%XMM15,%XMM15 |
(95) 0x41c5c8 MOV 0xb0(%RSP),%RDX |
(95) 0x41c5d0 VSCATTERQPD %ZMM13,(%RDX,%ZMM7,8){%K1} |
(95) 0x41c5d7 KXNORW %K0,%K0,%K1 |
(95) 0x41c5db VGATHERQPD (%RSI,%ZMM11,8),%ZMM15{%K2} |
(95) 0x41c5e2 VXORPD %XMM11,%XMM11,%XMM11 |
(95) 0x41c5e7 VGATHERQPD (%RSI,%ZMM22,8),%ZMM11{%K1} |
(95) 0x41c5ee VPADDQ %ZMM2,%ZMM14,%ZMM8 |
(95) 0x41c5f4 KXNORW %K0,%K0,%K1 |
(95) 0x41c5f8 VXORPD %XMM13,%XMM13,%XMM13 |
(95) 0x41c5fd KXNORW %K0,%K0,%K2 |
(95) 0x41c601 VXORPD %XMM18,%XMM18,%XMM18 |
(95) 0x41c607 MOV 0xa0(%RSP),%RAX |
(95) 0x41c60f VGATHERQPD (%RAX,%ZMM8,8),%ZMM13{%K1} |
(95) 0x41c616 KXNORW %K0,%K0,%K1 |
(95) 0x41c61a VGATHERQPD (%RSI,%ZMM9,8),%ZMM18{%K2} |
(95) 0x41c621 VXORPD %XMM19,%XMM19,%XMM19 |
(95) 0x41c627 VGATHERQPD (%RSI,%ZMM12,8),%ZMM19{%K1} |
(95) 0x41c62e KXNORW %K0,%K0,%K1 |
(95) 0x41c632 VXORPD %XMM12,%XMM12,%XMM12 |
(95) 0x41c637 VPADDQ %ZMM0,%ZMM14,%ZMM9 |
(95) 0x41c63d VPADDQ %ZMM2,%ZMM10,%ZMM10 |
(95) 0x41c643 KXNORW %K0,%K0,%K2 |
(95) 0x41c647 MOV 0xa8(%RSP),%RCX |
(95) 0x41c64f VGATHERQPD (%RCX,%ZMM10,8),%ZMM12{%K1} |
(95) 0x41c656 VXORPD %XMM10,%XMM10,%XMM10 |
(95) 0x41c65b VGATHERQPD (%RAX,%ZMM9,8),%ZMM10{%K2} |
(95) 0x41c662 VSUBPD %ZMM15,%ZMM11,%ZMM11 |
(95) 0x41c668 VMULPD %ZMM13,%ZMM11,%ZMM11 |
(95) 0x41c66e VSUBPD %ZMM18,%ZMM19,%ZMM13 |
(95) 0x41c674 VPXOR %XMM14,%XMM14,%XMM14 |
(95) 0x41c679 VPMULLQ %ZMM3,%ZMM29,%ZMM14 |
(95) 0x41c67f VFMADD213PD %ZMM11,%ZMM10,%ZMM13 |
(95) 0x41c685 VPADDQ %ZMM2,%ZMM14,%ZMM10 |
(95) 0x41c68b VPMULLQ %ZMM3,%ZMM30,%ZMM3 |
(95) 0x41c691 VFMADD213PD %ZMM12,%ZMM4,%ZMM13 |
(95) 0x41c697 KXNORW %K0,%K0,%K1 |
(95) 0x41c69b VPADDQ %ZMM2,%ZMM3,%ZMM11 |
(95) 0x41c6a1 KXNORW %K0,%K0,%K2 |
(95) 0x41c6a5 VXORPD %XMM12,%XMM12,%XMM12 |
(95) 0x41c6aa VPADDQ %ZMM0,%ZMM3,%ZMM3 |
(95) 0x41c6b0 MOV 0x58(%RSP),%RCX |
(95) 0x41c6b5 VSCATTERQPD %ZMM13,(%RCX,%ZMM10,8){%K1} |
(95) 0x41c6bc KXNORW %K0,%K0,%K1 |
(95) 0x41c6c0 VGATHERQPD (%RDI,%ZMM11,8),%ZMM12{%K2} |
(95) 0x41c6c7 VXORPD %XMM13,%XMM13,%XMM13 |
(95) 0x41c6cc VGATHERQPD (%RDI,%ZMM3,8),%ZMM13{%K1} |
(95) 0x41c6d3 VPMULLQ %ZMM1,%ZMM30,%ZMM1 |
(95) 0x41c6d9 KXNORW %K0,%K0,%K1 |
(95) 0x41c6dd VPXOR %XMM14,%XMM14,%XMM14 |
(95) 0x41c6e2 VGATHERQPD (%R14,%ZMM5,8),%ZMM14{%K1} |
(95) 0x41c6e9 KXNORW %K0,%K0,%K1 |
(95) 0x41c6ed VPADDQ %ZMM2,%ZMM1,%ZMM2 |
(95) 0x41c6f3 KXNORW %K0,%K0,%K2 |
(95) 0x41c6f7 VPADDQ %ZMM0,%ZMM1,%ZMM0 |
(95) 0x41c6fd VPXOR %XMM1,%XMM1,%XMM1 |
(95) 0x41c701 VXORPD %XMM5,%XMM5,%XMM5 |
(95) 0x41c705 VGATHERQPD (%R14,%ZMM6,8),%ZMM1{%K1} |
(95) 0x41c70c KXNORW %K0,%K0,%K1 |
(95) 0x41c710 VGATHERQPD (%RDI,%ZMM2,8),%ZMM5{%K2} |
(95) 0x41c717 VXORPD %XMM6,%XMM6,%XMM6 |
(95) 0x41c71b VGATHERQPD (%RDI,%ZMM0,8),%ZMM6{%K1} |
(95) 0x41c722 KXNORW %K0,%K0,%K1 |
(95) 0x41c726 VSUBPD %ZMM12,%ZMM13,%ZMM12 |
(95) 0x41c72c VXORPD %XMM13,%XMM13,%XMM13 |
(95) 0x41c731 VGATHERQPD (%RDX,%ZMM7,8),%ZMM13{%K1} |
(95) 0x41c738 VMULPD %ZMM14,%ZMM12,%ZMM12 |
(95) 0x41c73e VSUBPD %ZMM5,%ZMM6,%ZMM5 |
(95) 0x41c744 VFMADD213PD %ZMM12,%ZMM1,%ZMM5 |
(95) 0x41c74a VFMADD213PD %ZMM13,%ZMM4,%ZMM5 |
(95) 0x41c750 KXNORW %K0,%K0,%K1 |
(95) 0x41c754 VSCATTERQPD %ZMM5,(%RDX,%ZMM7,8){%K1} |
(95) 0x41c75b KXNORW %K0,%K0,%K1 |
(95) 0x41c75f VXORPD %XMM1,%XMM1,%XMM1 |
(95) 0x41c763 KXNORW %K0,%K0,%K2 |
(95) 0x41c767 VGATHERQPD (%RDI,%ZMM11,8),%ZMM1{%K1} |
(95) 0x41c76e VXORPD %XMM5,%XMM5,%XMM5 |
(95) 0x41c772 VGATHERQPD (%RDI,%ZMM2,8),%ZMM5{%K2} |
(95) 0x41c779 KXNORW %K0,%K0,%K1 |
(95) 0x41c77d VXORPD %XMM2,%XMM2,%XMM2 |
(95) 0x41c781 VGATHERQPD (%RAX,%ZMM8,8),%ZMM2{%K1} |
(95) 0x41c788 KXNORW %K0,%K0,%K1 |
(95) 0x41c78c VXORPD %XMM6,%XMM6,%XMM6 |
(95) 0x41c790 KXNORW %K0,%K0,%K2 |
(95) 0x41c794 VGATHERQPD (%RDI,%ZMM3,8),%ZMM6{%K1} |
(95) 0x41c79b VXORPD %XMM3,%XMM3,%XMM3 |
(95) 0x41c79f VGATHERQPD (%RDI,%ZMM0,8),%ZMM3{%K2} |
(95) 0x41c7a6 KXNORW %K0,%K0,%K1 |
(95) 0x41c7aa VXORPD %XMM0,%XMM0,%XMM0 |
(95) 0x41c7ae VGATHERQPD (%RAX,%ZMM9,8),%ZMM0{%K1} |
(95) 0x41c7b5 KXNORW %K0,%K0,%K1 |
(95) 0x41c7b9 VSUBPD %ZMM1,%ZMM5,%ZMM1 |
(95) 0x41c7bf VXORPD %XMM5,%XMM5,%XMM5 |
(95) 0x41c7c3 VGATHERQPD (%RCX,%ZMM10,8),%ZMM5{%K1} |
(95) 0x41c7ca VMULPD %ZMM2,%ZMM1,%ZMM1 |
(95) 0x41c7d0 VSUBPD %ZMM6,%ZMM3,%ZMM2 |
(95) 0x41c7d6 VFMADD213PD %ZMM1,%ZMM0,%ZMM2 |
(95) 0x41c7dc VFMADD213PD %ZMM5,%ZMM4,%ZMM2 |
(95) 0x41c7e2 KXNORW %K0,%K0,%K1 |
(95) 0x41c7e6 VSCATTERQPD %ZMM2,(%RCX,%ZMM10,8){%K1} |
(95) 0x41c7ed VPADDQ 0x46e61(%RIP){1to8},%ZMM17,%ZMM17 |
(95) 0x41c7f7 ADD $0x8,%R13 |
(95) 0x41c7fb CMP %R9,%R13 |
(95) 0x41c7fe JB 41c320 |
0x41c804 CMP %R9,0xd0(%RSP) |
0x41c80c VMOVUPD 0x170(%RSP),%XMM3 |
0x41c815 MOV 0x30(%RSP),%R11 |
0x41c81a JNE 41c84a |
0x41c81c MOV $0x6803d0,%EDI |
0x41c821 MOV 0x68(%RSP),%ESI |
0x41c825 LEA -0x28(%RBP),%RSP |
0x41c829 POP %RBX |
0x41c82a POP %R12 |
0x41c82c POP %R13 |
0x41c82e POP %R14 |
0x41c830 POP %R15 |
0x41c832 POP %RBP |
0x41c833 VZEROUPPER |
0x41c836 JMP 402fe0 |
0x41c83b LEA -0x28(%RBP),%RSP |
0x41c83f POP %RBX |
0x41c840 POP %R12 |
0x41c842 POP %R13 |
0x41c844 POP %R14 |
0x41c846 POP %R15 |
0x41c848 POP %RBP |
0x41c849 RET |
0x41c84a ADD %R9,%R11 |
0x41c84d JMP 41cae2 |
0x41c852 NOPW %CS:(%RAX,%RAX,1) |
(94) 0x41c860 MOV %R11,%RAX |
(94) 0x41c863 CQTO |
(94) 0x41c865 IDIV %R9 |
(94) 0x41c868 ADD 0x44(%RSP),%ECX |
(94) 0x41c86c ADD %R10D,%EDX |
(94) 0x41c86f LEA -0x1(%RCX),%EAX |
(94) 0x41c872 CLTQ |
(94) 0x41c874 MOV %RSI,%R8 |
(94) 0x41c877 IMUL %RAX,%R8 |
(94) 0x41c87b MOVSXD %EDX,%RDX |
(94) 0x41c87e LEA -0x1(%R8,%RDX,1),%R9 |
(94) 0x41c883 MOV 0x90(%RSP),%R12 |
(94) 0x41c88b MOV %R12,%R10 |
(94) 0x41c88e IMUL %RAX,%R10 |
(94) 0x41c892 LEA -0x1(%R10,%RDX,1),%R11 |
(94) 0x41c897 MOV 0x110(%RSP),%R14 |
(94) 0x41c89f VMOVSD (%R14,%R11,8),%XMM0 |
(94) 0x41c8a5 VMULSD (%R15,%R9,8),%XMM0,%XMM0 |
(94) 0x41c8ab MOVSXD %ECX,%RCX |
(94) 0x41c8ae MOV %RSI,%R9 |
(94) 0x41c8b1 IMUL %RCX,%R9 |
(94) 0x41c8b5 LEA -0x1(%R9,%RDX,1),%R11 |
(94) 0x41c8ba IMUL %RCX,%R12 |
(94) 0x41c8be LEA -0x1(%R12,%RDX,1),%R13 |
(94) 0x41c8c3 VMOVSD (%R14,%R13,8),%XMM1 |
(94) 0x41c8c9 VFMADD132SD (%R15,%R11,8),%XMM0,%XMM1 |
(94) 0x41c8cf ADD %RDX,%R9 |
(94) 0x41c8d2 ADD %RDX,%R12 |
(94) 0x41c8d5 VMOVSD (%R14,%R12,8),%XMM0 |
(94) 0x41c8db VFMADD132SD (%R15,%R9,8),%XMM1,%XMM0 |
(94) 0x41c8e1 ADD %RDX,%R8 |
(94) 0x41c8e4 ADD %RDX,%R10 |
(94) 0x41c8e7 VMOVSD (%R14,%R10,8),%XMM1 |
(94) 0x41c8ed VFMADD132SD (%R15,%R8,8),%XMM0,%XMM1 |
(94) 0x41c8f3 VMULSD 0x46d55(%RIP),%XMM1,%XMM0 |
(94) 0x41c8fb VDIVSD %XMM0,%XMM3,%XMM0 |
(94) 0x41c8ff MOV 0x88(%RSP),%R11 |
(94) 0x41c907 IMUL %RCX,%R11 |
(94) 0x41c90b ADD %RDX,%R11 |
(94) 0x41c90e MOV 0x80(%RSP),%RBX |
(94) 0x41c916 MOV %RBX,%R9 |
(94) 0x41c919 IMUL %RCX,%R9 |
(94) 0x41c91d ADD %RDX,%R9 |
(94) 0x41c920 MOV 0x108(%RSP),%R15 |
(94) 0x41c928 MOV %R15,%R8 |
(94) 0x41c92b IMUL %RCX,%R8 |
(94) 0x41c92f LEA -0x1(%R8,%RDX,1),%R13 |
(94) 0x41c934 ADD %RDX,%R8 |
(94) 0x41c937 MOV 0x78(%RSP),%RDI |
(94) 0x41c93c VMOVSD (%RDI,%R13,8),%XMM1 |
(94) 0x41c942 VSUBSD (%RDI,%R8,8),%XMM1,%XMM1 |
(94) 0x41c948 MOV 0x38(%RSP),%RSI |
(94) 0x41c94d VMULSD (%RSI,%R9,8),%XMM1,%XMM1 |
(94) 0x41c953 MOV %RAX,%R12 |
(94) 0x41c956 IMUL %RAX,%RBX |
(94) 0x41c95a ADD %RDX,%RBX |
(94) 0x41c95d IMUL %RAX,%R15 |
(94) 0x41c961 LEA -0x1(%R15,%RDX,1),%RAX |
(94) 0x41c966 ADD %RDX,%R15 |
(94) 0x41c969 VMOVSD (%RDI,%RAX,8),%XMM2 |
(94) 0x41c96e VSUBSD (%RDI,%R15,8),%XMM2,%XMM2 |
(94) 0x41c974 VFMADD132SD (%RSI,%RBX,8),%XMM1,%XMM2 |
(94) 0x41c97a MOV 0xb8(%RSP),%R14 |
(94) 0x41c982 VFMADD213SD (%R14,%R11,8),%XMM0,%XMM2 |
(94) 0x41c988 MOV 0x100(%RSP),%R11 |
(94) 0x41c990 IMUL %RCX,%R11 |
(94) 0x41c994 ADD %RDX,%R11 |
(94) 0x41c997 MOV 0xb0(%RSP),%R14 |
(94) 0x41c99f VMOVSD %XMM2,(%R14,%R11,8) |
(94) 0x41c9a5 VMOVSD (%RDI,%R15,8),%XMM1 |
(94) 0x41c9ab MOV 0xf8(%RSP),%R15 |
(94) 0x41c9b3 IMUL %RCX,%R15 |
(94) 0x41c9b7 ADD %RDX,%R15 |
(94) 0x41c9ba VSUBSD (%RDI,%R8,8),%XMM1,%XMM1 |
(94) 0x41c9c0 MOV 0xf0(%RSP),%R8 |
(94) 0x41c9c8 IMUL %RCX,%R8 |
(94) 0x41c9cc VMOVSD (%RDI,%RAX,8),%XMM2 |
(94) 0x41c9d1 LEA (%R8,%RDX,1),%RAX |
(94) 0x41c9d5 MOV %RAX,0x120(%RSP) |
(94) 0x41c9dd MOV 0xa0(%RSP),%R10 |
(94) 0x41c9e5 VMULSD (%R10,%RAX,8),%XMM1,%XMM1 |
(94) 0x41c9eb LEA -0x1(%R8,%RDX,1),%RAX |
(94) 0x41c9f0 MOV %RAX,0x140(%RSP) |
(94) 0x41c9f8 VSUBSD (%RDI,%R13,8),%XMM2,%XMM2 |
(94) 0x41c9fe VFMADD132SD (%R10,%RAX,8),%XMM1,%XMM2 |
(94) 0x41ca04 MOV 0xa8(%RSP),%RDI |
(94) 0x41ca0c VFMADD213SD (%RDI,%R15,8),%XMM0,%XMM2 |
(94) 0x41ca12 MOV 0xe8(%RSP),%R13 |
(94) 0x41ca1a IMUL %RCX,%R13 |
(94) 0x41ca1e ADD %RDX,%R13 |
(94) 0x41ca21 MOV 0x58(%RSP),%R8 |
(94) 0x41ca26 VMOVSD %XMM2,(%R8,%R13,8) |
(94) 0x41ca2c MOV 0xe0(%RSP),%RSI |
(94) 0x41ca34 IMUL %RSI,%RCX |
(94) 0x41ca38 LEA -0x1(%RCX,%RDX,1),%R15 |
(94) 0x41ca3d ADD %RDX,%RCX |
(94) 0x41ca40 MOV 0xd8(%RSP),%RAX |
(94) 0x41ca48 VMOVSD (%RAX,%R15,8),%XMM1 |
(94) 0x41ca4e VSUBSD (%RAX,%RCX,8),%XMM1,%XMM1 |
(94) 0x41ca53 MOV 0x38(%RSP),%RDI |
(94) 0x41ca58 VMULSD (%RDI,%R9,8),%XMM1,%XMM1 |
(94) 0x41ca5e IMUL %RSI,%R12 |
(94) 0x41ca62 LEA (%R12,%RDX,1),%R9 |
(94) 0x41ca66 LEA -0x1(%R12,%RDX,1),%RDX |
(94) 0x41ca6b MOV %RAX,%RSI |
(94) 0x41ca6e VMOVSD (%RAX,%RDX,8),%XMM2 |
(94) 0x41ca73 VSUBSD (%RAX,%R9,8),%XMM2,%XMM2 |
(94) 0x41ca79 VFMADD132SD (%RDI,%RBX,8),%XMM1,%XMM2 |
(94) 0x41ca7f VFMADD213SD (%R14,%R11,8),%XMM0,%XMM2 |
(94) 0x41ca85 VMOVSD %XMM2,(%R14,%R11,8) |
(94) 0x41ca8b VMOVSD (%RAX,%R9,8),%XMM1 |
(94) 0x41ca91 VSUBSD (%RAX,%RCX,8),%XMM1,%XMM1 |
(94) 0x41ca96 MOV 0x120(%RSP),%RAX |
(94) 0x41ca9e VMULSD (%R10,%RAX,8),%XMM1,%XMM1 |
(94) 0x41caa4 VMOVSD (%RSI,%RDX,8),%XMM2 |
(94) 0x41caa9 MOV %R10,%RAX |
(94) 0x41caac VSUBSD (%RSI,%R15,8),%XMM2,%XMM2 |
(94) 0x41cab2 MOV 0x140(%RSP),%RCX |
(94) 0x41caba VFMADD132SD (%R10,%RCX,8),%XMM1,%XMM2 |
(94) 0x41cac0 VFMADD213SD (%R8,%R13,8),%XMM0,%XMM2 |
(94) 0x41cac6 VMOVSD %XMM2,(%R8,%R13,8) |
(94) 0x41cacc MOV 0x30(%RSP),%R11 |
(94) 0x41cad1 INC %R11 |
(94) 0x41cad4 CMP 0x98(%RSP),%R11 |
(94) 0x41cadc JG 41c81c |
(94) 0x41cae2 MOV %R11,%R8 |
(94) 0x41cae5 SHR $0x20,%R8 |
(94) 0x41cae9 JE 41cb00 |
(94) 0x41caeb MOV %R11,%RAX |
(94) 0x41caee XOR %EDX,%EDX |
(94) 0x41caf0 MOV 0x48(%RSP),%R9 |
(94) 0x41caf5 DIV %R9 |
(94) 0x41caf8 MOV %RAX,%RCX |
(94) 0x41cafb JMP 41cb0f |
0x41cafd NOPL (%RAX) |
(94) 0x41cb00 MOV %R11D,%EAX |
(94) 0x41cb03 XOR %EDX,%EDX |
(94) 0x41cb05 MOV 0x48(%RSP),%R9 |
(94) 0x41cb0a DIV %R9D |
(94) 0x41cb0d MOV %EAX,%ECX |
(94) 0x41cb0f MOV 0x50(%RSP),%R10 |
(94) 0x41cb14 MOV 0x70(%RSP),%R15 |
(94) 0x41cb19 MOV 0x60(%RSP),%RSI |
(94) 0x41cb1e TEST %R8,%R8 |
(94) 0x41cb21 MOV %R11,0x30(%RSP) |
(94) 0x41cb26 JNE 41c860 |
(94) 0x41cb2c MOV %R11D,%EAX |
(94) 0x41cb2f XOR %EDX,%EDX |
(94) 0x41cb31 DIV %R9D |
(94) 0x41cb34 JMP 41c868 |
0x41cb39 NOPL (%RAX) |
Path / |
Source file and lines | accelerate.cpp:40-54 |
Module | exec |
nb instructions | 177 |
nb uops | 179 |
loop length | 902 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 1 |
used zmm registers | 11 |
nb stack references | 51 |
micro-operation queue | 29.83 cycles |
front end | 29.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.30 | 5.20 | 23.00 | 23.00 | 26.50 | 15.00 | 5.30 | 26.50 | 26.50 | 26.50 | 5.20 | 23.00 |
cycles | 5.30 | 5.20 | 23.00 | 23.00 | 26.50 | 15.00 | 5.30 | 26.50 | 26.50 | 26.50 | 5.20 | 23.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 29.67 |
Stall cycles | 0.00 |
Front-end | 29.83 |
Dispatch | 26.50 |
Overall L1 | 29.83 |
all | 9% |
load | 6% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 4% |
all | 75% |
load | 100% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | 17% |
store | 13% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 4% |
all | 16% |
load | 16% |
store | 18% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 53% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 40% |
load | 25% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 17% |
load | 17% |
store | 19% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 53% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x280,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x68(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVQ 0x40(%RBP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x48(%RBP),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10D,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x6c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 41c83b <_Z17accelerate_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_.extracted+0x83b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R15,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0xc8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x118(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x120(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x74(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0xd0(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0xc8(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x6803b0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
VMOVDQU %XMM0,0x190(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
CALL 403180 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
VMOVUPD 0x190(%RSP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0xc8(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xc0(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 41c81c <_Z17accelerate_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_.extracted+0x81c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R12,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB 0x50(%RSP),%ECX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV (%R15),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R15),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%R11),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x98(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RDI),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R9,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%R9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x120(%RSP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0xb8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R14),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R13),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0xb0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0xa8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x140(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RBX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x48(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %R11,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,0xd0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND %R9,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RBX,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x38(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x110(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x108(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0xe8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0xd8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0xf8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R13,0xf0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 41cae2 <_Z17accelerate_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_.extracted+0xae2> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPBROADCASTQ %RCX,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x44(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x50(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x60(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x90(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %ZMM0,0x200(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
MOV 0x88(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
MOV 0x80(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RDX,%ZMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R10,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VPBROADCASTQ %R14,%ZMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R13,%ZMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RSI,%ZMM29 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R8,%ZMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R11,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R11,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x4736c(%RIP),%ZMM0,%ZMM17 | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.67 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RBX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x78(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R9,0xd0(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
VMOVUPD 0x170(%RSP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV 0x30(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 41c84a <_Z17accelerate_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_.extracted+0x84a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x6803d0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x68(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402fe0 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
ADD %R9,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JMP 41cae2 <_Z17accelerate_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_.extracted+0xae2> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | accelerate.cpp:40-54 |
Module | exec |
nb instructions | 177 |
nb uops | 179 |
loop length | 902 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 1 |
used zmm registers | 11 |
nb stack references | 51 |
micro-operation queue | 29.83 cycles |
front end | 29.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.30 | 5.20 | 23.00 | 23.00 | 26.50 | 15.00 | 5.30 | 26.50 | 26.50 | 26.50 | 5.20 | 23.00 |
cycles | 5.30 | 5.20 | 23.00 | 23.00 | 26.50 | 15.00 | 5.30 | 26.50 | 26.50 | 26.50 | 5.20 | 23.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 29.67 |
Stall cycles | 0.00 |
Front-end | 29.83 |
Dispatch | 26.50 |
Overall L1 | 29.83 |
all | 9% |
load | 6% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 4% |
all | 75% |
load | 100% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | 17% |
store | 13% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 4% |
all | 16% |
load | 16% |
store | 18% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 53% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 40% |
load | 25% |
store | 100% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 17% |
load | 17% |
store | 19% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 53% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x280,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x68(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVQ 0x40(%RBP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x48(%RBP),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10D,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x6c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 41c83b <_Z17accelerate_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_.extracted+0x83b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R15,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0xc8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x118(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x120(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x74(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0xd0(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0xc8(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x6803b0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
VMOVDQU %XMM0,0x190(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
CALL 403180 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
VMOVUPD 0x190(%RSP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0xc8(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xc0(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 41c81c <_Z17accelerate_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_.extracted+0x81c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R12,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB 0x50(%RSP),%ECX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV (%R15),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R15),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%R11),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x98(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RDI),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R9,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%R9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x120(%RSP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0xb8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R14),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R13),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0xb0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0xa8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x140(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RBX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x48(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %R11,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,0xd0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND %R9,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RBX,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x38(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x110(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x108(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0xe8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0xd8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0xf8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R13,0xf0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 41cae2 <_Z17accelerate_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_.extracted+0xae2> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPBROADCASTQ %RCX,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x44(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x50(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x60(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x90(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD %ZMM0,0x200(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
MOV 0x88(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
MOV 0x80(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RDX,%ZMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R10,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VPBROADCASTQ %R14,%ZMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R13,%ZMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RSI,%ZMM29 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R8,%ZMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R11,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R11,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x4736c(%RIP),%ZMM0,%ZMM17 | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.67 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RBX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x78(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R9,0xd0(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
VMOVUPD 0x170(%RSP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV 0x30(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 41c84a <_Z17accelerate_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_.extracted+0x84a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x6803d0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x68(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402fe0 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
ADD %R9,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JMP 41cae2 <_Z17accelerate_kerneliiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_.extracted+0xae2> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼accelerate_kernel(int, int, int, int, double, clover::Buffer2D | 2.96 | 2.64 |
○Loop 95 - accelerate.cpp:41-54 - exec | 2.96 | 2.63 |
○Loop 94 - accelerate.cpp:41-54 - exec | 0 | 0 |