Function: _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buf ... | Module: exec | Source: advec_mom.cpp:167-172 [...] | Coverage: 0.82% |
---|
Function: _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buf ... | Module: exec | Source: advec_mom.cpp:167-172 [...] | Coverage: 0.82% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-112-9712/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/home/eoseret/qaas_runs_CPU_9468/171-112-9712/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 167 - 172 |
-------------------------------------------------------------------------------- |
167: #pragma omp parallel for simd collapse(2) |
168: for (int j = (y_min - 1 + 1); j < (y_max + 2 + 2); j++) { |
169: for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { |
170: node_mass_post(i, j) = 0.25 * (density1(i + 0, j - 1) * post_vol(i + 0, j - 1) + density1(i, j) * post_vol(i, j) + |
171: density1(i - 1, j - 1) * post_vol(i - 1, j - 1) + density1(i - 1, j + 0) * post_vol(i - 1, j + 0)); |
172: node_mass_pre(i, j) = node_mass_post(i, j) - node_flux(i + 0, j - 1) + node_flux(i, j); |
0x421570 PUSH %RBP |
0x421571 MOV %RSP,%RBP |
0x421574 PUSH %R15 |
0x421576 PUSH %R14 |
0x421578 PUSH %R13 |
0x42157a PUSH %R12 |
0x42157c PUSH %RBX |
0x42157d AND $-0x20,%RSP |
0x421581 SUB $0x120,%RSP |
0x421588 MOV %RDX,%R13 |
0x42158b MOV 0x38(%RBP),%RAX |
0x42158f MOV 0x28(%RBP),%R14 |
0x421593 MOV 0x20(%RBP),%RSI |
0x421597 MOV 0x10(%RBP),%RBX |
0x42159b MOV 0x18(%RBP),%EDX |
0x42159e MOV %EDX,0x1c(%RSP) |
0x4215a2 MOVL $0,0x34(%RSP) |
0x4215aa TEST %RAX,%RAX |
0x4215ad JS 421b26 |
0x4215b3 MOV %R9,%R15 |
0x4215b6 MOV %R8,%R12 |
0x4215b9 MOV %RCX,0x28(%RSP) |
0x4215be MOV %RSI,0x20(%RSP) |
0x4215c3 MOV (%RDI),%ESI |
0x4215c5 MOVQ $0,0x68(%RSP) |
0x4215ce MOV %RAX,0x60(%RSP) |
0x4215d3 MOVQ $0x1,0x98(%RSP) |
0x4215df SUB $0x8,%RSP |
0x4215e3 LEA 0xa0(%RSP),%RAX |
0x4215eb LEA 0x3c(%RSP),%RCX |
0x4215f0 LEA 0x70(%RSP),%R8 |
0x4215f5 LEA 0x68(%RSP),%R9 |
0x4215fa MOV $0x4807c0,%EDI |
0x4215ff MOV %ESI,0x38(%RSP) |
0x421603 MOV $0x22,%EDX |
0x421608 PUSH $0x1 |
0x42160a PUSH $0x1 |
0x42160c PUSH %RAX |
0x42160d CALL 4031e0 <__kmpc_for_static_init_8@plt> |
0x421612 ADD $0x20,%RSP |
0x421616 MOV 0x68(%RSP),%RSI |
0x42161b MOV 0x60(%RSP),%RAX |
0x421620 MOV %RAX,0x58(%RSP) |
0x421625 CMP %RAX,%RSI |
0x421628 JA 421b07 |
0x42162e MOV %R14,%RCX |
0x421631 SUB 0x20(%RSP),%ECX |
0x421635 MOV (%R13),%R9 |
0x421639 MOV 0x10(%R13),%R13 |
0x42163d MOV (%RBX),%R10 |
0x421640 MOV 0x10(%RBX),%RDI |
0x421644 MOV (%R12),%R11 |
0x421648 MOV 0x10(%R12),%R12 |
0x42164d MOV 0x28(%RSP),%RAX |
0x421652 MOV (%RAX),%RBX |
0x421655 MOV 0x10(%RAX),%R14 |
0x421659 MOV (%R15),%RDX |
0x42165c MOV 0x10(%R15),%R15 |
0x421660 LEA 0x1(%RSI),%RAX |
0x421664 MOV 0x58(%RSP),%R8 |
0x421669 INC %R8 |
0x42166c CMP %R8,%RAX |
0x42166f CMOVG %RAX,%R8 |
0x421673 SUB %RSI,%R8 |
0x421676 MOV $-0x8,%EAX |
0x42167b AND %R8,%RAX |
0x42167e MOV %RCX,0x50(%RSP) |
0x421683 MOV %R9,0x48(%RSP) |
0x421688 MOV %R10,0x40(%RSP) |
0x42168d MOV %R11,0x28(%RSP) |
0x421692 MOV %R12,0x90(%RSP) |
0x42169a MOV %RBX,0x88(%RSP) |
0x4216a2 MOV %RDX,0x80(%RSP) |
0x4216aa JE 421c32 |
0x4216b0 MOV %R8,0x78(%RSP) |
0x4216b5 VPBROADCASTQ %RCX,%YMM8 |
0x4216bb MOV %RAX,0x38(%RSP) |
0x4216c0 MOV 0x1c(%RSP),%EAX |
0x4216c4 VPBROADCASTD %EAX,%YMM0 |
0x4216ca VMOVDQU %YMM0,0xe0(%RSP) |
0x4216d3 MOV 0x20(%RSP),%RAX |
0x4216d8 VPBROADCASTD %EAX,%YMM0 |
0x4216de VMOVDQU %YMM0,0xc0(%RSP) |
0x4216e7 VPBROADCASTQ %R9,%YMM14 |
0x4216ed VPBROADCASTQ %R10,%YMM15 |
0x4216f3 VPBROADCASTQ %R11,%YMM0 |
0x4216f9 VMOVDQU %YMM0,0xa0(%RSP) |
0x421702 VPBROADCASTQ %RBX,%YMM17 |
0x421708 VPBROADCASTQ %RDX,%YMM18 |
0x42170e MOV %RSI,0x70(%RSP) |
0x421713 VPBROADCASTQ %RSI,%YMM0 |
0x421719 VPADDQ 0x42f5f(%RIP),%YMM0,%YMM9 |
0x421721 VPADDQ 0x42dd7(%RIP),%YMM0,%YMM10 |
0x421729 XOR %EBX,%EBX |
0x42172b NOPL (%RAX,%RAX,1) |
(172) 0x421730 VMOVDQA %YMM10,%YMM0 |
(172) 0x421734 VMOVDQA %YMM8,%YMM1 |
(172) 0x421738 MOV $0x452aa0,%RSI |
(172) 0x42173f CALL %RSI |
(172) 0x421741 VMOVDQA %YMM0,%YMM11 |
(172) 0x421745 VMOVDQA %YMM9,%YMM0 |
(172) 0x421749 VMOVDQA %YMM8,%YMM1 |
(172) 0x42174d CALL %RSI |
(172) 0x42174f VPMOVQD %YMM11,%XMM1 |
(172) 0x421755 VPMOVQD %YMM0,%XMM0 |
(172) 0x42175b VINSERTI128 $0x1,%XMM0,%YMM1,%YMM0 |
(172) 0x421761 VPADDD 0xe0(%RSP),%YMM0,%YMM22 |
(172) 0x421769 VMOVDQA %YMM10,%YMM0 |
(172) 0x42176d VMOVDQA %YMM8,%YMM1 |
(172) 0x421771 MOV $0x452870,%RSI |
(172) 0x421778 CALL %RSI |
(172) 0x42177a VMOVDQA %YMM0,%YMM11 |
(172) 0x42177e VMOVDQA %YMM9,%YMM0 |
(172) 0x421782 VMOVDQA %YMM8,%YMM1 |
(172) 0x421786 CALL %RSI |
(172) 0x421788 VPMOVQD %YMM11,%XMM1 |
(172) 0x42178e VPMOVQD %YMM0,%XMM0 |
(172) 0x421794 VINSERTI128 $0x1,%XMM0,%YMM1,%YMM0 |
(172) 0x42179a VPCMPEQD %YMM12,%YMM12,%YMM12 |
(172) 0x42179f VPADDD %YMM12,%YMM22,%YMM2 |
(172) 0x4217a5 VEXTRACTI128 $0x1,%YMM2,%XMM1 |
(172) 0x4217ab VPMOVSXDQ %XMM1,%YMM1 |
(172) 0x4217b0 VPMOVSXDQ %XMM2,%YMM2 |
(172) 0x4217b5 VPADDD 0xc0(%RSP),%YMM0,%YMM5 |
(172) 0x4217be VPMULLQ %YMM2,%YMM14,%YMM7 |
(172) 0x4217c4 VXORPS %XMM4,%XMM4,%XMM4 |
(172) 0x4217c8 VPMULLQ %YMM1,%YMM14,%YMM4 |
(172) 0x4217ce VPMOVSXDQ %XMM5,%YMM0 |
(172) 0x4217d3 VPMOVSXDQ %XMM22,%YMM3 |
(172) 0x4217d9 VXORPS %XMM11,%XMM11,%XMM11 |
(172) 0x4217de VPMULLQ %YMM3,%YMM14,%YMM11 |
(172) 0x4217e4 VPADDQ %YMM0,%YMM11,%YMM23 |
(172) 0x4217ea KXNORW %K0,%K0,%K1 |
(172) 0x4217ee VXORPD %XMM6,%XMM6,%XMM6 |
(172) 0x4217f2 VPMULLQ %YMM3,%YMM15,%YMM24 |
(172) 0x4217f8 VGATHERQPD (%R13,%YMM23,8),%YMM6{%K1} |
(172) 0x421800 VPADDQ %YMM0,%YMM7,%YMM23 |
(172) 0x421806 VPADDQ %YMM0,%YMM24,%YMM25 |
(172) 0x42180c KXNORW %K0,%K0,%K1 |
(172) 0x421810 VXORPD %XMM26,%XMM26,%XMM26 |
(172) 0x421816 VGATHERQPD (%RDI,%YMM25,8),%YMM26{%K1} |
(172) 0x42181d KXNORW %K0,%K0,%K1 |
(172) 0x421821 VXORPD %XMM25,%XMM25,%XMM25 |
(172) 0x421827 VPMULLQ %YMM2,%YMM15,%YMM27 |
(172) 0x42182d VGATHERQPD (%R13,%YMM23,8),%YMM25{%K1} |
(172) 0x421835 VPADDQ %YMM0,%YMM27,%YMM23 |
(172) 0x42183b KXNORW %K0,%K0,%K1 |
(172) 0x42183f VXORPD %XMM28,%XMM28,%XMM28 |
(172) 0x421845 VGATHERQPD (%RDI,%YMM23,8),%YMM28{%K1} |
(172) 0x42184c VPADDD %YMM12,%YMM5,%YMM23 |
(172) 0x421852 VPMOVSXDQ %XMM23,%YMM29 |
(172) 0x421858 VPADDQ %YMM29,%YMM7,%YMM7 |
(172) 0x42185e KXNORW %K0,%K0,%K1 |
(172) 0x421862 VXORPD %XMM30,%XMM30,%XMM30 |
(172) 0x421868 VGATHERQPD (%R13,%YMM7,8),%YMM30{%K1} |
(172) 0x421870 VEXTRACTI128 $0x1,%YMM5,%XMM5 |
(172) 0x421876 VPADDQ %YMM29,%YMM27,%YMM7 |
(172) 0x42187c KXNORW %K0,%K0,%K1 |
(172) 0x421880 VPXORD %XMM27,%XMM27,%XMM27 |
(172) 0x421886 VGATHERQPD (%RDI,%YMM7,8),%YMM27{%K1} |
(172) 0x42188d VPMOVSXDQ %XMM5,%YMM5 |
(172) 0x421892 KXNORW %K0,%K0,%K1 |
(172) 0x421896 VXORPS %XMM7,%XMM7,%XMM7 |
(172) 0x42189a VPMULLQ %YMM1,%YMM15,%YMM7 |
(172) 0x4218a0 VPADDQ %YMM5,%YMM4,%YMM31 |
(172) 0x4218a6 VXORPD %XMM19,%XMM19,%XMM19 |
(172) 0x4218ac VPADDQ %YMM5,%YMM7,%YMM13 |
(172) 0x4218b0 KXNORW %K0,%K0,%K2 |
(172) 0x4218b4 VPXOR %XMM12,%XMM12,%XMM12 |
(172) 0x4218b9 VEXTRACTI32X4 $0x1,%YMM22,%XMM22 |
(172) 0x4218c0 VGATHERQPD (%R13,%YMM31,8),%YMM19{%K1} |
(172) 0x4218c8 VPMOVSXDQ %XMM22,%YMM22 |
(172) 0x4218ce VPXORD %XMM31,%XMM31,%XMM31 |
(172) 0x4218d4 VPMULLQ %YMM22,%YMM14,%YMM31 |
(172) 0x4218da VPADDQ %YMM5,%YMM31,%YMM20 |
(172) 0x4218e0 VGATHERQPD (%RDI,%YMM13,8),%YMM12{%K2} |
(172) 0x4218e7 KXNORW %K0,%K0,%K1 |
(172) 0x4218eb VXORPD %XMM13,%XMM13,%XMM13 |
(172) 0x4218f0 VPMULLQ %YMM22,%YMM15,%YMM21 |
(172) 0x4218f6 VGATHERQPD (%R13,%YMM20,8),%YMM13{%K1} |
(172) 0x4218fe VPADDQ %YMM5,%YMM21,%YMM20 |
(172) 0x421904 KXNORW %K0,%K0,%K1 |
(172) 0x421908 VXORPD %XMM16,%XMM16,%XMM16 |
(172) 0x42190e VGATHERQPD (%RDI,%YMM20,8),%YMM16{%K1} |
(172) 0x421915 VEXTRACTI32X4 $0x1,%YMM23,%XMM20 |
(172) 0x42191c VPMOVSXDQ %XMM20,%YMM20 |
(172) 0x421922 VPADDQ %YMM20,%YMM4,%YMM4 |
(172) 0x421928 KXNORW %K0,%K0,%K1 |
(172) 0x42192c VPXORD %XMM23,%XMM23,%XMM23 |
(172) 0x421932 VGATHERQPD (%R13,%YMM4,8),%YMM23{%K1} |
(172) 0x42193a VPADDQ %YMM20,%YMM7,%YMM4 |
(172) 0x421940 KXNORW %K0,%K0,%K1 |
(172) 0x421944 VPXOR %XMM7,%XMM7,%XMM7 |
(172) 0x421948 VGATHERQPD (%RDI,%YMM4,8),%YMM7{%K1} |
(172) 0x42194f VPADDQ %YMM29,%YMM11,%YMM4 |
(172) 0x421955 KXNORW %K0,%K0,%K1 |
(172) 0x421959 VPXOR %XMM11,%XMM11,%XMM11 |
(172) 0x42195e VGATHERQPD (%R13,%YMM4,8),%YMM11{%K1} |
(172) 0x421966 VPADDQ %YMM29,%YMM24,%YMM4 |
(172) 0x42196c KXNORW %K0,%K0,%K1 |
(172) 0x421970 VPXORD %XMM24,%XMM24,%XMM24 |
(172) 0x421976 VGATHERQPD (%RDI,%YMM4,8),%YMM24{%K1} |
(172) 0x42197d VPADDQ %YMM20,%YMM31,%YMM4 |
(172) 0x421983 KXNORW %K0,%K0,%K1 |
(172) 0x421987 VPXORD %XMM29,%XMM29,%XMM29 |
(172) 0x42198d VGATHERQPD (%R13,%YMM4,8),%YMM29{%K1} |
(172) 0x421995 VMULPD %YMM19,%YMM12,%YMM4 |
(172) 0x42199b VFMADD213PD %YMM4,%YMM13,%YMM16 |
(172) 0x4219a1 VPADDQ %YMM20,%YMM21,%YMM4 |
(172) 0x4219a7 KXNORW %K0,%K0,%K1 |
(172) 0x4219ab VXORPD %XMM12,%XMM12,%XMM12 |
(172) 0x4219b0 VGATHERQPD (%RDI,%YMM4,8),%YMM12{%K1} |
(172) 0x4219b7 VMULPD %YMM25,%YMM28,%YMM4 |
(172) 0x4219bd VFMADD213PD %YMM4,%YMM6,%YMM26 |
(172) 0x4219c3 VFMADD213PD %YMM26,%YMM30,%YMM27 |
(172) 0x4219c9 VFMADD213PD %YMM16,%YMM23,%YMM7 |
(172) 0x4219cf VFMADD213PD %YMM27,%YMM11,%YMM24 |
(172) 0x4219d5 VFMADD213PD %YMM7,%YMM29,%YMM12 |
(172) 0x4219db VMOVDQU 0xa0(%RSP),%YMM6 |
(172) 0x4219e4 VXORPS %XMM4,%XMM4,%XMM4 |
(172) 0x4219e8 VPMULLQ %YMM3,%YMM6,%YMM4 |
(172) 0x4219ee VPMULLQ %YMM22,%YMM6,%YMM6 |
(172) 0x4219f4 VPADDQ %YMM5,%YMM6,%YMM6 |
(172) 0x4219f8 VPADDQ %YMM0,%YMM4,%YMM4 |
(172) 0x4219fc VPMULLQ %YMM1,%YMM17,%YMM1 |
(172) 0x421a02 VBROADCASTSD 0x42ae5(%RIP),%YMM11 |
(172) 0x421a0b VMULPD %YMM11,%YMM24,%YMM7 |
(172) 0x421a11 KXNORW %K0,%K0,%K1 |
(172) 0x421a15 KXNORW %K0,%K0,%K2 |
(172) 0x421a19 VPMULLQ %YMM2,%YMM17,%YMM2 |
(172) 0x421a1f VMULPD %YMM11,%YMM12,%YMM11 |
(172) 0x421a24 VPADDQ %YMM0,%YMM2,%YMM2 |
(172) 0x421a28 VSCATTERQPD %YMM7,(%R12,%YMM4,8){%K1} |
(172) 0x421a2f VPADDQ %YMM5,%YMM1,%YMM1 |
(172) 0x421a33 VSCATTERQPD %YMM11,(%R12,%YMM6,8){%K2} |
(172) 0x421a3a KXNORW %K0,%K0,%K1 |
(172) 0x421a3e VXORPD %XMM4,%XMM4,%XMM4 |
(172) 0x421a42 VXORPS %XMM6,%XMM6,%XMM6 |
(172) 0x421a46 VPMULLQ %YMM3,%YMM17,%YMM6 |
(172) 0x421a4c VGATHERQPD (%R14,%YMM1,8),%YMM4{%K1} |
(172) 0x421a53 KXNORW %K0,%K0,%K1 |
(172) 0x421a57 VXORPD %XMM1,%XMM1,%XMM1 |
(172) 0x421a5b VXORPS %XMM12,%XMM12,%XMM12 |
(172) 0x421a60 VPMULLQ %YMM22,%YMM17,%YMM12 |
(172) 0x421a66 VGATHERQPD (%R14,%YMM2,8),%YMM1{%K1} |
(172) 0x421a6d VPADDQ %YMM0,%YMM6,%YMM2 |
(172) 0x421a71 KXNORW %K0,%K0,%K1 |
(172) 0x421a75 VPXOR %XMM6,%XMM6,%XMM6 |
(172) 0x421a79 VGATHERQPD (%R14,%YMM2,8),%YMM6{%K1} |
(172) 0x421a80 VPADDQ %YMM5,%YMM12,%YMM2 |
(172) 0x421a84 KXNORW %K0,%K0,%K1 |
(172) 0x421a88 VPXOR %XMM12,%XMM12,%XMM12 |
(172) 0x421a8d VGATHERQPD (%R14,%YMM2,8),%YMM12{%K1} |
(172) 0x421a94 VSUBPD %YMM1,%YMM7,%YMM1 |
(172) 0x421a98 VXORPS %XMM2,%XMM2,%XMM2 |
(172) 0x421a9c VPMULLQ %YMM3,%YMM18,%YMM2 |
(172) 0x421aa2 VPADDQ %YMM0,%YMM2,%YMM0 |
(172) 0x421aa6 VADDPD %YMM6,%YMM1,%YMM1 |
(172) 0x421aaa KXNORW %K0,%K0,%K1 |
(172) 0x421aae VSCATTERQPD %YMM1,(%R15,%YMM0,8){%K1} |
(172) 0x421ab5 VSUBPD %YMM4,%YMM11,%YMM0 |
(172) 0x421ab9 VADDPD %YMM0,%YMM12,%YMM0 |
(172) 0x421abd VXORPS %XMM1,%XMM1,%XMM1 |
(172) 0x421ac1 VPMULLQ %YMM22,%YMM18,%YMM1 |
(172) 0x421ac7 VPADDQ %YMM5,%YMM1,%YMM1 |
(172) 0x421acb KXNORW %K0,%K0,%K1 |
(172) 0x421acf VSCATTERQPD %YMM0,(%R15,%YMM1,8){%K1} |
(172) 0x421ad6 VPBROADCASTQ 0x42bc1(%RIP),%YMM0 |
(172) 0x421adf VPADDQ %YMM0,%YMM10,%YMM10 |
(172) 0x421ae3 VPADDQ %YMM0,%YMM9,%YMM9 |
(172) 0x421ae7 ADD $0x8,%RBX |
(172) 0x421aeb CMP 0x38(%RSP),%RBX |
(172) 0x421af0 JB 421730 |
0x421af6 MOV 0x38(%RSP),%RAX |
0x421afb CMP %RAX,0x78(%RSP) |
0x421b00 MOV 0x70(%RSP),%RSI |
0x421b05 JNE 421b35 |
0x421b07 MOV $0x4807e0,%EDI |
0x421b0c MOV 0x30(%RSP),%ESI |
0x421b10 LEA -0x28(%RBP),%RSP |
0x421b14 POP %RBX |
0x421b15 POP %R12 |
0x421b17 POP %R13 |
0x421b19 POP %R14 |
0x421b1b POP %R15 |
0x421b1d POP %RBP |
0x421b1e VZEROUPPER |
0x421b21 JMP 403050 |
0x421b26 LEA -0x28(%RBP),%RSP |
0x421b2a POP %RBX |
0x421b2b POP %R12 |
0x421b2d POP %R13 |
0x421b2f POP %R14 |
0x421b31 POP %R15 |
0x421b33 POP %RBP |
0x421b34 RET |
0x421b35 ADD %RAX,%RSI |
0x421b38 JMP 421c32 |
0x421b3d NOPL (%RAX) |
(171) 0x421b40 MOV %RSI,%RAX |
(171) 0x421b43 CQTO |
(171) 0x421b45 IDIV %R9 |
(171) 0x421b48 MOV 0x20(%RSP),%RAX |
(171) 0x421b4d ADD %EAX,%EDX |
(171) 0x421b4f MOVSXD %EDX,%RAX |
(171) 0x421b52 LEA -0x1(%RCX),%EDX |
(171) 0x421b55 MOVSXD %EDX,%RDX |
(171) 0x421b58 MOV %RBX,%R8 |
(171) 0x421b5b IMUL %RDX,%R8 |
(171) 0x421b5f LEA (%R8,%RAX,1),%R9 |
(171) 0x421b63 MOV %R12,%R10 |
(171) 0x421b66 IMUL %RDX,%R10 |
(171) 0x421b6a LEA (%R10,%RAX,1),%R11 |
(171) 0x421b6e VMOVSD (%RDI,%R11,8),%XMM0 |
(171) 0x421b74 VMULSD (%R13,%R9,8),%XMM0,%XMM0 |
(171) 0x421b7b MOVSXD %ECX,%RCX |
(171) 0x421b7e MOV %RBX,%R9 |
(171) 0x421b81 IMUL %RCX,%R9 |
(171) 0x421b85 IMUL %RCX,%R12 |
(171) 0x421b89 LEA (%R12,%RAX,1),%RBX |
(171) 0x421b8d VMOVSD (%RDI,%RBX,8),%XMM1 |
(171) 0x421b92 LEA (%R9,%RAX,1),%RBX |
(171) 0x421b96 VFMADD132SD (%R13,%RBX,8),%XMM0,%XMM1 |
(171) 0x421b9d LEA -0x1(%R10,%RAX,1),%R10 |
(171) 0x421ba2 VMOVSD (%RDI,%R10,8),%XMM0 |
(171) 0x421ba8 LEA -0x1(%R8,%RAX,1),%R8 |
(171) 0x421bad VFMADD132SD (%R13,%R8,8),%XMM1,%XMM0 |
(171) 0x421bb4 LEA -0x1(%R12,%RAX,1),%R8 |
(171) 0x421bb9 VMOVSD (%RDI,%R8,8),%XMM1 |
(171) 0x421bbf LEA -0x1(%R9,%RAX,1),%R8 |
(171) 0x421bc4 VFMADD132SD (%R13,%R8,8),%XMM0,%XMM1 |
(171) 0x421bcb VMULSD 0x4291d(%RIP),%XMM1,%XMM0 |
(171) 0x421bd3 MOV 0x28(%RSP),%R8 |
(171) 0x421bd8 IMUL %RCX,%R8 |
(171) 0x421bdc ADD %RAX,%R8 |
(171) 0x421bdf MOV 0x90(%RSP),%R9 |
(171) 0x421be7 VMOVSD %XMM0,(%R9,%R8,8) |
(171) 0x421bed MOV 0x88(%RSP),%R8 |
(171) 0x421bf5 IMUL %R8,%RDX |
(171) 0x421bf9 ADD %RAX,%RDX |
(171) 0x421bfc VSUBSD (%R14,%RDX,8),%XMM0,%XMM0 |
(171) 0x421c02 MOV %R8,%RDX |
(171) 0x421c05 IMUL %RCX,%RDX |
(171) 0x421c09 ADD %RAX,%RDX |
(171) 0x421c0c VADDSD (%R14,%RDX,8),%XMM0,%XMM0 |
(171) 0x421c12 IMUL 0x80(%RSP),%RCX |
(171) 0x421c1b ADD %RAX,%RCX |
(171) 0x421c1e VMOVSD %XMM0,(%R15,%RCX,8) |
(171) 0x421c24 INC %RSI |
(171) 0x421c27 CMP 0x58(%RSP),%RSI |
(171) 0x421c2c JG 421b07 |
(171) 0x421c32 MOV %RSI,%R8 |
(171) 0x421c35 SHR $0x20,%R8 |
(171) 0x421c39 JE 421c50 |
(171) 0x421c3b MOV %RSI,%RAX |
(171) 0x421c3e XOR %EDX,%EDX |
(171) 0x421c40 MOV 0x50(%RSP),%R9 |
(171) 0x421c45 DIV %R9 |
(171) 0x421c48 MOV %RAX,%RCX |
(171) 0x421c4b JMP 421c5e |
0x421c4d NOPL (%RAX) |
(171) 0x421c50 MOV %ESI,%EAX |
(171) 0x421c52 XOR %EDX,%EDX |
(171) 0x421c54 MOV 0x50(%RSP),%R9 |
(171) 0x421c59 DIV %R9D |
(171) 0x421c5c MOV %EAX,%ECX |
(171) 0x421c5e MOV 0x48(%RSP),%RBX |
(171) 0x421c63 MOV 0x40(%RSP),%R12 |
(171) 0x421c68 ADD 0x1c(%RSP),%ECX |
(171) 0x421c6c TEST %R8,%R8 |
(171) 0x421c6f JNE 421b40 |
(171) 0x421c75 MOV %ESI,%EAX |
(171) 0x421c77 XOR %EDX,%EDX |
(171) 0x421c79 DIV %R9D |
(171) 0x421c7c JMP 421b48 |
0x421c81 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | advec_mom.cpp:167-172 |
Module | exec |
nb instructions | 123 |
nb uops | 125 |
loop length | 540 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 8 |
used zmm registers | 0 |
nb stack references | 28 |
micro-operation queue | 20.83 cycles |
front end | 20.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.60 | 5.60 | 14.00 | 14.00 | 16.00 | 9.00 | 5.40 | 16.00 | 16.00 | 16.00 | 5.40 | 14.00 |
cycles | 5.60 | 5.60 | 14.00 | 14.00 | 16.00 | 9.00 | 5.40 | 16.00 | 16.00 | 16.00 | 5.40 | 14.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 20.64 |
Stall cycles | 0.00 |
Front-end | 20.83 |
Dispatch | 16.00 |
Overall L1 | 20.83 |
all | 11% |
load | 16% |
store | 13% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 5% |
all | 15% |
load | 17% |
store | 16% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 26% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x120,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EDX,0x1c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 421b26 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.12+0x5b6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R9,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0xa0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x3c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x70(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x68(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x4807c0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 4031e0 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x68(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 421b07 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.12+0x597> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB 0x20(%RSP),%ECX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV (%R13),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RBX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R15),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R15),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x58(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %R8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB %RSI,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
AND %R8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RCX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 421c32 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.12+0x6c2> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R8,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RCX,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x1c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x20(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R9,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R10,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R11,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %RBX,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RDX,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RSI,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RSI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x42f5f(%RIP),%YMM0,%YMM9 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPADDQ 0x42dd7(%RIP),%YMM0,%YMM10 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x38(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RAX,0x78(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x70(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 421b35 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.12+0x5c5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x4807e0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x30(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 403050 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
ADD %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JMP 421c32 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.12+0x6c2> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_mom.cpp:167-172 |
Module | exec |
nb instructions | 123 |
nb uops | 125 |
loop length | 540 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 8 |
used zmm registers | 0 |
nb stack references | 28 |
micro-operation queue | 20.83 cycles |
front end | 20.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.60 | 5.60 | 14.00 | 14.00 | 16.00 | 9.00 | 5.40 | 16.00 | 16.00 | 16.00 | 5.40 | 14.00 |
cycles | 5.60 | 5.60 | 14.00 | 14.00 | 16.00 | 9.00 | 5.40 | 16.00 | 16.00 | 16.00 | 5.40 | 14.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 20.64 |
Stall cycles | 0.00 |
Front-end | 20.83 |
Dispatch | 16.00 |
Overall L1 | 20.83 |
all | 11% |
load | 16% |
store | 13% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 5% |
all | 15% |
load | 17% |
store | 16% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 26% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x120,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EDX,0x1c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 421b26 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.12+0x5b6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R9,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0xa0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x3c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x70(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x68(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x4807c0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 4031e0 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x68(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 421b07 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.12+0x597> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB 0x20(%RSP),%ECX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV (%R13),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RBX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R15),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R15),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x58(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %R8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB %RSI,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
AND %R8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RCX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 421c32 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.12+0x6c2> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R8,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RCX,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x1c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x20(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R9,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R10,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R11,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %RBX,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RDX,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RSI,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RSI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x42f5f(%RIP),%YMM0,%YMM9 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPADDQ 0x42dd7(%RIP),%YMM0,%YMM10 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x38(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RAX,0x78(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x70(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 421b35 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.12+0x5c5> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x4807e0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x30(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 403050 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
ADD %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JMP 421c32 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.12+0x6c2> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.12– | 0.82 | 1.16 |
○Loop 172 - advec_mom.cpp:167-172 - exec | 0.82 | 1.16 |
○Loop 171 - advec_mom.cpp:167-172 - exec | 0 | 0 |