Function: _Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_ ... | Module: exec | Source: advec_cell.cpp:117-125 [...] | Coverage: 0.9% |
---|
Function: _Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_ ... | Module: exec | Source: advec_cell.cpp:117-125 [...] | Coverage: 0.9% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-112-9712/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/home/eoseret/qaas_runs_CPU_9468/171-112-9712/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_cell.cpp: 117 - 125 |
-------------------------------------------------------------------------------- |
117: #pragma omp parallel for simd collapse(2) |
118: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
119: for (int i = (x_min + 1); i < (x_max + 2); i++) { |
120: double pre_mass_s = density1(i, j) * pre_vol(i, j); |
121: double post_mass_s = pre_mass_s + mass_flux_x(i, j) - mass_flux_x(i + 1, j + 0); |
122: double post_ener_s = (energy1(i, j) * pre_mass_s + ener_flux(i, j) - ener_flux(i + 1, j + 0)) / post_mass_s; |
123: double advec_vol_s = pre_vol(i, j) + vol_flux_x(i, j) - vol_flux_x(i + 1, j + 0); |
124: density1(i, j) = post_mass_s / advec_vol_s; |
125: energy1(i, j) = post_ener_s; |
0x41e500 PUSH %RBP |
0x41e501 MOV %RSP,%RBP |
0x41e504 PUSH %R15 |
0x41e506 PUSH %R14 |
0x41e508 PUSH %R13 |
0x41e50a PUSH %R12 |
0x41e50c PUSH %RBX |
0x41e50d AND $-0x20,%RSP |
0x41e511 SUB $0xe0,%RSP |
0x41e518 MOV %RCX,%RSI |
0x41e51b MOV 0x40(%RBP),%RAX |
0x41e51f MOV 0x30(%RBP),%R15 |
0x41e523 MOV 0x28(%RBP),%R10 |
0x41e527 MOV 0x20(%RBP),%RCX |
0x41e52b MOV %RCX,0x58(%RSP) |
0x41e530 MOV 0x18(%RBP),%RBX |
0x41e534 MOV 0x10(%RBP),%R14 |
0x41e538 MOVL $0,0x34(%RSP) |
0x41e540 TEST %RAX,%RAX |
0x41e543 JS 41ea99 |
0x41e549 MOV %R9,%R12 |
0x41e54c MOV %RDX,%R13 |
0x41e54f MOV %R8,0x10(%RSP) |
0x41e554 MOV %RSI,0x18(%RSP) |
0x41e559 MOV (%RDI),%ESI |
0x41e55b MOVQ $0,0x68(%RSP) |
0x41e564 MOV %RAX,0x60(%RSP) |
0x41e569 MOVQ $0x1,0x98(%RSP) |
0x41e575 SUB $0x8,%RSP |
0x41e579 LEA 0xa0(%RSP),%RAX |
0x41e581 LEA 0x3c(%RSP),%RCX |
0x41e586 LEA 0x70(%RSP),%R8 |
0x41e58b LEA 0x68(%RSP),%R9 |
0x41e590 MOV $0x480580,%EDI |
0x41e595 MOV %ESI,0x38(%RSP) |
0x41e599 MOV $0x22,%EDX |
0x41e59e PUSH $0x1 |
0x41e5a0 PUSH $0x1 |
0x41e5a2 PUSH %RAX |
0x41e5a3 MOV %R10,0x58(%RSP) |
0x41e5a8 CALL 4031e0 <__kmpc_for_static_init_8@plt> |
0x41e5ad MOV 0x58(%RSP),%R11 |
0x41e5b2 ADD $0x20,%RSP |
0x41e5b6 MOV 0x68(%RSP),%RSI |
0x41e5bb MOV 0x60(%RSP),%RAX |
0x41e5c0 MOV %RAX,0x50(%RSP) |
0x41e5c5 CMP %RAX,%RSI |
0x41e5c8 JA 41ea7a |
0x41e5ce SUB %R11D,%R15D |
0x41e5d1 MOV (%R13),%R9 |
0x41e5d5 MOV 0x10(%R13),%R13 |
0x41e5d9 MOV (%R14),%RAX |
0x41e5dc MOV %RAX,0x20(%RSP) |
0x41e5e1 MOV 0x10(%R14),%RAX |
0x41e5e5 MOV %RAX,0x28(%RSP) |
0x41e5ea MOV 0x10(%RSP),%RAX |
0x41e5ef MOV (%RAX),%RCX |
0x41e5f2 MOV %RCX,0x48(%RSP) |
0x41e5f7 MOV %R12,%R10 |
0x41e5fa MOV 0x10(%RAX),%R12 |
0x41e5fe MOV 0x18(%RSP),%RAX |
0x41e603 MOV (%RAX),%RCX |
0x41e606 MOV %RCX,0x10(%RSP) |
0x41e60b MOV 0x10(%RAX),%R14 |
0x41e60f MOV (%RBX),%RDX |
0x41e612 MOV 0x10(%RBX),%RDI |
0x41e616 LEA 0x1(%RSI),%RAX |
0x41e61a MOV 0x50(%RSP),%RCX |
0x41e61f LEA 0x1(%RCX),%R8 |
0x41e623 CMP %R8,%RAX |
0x41e626 CMOVG %RAX,%R8 |
0x41e62a MOV (%R10),%RCX |
0x41e62d MOV 0x10(%R10),%RBX |
0x41e631 SUB %RSI,%R8 |
0x41e634 MOV $-0x8,%EAX |
0x41e639 MOV %R8,0x80(%RSP) |
0x41e641 AND %R8,%RAX |
0x41e644 MOV %R9,%R10 |
0x41e647 MOV %R9,0x40(%RSP) |
0x41e64c MOV 0x20(%RSP),%R9 |
0x41e651 MOV %RDX,0x18(%RSP) |
0x41e656 MOV %RCX,0x90(%RSP) |
0x41e65e JE 41eb75 |
0x41e664 MOV %RAX,%R8 |
0x41e667 MOV %R15,0x78(%RSP) |
0x41e66c VPBROADCASTQ %R15,%YMM8 |
0x41e672 MOV 0x58(%RSP),%RAX |
0x41e677 VPBROADCASTQ %RAX,%YMM12 |
0x41e67d VPBROADCASTD %R11D,%YMM0 |
0x41e683 VMOVDQU %YMM0,0xa0(%RSP) |
0x41e68c VPBROADCASTQ %R10,%YMM14 |
0x41e692 VPBROADCASTQ %R9,%YMM15 |
0x41e698 MOV 0x48(%RSP),%RAX |
0x41e69d VPBROADCASTQ %RAX,%YMM16 |
0x41e6a3 MOV 0x10(%RSP),%RAX |
0x41e6a8 VPBROADCASTQ %RAX,%YMM17 |
0x41e6ae VPBROADCASTQ %RDX,%YMM18 |
0x41e6b4 MOV %RSI,0x70(%RSP) |
0x41e6b9 VPBROADCASTQ %RSI,%YMM0 |
0x41e6bf VPADDQ 0x45fb9(%RIP),%YMM0,%YMM9 |
0x41e6c7 VPADDQ 0x45e31(%RIP),%YMM0,%YMM10 |
0x41e6cf VPBROADCASTQ %RCX,%YMM19 |
0x41e6d5 XOR %ESI,%ESI |
0x41e6d7 MOV %RBX,0x88(%RSP) |
0x41e6df NOP |
(160) 0x41e6e0 VMOVDQA %YMM10,%YMM0 |
(160) 0x41e6e4 VMOVDQA %YMM8,%YMM1 |
(160) 0x41e6e8 MOV %RDI,%RBX |
(160) 0x41e6eb MOV %R14,%RDI |
(160) 0x41e6ee MOV %R12,%R14 |
(160) 0x41e6f1 MOV %R13,%R12 |
(160) 0x41e6f4 MOV %R8,%R13 |
(160) 0x41e6f7 MOV $0x452aa0,%R15 |
(160) 0x41e6fe CALL %R15 |
(160) 0x41e701 VMOVDQA %YMM0,%YMM11 |
(160) 0x41e705 VMOVDQA %YMM9,%YMM0 |
(160) 0x41e709 VMOVDQA %YMM8,%YMM1 |
(160) 0x41e70d CALL %R15 |
(160) 0x41e710 VPADDQ %YMM12,%YMM0,%YMM21 |
(160) 0x41e716 VPADDQ %YMM12,%YMM11,%YMM22 |
(160) 0x41e71c VMOVDQA %YMM10,%YMM0 |
(160) 0x41e720 VMOVDQA %YMM8,%YMM1 |
(160) 0x41e724 MOV $0x452870,%R15 |
(160) 0x41e72b CALL %R15 |
(160) 0x41e72e VMOVDQA %YMM0,%YMM11 |
(160) 0x41e732 VMOVDQA %YMM9,%YMM0 |
(160) 0x41e736 VMOVDQA %YMM8,%YMM1 |
(160) 0x41e73a CALL %R15 |
(160) 0x41e73d MOV %R13,%R8 |
(160) 0x41e740 MOV %R12,%R13 |
(160) 0x41e743 MOV %R14,%R12 |
(160) 0x41e746 MOV %RDI,%R14 |
(160) 0x41e749 MOV %RBX,%RDI |
(160) 0x41e74c MOV 0x88(%RSP),%RBX |
(160) 0x41e754 VPMOVQD %YMM11,%XMM1 |
(160) 0x41e75a VPMOVQD %YMM0,%XMM0 |
(160) 0x41e760 VINSERTI128 $0x1,%XMM0,%YMM1,%YMM0 |
(160) 0x41e766 VPADDD 0xa0(%RSP),%YMM0,%YMM11 |
(160) 0x41e76f VPSLLQ $0x20,%YMM22,%YMM0 |
(160) 0x41e776 VPSRAQ $0x20,%YMM0,%YMM0 |
(160) 0x41e77d VPSLLQ $0x20,%YMM21,%YMM1 |
(160) 0x41e784 VPSRAQ $0x20,%YMM1,%YMM1 |
(160) 0x41e78b VXORPS %XMM2,%XMM2,%XMM2 |
(160) 0x41e78f VPMULLQ %YMM1,%YMM15,%YMM2 |
(160) 0x41e795 VXORPS %XMM4,%XMM4,%XMM4 |
(160) 0x41e799 VPMULLQ %YMM0,%YMM15,%YMM4 |
(160) 0x41e79f VEXTRACTI128 $0x1,%YMM11,%XMM3 |
(160) 0x41e7a5 VPMOVSXDQ %XMM3,%YMM3 |
(160) 0x41e7aa VPADDQ %YMM3,%YMM2,%YMM5 |
(160) 0x41e7ae KXNORW %K0,%K0,%K1 |
(160) 0x41e7b2 VPXOR %XMM2,%XMM2,%XMM2 |
(160) 0x41e7b6 MOV 0x28(%RSP),%RCX |
(160) 0x41e7bb VGATHERQPD (%RCX,%YMM5,8),%YMM2{%K1} |
(160) 0x41e7c2 VPMOVSXDQ %XMM11,%YMM5 |
(160) 0x41e7c7 VPADDQ %YMM5,%YMM4,%YMM6 |
(160) 0x41e7cb KXNORW %K0,%K0,%K1 |
(160) 0x41e7cf VPXOR %XMM4,%XMM4,%XMM4 |
(160) 0x41e7d3 VGATHERQPD (%RCX,%YMM6,8),%YMM4{%K1} |
(160) 0x41e7da VPXORD %XMM21,%XMM21,%XMM21 |
(160) 0x41e7e0 VPMULLQ %YMM0,%YMM16,%YMM21 |
(160) 0x41e7e6 VPXORD %XMM22,%XMM22,%XMM22 |
(160) 0x41e7ec VPMULLQ %YMM1,%YMM16,%YMM22 |
(160) 0x41e7f2 VPADDQ %YMM5,%YMM21,%YMM6 |
(160) 0x41e7f8 KXNORW %K0,%K0,%K1 |
(160) 0x41e7fc VXORPD %XMM7,%XMM7,%XMM7 |
(160) 0x41e800 VGATHERQPD (%R12,%YMM6,8),%YMM7{%K1} |
(160) 0x41e807 VPADDQ %YMM3,%YMM22,%YMM23 |
(160) 0x41e80d KXNORW %K0,%K0,%K1 |
(160) 0x41e811 VXORPD %XMM6,%XMM6,%XMM6 |
(160) 0x41e815 VGATHERQPD (%R12,%YMM23,8),%YMM6{%K1} |
(160) 0x41e81c VPCMPEQD %YMM13,%YMM13,%YMM13 |
(160) 0x41e821 VPSUBD %YMM13,%YMM11,%YMM23 |
(160) 0x41e827 VEXTRACTI32X4 $0x1,%YMM23,%XMM11 |
(160) 0x41e82e VPMOVSXDQ %XMM11,%YMM24 |
(160) 0x41e834 VPADDQ %YMM24,%YMM22,%YMM22 |
(160) 0x41e83a KXNORW %K0,%K0,%K1 |
(160) 0x41e83e VPXOR %XMM11,%XMM11,%XMM11 |
(160) 0x41e843 VGATHERQPD (%R12,%YMM22,8),%YMM11{%K1} |
(160) 0x41e84a VPMOVSXDQ %XMM23,%YMM22 |
(160) 0x41e850 VPADDQ %YMM22,%YMM21,%YMM23 |
(160) 0x41e856 KXNORW %K0,%K0,%K1 |
(160) 0x41e85a VPXORD %XMM21,%XMM21,%XMM21 |
(160) 0x41e860 VGATHERQPD (%R12,%YMM23,8),%YMM21{%K1} |
(160) 0x41e867 VPXORD %XMM23,%XMM23,%XMM23 |
(160) 0x41e86d VPMULLQ %YMM0,%YMM18,%YMM23 |
(160) 0x41e873 VPMULLQ %YMM1,%YMM18,%YMM25 |
(160) 0x41e879 VPADDQ %YMM5,%YMM23,%YMM26 |
(160) 0x41e87f KXNORW %K0,%K0,%K1 |
(160) 0x41e883 VXORPD %XMM27,%XMM27,%XMM27 |
(160) 0x41e889 VGATHERQPD (%RDI,%YMM26,8),%YMM27{%K1} |
(160) 0x41e890 VPADDQ %YMM3,%YMM25,%YMM26 |
(160) 0x41e896 KXNORW %K0,%K0,%K1 |
(160) 0x41e89a VXORPD %XMM28,%XMM28,%XMM28 |
(160) 0x41e8a0 VGATHERQPD (%RDI,%YMM26,8),%YMM28{%K1} |
(160) 0x41e8a7 VPADDQ %YMM24,%YMM25,%YMM25 |
(160) 0x41e8ad KXNORW %K0,%K0,%K1 |
(160) 0x41e8b1 VXORPD %XMM26,%XMM26,%XMM26 |
(160) 0x41e8b7 VGATHERQPD (%RDI,%YMM25,8),%YMM26{%K1} |
(160) 0x41e8be VPADDQ %YMM22,%YMM23,%YMM23 |
(160) 0x41e8c4 KXNORW %K0,%K0,%K1 |
(160) 0x41e8c8 VXORPD %XMM25,%XMM25,%XMM25 |
(160) 0x41e8ce VGATHERQPD (%RDI,%YMM23,8),%YMM25{%K1} |
(160) 0x41e8d5 VPXORD %XMM23,%XMM23,%XMM23 |
(160) 0x41e8db VPMULLQ %YMM0,%YMM19,%YMM23 |
(160) 0x41e8e1 VPMULLQ %YMM1,%YMM19,%YMM29 |
(160) 0x41e8e7 VPADDQ %YMM5,%YMM23,%YMM30 |
(160) 0x41e8ed KXNORW %K0,%K0,%K1 |
(160) 0x41e8f1 VXORPD %XMM31,%XMM31,%XMM31 |
(160) 0x41e8f7 VGATHERQPD (%RBX,%YMM30,8),%YMM31{%K1} |
(160) 0x41e8fe VPADDQ %YMM3,%YMM29,%YMM30 |
(160) 0x41e904 KXNORW %K0,%K0,%K1 |
(160) 0x41e908 VPXOR %XMM13,%XMM13,%XMM13 |
(160) 0x41e90d VGATHERQPD (%RBX,%YMM30,8),%YMM13{%K1} |
(160) 0x41e914 VPXORD %XMM30,%XMM30,%XMM30 |
(160) 0x41e91a VPMULLQ %YMM1,%YMM14,%YMM30 |
(160) 0x41e920 VPMULLQ %YMM0,%YMM14,%YMM20 |
(160) 0x41e926 VPADDQ %YMM24,%YMM29,%YMM24 |
(160) 0x41e92c KXNORW %K0,%K0,%K1 |
(160) 0x41e930 VPXORD %XMM29,%XMM29,%XMM29 |
(160) 0x41e936 VGATHERQPD (%RBX,%YMM24,8),%YMM29{%K1} |
(160) 0x41e93d VPADDQ %YMM3,%YMM30,%YMM24 |
(160) 0x41e943 KXNORW %K0,%K0,%K1 |
(160) 0x41e947 VPXORD %XMM30,%XMM30,%XMM30 |
(160) 0x41e94d VGATHERQPD (%R13,%YMM24,8),%YMM30{%K1} |
(160) 0x41e955 VPADDQ %YMM22,%YMM23,%YMM22 |
(160) 0x41e95b KXNORW %K0,%K0,%K1 |
(160) 0x41e95f VPXORD %XMM23,%XMM23,%XMM23 |
(160) 0x41e965 VGATHERQPD (%RBX,%YMM22,8),%YMM23{%K1} |
(160) 0x41e96c VPADDQ %YMM5,%YMM20,%YMM20 |
(160) 0x41e972 KXNORW %K0,%K0,%K1 |
(160) 0x41e976 VXORPD %XMM22,%XMM22,%XMM22 |
(160) 0x41e97c VGATHERQPD (%R13,%YMM20,8),%YMM22{%K1} |
(160) 0x41e984 VPMULLQ %YMM0,%YMM17,%YMM0 |
(160) 0x41e98a VPMULLQ %YMM1,%YMM17,%YMM1 |
(160) 0x41e990 VPADDQ %YMM5,%YMM0,%YMM0 |
(160) 0x41e994 KXNORW %K0,%K0,%K1 |
(160) 0x41e998 VPXOR %XMM5,%XMM5,%XMM5 |
(160) 0x41e99c VGATHERQPD (%R14,%YMM0,8),%YMM5{%K1} |
(160) 0x41e9a3 VPADDQ %YMM3,%YMM1,%YMM1 |
(160) 0x41e9a7 KXNORW %K0,%K0,%K1 |
(160) 0x41e9ab VPXOR %XMM3,%XMM3,%XMM3 |
(160) 0x41e9af VGATHERQPD (%R14,%YMM1,8),%YMM3{%K1} |
(160) 0x41e9b6 VMULPD %YMM22,%YMM4,%YMM22 |
(160) 0x41e9bc VADDPD %YMM22,%YMM7,%YMM7 |
(160) 0x41e9c2 VSUBPD %YMM21,%YMM7,%YMM7 |
(160) 0x41e9c8 VADDPD %YMM4,%YMM31,%YMM4 |
(160) 0x41e9ce VSUBPD %YMM23,%YMM4,%YMM4 |
(160) 0x41e9d4 VDIVPD %YMM4,%YMM7,%YMM4 |
(160) 0x41e9d8 KXNORW %K0,%K0,%K1 |
(160) 0x41e9dc VSCATTERQPD %YMM4,(%R13,%YMM20,8){%K1} |
(160) 0x41e9e4 VMULPD %YMM30,%YMM2,%YMM4 |
(160) 0x41e9ea VADDPD %YMM4,%YMM6,%YMM6 |
(160) 0x41e9ee VSUBPD %YMM11,%YMM6,%YMM6 |
(160) 0x41e9f3 VADDPD %YMM2,%YMM13,%YMM2 |
(160) 0x41e9f7 VSUBPD %YMM29,%YMM2,%YMM2 |
(160) 0x41e9fd VDIVPD %YMM2,%YMM6,%YMM2 |
(160) 0x41ea01 KXNORW %K0,%K0,%K1 |
(160) 0x41ea05 VSCATTERQPD %YMM2,(%R13,%YMM24,8){%K1} |
(160) 0x41ea0d VFMADD231PD %YMM5,%YMM22,%YMM27 |
(160) 0x41ea13 VSUBPD %YMM25,%YMM27,%YMM2 |
(160) 0x41ea19 VDIVPD %YMM7,%YMM2,%YMM2 |
(160) 0x41ea1d KXNORW %K0,%K0,%K1 |
(160) 0x41ea21 VSCATTERQPD %YMM2,(%R14,%YMM0,8){%K1} |
(160) 0x41ea28 VFMADD231PD %YMM3,%YMM4,%YMM28 |
(160) 0x41ea2e VSUBPD %YMM26,%YMM28,%YMM0 |
(160) 0x41ea34 VDIVPD %YMM6,%YMM0,%YMM0 |
(160) 0x41ea38 KXNORW %K0,%K0,%K1 |
(160) 0x41ea3c VSCATTERQPD %YMM0,(%R14,%YMM1,8){%K1} |
(160) 0x41ea43 VPBROADCASTQ 0x45c54(%RIP),%YMM0 |
(160) 0x41ea4c VPADDQ %YMM0,%YMM10,%YMM10 |
(160) 0x41ea50 VPADDQ %YMM0,%YMM9,%YMM9 |
(160) 0x41ea54 ADD $0x8,%RSI |
(160) 0x41ea58 CMP %R8,%RSI |
(160) 0x41ea5b JB 41e6e0 |
0x41ea61 CMP %R8,0x80(%RSP) |
0x41ea69 MOV 0x78(%RSP),%R15 |
0x41ea6e MOV 0x38(%RSP),%R11 |
0x41ea73 MOV 0x70(%RSP),%RSI |
0x41ea78 JNE 41eaa8 |
0x41ea7a MOV $0x4805a0,%EDI |
0x41ea7f MOV 0x30(%RSP),%ESI |
0x41ea83 LEA -0x28(%RBP),%RSP |
0x41ea87 POP %RBX |
0x41ea88 POP %R12 |
0x41ea8a POP %R13 |
0x41ea8c POP %R14 |
0x41ea8e POP %R15 |
0x41ea90 POP %RBP |
0x41ea91 VZEROUPPER |
0x41ea94 JMP 403050 |
0x41ea99 LEA -0x28(%RBP),%RSP |
0x41ea9d POP %RBX |
0x41ea9e POP %R12 |
0x41eaa0 POP %R13 |
0x41eaa2 POP %R14 |
0x41eaa4 POP %R15 |
0x41eaa6 POP %RBP |
0x41eaa7 RET |
0x41eaa8 ADD %R8,%RSI |
0x41eaab JMP 41eb75 |
(159) 0x41eab0 MOV %RSI,%RAX |
(159) 0x41eab3 CQTO |
(159) 0x41eab5 IDIV %R15 |
(159) 0x41eab8 MOV 0x40(%RSP),%R8 |
(159) 0x41eabd ADD %R11D,%EDX |
(159) 0x41eac0 MOVSXD %EDX,%RAX |
(159) 0x41eac3 MOVSXD %ECX,%RDX |
(159) 0x41eac6 MOV %R8,%RCX |
(159) 0x41eac9 IMUL %RDX,%RCX |
(159) 0x41eacd ADD %RAX,%RCX |
(159) 0x41ead0 MOV %R9,%R8 |
(159) 0x41ead3 IMUL %RDX,%R8 |
(159) 0x41ead7 ADD %RAX,%R8 |
(159) 0x41eada VMOVSD (%R10,%R8,8),%XMM0 |
(159) 0x41eae0 VMULSD (%R13,%RCX,8),%XMM0,%XMM1 |
(159) 0x41eae7 MOV 0x48(%RSP),%R8 |
(159) 0x41eaec IMUL %RDX,%R8 |
(159) 0x41eaf0 LEA (%R8,%RAX,1),%R9 |
(159) 0x41eaf4 VADDSD (%R12,%R9,8),%XMM1,%XMM2 |
(159) 0x41eafa LEA 0x1(%R8,%RAX,1),%R8 |
(159) 0x41eaff VSUBSD (%R12,%R8,8),%XMM2,%XMM2 |
(159) 0x41eb05 MOV 0x10(%RSP),%R8 |
(159) 0x41eb0a IMUL %RDX,%R8 |
(159) 0x41eb0e ADD %RAX,%R8 |
(159) 0x41eb11 VMOVSD (%R14,%R8,8),%XMM3 |
(159) 0x41eb17 MOV 0x18(%RSP),%R9 |
(159) 0x41eb1c IMUL %RDX,%R9 |
(159) 0x41eb20 LEA (%R9,%RAX,1),%R10 |
(159) 0x41eb24 VFMADD213SD (%RDI,%R10,8),%XMM1,%XMM3 |
(159) 0x41eb2a LEA 0x1(%R9,%RAX,1),%R9 |
(159) 0x41eb2f VSUBSD (%RDI,%R9,8),%XMM3,%XMM1 |
(159) 0x41eb35 IMUL 0x90(%RSP),%RDX |
(159) 0x41eb3e LEA (%RDX,%RAX,1),%R9 |
(159) 0x41eb42 VADDSD (%RBX,%R9,8),%XMM0,%XMM0 |
(159) 0x41eb48 LEA 0x1(%RDX,%RAX,1),%RAX |
(159) 0x41eb4d VSUBSD (%RBX,%RAX,8),%XMM0,%XMM0 |
(159) 0x41eb52 VDIVSD %XMM0,%XMM2,%XMM0 |
(159) 0x41eb56 VMOVSD %XMM0,(%R13,%RCX,8) |
(159) 0x41eb5d VDIVSD %XMM2,%XMM1,%XMM0 |
(159) 0x41eb61 VMOVSD %XMM0,(%R14,%R8,8) |
(159) 0x41eb67 INC %RSI |
(159) 0x41eb6a CMP 0x50(%RSP),%RSI |
(159) 0x41eb6f JG 41ea7a |
(159) 0x41eb75 MOV %RSI,%R8 |
(159) 0x41eb78 SHR $0x20,%R8 |
(159) 0x41eb7c JE 41eb90 |
(159) 0x41eb7e MOV %RSI,%RAX |
(159) 0x41eb81 XOR %EDX,%EDX |
(159) 0x41eb83 DIV %R15 |
(159) 0x41eb86 MOV %RAX,%RCX |
(159) 0x41eb89 JMP 41eb99 |
0x41eb8b NOPL (%RAX,%RAX,1) |
(159) 0x41eb90 MOV %ESI,%EAX |
(159) 0x41eb92 XOR %EDX,%EDX |
(159) 0x41eb94 DIV %R15D |
(159) 0x41eb97 MOV %EAX,%ECX |
(159) 0x41eb99 MOV 0x20(%RSP),%R9 |
(159) 0x41eb9e MOV 0x28(%RSP),%R10 |
(159) 0x41eba3 ADD 0x58(%RSP),%RCX |
(159) 0x41eba8 TEST %R8,%R8 |
(159) 0x41ebab JNE 41eab0 |
(159) 0x41ebb1 MOV %ESI,%EAX |
(159) 0x41ebb3 XOR %EDX,%EDX |
(159) 0x41ebb5 DIV %R15D |
(159) 0x41ebb8 JMP 41eab8 |
0x41ebbd NOPL (%RAX) |
Path / |
Source file and lines | advec_cell.cpp:117-125 |
Module | exec |
nb instructions | 133 |
nb uops | 135 |
loop length | 567 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 11 |
used zmm registers | 0 |
nb stack references | 28 |
micro-operation queue | 22.50 cycles |
front end | 22.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.60 | 5.60 | 16.33 | 16.33 | 16.00 | 10.00 | 5.40 | 16.00 | 16.00 | 16.00 | 5.40 | 16.33 |
cycles | 5.60 | 5.60 | 16.33 | 16.33 | 16.00 | 10.00 | 5.40 | 16.00 | 16.00 | 16.00 | 5.40 | 16.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 22.29 |
Stall cycles | 0.00 |
Front-end | 22.50 |
Dispatch | 16.33 |
Overall L1 | 22.50 |
all | 8% |
load | 18% |
store | 4% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 66% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 5% |
all | 14% |
load | 19% |
store | 13% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 35% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0xe0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 41ea99 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.22+0x599> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R9,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0xa0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x3c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x70(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x68(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x480580,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %R10,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4031e0 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x58(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x68(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 41ea7a <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.22+0x57a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
SUB %R11D,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV (%R13),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x10(%RAX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RAX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RBX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x50(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RCX),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV (%R10),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R10),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RSI,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND %R8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R9,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 41eb75 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.22+0x675> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RAX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R15,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R15,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x58(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTD %R11D,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R10,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R9,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x48(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x10(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RDX,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RSI,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RSI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x45fb9(%RIP),%YMM0,%YMM9 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPADDQ 0x45e31(%RIP),%YMM0,%YMM10 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPBROADCASTQ %RCX,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RBX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R8,0x80(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x78(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x70(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 41eaa8 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.22+0x5a8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x4805a0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x30(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 403050 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
ADD %R8,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JMP 41eb75 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.22+0x675> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_cell.cpp:117-125 |
Module | exec |
nb instructions | 133 |
nb uops | 135 |
loop length | 567 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 11 |
used zmm registers | 0 |
nb stack references | 28 |
micro-operation queue | 22.50 cycles |
front end | 22.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.60 | 5.60 | 16.33 | 16.33 | 16.00 | 10.00 | 5.40 | 16.00 | 16.00 | 16.00 | 5.40 | 16.33 |
cycles | 5.60 | 5.60 | 16.33 | 16.33 | 16.00 | 10.00 | 5.40 | 16.00 | 16.00 | 16.00 | 5.40 | 16.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 22.29 |
Stall cycles | 0.00 |
Front-end | 22.50 |
Dispatch | 16.33 |
Overall L1 | 22.50 |
all | 8% |
load | 18% |
store | 4% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 66% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 5% |
all | 14% |
load | 19% |
store | 13% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 35% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0xe0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 41ea99 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.22+0x599> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R9,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0xa0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x3c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x70(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x68(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x480580,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %R10,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4031e0 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x58(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x68(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 41ea7a <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.22+0x57a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
SUB %R11D,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV (%R13),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x10(%RAX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RAX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RBX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x50(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RCX),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV (%R10),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R10),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RSI,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND %R8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R9,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 41eb75 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.22+0x675> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RAX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R15,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R15,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x58(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTD %R11D,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R10,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R9,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x48(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x10(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RDX,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RSI,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RSI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x45fb9(%RIP),%YMM0,%YMM9 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPADDQ 0x45e31(%RIP),%YMM0,%YMM10 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPBROADCASTQ %RCX,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RBX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R8,0x80(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x78(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x70(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 41eaa8 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.22+0x5a8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x4805a0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x30(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 403050 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
ADD %R8,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JMP 41eb75 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.22+0x675> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.22– | 0.9 | 1.27 |
○Loop 160 - advec_cell.cpp:117-125 - exec | 0.9 | 1.26 |
○Loop 159 - advec_cell.cpp:117-125 - exec | 0 | 0 |