Function: _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buf ... | Module: exec | Source: advec_mom.cpp:95-100 [...] | Coverage: 0.8% |
---|
Function: _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buf ... | Module: exec | Source: advec_mom.cpp:95-100 [...] | Coverage: 0.8% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-112-9712/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/home/eoseret/qaas_runs_CPU_9468/171-112-9712/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 95 - 100 |
-------------------------------------------------------------------------------- |
95: #pragma omp parallel for simd collapse(2) |
96: for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { |
97: for (int i = (x_min - 1 + 1); i < (x_max + 2 + 2); i++) { |
98: node_mass_post(i, j) = 0.25 * (density1(i + 0, j - 1) * post_vol(i + 0, j - 1) + density1(i, j) * post_vol(i, j) + |
99: density1(i - 1, j - 1) * post_vol(i - 1, j - 1) + density1(i - 1, j + 0) * post_vol(i - 1, j + 0)); |
100: node_mass_pre(i, j) = node_mass_post(i, j) - node_flux(i - 1, j + 0) + node_flux(i, j); |
0x422d50 PUSH %RBP |
0x422d51 MOV %RSP,%RBP |
0x422d54 PUSH %R15 |
0x422d56 PUSH %R14 |
0x422d58 PUSH %R13 |
0x422d5a PUSH %R12 |
0x422d5c PUSH %RBX |
0x422d5d AND $-0x20,%RSP |
0x422d61 SUB $0xe0,%RSP |
0x422d68 MOV %RCX,%R12 |
0x422d6b MOV %RDX,%R13 |
0x422d6e MOV 0x38(%RBP),%RAX |
0x422d72 MOV 0x28(%RBP),%R15 |
0x422d76 MOV 0x20(%RBP),%RCX |
0x422d7a MOV 0x10(%RBP),%RBX |
0x422d7e MOV 0x18(%RBP),%EDX |
0x422d81 MOV %EDX,0x4(%RSP) |
0x422d85 MOVL $0,0x1c(%RSP) |
0x422d8d TEST %RAX,%RAX |
0x422d90 JS 4232dd |
0x422d96 MOV %R9,%R14 |
0x422d99 MOV %R8,0x8(%RSP) |
0x422d9e MOV %RCX,0x10(%RSP) |
0x422da3 MOV (%RDI),%ESI |
0x422da5 MOVQ $0,0x40(%RSP) |
0x422dae MOV %RAX,0x38(%RSP) |
0x422db3 MOVQ $0x1,0x78(%RSP) |
0x422dbc SUB $0x8,%RSP |
0x422dc0 LEA 0x80(%RSP),%RAX |
0x422dc8 LEA 0x24(%RSP),%RCX |
0x422dcd LEA 0x48(%RSP),%R8 |
0x422dd2 LEA 0x40(%RSP),%R9 |
0x422dd7 MOV $0x480940,%EDI |
0x422ddc MOV %ESI,0x20(%RSP) |
0x422de0 MOV $0x22,%EDX |
0x422de5 PUSH $0x1 |
0x422de7 PUSH $0x1 |
0x422de9 PUSH %RAX |
0x422dea CALL 4031e0 <__kmpc_for_static_init_8@plt> |
0x422def ADD $0x20,%RSP |
0x422df3 MOV 0x40(%RSP),%RSI |
0x422df8 MOV 0x38(%RSP),%RAX |
0x422dfd MOV %RAX,0x30(%RSP) |
0x422e02 CMP %RAX,%RSI |
0x422e05 JA 4232be |
0x422e0b MOV 0x10(%RSP),%RCX |
0x422e10 SUB %ECX,%R15D |
0x422e13 MOV (%R13),%R8 |
0x422e17 MOV 0x10(%R13),%R13 |
0x422e1b MOV (%RBX),%RDI |
0x422e1e MOV %R12,%RDX |
0x422e21 MOV 0x10(%RBX),%R12 |
0x422e25 MOV 0x8(%RSP),%RAX |
0x422e2a MOV (%RAX),%R9 |
0x422e2d MOV 0x10(%RAX),%RAX |
0x422e31 MOV %RAX,0x8(%RSP) |
0x422e36 MOV (%RDX),%R10 |
0x422e39 MOV 0x10(%RDX),%RBX |
0x422e3d MOV (%R14),%R11 |
0x422e40 MOV 0x10(%R14),%R14 |
0x422e44 LEA 0x1(%RSI),%RAX |
0x422e48 MOV 0x30(%RSP),%RDX |
0x422e4d INC %RDX |
0x422e50 CMP %RDX,%RAX |
0x422e53 CMOVG %RAX,%RDX |
0x422e57 SUB %RSI,%RDX |
0x422e5a MOV $-0x8,%EAX |
0x422e5f AND %RDX,%RAX |
0x422e62 MOV %R8,0x28(%RSP) |
0x422e67 MOV %RDI,0x70(%RSP) |
0x422e6c MOV %R9,0x68(%RSP) |
0x422e71 MOV %R10,0x60(%RSP) |
0x422e76 MOV %R11,0x58(%RSP) |
0x422e7b JE 4233e5 |
0x422e81 MOV %RDX,0x48(%RSP) |
0x422e86 MOV %R15,0x50(%RSP) |
0x422e8b VPBROADCASTQ %R15,%YMM8 |
0x422e91 MOV %RAX,0x20(%RSP) |
0x422e96 MOV 0x4(%RSP),%EAX |
0x422e9a VPBROADCASTD %EAX,%YMM0 |
0x422ea0 VMOVDQU %YMM0,0xa0(%RSP) |
0x422ea9 VPBROADCASTD %ECX,%YMM0 |
0x422eaf VMOVDQU %YMM0,0x80(%RSP) |
0x422eb8 VPBROADCASTQ %R8,%YMM14 |
0x422ebe VPBROADCASTQ %RDI,%YMM15 |
0x422ec4 VPBROADCASTQ %R9,%YMM16 |
0x422eca VPBROADCASTQ %R10,%YMM17 |
0x422ed0 VPBROADCASTQ %R11,%YMM18 |
0x422ed6 VPBROADCASTQ %RSI,%YMM0 |
0x422edc VPADDQ 0x4179c(%RIP),%YMM0,%YMM9 |
0x422ee4 VPADDQ 0x41614(%RIP),%YMM0,%YMM10 |
0x422eec XOR %R15D,%R15D |
0x422eef VPBROADCASTQ 0x417a7(%RIP),%YMM21 |
0x422ef9 NOPL (%RAX) |
(180) 0x422f00 VMOVDQA %YMM10,%YMM0 |
(180) 0x422f04 VMOVDQA %YMM8,%YMM1 |
(180) 0x422f08 MOV $0x452aa0,%RDI |
(180) 0x422f0f CALL %RDI |
(180) 0x422f11 VMOVDQA %YMM0,%YMM11 |
(180) 0x422f15 VMOVDQA %YMM9,%YMM0 |
(180) 0x422f19 VMOVDQA %YMM8,%YMM1 |
(180) 0x422f1d CALL %RDI |
(180) 0x422f1f VPMOVQD %YMM11,%XMM1 |
(180) 0x422f25 VPMOVQD %YMM0,%XMM0 |
(180) 0x422f2b VINSERTI128 $0x1,%XMM0,%YMM1,%YMM0 |
(180) 0x422f31 VPADDD 0xa0(%RSP),%YMM0,%YMM22 |
(180) 0x422f39 VMOVDQA %YMM10,%YMM0 |
(180) 0x422f3d VMOVDQA %YMM8,%YMM1 |
(180) 0x422f41 MOV $0x452870,%RDI |
(180) 0x422f48 CALL %RDI |
(180) 0x422f4a VMOVDQA %YMM0,%YMM11 |
(180) 0x422f4e VMOVDQA %YMM9,%YMM0 |
(180) 0x422f52 VMOVDQA %YMM8,%YMM1 |
(180) 0x422f56 CALL %RDI |
(180) 0x422f58 VPMOVQD %YMM11,%XMM1 |
(180) 0x422f5e VPMOVQD %YMM0,%XMM0 |
(180) 0x422f64 VINSERTI128 $0x1,%XMM0,%YMM1,%YMM0 |
(180) 0x422f6a VPCMPEQD %YMM12,%YMM12,%YMM12 |
(180) 0x422f6f VPADDD %YMM12,%YMM22,%YMM1 |
(180) 0x422f75 VEXTRACTI128 $0x1,%YMM1,%XMM2 |
(180) 0x422f7b VPMOVSXDQ %XMM2,%YMM3 |
(180) 0x422f80 VPMOVSXDQ %XMM1,%YMM5 |
(180) 0x422f85 VPADDD 0x80(%RSP),%YMM0,%YMM6 |
(180) 0x422f8e VPMULLQ %YMM5,%YMM14,%YMM7 |
(180) 0x422f94 VXORPS %XMM2,%XMM2,%XMM2 |
(180) 0x422f98 VPMULLQ %YMM3,%YMM14,%YMM2 |
(180) 0x422f9e VPMOVSXDQ %XMM6,%YMM0 |
(180) 0x422fa3 VPMOVSXDQ %XMM22,%YMM1 |
(180) 0x422fa9 VXORPS %XMM11,%XMM11,%XMM11 |
(180) 0x422fae VPMULLQ %YMM1,%YMM14,%YMM11 |
(180) 0x422fb4 VPADDQ %YMM0,%YMM11,%YMM23 |
(180) 0x422fba KXNORW %K0,%K0,%K1 |
(180) 0x422fbe VXORPD %XMM4,%XMM4,%XMM4 |
(180) 0x422fc2 VPMULLQ %YMM1,%YMM15,%YMM24 |
(180) 0x422fc8 VGATHERQPD (%R13,%YMM23,8),%YMM4{%K1} |
(180) 0x422fd0 VPADDQ %YMM0,%YMM7,%YMM23 |
(180) 0x422fd6 VPADDQ %YMM0,%YMM24,%YMM25 |
(180) 0x422fdc KXNORW %K0,%K0,%K1 |
(180) 0x422fe0 VXORPD %XMM26,%XMM26,%XMM26 |
(180) 0x422fe6 VGATHERQPD (%R12,%YMM25,8),%YMM26{%K1} |
(180) 0x422fed KXNORW %K0,%K0,%K1 |
(180) 0x422ff1 VXORPD %XMM25,%XMM25,%XMM25 |
(180) 0x422ff7 VPMULLQ %YMM5,%YMM15,%YMM5 |
(180) 0x422ffd VGATHERQPD (%R13,%YMM23,8),%YMM25{%K1} |
(180) 0x423005 VPADDQ %YMM0,%YMM5,%YMM23 |
(180) 0x42300b KXNORW %K0,%K0,%K1 |
(180) 0x42300f VXORPD %XMM27,%XMM27,%XMM27 |
(180) 0x423015 VGATHERQPD (%R12,%YMM23,8),%YMM27{%K1} |
(180) 0x42301c VPADDD %YMM12,%YMM6,%YMM23 |
(180) 0x423022 VPMOVSXDQ %XMM23,%YMM28 |
(180) 0x423028 VPADDQ %YMM28,%YMM7,%YMM7 |
(180) 0x42302e KXNORW %K0,%K0,%K1 |
(180) 0x423032 VXORPD %XMM29,%XMM29,%XMM29 |
(180) 0x423038 VGATHERQPD (%R13,%YMM7,8),%YMM29{%K1} |
(180) 0x423040 VEXTRACTI128 $0x1,%YMM6,%XMM6 |
(180) 0x423046 VPADDQ %YMM28,%YMM5,%YMM5 |
(180) 0x42304c KXNORW %K0,%K0,%K1 |
(180) 0x423050 VXORPD %XMM7,%XMM7,%XMM7 |
(180) 0x423054 VGATHERQPD (%R12,%YMM5,8),%YMM7{%K1} |
(180) 0x42305b VPMOVSXDQ %XMM6,%YMM5 |
(180) 0x423060 KXNORW %K0,%K0,%K1 |
(180) 0x423064 VPMULLQ %YMM3,%YMM15,%YMM3 |
(180) 0x42306a VPADDQ %YMM5,%YMM2,%YMM6 |
(180) 0x42306e VXORPD %XMM30,%XMM30,%XMM30 |
(180) 0x423074 VPADDQ %YMM5,%YMM3,%YMM31 |
(180) 0x42307a KXNORW %K0,%K0,%K2 |
(180) 0x42307e VXORPD %XMM19,%XMM19,%XMM19 |
(180) 0x423084 VEXTRACTI32X4 $0x1,%YMM22,%XMM22 |
(180) 0x42308b VGATHERQPD (%R13,%YMM6,8),%YMM30{%K1} |
(180) 0x423093 VPMOVSXDQ %XMM22,%YMM6 |
(180) 0x423099 VPXORD %XMM22,%XMM22,%XMM22 |
(180) 0x42309f VPMULLQ %YMM6,%YMM14,%YMM22 |
(180) 0x4230a5 VPADDQ %YMM5,%YMM22,%YMM13 |
(180) 0x4230ab VGATHERQPD (%R12,%YMM31,8),%YMM19{%K2} |
(180) 0x4230b2 KXNORW %K0,%K0,%K1 |
(180) 0x4230b6 VXORPD %XMM31,%XMM31,%XMM31 |
(180) 0x4230bc VXORPS %XMM12,%XMM12,%XMM12 |
(180) 0x4230c1 VPMULLQ %YMM6,%YMM15,%YMM12 |
(180) 0x4230c7 VGATHERQPD (%R13,%YMM13,8),%YMM31{%K1} |
(180) 0x4230cf VPADDQ %YMM5,%YMM12,%YMM13 |
(180) 0x4230d3 KXNORW %K0,%K0,%K1 |
(180) 0x4230d7 VXORPD %XMM20,%XMM20,%XMM20 |
(180) 0x4230dd VGATHERQPD (%R12,%YMM13,8),%YMM20{%K1} |
(180) 0x4230e4 VEXTRACTI32X4 $0x1,%YMM23,%XMM13 |
(180) 0x4230eb VPMOVSXDQ %XMM13,%YMM13 |
(180) 0x4230f0 VPADDQ %YMM2,%YMM13,%YMM2 |
(180) 0x4230f4 KXNORW %K0,%K0,%K1 |
(180) 0x4230f8 VPXORD %XMM23,%XMM23,%XMM23 |
(180) 0x4230fe VGATHERQPD (%R13,%YMM2,8),%YMM23{%K1} |
(180) 0x423106 VPADDQ %YMM3,%YMM13,%YMM2 |
(180) 0x42310a KXNORW %K0,%K0,%K1 |
(180) 0x42310e VPXOR %XMM3,%XMM3,%XMM3 |
(180) 0x423112 VGATHERQPD (%R12,%YMM2,8),%YMM3{%K1} |
(180) 0x423119 VPADDQ %YMM28,%YMM11,%YMM2 |
(180) 0x42311f KXNORW %K0,%K0,%K1 |
(180) 0x423123 VPXOR %XMM11,%XMM11,%XMM11 |
(180) 0x423128 VGATHERQPD (%R13,%YMM2,8),%YMM11{%K1} |
(180) 0x423130 VPADDQ %YMM28,%YMM24,%YMM2 |
(180) 0x423136 KXNORW %K0,%K0,%K1 |
(180) 0x42313a VPXORD %XMM24,%XMM24,%XMM24 |
(180) 0x423140 VGATHERQPD (%R12,%YMM2,8),%YMM24{%K1} |
(180) 0x423147 VPADDQ %YMM13,%YMM22,%YMM2 |
(180) 0x42314d KXNORW %K0,%K0,%K1 |
(180) 0x423151 VPXORD %XMM22,%XMM22,%XMM22 |
(180) 0x423157 VGATHERQPD (%R13,%YMM2,8),%YMM22{%K1} |
(180) 0x42315f VPADDQ %YMM13,%YMM12,%YMM2 |
(180) 0x423164 KXNORW %K0,%K0,%K1 |
(180) 0x423168 VPXOR %XMM12,%XMM12,%XMM12 |
(180) 0x42316d VGATHERQPD (%R12,%YMM2,8),%YMM12{%K1} |
(180) 0x423174 VMULPD %YMM25,%YMM27,%YMM2 |
(180) 0x42317a VMULPD %YMM30,%YMM19,%YMM19 |
(180) 0x423180 VPMULLQ %YMM1,%YMM16,%YMM25 |
(180) 0x423186 VFMADD213PD %YMM19,%YMM31,%YMM20 |
(180) 0x42318c VFMADD213PD %YMM2,%YMM4,%YMM26 |
(180) 0x423192 VFMADD213PD %YMM26,%YMM29,%YMM7 |
(180) 0x423198 VXORPS %XMM2,%XMM2,%XMM2 |
(180) 0x42319c VPMULLQ %YMM6,%YMM16,%YMM2 |
(180) 0x4231a2 VFMADD213PD %YMM20,%YMM23,%YMM3 |
(180) 0x4231a8 VFMADD213PD %YMM3,%YMM22,%YMM12 |
(180) 0x4231ae VFMADD213PD %YMM7,%YMM11,%YMM24 |
(180) 0x4231b4 VXORPS %XMM3,%XMM3,%XMM3 |
(180) 0x4231b8 VPMULLQ %YMM6,%YMM17,%YMM3 |
(180) 0x4231be VBROADCASTSD 0x41329(%RIP),%YMM7 |
(180) 0x4231c7 VMULPD %YMM7,%YMM12,%YMM4 |
(180) 0x4231cb VMULPD %YMM7,%YMM24,%YMM7 |
(180) 0x4231d1 VPADDQ %YMM0,%YMM25,%YMM11 |
(180) 0x4231d7 VXORPS %XMM12,%XMM12,%XMM12 |
(180) 0x4231dc VPMULLQ %YMM1,%YMM17,%YMM12 |
(180) 0x4231e2 VPADDQ %YMM5,%YMM2,%YMM2 |
(180) 0x4231e6 KXNORW %K0,%K0,%K1 |
(180) 0x4231ea MOV 0x8(%RSP),%RAX |
(180) 0x4231ef VSCATTERQPD %YMM7,(%RAX,%YMM11,8){%K1} |
(180) 0x4231f6 KXNORW %K0,%K0,%K1 |
(180) 0x4231fa VSCATTERQPD %YMM4,(%RAX,%YMM2,8){%K1} |
(180) 0x423201 VPADDQ %YMM3,%YMM13,%YMM2 |
(180) 0x423205 KXNORW %K0,%K0,%K1 |
(180) 0x423209 VXORPD %XMM11,%XMM11,%XMM11 |
(180) 0x42320e VGATHERQPD (%RBX,%YMM2,8),%YMM11{%K1} |
(180) 0x423215 VPADDQ %YMM28,%YMM12,%YMM2 |
(180) 0x42321b KXNORW %K0,%K0,%K1 |
(180) 0x42321f VPXOR %XMM13,%XMM13,%XMM13 |
(180) 0x423224 VGATHERQPD (%RBX,%YMM2,8),%YMM13{%K1} |
(180) 0x42322b VPADDQ %YMM0,%YMM12,%YMM2 |
(180) 0x42322f KXNORW %K0,%K0,%K1 |
(180) 0x423233 VPXOR %XMM12,%XMM12,%XMM12 |
(180) 0x423238 VGATHERQPD (%RBX,%YMM2,8),%YMM12{%K1} |
(180) 0x42323f VPADDQ %YMM5,%YMM3,%YMM2 |
(180) 0x423243 KXNORW %K0,%K0,%K1 |
(180) 0x423247 VPXOR %XMM3,%XMM3,%XMM3 |
(180) 0x42324b VGATHERQPD (%RBX,%YMM2,8),%YMM3{%K1} |
(180) 0x423252 VSUBPD %YMM13,%YMM7,%YMM2 |
(180) 0x423257 VSUBPD %YMM11,%YMM4,%YMM4 |
(180) 0x42325c VPMULLQ %YMM1,%YMM18,%YMM1 |
(180) 0x423262 VADDPD %YMM2,%YMM12,%YMM2 |
(180) 0x423266 VADDPD %YMM3,%YMM4,%YMM3 |
(180) 0x42326a VXORPS %XMM4,%XMM4,%XMM4 |
(180) 0x42326e VPMULLQ %YMM6,%YMM18,%YMM4 |
(180) 0x423274 VPADDQ %YMM0,%YMM1,%YMM0 |
(180) 0x423278 VPADDQ %YMM5,%YMM4,%YMM1 |
(180) 0x42327c KXNORW %K0,%K0,%K1 |
(180) 0x423280 VSCATTERQPD %YMM2,(%R14,%YMM0,8){%K1} |
(180) 0x423287 KXNORW %K0,%K0,%K1 |
(180) 0x42328b VSCATTERQPD %YMM3,(%R14,%YMM1,8){%K1} |
(180) 0x423292 VPADDQ %YMM21,%YMM10,%YMM10 |
(180) 0x423298 VPADDQ %YMM21,%YMM9,%YMM9 |
(180) 0x42329e ADD $0x8,%R15 |
(180) 0x4232a2 CMP 0x20(%RSP),%R15 |
(180) 0x4232a7 JB 422f00 |
0x4232ad MOV 0x20(%RSP),%RAX |
0x4232b2 CMP %RAX,0x48(%RSP) |
0x4232b7 MOV 0x50(%RSP),%R15 |
0x4232bc JNE 4232ec |
0x4232be MOV $0x480960,%EDI |
0x4232c3 MOV 0x18(%RSP),%ESI |
0x4232c7 LEA -0x28(%RBP),%RSP |
0x4232cb POP %RBX |
0x4232cc POP %R12 |
0x4232ce POP %R13 |
0x4232d0 POP %R14 |
0x4232d2 POP %R15 |
0x4232d4 POP %RBP |
0x4232d5 VZEROUPPER |
0x4232d8 JMP 403050 |
0x4232dd LEA -0x28(%RBP),%RSP |
0x4232e1 POP %RBX |
0x4232e2 POP %R12 |
0x4232e4 POP %R13 |
0x4232e6 POP %R14 |
0x4232e8 POP %R15 |
0x4232ea POP %RBP |
0x4232eb RET |
0x4232ec ADD %RAX,%RSI |
0x4232ef JMP 4233e5 |
0x4232f4 NOPW %CS:(%RAX,%RAX,1) |
(179) 0x423300 MOV %RSI,%RAX |
(179) 0x423303 CQTO |
(179) 0x423305 IDIV %R15 |
(179) 0x423308 MOV 0x10(%RSP),%RAX |
(179) 0x42330d ADD %EAX,%EDX |
(179) 0x42330f MOVSXD %EDX,%RAX |
(179) 0x423312 LEA -0x1(%RCX),%EDX |
(179) 0x423315 MOVSXD %EDX,%RDX |
(179) 0x423318 MOV %R11,%R8 |
(179) 0x42331b IMUL %RDX,%R8 |
(179) 0x42331f LEA (%R8,%RAX,1),%R9 |
(179) 0x423323 MOV 0x70(%RSP),%RDI |
(179) 0x423328 IMUL %RDI,%RDX |
(179) 0x42332c LEA (%RDX,%RAX,1),%R10 |
(179) 0x423330 VMOVSD (%R12,%R10,8),%XMM0 |
(179) 0x423336 VMULSD (%R13,%R9,8),%XMM0,%XMM0 |
(179) 0x42333d MOVSXD %ECX,%RCX |
(179) 0x423340 MOV %R11,%R9 |
(179) 0x423343 IMUL %RCX,%R9 |
(179) 0x423347 IMUL %RCX,%RDI |
(179) 0x42334b LEA (%RDI,%RAX,1),%R11 |
(179) 0x42334f VMOVSD (%R12,%R11,8),%XMM1 |
(179) 0x423355 LEA (%R9,%RAX,1),%R11 |
(179) 0x423359 VFMADD132SD (%R13,%R11,8),%XMM0,%XMM1 |
(179) 0x423360 LEA -0x1(%RDX,%RAX,1),%RDX |
(179) 0x423365 VMOVSD (%R12,%RDX,8),%XMM0 |
(179) 0x42336b LEA -0x1(%R8,%RAX,1),%RDX |
(179) 0x423370 VFMADD132SD (%R13,%RDX,8),%XMM1,%XMM0 |
(179) 0x423377 LEA -0x1(%R9,%RAX,1),%RDX |
(179) 0x42337c LEA -0x1(%RDI,%RAX,1),%R8 |
(179) 0x423381 VMOVSD (%R12,%R8,8),%XMM1 |
(179) 0x423387 VFMADD132SD (%R13,%RDX,8),%XMM0,%XMM1 |
(179) 0x42338e VMULSD 0x4115a(%RIP),%XMM1,%XMM0 |
(179) 0x423396 MOV 0x68(%RSP),%RDX |
(179) 0x42339b IMUL %RCX,%RDX |
(179) 0x42339f ADD %RAX,%RDX |
(179) 0x4233a2 MOV 0x8(%RSP),%RDI |
(179) 0x4233a7 VMOVSD %XMM0,(%RDI,%RDX,8) |
(179) 0x4233ac MOV 0x60(%RSP),%RDX |
(179) 0x4233b1 IMUL %RCX,%RDX |
(179) 0x4233b5 LEA -0x1(%RDX,%RAX,1),%R8 |
(179) 0x4233ba VSUBSD (%RBX,%R8,8),%XMM0,%XMM0 |
(179) 0x4233c0 ADD %RAX,%RDX |
(179) 0x4233c3 VADDSD (%RBX,%RDX,8),%XMM0,%XMM0 |
(179) 0x4233c8 IMUL 0x58(%RSP),%RCX |
(179) 0x4233ce ADD %RAX,%RCX |
(179) 0x4233d1 VMOVSD %XMM0,(%R14,%RCX,8) |
(179) 0x4233d7 INC %RSI |
(179) 0x4233da CMP 0x30(%RSP),%RSI |
(179) 0x4233df JG 4232be |
(179) 0x4233e5 MOV %RSI,%R8 |
(179) 0x4233e8 SHR $0x20,%R8 |
(179) 0x4233ec JE 423410 |
(179) 0x4233ee MOV %RSI,%RAX |
(179) 0x4233f1 XOR %EDX,%EDX |
(179) 0x4233f3 DIV %R15 |
(179) 0x4233f6 MOV %RAX,%RCX |
(179) 0x4233f9 MOV 0x28(%RSP),%R11 |
(179) 0x4233fe ADD 0x4(%RSP),%ECX |
(179) 0x423402 TEST %R8,%R8 |
(179) 0x423405 JNE 423300 |
(179) 0x42340b JMP 42342b |
0x42340d NOPL (%RAX) |
(179) 0x423410 MOV %ESI,%EAX |
(179) 0x423412 XOR %EDX,%EDX |
(179) 0x423414 DIV %R15D |
(179) 0x423417 MOV %EAX,%ECX |
(179) 0x423419 MOV 0x28(%RSP),%R11 |
(179) 0x42341e ADD 0x4(%RSP),%ECX |
(179) 0x423422 TEST %R8,%R8 |
(179) 0x423425 JNE 423300 |
(179) 0x42342b MOV %ESI,%EAX |
(179) 0x42342d XOR %EDX,%EDX |
(179) 0x42342f DIV %R15D |
(179) 0x423432 JMP 423308 |
0x423437 NOPW (%RAX,%RAX,1) |
Path / |
Source file and lines | advec_mom.cpp:95-100 |
Module | exec |
nb instructions | 122 |
nb uops | 124 |
loop length | 527 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 10 |
used zmm registers | 0 |
nb stack references | 26 |
micro-operation queue | 20.67 cycles |
front end | 20.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.60 | 5.60 | 14.00 | 14.00 | 15.00 | 9.00 | 5.40 | 15.00 | 15.00 | 15.00 | 5.40 | 14.00 |
cycles | 5.60 | 5.60 | 14.00 | 14.00 | 15.00 | 9.00 | 5.40 | 15.00 | 15.00 | 15.00 | 5.40 | 14.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 20.47 |
Stall cycles | 0.00 |
Front-end | 20.67 |
Dispatch | 15.00 |
Overall L1 | 20.67 |
all | 10% |
load | 15% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 66% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 5% |
all | 14% |
load | 18% |
store | 14% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 35% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0xe0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EDX,0x4(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x1c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 4232dd <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.32+0x58d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R9,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x80(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x24(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x48(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x40(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x480940,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 4031e0 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x40(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 4232be <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.32+0x56e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x10(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %ECX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV (%R13),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R12,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x10(%RBX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RDX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R14),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R14),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x30(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB %RSI,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
AND %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R8,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 4233e5 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.32+0x695> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R15,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x4(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTD %ECX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R8,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RDI,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R9,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R10,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R11,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RSI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x4179c(%RIP),%YMM0,%YMM9 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPADDQ 0x41614(%RIP),%YMM0,%YMM10 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ 0x417a7(%RIP),%YMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x20(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RAX,0x48(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x50(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 4232ec <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.32+0x59c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x480960,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x18(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 403050 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
ADD %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JMP 4233e5 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.32+0x695> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_mom.cpp:95-100 |
Module | exec |
nb instructions | 122 |
nb uops | 124 |
loop length | 527 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 10 |
used zmm registers | 0 |
nb stack references | 26 |
micro-operation queue | 20.67 cycles |
front end | 20.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.60 | 5.60 | 14.00 | 14.00 | 15.00 | 9.00 | 5.40 | 15.00 | 15.00 | 15.00 | 5.40 | 14.00 |
cycles | 5.60 | 5.60 | 14.00 | 14.00 | 15.00 | 9.00 | 5.40 | 15.00 | 15.00 | 15.00 | 5.40 | 14.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 20.47 |
Stall cycles | 0.00 |
Front-end | 20.67 |
Dispatch | 15.00 |
Overall L1 | 20.67 |
all | 10% |
load | 15% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 66% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 5% |
all | 14% |
load | 18% |
store | 14% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 35% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0xe0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EDX,0x4(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x1c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 4232dd <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.32+0x58d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R9,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x80(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x24(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x48(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x40(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x480940,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 4031e0 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x40(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 4232be <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.32+0x56e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x10(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %ECX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV (%R13),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R12,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x10(%RBX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RDX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R14),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R14),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x30(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB %RSI,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
AND %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R8,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 4233e5 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.32+0x695> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R15,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x4(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTD %ECX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R8,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RDI,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R9,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R10,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R11,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RSI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x4179c(%RIP),%YMM0,%YMM9 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPADDQ 0x41614(%RIP),%YMM0,%YMM10 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ 0x417a7(%RIP),%YMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x20(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RAX,0x48(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x50(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 4232ec <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.32+0x59c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x480960,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x18(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 403050 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
ADD %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JMP 4233e5 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.32+0x695> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.32– | 0.8 | 1.13 |
○Loop 180 - advec_mom.cpp:95-100 - exec | 0.8 | 1.12 |
○Loop 179 - advec_mom.cpp:95-100 - exec | 0 | 0 |