Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:167-173 [...] | Coverage: 0.86% |
---|
Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:167-173 [...] | Coverage: 0.86% |
---|
/scratch_na/users/xoserete/qaas_runs/171-291-1828/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/scratch_na/users/xoserete/qaas_runs/171-291-1828/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 167 - 173 |
-------------------------------------------------------------------------------- |
167: #pragma omp parallel for simd collapse(2) |
168: for (int j = (y_min - 1 + 1); j < (y_max + 2 + 2); j++) { |
169: for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { |
170: node_mass_post(i, j) = 0.25 * (density1(i + 0, j - 1) * post_vol(i + 0, j - 1) + density1(i, j) * post_vol(i, j) + |
171: density1(i - 1, j - 1) * post_vol(i - 1, j - 1) + density1(i - 1, j + 0) * post_vol(i - 1, j + 0)); |
172: node_mass_pre(i, j) = node_mass_post(i, j) - node_flux(i + 0, j - 1) + node_flux(i, j); |
173: } |
0x422e70 PUSH %RBP |
0x422e71 MOV %RSP,%RBP |
0x422e74 PUSH %R15 |
0x422e76 PUSH %R14 |
0x422e78 PUSH %R13 |
0x422e7a PUSH %R12 |
0x422e7c PUSH %RBX |
0x422e7d AND $-0x20,%RSP |
0x422e81 SUB $0x120,%RSP |
0x422e88 MOV %RDX,%R13 |
0x422e8b MOV 0x38(%RBP),%RAX |
0x422e8f MOV 0x28(%RBP),%R14 |
0x422e93 MOV 0x20(%RBP),%RSI |
0x422e97 MOV 0x10(%RBP),%RBX |
0x422e9b MOV 0x18(%RBP),%EDX |
0x422e9e MOV %EDX,0x1c(%RSP) |
0x422ea2 MOVL $0,0x2c(%RSP) |
0x422eaa TEST %RAX,%RAX |
0x422ead JS 42341f |
0x422eb3 MOV %R9,%R15 |
0x422eb6 MOV %R8,%R12 |
0x422eb9 MOV %RCX,0x20(%RSP) |
0x422ebe MOV %RSI,0x58(%RSP) |
0x422ec3 MOV (%RDI),%ESI |
0x422ec5 MOVQ $0,0x68(%RSP) |
0x422ece MOV %RAX,0x60(%RSP) |
0x422ed3 MOVQ $0x1,0x98(%RSP) |
0x422edf SUB $0x8,%RSP |
0x422ee3 LEA 0xa0(%RSP),%RAX |
0x422eeb LEA 0x34(%RSP),%RCX |
0x422ef0 LEA 0x70(%RSP),%R8 |
0x422ef5 LEA 0x68(%RSP),%R9 |
0x422efa MOV $0x6827b0,%EDI |
0x422eff MOV %ESI,0x30(%RSP) |
0x422f03 MOV $0x22,%EDX |
0x422f08 PUSH $0x1 |
0x422f0a PUSH $0x1 |
0x422f0c PUSH %RAX |
0x422f0d CALL 403020 <__kmpc_for_static_init_8@plt> |
0x422f12 ADD $0x20,%RSP |
0x422f16 MOV 0x68(%RSP),%RSI |
0x422f1b MOV 0x60(%RSP),%RAX |
0x422f20 MOV %RAX,0x50(%RSP) |
0x422f25 CMP %RAX,%RSI |
0x422f28 JA 423400 |
0x422f2e MOV %R14,%RCX |
0x422f31 MOV 0x58(%RSP),%RDX |
0x422f36 SUB %EDX,%ECX |
0x422f38 MOV (%R13),%R9 |
0x422f3c MOV 0x10(%R13),%R13 |
0x422f40 MOV (%RBX),%R10 |
0x422f43 MOV %R15,%RAX |
0x422f46 MOV 0x10(%RBX),%R15 |
0x422f4a MOV (%R12),%R11 |
0x422f4e MOV 0x10(%R12),%R12 |
0x422f53 MOV 0x20(%RSP),%RDI |
0x422f58 MOV (%RDI),%R14 |
0x422f5b MOV 0x10(%RDI),%RDI |
0x422f5f MOV (%RAX),%RBX |
0x422f62 MOV 0x10(%RAX),%RAX |
0x422f66 MOV %RAX,0x20(%RSP) |
0x422f6b LEA 0x1(%RSI),%RAX |
0x422f6f MOV 0x50(%RSP),%R8 |
0x422f74 INC %R8 |
0x422f77 CMP %R8,%RAX |
0x422f7a CMOVG %RAX,%R8 |
0x422f7e SUB %RSI,%R8 |
0x422f81 MOV $-0x8,%EAX |
0x422f86 AND %R8,%RAX |
0x422f89 MOV %RCX,0x48(%RSP) |
0x422f8e MOV %R9,0x40(%RSP) |
0x422f93 MOV %R10,0x38(%RSP) |
0x422f98 MOV %R11,0x90(%RSP) |
0x422fa0 MOV %RBX,0x80(%RSP) |
0x422fa8 MOV %RDI,0x88(%RSP) |
0x422fb0 JE 423537 |
0x422fb6 MOV %R8,0x78(%RSP) |
0x422fbb VPBROADCASTQ %RCX,%YMM8 |
0x422fc1 MOV %RAX,0x30(%RSP) |
0x422fc6 MOV 0x1c(%RSP),%EAX |
0x422fca VPBROADCASTD %EAX,%YMM0 |
0x422fd0 VMOVDQU %YMM0,0xe0(%RSP) |
0x422fd9 VPBROADCASTD %EDX,%YMM0 |
0x422fdf VMOVDQU %YMM0,0xc0(%RSP) |
0x422fe8 VPBROADCASTQ %R9,%YMM14 |
0x422fee VPBROADCASTQ %R10,%YMM15 |
0x422ff4 VPBROADCASTQ %R11,%YMM0 |
0x422ffa VMOVDQU %YMM0,0xa0(%RSP) |
0x423003 MOV %R14,0x70(%RSP) |
0x423008 VPBROADCASTQ %R14,%YMM17 |
0x42300e VPBROADCASTQ %RBX,%YMM18 |
0x423014 VPBROADCASTQ %RSI,%YMM0 |
0x42301a VPADDQ 0x4325e(%RIP),%YMM0,%YMM9 |
0x423022 VPADDQ 0x430d6(%RIP),%YMM0,%YMM10 |
0x42302a XOR %EBX,%EBX |
0x42302c NOPL (%RAX) |
(123) 0x423030 VMOVDQA %YMM10,%YMM0 |
(123) 0x423034 VMOVDQA %YMM8,%YMM1 |
(123) 0x423038 MOV $0x454690,%R14 |
(123) 0x42303f CALL %R14 |
(123) 0x423042 VMOVDQA %YMM0,%YMM11 |
(123) 0x423046 VMOVDQA %YMM9,%YMM0 |
(123) 0x42304a VMOVDQA %YMM8,%YMM1 |
(123) 0x42304e CALL %R14 |
(123) 0x423051 VPMOVQD %YMM11,%XMM1 |
(123) 0x423057 VPMOVQD %YMM0,%XMM0 |
(123) 0x42305d VINSERTI128 $0x1,%XMM0,%YMM1,%YMM0 |
(123) 0x423063 VPADDD 0xe0(%RSP),%YMM0,%YMM22 |
(123) 0x42306b VMOVDQA %YMM10,%YMM0 |
(123) 0x42306f VMOVDQA %YMM8,%YMM1 |
(123) 0x423073 MOV $0x454460,%R14 |
(123) 0x42307a CALL %R14 |
(123) 0x42307d VMOVDQA %YMM0,%YMM11 |
(123) 0x423081 VMOVDQA %YMM9,%YMM0 |
(123) 0x423085 VMOVDQA %YMM8,%YMM1 |
(123) 0x423089 CALL %R14 |
(123) 0x42308c VPMOVQD %YMM11,%XMM1 |
(123) 0x423092 VPMOVQD %YMM0,%XMM0 |
(123) 0x423098 VINSERTI128 $0x1,%XMM0,%YMM1,%YMM0 |
(123) 0x42309e VPCMPEQD %YMM12,%YMM12,%YMM12 |
(123) 0x4230a3 VPADDD %YMM12,%YMM22,%YMM1 |
(123) 0x4230a9 VEXTRACTI128 $0x1,%YMM1,%XMM2 |
(123) 0x4230af VPMOVSXDQ %XMM2,%YMM3 |
(123) 0x4230b4 VPMOVSXDQ %XMM1,%YMM2 |
(123) 0x4230b9 VPADDD 0xc0(%RSP),%YMM0,%YMM4 |
(123) 0x4230c2 VPMULLQ %YMM2,%YMM14,%YMM5 |
(123) 0x4230c8 VXORPS %XMM7,%XMM7,%XMM7 |
(123) 0x4230cc VPMULLQ %YMM3,%YMM14,%YMM7 |
(123) 0x4230d2 VPMOVSXDQ %XMM4,%YMM0 |
(123) 0x4230d7 VPMOVSXDQ %XMM22,%YMM1 |
(123) 0x4230dd VPMULLQ %YMM1,%YMM14,%YMM24 |
(123) 0x4230e3 VPADDQ %YMM0,%YMM24,%YMM11 |
(123) 0x4230e9 KXNORW %K0,%K0,%K1 |
(123) 0x4230ed VXORPD %XMM6,%XMM6,%XMM6 |
(123) 0x4230f1 VPMULLQ %YMM1,%YMM15,%YMM26 |
(123) 0x4230f7 VGATHERQPD (%R13,%YMM11,8),%YMM6{%K1} |
(123) 0x4230ff VPADDQ %YMM0,%YMM5,%YMM25 |
(123) 0x423105 VPADDQ %YMM0,%YMM26,%YMM23 |
(123) 0x42310b KXNORW %K0,%K0,%K1 |
(123) 0x42310f VXORPD %XMM11,%XMM11,%XMM11 |
(123) 0x423114 VGATHERQPD (%R15,%YMM23,8),%YMM11{%K1} |
(123) 0x42311b KXNORW %K0,%K0,%K1 |
(123) 0x42311f VXORPD %XMM23,%XMM23,%XMM23 |
(123) 0x423125 VPMULLQ %YMM2,%YMM15,%YMM28 |
(123) 0x42312b VGATHERQPD (%R13,%YMM25,8),%YMM23{%K1} |
(123) 0x423133 VPADDQ %YMM0,%YMM28,%YMM27 |
(123) 0x423139 KXNORW %K0,%K0,%K1 |
(123) 0x42313d VXORPD %XMM25,%XMM25,%XMM25 |
(123) 0x423143 VGATHERQPD (%R15,%YMM27,8),%YMM25{%K1} |
(123) 0x42314a VPADDD %YMM12,%YMM4,%YMM29 |
(123) 0x423150 VPMOVSXDQ %XMM29,%YMM30 |
(123) 0x423156 VPADDQ %YMM30,%YMM5,%YMM5 |
(123) 0x42315c KXNORW %K0,%K0,%K1 |
(123) 0x423160 VXORPD %XMM27,%XMM27,%XMM27 |
(123) 0x423166 VGATHERQPD (%R13,%YMM5,8),%YMM27{%K1} |
(123) 0x42316e VEXTRACTI128 $0x1,%YMM4,%XMM4 |
(123) 0x423174 VPADDQ %YMM30,%YMM28,%YMM5 |
(123) 0x42317a KXNORW %K0,%K0,%K1 |
(123) 0x42317e VPXORD %XMM28,%XMM28,%XMM28 |
(123) 0x423184 VGATHERQPD (%R15,%YMM5,8),%YMM28{%K1} |
(123) 0x42318b VPMOVSXDQ %XMM4,%YMM4 |
(123) 0x423190 KXNORW %K0,%K0,%K1 |
(123) 0x423194 VPMULLQ %YMM3,%YMM15,%YMM31 |
(123) 0x42319a VPADDQ %YMM4,%YMM7,%YMM5 |
(123) 0x42319e VXORPD %XMM19,%XMM19,%XMM19 |
(123) 0x4231a4 VPADDQ %YMM4,%YMM31,%YMM13 |
(123) 0x4231aa KXNORW %K0,%K0,%K2 |
(123) 0x4231ae VPXOR %XMM12,%XMM12,%XMM12 |
(123) 0x4231b3 VEXTRACTI32X4 $0x1,%YMM22,%XMM22 |
(123) 0x4231ba VGATHERQPD (%R13,%YMM5,8),%YMM19{%K1} |
(123) 0x4231c2 VPMOVSXDQ %XMM22,%YMM5 |
(123) 0x4231c8 VPXORD %XMM22,%XMM22,%XMM22 |
(123) 0x4231ce VPMULLQ %YMM5,%YMM14,%YMM22 |
(123) 0x4231d4 VPADDQ %YMM4,%YMM22,%YMM20 |
(123) 0x4231da VGATHERQPD (%R15,%YMM13,8),%YMM12{%K2} |
(123) 0x4231e1 KXNORW %K0,%K0,%K1 |
(123) 0x4231e5 VXORPD %XMM13,%XMM13,%XMM13 |
(123) 0x4231ea VPMULLQ %YMM5,%YMM15,%YMM21 |
(123) 0x4231f0 VGATHERQPD (%R13,%YMM20,8),%YMM13{%K1} |
(123) 0x4231f8 VPADDQ %YMM4,%YMM21,%YMM20 |
(123) 0x4231fe KXNORW %K0,%K0,%K1 |
(123) 0x423202 VXORPD %XMM16,%XMM16,%XMM16 |
(123) 0x423208 VGATHERQPD (%R15,%YMM20,8),%YMM16{%K1} |
(123) 0x42320f VEXTRACTI32X4 $0x1,%YMM29,%XMM20 |
(123) 0x423216 VPMOVSXDQ %XMM20,%YMM20 |
(123) 0x42321c VPADDQ %YMM20,%YMM7,%YMM7 |
(123) 0x423222 KXNORW %K0,%K0,%K1 |
(123) 0x423226 VPXORD %XMM29,%XMM29,%XMM29 |
(123) 0x42322c VGATHERQPD (%R13,%YMM7,8),%YMM29{%K1} |
(123) 0x423234 VPADDQ %YMM20,%YMM31,%YMM7 |
(123) 0x42323a KXNORW %K0,%K0,%K1 |
(123) 0x42323e VPXORD %XMM31,%XMM31,%XMM31 |
(123) 0x423244 VGATHERQPD (%R15,%YMM7,8),%YMM31{%K1} |
(123) 0x42324b VPADDQ %YMM30,%YMM24,%YMM7 |
(123) 0x423251 KXNORW %K0,%K0,%K1 |
(123) 0x423255 VPXORD %XMM24,%XMM24,%XMM24 |
(123) 0x42325b VGATHERQPD (%R13,%YMM7,8),%YMM24{%K1} |
(123) 0x423263 VPADDQ %YMM30,%YMM26,%YMM7 |
(123) 0x423269 KXNORW %K0,%K0,%K1 |
(123) 0x42326d VPXORD %XMM26,%XMM26,%XMM26 |
(123) 0x423273 VGATHERQPD (%R15,%YMM7,8),%YMM26{%K1} |
(123) 0x42327a VPADDQ %YMM20,%YMM22,%YMM7 |
(123) 0x423280 KXNORW %K0,%K0,%K1 |
(123) 0x423284 VPXORD %XMM22,%XMM22,%XMM22 |
(123) 0x42328a VGATHERQPD (%R13,%YMM7,8),%YMM22{%K1} |
(123) 0x423292 VMULPD %YMM19,%YMM12,%YMM7 |
(123) 0x423298 VFMADD213PD %YMM7,%YMM13,%YMM16 |
(123) 0x42329e VPADDQ %YMM20,%YMM21,%YMM7 |
(123) 0x4232a4 KXNORW %K0,%K0,%K1 |
(123) 0x4232a8 VXORPD %XMM12,%XMM12,%XMM12 |
(123) 0x4232ad VGATHERQPD (%R15,%YMM7,8),%YMM12{%K1} |
(123) 0x4232b4 VMULPD %YMM23,%YMM25,%YMM7 |
(123) 0x4232ba VFMADD213PD %YMM7,%YMM6,%YMM11 |
(123) 0x4232bf VFMADD213PD %YMM11,%YMM27,%YMM28 |
(123) 0x4232c5 VFMADD213PD %YMM16,%YMM29,%YMM31 |
(123) 0x4232cb VFMADD213PD %YMM28,%YMM24,%YMM26 |
(123) 0x4232d1 VFMADD213PD %YMM31,%YMM22,%YMM12 |
(123) 0x4232d7 VMOVDQU 0xa0(%RSP),%YMM7 |
(123) 0x4232e0 VPMULLQ %YMM1,%YMM7,%YMM6 |
(123) 0x4232e6 VPMULLQ %YMM5,%YMM7,%YMM7 |
(123) 0x4232ec VPADDQ %YMM4,%YMM7,%YMM7 |
(123) 0x4232f0 VPADDQ %YMM0,%YMM6,%YMM6 |
(123) 0x4232f4 VPMULLQ %YMM3,%YMM17,%YMM3 |
(123) 0x4232fa VBROADCASTSD 0x42ded(%RIP),%YMM13 |
(123) 0x423303 VMULPD %YMM13,%YMM26,%YMM11 |
(123) 0x423309 KXNORW %K0,%K0,%K1 |
(123) 0x42330d KXNORW %K0,%K0,%K2 |
(123) 0x423311 VPMULLQ %YMM2,%YMM17,%YMM2 |
(123) 0x423317 VMULPD %YMM13,%YMM12,%YMM12 |
(123) 0x42331c VPADDQ %YMM0,%YMM2,%YMM2 |
(123) 0x423320 VSCATTERQPD %YMM11,(%R12,%YMM6,8){%K1} |
(123) 0x423327 VPADDQ %YMM4,%YMM3,%YMM3 |
(123) 0x42332b VSCATTERQPD %YMM12,(%R12,%YMM7,8){%K2} |
(123) 0x423332 KXNORW %K0,%K0,%K1 |
(123) 0x423336 VXORPD %XMM6,%XMM6,%XMM6 |
(123) 0x42333a VXORPS %XMM7,%XMM7,%XMM7 |
(123) 0x42333e VPMULLQ %YMM1,%YMM17,%YMM7 |
(123) 0x423344 VGATHERQPD (%RDI,%YMM3,8),%YMM6{%K1} |
(123) 0x42334b KXNORW %K0,%K0,%K1 |
(123) 0x42334f VXORPD %XMM3,%XMM3,%XMM3 |
(123) 0x423353 VXORPS %XMM13,%XMM13,%XMM13 |
(123) 0x423358 VPMULLQ %YMM5,%YMM17,%YMM13 |
(123) 0x42335e VGATHERQPD (%RDI,%YMM2,8),%YMM3{%K1} |
(123) 0x423365 VPADDQ %YMM0,%YMM7,%YMM2 |
(123) 0x423369 KXNORW %K0,%K0,%K1 |
(123) 0x42336d VPXOR %XMM7,%XMM7,%XMM7 |
(123) 0x423371 VGATHERQPD (%RDI,%YMM2,8),%YMM7{%K1} |
(123) 0x423378 VPADDQ %YMM4,%YMM13,%YMM2 |
(123) 0x42337c KXNORW %K0,%K0,%K1 |
(123) 0x423380 VPXOR %XMM13,%XMM13,%XMM13 |
(123) 0x423385 VGATHERQPD (%RDI,%YMM2,8),%YMM13{%K1} |
(123) 0x42338c VSUBPD %YMM3,%YMM11,%YMM2 |
(123) 0x423390 VPMULLQ %YMM1,%YMM18,%YMM1 |
(123) 0x423396 VPADDQ %YMM0,%YMM1,%YMM0 |
(123) 0x42339a VADDPD %YMM7,%YMM2,%YMM1 |
(123) 0x42339e KXNORW %K0,%K0,%K1 |
(123) 0x4233a2 MOV 0x20(%RSP),%RAX |
(123) 0x4233a7 VSCATTERQPD %YMM1,(%RAX,%YMM0,8){%K1} |
(123) 0x4233ae VSUBPD %YMM6,%YMM12,%YMM0 |
(123) 0x4233b2 VADDPD %YMM0,%YMM13,%YMM0 |
(123) 0x4233b6 VXORPS %XMM1,%XMM1,%XMM1 |
(123) 0x4233ba VPMULLQ %YMM5,%YMM18,%YMM1 |
(123) 0x4233c0 VPADDQ %YMM4,%YMM1,%YMM1 |
(123) 0x4233c4 KXNORW %K0,%K0,%K1 |
(123) 0x4233c8 VSCATTERQPD %YMM0,(%RAX,%YMM1,8){%K1} |
(123) 0x4233cf VPBROADCASTQ 0x42ec8(%RIP),%YMM0 |
(123) 0x4233d8 VPADDQ %YMM0,%YMM10,%YMM10 |
(123) 0x4233dc VPADDQ %YMM0,%YMM9,%YMM9 |
(123) 0x4233e0 ADD $0x8,%RBX |
(123) 0x4233e4 CMP 0x30(%RSP),%RBX |
(123) 0x4233e9 JB 423030 |
0x4233ef MOV 0x30(%RSP),%RAX |
0x4233f4 CMP %RAX,0x78(%RSP) |
0x4233f9 MOV 0x70(%RSP),%R14 |
0x4233fe JNE 42342e |
0x423400 MOV $0x6827d0,%EDI |
0x423405 MOV 0x28(%RSP),%ESI |
0x423409 LEA -0x28(%RBP),%RSP |
0x42340d POP %RBX |
0x42340e POP %R12 |
0x423410 POP %R13 |
0x423412 POP %R14 |
0x423414 POP %R15 |
0x423416 POP %RBP |
0x423417 VZEROUPPER |
0x42341a JMP 402e90 |
0x42341f LEA -0x28(%RBP),%RSP |
0x423423 POP %RBX |
0x423424 POP %R12 |
0x423426 POP %R13 |
0x423428 POP %R14 |
0x42342a POP %R15 |
0x42342c POP %RBP |
0x42342d RET |
0x42342e ADD %RAX,%RSI |
0x423431 JMP 423537 |
0x423436 NOPW %CS:(%RAX,%RAX,1) |
(122) 0x423440 MOV %RSI,%RAX |
(122) 0x423443 CQTO |
(122) 0x423445 IDIV %R9 |
(122) 0x423448 ADD 0x1c(%RSP),%ECX |
(122) 0x42344c ADD 0x58(%RSP),%EDX |
(122) 0x423450 MOVSXD %EDX,%RAX |
(122) 0x423453 LEA -0x1(%RCX),%EDX |
(122) 0x423456 MOVSXD %EDX,%RDX |
(122) 0x423459 MOV %RBX,%R8 |
(122) 0x42345c IMUL %RDX,%R8 |
(122) 0x423460 LEA (%R8,%RAX,1),%R9 |
(122) 0x423464 MOV %R12,%R10 |
(122) 0x423467 IMUL %RDX,%R10 |
(122) 0x42346b LEA (%R10,%RAX,1),%R11 |
(122) 0x42346f VMOVSD (%R15,%R11,8),%XMM0 |
(122) 0x423475 VMULSD (%R13,%R9,8),%XMM0,%XMM0 |
(122) 0x42347c MOVSXD %ECX,%RCX |
(122) 0x42347f MOV %RBX,%R9 |
(122) 0x423482 IMUL %RCX,%R9 |
(122) 0x423486 IMUL %RCX,%R12 |
(122) 0x42348a LEA (%R12,%RAX,1),%RBX |
(122) 0x42348e VMOVSD (%R15,%RBX,8),%XMM1 |
(122) 0x423494 LEA (%R9,%RAX,1),%RBX |
(122) 0x423498 VFMADD132SD (%R13,%RBX,8),%XMM0,%XMM1 |
(122) 0x42349f LEA -0x1(%R10,%RAX,1),%R10 |
(122) 0x4234a4 VMOVSD (%R15,%R10,8),%XMM0 |
(122) 0x4234aa LEA -0x1(%R8,%RAX,1),%R8 |
(122) 0x4234af VFMADD132SD (%R13,%R8,8),%XMM1,%XMM0 |
(122) 0x4234b6 LEA -0x1(%R12,%RAX,1),%R8 |
(122) 0x4234bb VMOVSD (%R15,%R8,8),%XMM1 |
(122) 0x4234c1 LEA -0x1(%R9,%RAX,1),%R8 |
(122) 0x4234c6 VFMADD132SD (%R13,%R8,8),%XMM0,%XMM1 |
(122) 0x4234cd VMULSD 0x42c1b(%RIP),%XMM1,%XMM0 |
(122) 0x4234d5 MOV 0x90(%RSP),%R8 |
(122) 0x4234dd IMUL %RCX,%R8 |
(122) 0x4234e1 ADD %RAX,%R8 |
(122) 0x4234e4 MOV %R14,%R12 |
(122) 0x4234e7 VMOVSD %XMM0,(%R14,%R8,8) |
(122) 0x4234ed MOV %RDI,%R14 |
(122) 0x4234f0 IMUL %RDI,%RDX |
(122) 0x4234f4 ADD %RAX,%RDX |
(122) 0x4234f7 MOV 0x88(%RSP),%RDI |
(122) 0x4234ff VSUBSD (%RDI,%RDX,8),%XMM0,%XMM0 |
(122) 0x423504 MOV %R14,%RDX |
(122) 0x423507 IMUL %RCX,%RDX |
(122) 0x42350b ADD %RAX,%RDX |
(122) 0x42350e VADDSD (%RDI,%RDX,8),%XMM0,%XMM0 |
(122) 0x423513 IMUL 0x80(%RSP),%RCX |
(122) 0x42351c ADD %RAX,%RCX |
(122) 0x42351f MOV 0x20(%RSP),%RAX |
(122) 0x423524 VMOVSD %XMM0,(%RAX,%RCX,8) |
(122) 0x423529 INC %RSI |
(122) 0x42352c CMP 0x50(%RSP),%RSI |
(122) 0x423531 JG 423400 |
(122) 0x423537 MOV %R14,%RDI |
(122) 0x42353a MOV %R12,%R14 |
(122) 0x42353d MOV %RSI,%R8 |
(122) 0x423540 SHR $0x20,%R8 |
(122) 0x423544 JE 423570 |
(122) 0x423546 MOV %RSI,%RAX |
(122) 0x423549 XOR %EDX,%EDX |
(122) 0x42354b MOV 0x48(%RSP),%R9 |
(122) 0x423550 DIV %R9 |
(122) 0x423553 MOV %RAX,%RCX |
(122) 0x423556 MOV 0x40(%RSP),%RBX |
(122) 0x42355b MOV 0x38(%RSP),%R12 |
(122) 0x423560 TEST %R8,%R8 |
(122) 0x423563 JNE 423440 |
(122) 0x423569 JMP 423591 |
0x42356b NOPL (%RAX,%RAX,1) |
(122) 0x423570 MOV %ESI,%EAX |
(122) 0x423572 XOR %EDX,%EDX |
(122) 0x423574 MOV 0x48(%RSP),%R9 |
(122) 0x423579 DIV %R9D |
(122) 0x42357c MOV %EAX,%ECX |
(122) 0x42357e MOV 0x40(%RSP),%RBX |
(122) 0x423583 MOV 0x38(%RSP),%R12 |
(122) 0x423588 TEST %R8,%R8 |
(122) 0x42358b JNE 423440 |
(122) 0x423591 MOV %ESI,%EAX |
(122) 0x423593 XOR %EDX,%EDX |
(122) 0x423595 DIV %R9D |
(122) 0x423598 JMP 423448 |
0x42359d NOPL (%RAX) |
Path / |
Source file and lines | advec_mom.cpp:167-173 |
Module | exec |
nb instructions | 124 |
nb uops | 126 |
loop length | 537 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 8 |
used zmm registers | 0 |
nb stack references | 28 |
micro-operation queue | 21.00 cycles |
front end | 21.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.60 | 5.60 | 13.67 | 13.67 | 16.00 | 9.00 | 5.40 | 16.00 | 16.00 | 16.00 | 5.40 | 13.67 |
cycles | 5.60 | 5.60 | 13.67 | 13.67 | 16.00 | 9.00 | 5.40 | 16.00 | 16.00 | 16.00 | 5.40 | 13.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 20.80 |
Stall cycles | 0.00 |
Front-end | 21.00 |
Dispatch | 16.00 |
Overall L1 | 21.00 |
all | 10% |
load | 15% |
store | 13% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 5% |
all | 15% |
load | 17% |
store | 16% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 26% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x120,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EDX,0x1c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x2c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 42341f <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.12+0x5af> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R9,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0xa0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x34(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x70(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x68(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x6827b0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403020 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x68(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 423400 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.12+0x590> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x58(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV (%R13),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RBX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x10(%RBX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x50(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %R8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB %RSI,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
AND %R8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RCX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 423537 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.12+0x6c7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R8,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RCX,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x1c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTD %EDX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R9,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R10,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R11,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV %R14,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R14,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RBX,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RSI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x4325e(%RIP),%YMM0,%YMM9 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPADDQ 0x430d6(%RIP),%YMM0,%YMM10 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x30(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RAX,0x78(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x70(%RSP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 42342e <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.12+0x5be> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x6827d0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x28(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402e90 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
ADD %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JMP 423537 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.12+0x6c7> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_mom.cpp:167-173 |
Module | exec |
nb instructions | 124 |
nb uops | 126 |
loop length | 537 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 8 |
used zmm registers | 0 |
nb stack references | 28 |
micro-operation queue | 21.00 cycles |
front end | 21.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.60 | 5.60 | 13.67 | 13.67 | 16.00 | 9.00 | 5.40 | 16.00 | 16.00 | 16.00 | 5.40 | 13.67 |
cycles | 5.60 | 5.60 | 13.67 | 13.67 | 16.00 | 9.00 | 5.40 | 16.00 | 16.00 | 16.00 | 5.40 | 13.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 20.80 |
Stall cycles | 0.00 |
Front-end | 21.00 |
Dispatch | 16.00 |
Overall L1 | 21.00 |
all | 10% |
load | 15% |
store | 13% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 40% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 5% |
all | 15% |
load | 17% |
store | 16% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 26% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x120,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EDX,0x1c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x2c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 42341f <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.12+0x5af> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R9,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0xa0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x34(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x70(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x68(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x6827b0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403020 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x68(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 423400 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.12+0x590> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x58(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV (%R13),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RBX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x10(%RBX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x50(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %R8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB %RSI,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
AND %R8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RCX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 423537 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.12+0x6c7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R8,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RCX,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x1c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTD %EDX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R9,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R10,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R11,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV %R14,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %R14,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RBX,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RSI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x4325e(%RIP),%YMM0,%YMM9 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPADDQ 0x430d6(%RIP),%YMM0,%YMM10 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x30(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RAX,0x78(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x70(%RSP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 42342e <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.12+0x5be> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x6827d0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x28(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402e90 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
ADD %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JMP 423537 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii.extracted.12+0x6c7> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advec_mom_kernel(int, int, int, int, clover::Buffer2D | 0.86 | 1.37 |
○Loop 123 - advec_mom.cpp:168-173 - exec | 0.86 | 1.37 |
○Loop 122 - advec_mom.cpp:168-173 - exec | 0 | 0 |