Function: _Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_ ... | Module: exec | Source: advec_cell.cpp:65-110 [...] | Coverage: 1.5% |
---|
Function: _Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_ ... | Module: exec | Source: advec_cell.cpp:65-110 [...] | Coverage: 1.5% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-112-9712/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/home/eoseret/qaas_runs_CPU_9468/171-112-9712/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_cell.cpp: 65 - 110 |
-------------------------------------------------------------------------------- |
65: #pragma omp parallel for simd collapse(2) |
66: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
67: for (int i = (x_min + 1); i < (x_max + 2 + 2); i++) |
68: ({ |
69: int upwind, donor, downwind, dif; |
70: double sigmat, sigma3, sigma4, sigmav, sigmam, diffuw, diffdw, limiter, wind; |
71: if (vol_flux_x(i, j) > 0.0) { |
72: upwind = i - 2; |
73: donor = i - 1; |
74: downwind = i; |
75: dif = donor; |
76: } else { |
77: upwind = std::min(i + 1, x_max + 2); |
78: donor = i; |
79: downwind = i - 1; |
80: dif = upwind; |
81: } |
82: sigmat = std::fabs(vol_flux_x(i, j)) / pre_vol(donor, j); |
83: sigma3 = (1.0 + sigmat) * (vertexdx[i] / vertexdx[dif]); |
84: sigma4 = 2.0 - sigmat; |
85: sigmav = sigmat; |
86: diffuw = density1(donor, j) - density1(upwind, j); |
87: diffdw = density1(downwind, j) - density1(donor, j); |
88: wind = 1.0; |
89: if (diffdw <= 0.0) wind = -1.0; |
90: if (diffuw * diffdw > 0.0) { |
91: limiter = (1.0 - sigmav) * wind * |
92: std::fmin(std::fmin(std::fabs(diffuw), std::fabs(diffdw)), |
93: one_by_six * (sigma3 * std::fabs(diffuw) + sigma4 * std::fabs(diffdw))); |
94: } else { |
95: limiter = 0.0; |
96: } |
97: mass_flux_x(i, j) = vol_flux_x(i, j) * (density1(donor, j) + limiter); |
98: sigmam = std::fabs(mass_flux_x(i, j)) / (density1(donor, j) * pre_vol(donor, j)); |
99: diffuw = energy1(donor, j) - energy1(upwind, j); |
100: diffdw = energy1(downwind, j) - energy1(donor, j); |
101: wind = 1.0; |
102: if (diffdw <= 0.0) wind = -1.0; |
103: if (diffuw * diffdw > 0.0) { |
104: limiter = (1.0 - sigmam) * wind * |
105: std::fmin(std::fmin(std::fabs(diffuw), std::fabs(diffdw)), |
106: one_by_six * (sigma3 * std::fabs(diffuw) + sigma4 * std::fabs(diffdw))); |
107: } else { |
108: limiter = 0.0; |
109: } |
110: ener_flux(i, j) = mass_flux_x(i, j) * (energy1(donor, j) + limiter); |
0x41ebc0 PUSH %RBP |
0x41ebc1 MOV %RSP,%RBP |
0x41ebc4 PUSH %R15 |
0x41ebc6 PUSH %R14 |
0x41ebc8 PUSH %R13 |
0x41ebca PUSH %R12 |
0x41ebcc PUSH %RBX |
0x41ebcd AND $-0x20,%RSP |
0x41ebd1 SUB $0x280,%RSP |
0x41ebd8 MOV %R8,%R12 |
0x41ebdb MOV 0x50(%RBP),%RAX |
0x41ebdf MOV 0x40(%RBP),%R10 |
0x41ebe3 MOV 0x38(%RBP),%RSI |
0x41ebe7 MOV 0x30(%RBP),%R8 |
0x41ebeb MOV %R8,0x40(%RSP) |
0x41ebf0 MOV 0x28(%RBP),%RBX |
0x41ebf4 MOV 0x20(%RBP),%R15 |
0x41ebf8 MOV 0x18(%RBP),%R13 |
0x41ebfc MOV 0x10(%RBP),%R14 |
0x41ec00 MOVL $0,0x2c(%RSP) |
0x41ec08 TEST %RAX,%RAX |
0x41ec0b JS 41f4ed |
0x41ec11 MOV %RCX,0x10(%RSP) |
0x41ec16 MOV %R9,0xa0(%RSP) |
0x41ec1e MOV %RSI,(%RSP) |
0x41ec22 MOV %R10,0x8(%RSP) |
0x41ec27 MOV %RDX,0x18(%RSP) |
0x41ec2c MOV (%RDI),%ESI |
0x41ec2e MOVQ $0,0x60(%RSP) |
0x41ec37 MOV %RAX,0x58(%RSP) |
0x41ec3c MOVQ $0x1,0x98(%RSP) |
0x41ec48 SUB $0x8,%RSP |
0x41ec4c LEA 0xa0(%RSP),%RAX |
0x41ec54 LEA 0x34(%RSP),%RCX |
0x41ec59 LEA 0x68(%RSP),%R8 |
0x41ec5e LEA 0x60(%RSP),%R9 |
0x41ec63 MOV $0x4805e0,%EDI |
0x41ec68 MOV %ESI,0x30(%RSP) |
0x41ec6c MOV $0x22,%EDX |
0x41ec71 PUSH $0x1 |
0x41ec73 PUSH $0x1 |
0x41ec75 PUSH %RAX |
0x41ec76 CALL 4031e0 <__kmpc_for_static_init_8@plt> |
0x41ec7b ADD $0x20,%RSP |
0x41ec7f MOV 0x60(%RSP),%RSI |
0x41ec84 MOV 0x58(%RSP),%RAX |
0x41ec89 MOV %RAX,0x38(%RSP) |
0x41ec8e CMP %RAX,%RSI |
0x41ec91 JA 41f4ce |
0x41ec97 MOV 0x8(%RSP),%RDX |
0x41ec9c SUB (%RSP),%EDX |
0x41ec9f MOV (%R13),%RAX |
0x41eca3 MOV %RAX,0x20(%RSP) |
0x41eca8 MOV 0x10(%R13),%RAX |
0x41ecac MOV %RAX,0x50(%RSP) |
0x41ecb1 MOV (%R15),%RAX |
0x41ecb4 MOV %RAX,0x30(%RSP) |
0x41ecb9 MOV 0x10(%R15),%R13 |
0x41ecbd MOV 0x10(%RSP),%RAX |
0x41ecc2 MOV 0x8(%RAX),%R15 |
0x41ecc6 MOV (%R12),%R11 |
0x41ecca MOV 0x10(%R12),%R12 |
0x41eccf MOV (%R14),%RDI |
0x41ecd2 MOV 0x10(%R14),%RAX |
0x41ecd6 MOV %RAX,0x10(%RSP) |
0x41ecdb MOV 0x18(%RSP),%RCX |
0x41ece0 ADD $0x2,%ECX |
0x41ece3 LEA 0x1(%RSI),%RAX |
0x41ece7 MOV 0x38(%RSP),%R8 |
0x41ecec LEA 0x1(%R8),%R9 |
0x41ecf0 CMP %R9,%RAX |
0x41ecf3 CMOVG %RAX,%R9 |
0x41ecf7 MOV 0xa0(%RSP),%RAX |
0x41ecff MOV (%RAX),%R8 |
0x41ed02 MOV 0x10(%RAX),%R14 |
0x41ed06 MOV (%RBX),%R10 |
0x41ed09 MOV 0x10(%RBX),%RAX |
0x41ed0d SUB %RSI,%R9 |
0x41ed10 MOV $-0x8,%EBX |
0x41ed15 MOV %R9,0x70(%RSP) |
0x41ed1a AND %R9,%RBX |
0x41ed1d MOV %RBX,%R9 |
0x41ed20 MOV 0x20(%RSP),%RBX |
0x41ed25 MOV %RCX,0x18(%RSP) |
0x41ed2a MOV %RDX,0x8(%RSP) |
0x41ed2f MOV %RDI,0x88(%RSP) |
0x41ed37 MOV %R10,0x78(%RSP) |
0x41ed3c MOV %R11,0x90(%RSP) |
0x41ed44 MOV %R8,0x80(%RSP) |
0x41ed4c MOV %RAX,0x48(%RSP) |
0x41ed51 JE 41f4ff |
0x41ed57 VPBROADCASTQ %RDX,%YMM0 |
0x41ed5d VMOVDQU %YMM0,0x1e0(%RSP) |
0x41ed66 MOV 0x40(%RSP),%RAX |
0x41ed6b VPBROADCASTQ %RAX,%YMM0 |
0x41ed71 VMOVDQU %YMM0,0x1c0(%RSP) |
0x41ed7a MOV (%RSP),%RAX |
0x41ed7e VPBROADCASTD %EAX,%YMM0 |
0x41ed84 VMOVDQU %YMM0,0x1a0(%RSP) |
0x41ed8d VPBROADCASTQ %RBX,%YMM0 |
0x41ed93 VMOVDQU %YMM0,0x180(%RSP) |
0x41ed9c VPBROADCASTD %ECX,%YMM0 |
0x41eda2 VMOVDQU %YMM0,0x160(%RSP) |
0x41edab MOV 0x30(%RSP),%RAX |
0x41edb0 VPBROADCASTQ %RAX,%YMM0 |
0x41edb6 VMOVDQU %YMM0,0x140(%RSP) |
0x41edbf VPBROADCASTQ %R11,%YMM0 |
0x41edc5 VMOVDQU %YMM0,0x120(%RSP) |
0x41edce VPBROADCASTQ %RDI,%YMM0 |
0x41edd4 VMOVDQU %YMM0,0x100(%RSP) |
0x41eddd VPBROADCASTQ %R8,%YMM0 |
0x41ede3 VMOVDQU %YMM0,0xe0(%RSP) |
0x41edec VPBROADCASTQ %R10,%YMM0 |
0x41edf2 VMOVDQU %YMM0,0xc0(%RSP) |
0x41edfb MOV %RSI,0x68(%RSP) |
0x41ee00 VPBROADCASTQ %RSI,%YMM0 |
0x41ee06 VPADDQ 0x45872(%RIP),%YMM0,%YMM9 |
0x41ee0e VPADDQ 0x456ea(%RIP),%YMM0,%YMM1 |
0x41ee16 XOR %EBX,%EBX |
0x41ee18 NOPL (%RAX,%RAX,1) |
(162) 0x41ee20 VMOVDQU %YMM1,0xa0(%RSP) |
(162) 0x41ee29 VMOVDQU %YMM9,0x200(%RSP) |
(162) 0x41ee32 VMOVDQA %YMM1,%YMM0 |
(162) 0x41ee36 VMOVUPS 0x1e0(%RSP),%YMM8 |
(162) 0x41ee3f VMOVAPS %YMM8,%YMM1 |
(162) 0x41ee43 MOV %R9,%RDI |
(162) 0x41ee46 MOV $0x452aa0,%RSI |
(162) 0x41ee4d CALL %RSI |
(162) 0x41ee4f VMOVDQA %YMM0,%YMM11 |
(162) 0x41ee53 VMOVDQA %YMM9,%YMM0 |
(162) 0x41ee57 VMOVAPS %YMM8,%YMM1 |
(162) 0x41ee5b CALL %RSI |
(162) 0x41ee5d VMOVDQU 0x1c0(%RSP),%YMM1 |
(162) 0x41ee66 VPADDQ %YMM1,%YMM11,%YMM12 |
(162) 0x41ee6a VPADDQ %YMM1,%YMM0,%YMM13 |
(162) 0x41ee6e VMOVDQU 0xa0(%RSP),%YMM0 |
(162) 0x41ee77 VMOVAPS %YMM8,%YMM1 |
(162) 0x41ee7b MOV $0x452870,%RSI |
(162) 0x41ee82 CALL %RSI |
(162) 0x41ee84 VMOVDQA %YMM0,%YMM11 |
(162) 0x41ee88 VMOVDQA %YMM9,%YMM0 |
(162) 0x41ee8c VMOVAPS %YMM8,%YMM1 |
(162) 0x41ee90 CALL %RSI |
(162) 0x41ee92 MOV %RDI,%R9 |
(162) 0x41ee95 VPMOVQD %YMM11,%XMM1 |
(162) 0x41ee9b VPMOVQD %YMM0,%XMM3 |
(162) 0x41eea1 VPSLLQ $0x20,%YMM12,%YMM0 |
(162) 0x41eea7 VPSRAQ $0x20,%YMM0,%YMM2 |
(162) 0x41eeae VMOVDQU 0x180(%RSP),%YMM5 |
(162) 0x41eeb7 VXORPS %XMM4,%XMM4,%XMM4 |
(162) 0x41eebb VPMULLQ %YMM2,%YMM5,%YMM4 |
(162) 0x41eec1 VPSLLQ $0x20,%YMM13,%YMM0 |
(162) 0x41eec7 VPSRAQ $0x20,%YMM0,%YMM0 |
(162) 0x41eece VPMULLQ %YMM0,%YMM5,%YMM5 |
(162) 0x41eed4 VINSERTI128 $0x1,%XMM3,%YMM1,%YMM1 |
(162) 0x41eeda VPADDD 0x1a0(%RSP),%YMM1,%YMM11 |
(162) 0x41eee3 VPMOVSXDQ %XMM11,%YMM3 |
(162) 0x41eee8 VPADDQ %YMM3,%YMM4,%YMM1 |
(162) 0x41eeec KXNORW %K0,%K0,%K1 |
(162) 0x41eef0 VXORPD %XMM7,%XMM7,%XMM7 |
(162) 0x41eef4 MOV 0x50(%RSP),%RAX |
(162) 0x41eef9 VGATHERQPD (%RAX,%YMM1,8),%YMM7{%K1} |
(162) 0x41ef00 VEXTRACTI128 $0x1,%YMM11,%XMM12 |
(162) 0x41ef06 VPMOVSXDQ %XMM12,%YMM1 |
(162) 0x41ef0b VPADDQ %YMM1,%YMM5,%YMM4 |
(162) 0x41ef0f VMOVDQA64 %YMM1,%YMM18 |
(162) 0x41ef15 KXNORW %K0,%K0,%K1 |
(162) 0x41ef19 VXORPD %XMM6,%XMM6,%XMM6 |
(162) 0x41ef1d VGATHERQPD (%RAX,%YMM4,8),%YMM6{%K1} |
(162) 0x41ef24 VMOVDQU 0x140(%RSP),%YMM8 |
(162) 0x41ef2d VXORPS %XMM4,%XMM4,%XMM4 |
(162) 0x41ef31 VPMULLQ %YMM2,%YMM8,%YMM4 |
(162) 0x41ef37 VPXOR %XMM1,%XMM1,%XMM1 |
(162) 0x41ef3b VCMPPD $0x1,%YMM6,%YMM1,%K1 |
(162) 0x41ef42 VPCMPEQD %YMM9,%YMM9,%YMM9 |
(162) 0x41ef47 VPADDD %YMM9,%YMM11,%YMM5 |
(162) 0x41ef4c VPMOVSXDQ %XMM5,%YMM14 |
(162) 0x41ef51 VPMULLQ %YMM0,%YMM8,%YMM15 |
(162) 0x41ef57 VCMPPD $0x1,%YMM7,%YMM1,%K2 |
(162) 0x41ef5e VEXTRACTI128 $0x1,%YMM5,%XMM13 |
(162) 0x41ef64 VPBLENDMQ %YMM14,%YMM3,%YMM31{%K2} |
(162) 0x41ef6a VPADDQ %YMM31,%YMM4,%YMM4 |
(162) 0x41ef70 VMOVDQU %YMM4,0x220(%RSP) |
(162) 0x41ef79 KXNORW %K0,%K0,%K3 |
(162) 0x41ef7d VXORPD %XMM16,%XMM16,%XMM16 |
(162) 0x41ef83 VGATHERQPD (%R13,%YMM4,8),%YMM16{%K3} |
(162) 0x41ef8b VPMOVSXDQ %XMM13,%YMM17 |
(162) 0x41ef91 VPBLENDMQ %YMM17,%YMM18,%YMM13{%K1} |
(162) 0x41ef97 VMOVDQA64 %YMM18,%YMM21 |
(162) 0x41ef9d VPADDQ %YMM13,%YMM15,%YMM4 |
(162) 0x41efa2 VMOVDQU %YMM4,0x240(%RSP) |
(162) 0x41efab KXNORW %K0,%K0,%K3 |
(162) 0x41efaf VPXOR %XMM15,%XMM15,%XMM15 |
(162) 0x41efb4 VGATHERQPD (%R13,%YMM4,8),%YMM15{%K3} |
(162) 0x41efbc KXNORW %K0,%K0,%K3 |
(162) 0x41efc0 VXORPD %XMM22,%XMM22,%XMM22 |
(162) 0x41efc6 VGATHERDPD (%R15,%XMM11,8),%YMM22{%K3} |
(162) 0x41efcd VPSUBD %YMM9,%YMM11,%YMM23 |
(162) 0x41efd3 KXNORW %K0,%K0,%K3 |
(162) 0x41efd7 VPXORD %XMM18,%XMM18,%XMM18 |
(162) 0x41efdd VGATHERDPD (%R15,%XMM12,8),%YMM18{%K3} |
(162) 0x41efe4 VPMINSD 0x160(%RSP),%YMM23,%YMM12 |
(162) 0x41efec VEXTRACTI32X4 $0x1,%YMM12,%XMM23 |
(162) 0x41eff3 VPMOVSXDQ %XMM23,%YMM23 |
(162) 0x41eff9 VPMOVSXDQ %XMM12,%YMM12 |
(162) 0x41effe VMOVDQA64 %YMM12,%YMM29 |
(162) 0x41f004 VMOVDQA64 %YMM23,%YMM26 |
(162) 0x41f00a VMOVDQA64 %YMM17,%YMM23{%K1} |
(162) 0x41f010 VMOVDQA64 %YMM14,%YMM12{%K2} |
(162) 0x41f016 KXNORW %K0,%K0,%K3 |
(162) 0x41f01a VXORPD %XMM19,%XMM19,%XMM19 |
(162) 0x41f020 VMOVDQU 0x120(%RSP),%YMM8 |
(162) 0x41f029 VPMULLQ %YMM0,%YMM8,%YMM20 |
(162) 0x41f02f VGATHERQPD (%R15,%YMM12,8),%YMM19{%K3} |
(162) 0x41f036 KXNORW %K0,%K0,%K3 |
(162) 0x41f03a VXORPD %XMM12,%XMM12,%XMM12 |
(162) 0x41f03f VPMULLQ %YMM2,%YMM8,%YMM25 |
(162) 0x41f045 VGATHERQPD (%R15,%YMM23,8),%YMM12{%K3} |
(162) 0x41f04c VPADDQ %YMM31,%YMM25,%YMM5 |
(162) 0x41f052 KXNORW %K0,%K0,%K3 |
(162) 0x41f056 VXORPD %XMM27,%XMM27,%XMM27 |
(162) 0x41f05c VGATHERQPD (%R12,%YMM5,8),%YMM27{%K3} |
(162) 0x41f063 VPBLENDMQ %YMM3,%YMM14,%YMM14{%K2} |
(162) 0x41f069 VMOVDQA %YMM3,%YMM4 |
(162) 0x41f06d VPADDQ %YMM14,%YMM25,%YMM23 |
(162) 0x41f073 KXNORW %K0,%K0,%K3 |
(162) 0x41f077 VXORPD %XMM28,%XMM28,%XMM28 |
(162) 0x41f07d VGATHERQPD (%R12,%YMM23,8),%YMM28{%K3} |
(162) 0x41f084 VPBLENDMQ %YMM21,%YMM17,%YMM17{%K1} |
(162) 0x41f08a VMOVDQA64 %YMM21,%YMM3 |
(162) 0x41f090 VPADDQ %YMM17,%YMM20,%YMM23 |
(162) 0x41f096 KXNORW %K0,%K0,%K3 |
(162) 0x41f09a VPXOR %XMM8,%XMM8,%XMM8 |
(162) 0x41f09f VGATHERQPD (%R12,%YMM23,8),%YMM8{%K3} |
(162) 0x41f0a6 VPADDD 0x45610(%RIP){1to8},%YMM11,%YMM11 |
(162) 0x41f0b0 VPMOVSXDQ %XMM11,%YMM29{%K2} |
(162) 0x41f0b6 VPADDQ %YMM29,%YMM25,%YMM23 |
(162) 0x41f0bc KXNORW %K0,%K0,%K2 |
(162) 0x41f0c0 VPXORD %XMM25,%XMM25,%XMM25 |
(162) 0x41f0c6 VGATHERQPD (%R12,%YMM23,8),%YMM25{%K2} |
(162) 0x41f0cd VEXTRACTI128 $0x1,%YMM11,%XMM11 |
(162) 0x41f0d3 VPMOVSXDQ %XMM11,%YMM26{%K1} |
(162) 0x41f0d9 VPADDQ %YMM26,%YMM20,%YMM11 |
(162) 0x41f0df KXNORW %K0,%K0,%K1 |
(162) 0x41f0e3 VPXORD %XMM21,%XMM21,%XMM21 |
(162) 0x41f0e9 VGATHERQPD (%R12,%YMM11,8),%YMM21{%K1} |
(162) 0x41f0f0 VBROADCASTSD 0x449e7(%RIP),%YMM9 |
(162) 0x41f0f9 VANDPD %YMM7,%YMM9,%YMM11 |
(162) 0x41f0fd VDIVPD %YMM16,%YMM11,%YMM24 |
(162) 0x41f103 VANDPD %YMM6,%YMM9,%YMM11 |
(162) 0x41f107 VDIVPD %YMM15,%YMM11,%YMM11 |
(162) 0x41f10c VFMADD213PD %YMM22,%YMM24,%YMM22 |
(162) 0x41f112 VDIVPD %YMM19,%YMM22,%YMM22 |
(162) 0x41f118 VFMADD213PD %YMM18,%YMM11,%YMM18 |
(162) 0x41f11e VDIVPD %YMM12,%YMM18,%YMM18 |
(162) 0x41f124 VPADDQ %YMM13,%YMM20,%YMM16 |
(162) 0x41f12a KXNORW %K0,%K0,%K2 |
(162) 0x41f12e VSUBPD %YMM25,%YMM27,%YMM12 |
(162) 0x41f134 VSUBPD %YMM27,%YMM28,%YMM15 |
(162) 0x41f13a VMULPD %YMM12,%YMM15,%YMM19 |
(162) 0x41f140 VBROADCASTSD 0x4555e(%RIP),%YMM28 |
(162) 0x41f14a VSUBPD %YMM24,%YMM28,%YMM23 |
(162) 0x41f150 VCMPPD $0x1,%YMM19,%YMM1,%K1 |
(162) 0x41f157 VCMPPD $0x1,%YMM15,%YMM1,%K3 |
(162) 0x41f15e VBROADCASTSD 0x44961(%RIP),%YMM10 |
(162) 0x41f167 VSUBPD %YMM24,%YMM10,%YMM19 |
(162) 0x41f16d VBROADCASTSD 0x45539(%RIP),%YMM30 |
(162) 0x41f177 VXORPD %YMM30,%YMM19,%YMM20 |
(162) 0x41f17d VANDPD %YMM9,%YMM12,%YMM12 |
(162) 0x41f182 VMOVAPD %YMM19,%YMM20{%K3} |
(162) 0x41f188 VANDPD %YMM9,%YMM15,%YMM15 |
(162) 0x41f18d VMINPD %YMM15,%YMM12,%YMM19 |
(162) 0x41f193 VMULPD %YMM22,%YMM12,%YMM12 |
(162) 0x41f199 VFMADD231PD %YMM15,%YMM23,%YMM12 |
(162) 0x41f19f VXORPD %XMM24,%XMM24,%XMM24 |
(162) 0x41f1a5 VGATHERQPD (%R12,%YMM16,8),%YMM24{%K2} |
(162) 0x41f1ac VBROADCASTSD 0x45502(%RIP),%YMM25 |
(162) 0x41f1b6 VMULPD %YMM25,%YMM12,%YMM12 |
(162) 0x41f1bc VMINPD %YMM12,%YMM19,%YMM12 |
(162) 0x41f1c2 VSUBPD %YMM21,%YMM24,%YMM19 |
(162) 0x41f1c8 VSUBPD %YMM24,%YMM8,%YMM8 |
(162) 0x41f1ce VFMADD231PD %YMM20,%YMM12,%YMM27{%K1} |
(162) 0x41f1d4 VSUBPD %YMM11,%YMM28,%YMM15 |
(162) 0x41f1da VMULPD %YMM19,%YMM8,%YMM12 |
(162) 0x41f1e0 VCMPPD $0x1,%YMM12,%YMM1,%K1 |
(162) 0x41f1e7 VCMPPD $0x1,%YMM8,%YMM1,%K2 |
(162) 0x41f1ee VSUBPD %YMM11,%YMM10,%YMM11 |
(162) 0x41f1f3 VXORPD %YMM30,%YMM11,%YMM20 |
(162) 0x41f1f9 VMOVAPD %YMM11,%YMM20{%K2} |
(162) 0x41f1ff VANDPD %YMM9,%YMM19,%YMM11 |
(162) 0x41f205 VANDPD %YMM9,%YMM8,%YMM8 |
(162) 0x41f20a VMINPD %YMM8,%YMM11,%YMM12 |
(162) 0x41f20f VMULPD %YMM18,%YMM11,%YMM11 |
(162) 0x41f215 VFMADD231PD %YMM8,%YMM15,%YMM11 |
(162) 0x41f21a VMULPD %YMM25,%YMM11,%YMM8 |
(162) 0x41f220 VMINPD %YMM8,%YMM12,%YMM8 |
(162) 0x41f225 VMOVDQU64 0x100(%RSP),%YMM19 |
(162) 0x41f22d VXORPS %XMM11,%XMM11,%XMM11 |
(162) 0x41f232 VPMULLQ %YMM2,%YMM19,%YMM11 |
(162) 0x41f238 VMOVDQA64 %YMM4,%YMM28 |
(162) 0x41f23e VPADDQ %YMM4,%YMM11,%YMM11 |
(162) 0x41f242 VMULPD %YMM7,%YMM27,%YMM12 |
(162) 0x41f248 KXNORW %K0,%K0,%K2 |
(162) 0x41f24c MOV 0x10(%RSP),%RAX |
(162) 0x41f251 VSCATTERQPD %YMM12,(%RAX,%YMM11,8){%K2} |
(162) 0x41f258 VPMULLQ %YMM0,%YMM19,%YMM7 |
(162) 0x41f25e VMOVDQU64 0xe0(%RSP),%YMM19 |
(162) 0x41f266 VXORPS %XMM11,%XMM11,%XMM11 |
(162) 0x41f26b VPMULLQ %YMM0,%YMM19,%YMM11 |
(162) 0x41f271 VFMADD231PD %YMM20,%YMM8,%YMM24{%K1} |
(162) 0x41f277 VMULPD %YMM6,%YMM24,%YMM6 |
(162) 0x41f27d VXORPS %XMM8,%XMM8,%XMM8 |
(162) 0x41f282 VPMULLQ %YMM2,%YMM19,%YMM8 |
(162) 0x41f288 VMOVDQA64 %YMM3,%YMM24 |
(162) 0x41f28e VPADDQ %YMM3,%YMM7,%YMM7 |
(162) 0x41f292 KXNORW %K0,%K0,%K1 |
(162) 0x41f296 VSCATTERQPD %YMM6,(%RAX,%YMM7,8){%K1} |
(162) 0x41f29d VPADDQ %YMM13,%YMM11,%YMM13 |
(162) 0x41f2a2 KXNORW %K0,%K0,%K1 |
(162) 0x41f2a6 VXORPD %XMM7,%XMM7,%XMM7 |
(162) 0x41f2aa VGATHERQPD (%R14,%YMM13,8),%YMM7{%K1} |
(162) 0x41f2b1 VPADDQ %YMM31,%YMM8,%YMM19 |
(162) 0x41f2b7 KXNORW %K0,%K0,%K1 |
(162) 0x41f2bb VXORPD %XMM13,%XMM13,%XMM13 |
(162) 0x41f2c0 VGATHERQPD (%R14,%YMM19,8),%YMM13{%K1} |
(162) 0x41f2c7 VPADDQ %YMM29,%YMM8,%YMM19 |
(162) 0x41f2cd KXNORW %K0,%K0,%K1 |
(162) 0x41f2d1 VXORPD %XMM20,%XMM20,%XMM20 |
(162) 0x41f2d7 VGATHERQPD (%R14,%YMM19,8),%YMM20{%K1} |
(162) 0x41f2de VPADDQ %YMM26,%YMM11,%YMM19 |
(162) 0x41f2e4 KXNORW %K0,%K0,%K1 |
(162) 0x41f2e8 VXORPD %XMM21,%XMM21,%XMM21 |
(162) 0x41f2ee VGATHERQPD (%R14,%YMM19,8),%YMM21{%K1} |
(162) 0x41f2f5 VPADDQ %YMM14,%YMM8,%YMM8 |
(162) 0x41f2fa KXNORW %K0,%K0,%K1 |
(162) 0x41f2fe VPXOR %XMM14,%XMM14,%XMM14 |
(162) 0x41f303 VGATHERQPD (%R14,%YMM8,8),%YMM14{%K1} |
(162) 0x41f30a VPADDQ %YMM17,%YMM11,%YMM8 |
(162) 0x41f310 KXNORW %K0,%K0,%K1 |
(162) 0x41f314 VPXOR %XMM11,%XMM11,%XMM11 |
(162) 0x41f319 VGATHERQPD (%R14,%YMM8,8),%YMM11{%K1} |
(162) 0x41f320 VSUBPD %YMM20,%YMM13,%YMM8 |
(162) 0x41f326 VSUBPD %YMM13,%YMM14,%YMM14 |
(162) 0x41f32b VMULPD %YMM8,%YMM14,%YMM17 |
(162) 0x41f331 VCMPPD $0x1,%YMM17,%YMM1,%K2 |
(162) 0x41f338 VXORPD %XMM17,%XMM17,%XMM17 |
(162) 0x41f33e KMOVQ %K2,%K1 |
(162) 0x41f343 VGATHERQPD (%R12,%YMM5,8),%YMM17{%K1} |
(162) 0x41f34a VSUBPD %YMM21,%YMM7,%YMM19 |
(162) 0x41f350 VSUBPD %YMM7,%YMM11,%YMM11 |
(162) 0x41f354 VMULPD %YMM19,%YMM11,%YMM20 |
(162) 0x41f35a VCMPPD $0x1,%YMM20,%YMM1,%K1 |
(162) 0x41f361 KMOVQ %K1,%K3 |
(162) 0x41f366 VXORPD %XMM20,%XMM20,%XMM20 |
(162) 0x41f36c VGATHERQPD (%R12,%YMM16,8),%YMM20{%K3} |
(162) 0x41f373 VXORPD %XMM16,%XMM16,%XMM16 |
(162) 0x41f379 KMOVQ %K2,%K3 |
(162) 0x41f37e VMOVUPD 0x220(%RSP),%YMM3 |
(162) 0x41f387 VGATHERQPD (%R13,%YMM3,8),%YMM16{%K3} |
(162) 0x41f38f KMOVQ %K1,%K3 |
(162) 0x41f394 VXORPD %XMM5,%XMM5,%XMM5 |
(162) 0x41f398 VMOVUPD 0x240(%RSP),%YMM3 |
(162) 0x41f3a1 VGATHERQPD (%R13,%YMM3,8),%YMM5{%K3} |
(162) 0x41f3a9 VMULPD %YMM17,%YMM16,%YMM4 |
(162) 0x41f3af VCMPPD $0x1,%YMM14,%YMM1,%K3 |
(162) 0x41f3b6 VANDPD %YMM9,%YMM8,%YMM8 |
(162) 0x41f3bb VANDPD %YMM9,%YMM14,%YMM14 |
(162) 0x41f3c0 VMULPD %YMM22,%YMM8,%YMM16 |
(162) 0x41f3c6 VFMADD231PD %YMM23,%YMM14,%YMM16 |
(162) 0x41f3cc VANDPD %YMM9,%YMM12,%YMM17 |
(162) 0x41f3d2 VDIVPD %YMM4,%YMM17,%YMM4 |
(162) 0x41f3d8 VMINPD %YMM14,%YMM8,%YMM8 |
(162) 0x41f3dd VSUBPD %YMM4,%YMM10,%YMM4 |
(162) 0x41f3e1 VMULPD %YMM25,%YMM16,%YMM14 |
(162) 0x41f3e7 VMINPD %YMM14,%YMM8,%YMM8 |
(162) 0x41f3ec VXORPD %YMM30,%YMM4,%YMM14 |
(162) 0x41f3f2 VMOVAPD %YMM4,%YMM14{%K3} |
(162) 0x41f3f8 VFMADD231PD %YMM8,%YMM14,%YMM13{%K2} |
(162) 0x41f3fe VMOVDQU 0xc0(%RSP),%YMM8 |
(162) 0x41f407 VPMULLQ %YMM2,%YMM8,%YMM2 |
(162) 0x41f40d VPADDQ %YMM28,%YMM2,%YMM2 |
(162) 0x41f413 VMULPD %YMM12,%YMM13,%YMM3 |
(162) 0x41f418 KXNORW %K0,%K0,%K2 |
(162) 0x41f41c MOV 0x48(%RSP),%RAX |
(162) 0x41f421 VSCATTERQPD %YMM3,(%RAX,%YMM2,8){%K2} |
(162) 0x41f428 VMULPD %YMM20,%YMM5,%YMM2 |
(162) 0x41f42e VANDPD %YMM6,%YMM9,%YMM3 |
(162) 0x41f432 VDIVPD %YMM2,%YMM3,%YMM2 |
(162) 0x41f436 VANDPD %YMM9,%YMM19,%YMM3 |
(162) 0x41f43c VANDPD %YMM9,%YMM11,%YMM4 |
(162) 0x41f441 VMOVDQU 0x200(%RSP),%YMM9 |
(162) 0x41f44a VMULPD %YMM18,%YMM3,%YMM5 |
(162) 0x41f450 VFMADD231PD %YMM15,%YMM4,%YMM5 |
(162) 0x41f455 VCMPPD $0x1,%YMM11,%YMM1,%K2 |
(162) 0x41f45c VMOVDQU 0xa0(%RSP),%YMM1 |
(162) 0x41f465 VMINPD %YMM4,%YMM3,%YMM3 |
(162) 0x41f469 VSUBPD %YMM2,%YMM10,%YMM2 |
(162) 0x41f46d VMULPD %YMM25,%YMM5,%YMM4 |
(162) 0x41f473 VMINPD %YMM4,%YMM3,%YMM3 |
(162) 0x41f477 VXORPD %YMM30,%YMM2,%YMM4 |
(162) 0x41f47d VMOVAPD %YMM2,%YMM4{%K2} |
(162) 0x41f483 VFMADD231PD %YMM3,%YMM4,%YMM7{%K1} |
(162) 0x41f489 VMULPD %YMM6,%YMM7,%YMM2 |
(162) 0x41f48d VPMULLQ %YMM0,%YMM8,%YMM0 |
(162) 0x41f493 VPADDQ %YMM24,%YMM0,%YMM0 |
(162) 0x41f499 KXNORW %K0,%K0,%K1 |
(162) 0x41f49d VSCATTERQPD %YMM2,(%RAX,%YMM0,8){%K1} |
(162) 0x41f4a4 VPBROADCASTQ 0x451f3(%RIP),%YMM0 |
(162) 0x41f4ad VPADDQ %YMM0,%YMM1,%YMM1 |
(162) 0x41f4b1 VPADDQ %YMM0,%YMM9,%YMM9 |
(162) 0x41f4b5 ADD $0x8,%RBX |
(162) 0x41f4b9 CMP %RDI,%RBX |
(162) 0x41f4bc JB 41ee20 |
0x41f4c2 CMP %R9,0x70(%RSP) |
0x41f4c7 MOV 0x68(%RSP),%RSI |
0x41f4cc JNE 41f4fc |
0x41f4ce MOV $0x480600,%EDI |
0x41f4d3 MOV 0x28(%RSP),%ESI |
0x41f4d7 LEA -0x28(%RBP),%RSP |
0x41f4db POP %RBX |
0x41f4dc POP %R12 |
0x41f4de POP %R13 |
0x41f4e0 POP %R14 |
0x41f4e2 POP %R15 |
0x41f4e4 POP %RBP |
0x41f4e5 VZEROUPPER |
0x41f4e8 JMP 403050 |
0x41f4ed LEA -0x28(%RBP),%RSP |
0x41f4f1 POP %RBX |
0x41f4f2 POP %R12 |
0x41f4f4 POP %R13 |
0x41f4f6 POP %R14 |
0x41f4f8 POP %R15 |
0x41f4fa POP %RBP |
0x41f4fb RET |
0x41f4fc ADD %R9,%RSI |
0x41f4ff VPXOR %XMM0,%XMM0,%XMM0 |
0x41f503 VMOVDDUP 0x445d5(%RIP),%XMM1 |
0x41f50b VMOVSD 0x45195(%RIP),%XMM2 |
0x41f513 VMOVSD 0x445ad(%RIP),%XMM3 |
0x41f51b VMOVDDUP 0x4518d(%RIP),%XMM4 |
0x41f523 VMOVDDUP 0x445b5(%RIP),%XMM5 |
0x41f52b VMOVSD 0x45185(%RIP),%XMM6 |
0x41f533 JMP 41f56a |
0x41f535 NOPW %CS:(%RAX,%RAX,1) |
(161) 0x41f540 VADDSD %XMM11,%XMM10,%XMM8 |
(161) 0x41f545 VMULSD %XMM7,%XMM8,%XMM7 |
(161) 0x41f549 IMUL 0x78(%RSP),%RCX |
(161) 0x41f54f ADD %RAX,%RCX |
(161) 0x41f552 MOV 0x48(%RSP),%RAX |
(161) 0x41f557 VMOVSD %XMM7,(%RAX,%RCX,8) |
(161) 0x41f55c INC %RSI |
(161) 0x41f55f CMP 0x38(%RSP),%RSI |
(161) 0x41f564 JG 41f4ce |
(161) 0x41f56a MOV %RSI,%R8 |
(161) 0x41f56d SHR $0x20,%R8 |
(161) 0x41f571 JE 41f5a0 |
(161) 0x41f573 MOV %RSI,%RAX |
(161) 0x41f576 XOR %EDX,%EDX |
(161) 0x41f578 MOV 0x8(%RSP),%RDI |
(161) 0x41f57d DIV %RDI |
(161) 0x41f580 MOV %RAX,%RCX |
(161) 0x41f583 MOV (%RSP),%R10 |
(161) 0x41f587 MOV 0x20(%RSP),%R11 |
(161) 0x41f58c TEST %R8,%R8 |
(161) 0x41f58f JE 41f5bb |
(161) 0x41f591 MOV %RSI,%RAX |
(161) 0x41f594 CQTO |
(161) 0x41f596 IDIV %RDI |
(161) 0x41f599 JMP 41f5c1 |
0x41f59b NOPL (%RAX,%RAX,1) |
(161) 0x41f5a0 MOV %ESI,%EAX |
(161) 0x41f5a2 XOR %EDX,%EDX |
(161) 0x41f5a4 MOV 0x8(%RSP),%RDI |
(161) 0x41f5a9 DIV %EDI |
(161) 0x41f5ab MOV %EAX,%ECX |
(161) 0x41f5ad MOV (%RSP),%R10 |
(161) 0x41f5b1 MOV 0x20(%RSP),%R11 |
(161) 0x41f5b6 TEST %R8,%R8 |
(161) 0x41f5b9 JNE 41f591 |
(161) 0x41f5bb MOV %ESI,%EAX |
(161) 0x41f5bd XOR %EDX,%EDX |
(161) 0x41f5bf DIV %EDI |
(161) 0x41f5c1 ADD 0x40(%RSP),%RCX |
(161) 0x41f5c6 LEA (%RDX,%R10,1),%R9D |
(161) 0x41f5ca MOVSXD %R9D,%RAX |
(161) 0x41f5cd MOVSXD %ECX,%RCX |
(161) 0x41f5d0 MOV %R11,%R8 |
(161) 0x41f5d3 IMUL %RCX,%R8 |
(161) 0x41f5d7 ADD %RAX,%R8 |
(161) 0x41f5da MOV 0x50(%RSP),%RDI |
(161) 0x41f5df VMOVSD (%RDI,%R8,8),%XMM7 |
(161) 0x41f5e5 VUCOMISD %XMM7,%XMM0 |
(161) 0x41f5e9 LEA -0x1(%RDX,%R10,1),%EDX |
(161) 0x41f5ee JAE 41f610 |
(161) 0x41f5f0 ADD $-0x2,%R9D |
(161) 0x41f5f4 MOVSXD %EDX,%R11 |
(161) 0x41f5f7 MOVSXD %R9D,%RDX |
(161) 0x41f5fa MOV %RAX,%R8 |
(161) 0x41f5fd MOV %R11,%R10 |
(161) 0x41f600 JMP 41f62b |
0x41f602 NOPW %CS:(%RAX,%RAX,1) |
(161) 0x41f610 INC %R9D |
(161) 0x41f613 MOV 0x18(%RSP),%RDI |
(161) 0x41f618 CMP %R9D,%EDI |
(161) 0x41f61b CMOVL %EDI,%R9D |
(161) 0x41f61f MOVSXD %EDX,%R8 |
(161) 0x41f622 MOVSXD %R9D,%RDX |
(161) 0x41f625 MOV %RDX,%R11 |
(161) 0x41f628 MOV %RAX,%R10 |
(161) 0x41f62b VANDPD %XMM1,%XMM7,%XMM8 |
(161) 0x41f62f MOV 0x30(%RSP),%R9 |
(161) 0x41f634 IMUL %RCX,%R9 |
(161) 0x41f638 ADD %R10,%R9 |
(161) 0x41f63b VDIVSD (%R13,%R9,8),%XMM8,%XMM12 |
(161) 0x41f642 VMOVSD (%R15,%RAX,8),%XMM8 |
(161) 0x41f648 VFMADD213SD %XMM8,%XMM12,%XMM8 |
(161) 0x41f64d VDIVSD (%R15,%R11,8),%XMM8,%XMM8 |
(161) 0x41f653 MOV 0x90(%RSP),%RBX |
(161) 0x41f65b IMUL %RCX,%RBX |
(161) 0x41f65f LEA (%RBX,%R10,1),%R11 |
(161) 0x41f663 VMOVSD (%R12,%R11,8),%XMM11 |
(161) 0x41f669 LEA (%RBX,%RDX,1),%RDI |
(161) 0x41f66d VSUBSD (%R12,%RDI,8),%XMM11,%XMM13 |
(161) 0x41f673 ADD %R8,%RBX |
(161) 0x41f676 VMOVSD (%R12,%RBX,8),%XMM10 |
(161) 0x41f67c VSUBSD %XMM12,%XMM2,%XMM9 |
(161) 0x41f681 VSUBSD %XMM11,%XMM10,%XMM14 |
(161) 0x41f686 VMULSD %XMM13,%XMM14,%XMM15 |
(161) 0x41f68b VXORPD %XMM10,%XMM10,%XMM10 |
(161) 0x41f690 VUCOMISD %XMM10,%XMM15 |
(161) 0x41f695 VXORPD %XMM15,%XMM15,%XMM15 |
(161) 0x41f69a JBE 41f6d7 |
(161) 0x41f69c VSUBSD %XMM12,%XMM3,%XMM12 |
(161) 0x41f6a1 VXORPD %XMM4,%XMM12,%XMM15 |
(161) 0x41f6a5 VCMPSD $0x1,%XMM14,%XMM0,%K1 |
(161) 0x41f6ac VMOVSD %XMM12,%XMM15,%XMM15{%K1} |
(161) 0x41f6b2 VANDPD %XMM5,%XMM13,%XMM12 |
(161) 0x41f6b6 VANDPD %XMM5,%XMM14,%XMM13 |
(161) 0x41f6ba VMINSD %XMM13,%XMM12,%XMM14 |
(161) 0x41f6bf VMULSD %XMM8,%XMM12,%XMM12 |
(161) 0x41f6c4 VFMADD231SD %XMM13,%XMM9,%XMM12 |
(161) 0x41f6c9 VMULSD %XMM6,%XMM12,%XMM12 |
(161) 0x41f6cd VMINSD %XMM12,%XMM14,%XMM12 |
(161) 0x41f6d2 VMULSD %XMM15,%XMM12,%XMM15 |
(161) 0x41f6d7 VADDSD %XMM11,%XMM15,%XMM11 |
(161) 0x41f6dc VMULSD %XMM7,%XMM11,%XMM7 |
(161) 0x41f6e0 MOV 0x88(%RSP),%RDI |
(161) 0x41f6e8 IMUL %RCX,%RDI |
(161) 0x41f6ec ADD %RAX,%RDI |
(161) 0x41f6ef MOV 0x10(%RSP),%RBX |
(161) 0x41f6f4 VMOVSD %XMM7,(%RBX,%RDI,8) |
(161) 0x41f6f9 MOV 0x80(%RSP),%RDI |
(161) 0x41f701 IMUL %RCX,%RDI |
(161) 0x41f705 ADD %RDI,%R10 |
(161) 0x41f708 VMOVSD (%R14,%R10,8),%XMM11 |
(161) 0x41f70e ADD %RDI,%RDX |
(161) 0x41f711 VSUBSD (%R14,%RDX,8),%XMM11,%XMM12 |
(161) 0x41f717 ADD %R8,%RDI |
(161) 0x41f71a VMOVSD (%R14,%RDI,8),%XMM13 |
(161) 0x41f720 VSUBSD %XMM11,%XMM13,%XMM13 |
(161) 0x41f725 VMULSD %XMM12,%XMM13,%XMM14 |
(161) 0x41f72a VUCOMISD %XMM10,%XMM14 |
(161) 0x41f72f JBE 41f540 |
(161) 0x41f735 VANDPD %XMM5,%XMM7,%XMM10 |
(161) 0x41f739 VMOVSD (%R13,%R9,8),%XMM14 |
(161) 0x41f740 VMULSD (%R12,%R11,8),%XMM14,%XMM14 |
(161) 0x41f746 VDIVSD %XMM14,%XMM10,%XMM10 |
(161) 0x41f74b VSUBSD %XMM10,%XMM3,%XMM10 |
(161) 0x41f750 VXORPD %XMM4,%XMM10,%XMM14 |
(161) 0x41f754 VCMPSD $0x1,%XMM13,%XMM0,%K1 |
(161) 0x41f75b VMOVSD %XMM10,%XMM14,%XMM14{%K1} |
(161) 0x41f761 VANDPD %XMM5,%XMM12,%XMM10 |
(161) 0x41f765 VANDPD %XMM5,%XMM13,%XMM12 |
(161) 0x41f769 VMINSD %XMM12,%XMM10,%XMM13 |
(161) 0x41f76e VMULSD %XMM8,%XMM10,%XMM8 |
(161) 0x41f773 VFMADD213SD %XMM8,%XMM12,%XMM9 |
(161) 0x41f778 VMULSD %XMM6,%XMM9,%XMM8 |
(161) 0x41f77c VMINSD %XMM8,%XMM13,%XMM8 |
(161) 0x41f781 VMULSD %XMM8,%XMM14,%XMM10 |
(161) 0x41f786 JMP 41f540 |
0x41f78b NOPL (%RAX,%RAX,1) |
Path / |
Source file and lines | advec_cell.cpp:65-110 |
Module | exec |
nb instructions | 155 |
nb uops | 157 |
loop length | 758 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 7 |
used ymm registers | 3 |
used zmm registers | 0 |
nb stack references | 42 |
micro-operation queue | 26.17 cycles |
front end | 26.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.80 | 5.80 | 19.33 | 19.33 | 22.50 | 11.00 | 5.80 | 22.50 | 22.50 | 22.50 | 5.60 | 19.33 |
cycles | 5.80 | 5.80 | 19.33 | 19.33 | 22.50 | 11.00 | 5.80 | 22.50 | 22.50 | 22.50 | 5.60 | 19.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 25.99 |
Stall cycles | 0.00 |
Front-end | 26.17 |
Dispatch | 22.50 |
Overall L1 | 26.17 |
all | 22% |
load | 22% |
store | 28% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 20% |
load | 13% |
store | 28% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
all | 19% |
load | 20% |
store | 22% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 28% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 18% |
load | 17% |
store | 22% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 28% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x280,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x2c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 41f4ed <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x92d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0xa0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x34(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x68(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x60(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x4805e0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 4031e0 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x60(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 41f4ce <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x90e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x8(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB (%RSP),%EDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV (%R13),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R13),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R15),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R15),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R14),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x38(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%R8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R9,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%R9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0xa0(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RBX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RSI,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND %R9,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RBX,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x20(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 41f4ff <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x93f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPBROADCASTQ %RDX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x1e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x40(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV (%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x1a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %RBX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTD %ECX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x30(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R11,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %RDI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R8,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R10,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV %RSI,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RSI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x45872(%RIP),%YMM0,%YMM9 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPADDQ 0x456ea(%RIP),%YMM0,%YMM1 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R9,0x70(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x68(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 41f4fc <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x93c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x480600,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x28(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 403050 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
ADD %R9,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDDUP 0x445d5(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x45195(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x445ad(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x4518d(%RIP),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x445b5(%RIP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x45185(%RIP),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 41f56a <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x9aa> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_cell.cpp:65-110 |
Module | exec |
nb instructions | 155 |
nb uops | 157 |
loop length | 758 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 7 |
used ymm registers | 3 |
used zmm registers | 0 |
nb stack references | 42 |
micro-operation queue | 26.17 cycles |
front end | 26.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.80 | 5.80 | 19.33 | 19.33 | 22.50 | 11.00 | 5.80 | 22.50 | 22.50 | 22.50 | 5.60 | 19.33 |
cycles | 5.80 | 5.80 | 19.33 | 19.33 | 22.50 | 11.00 | 5.80 | 22.50 | 22.50 | 22.50 | 5.60 | 19.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 25.99 |
Stall cycles | 0.00 |
Front-end | 26.17 |
Dispatch | 22.50 |
Overall L1 | 26.17 |
all | 22% |
load | 22% |
store | 28% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 20% |
load | 13% |
store | 28% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
all | 19% |
load | 20% |
store | 22% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 28% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 18% |
load | 17% |
store | 22% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 28% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x280,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x2c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 41f4ed <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x92d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0xa0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x34(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x68(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x60(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x4805e0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 4031e0 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x60(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 41f4ce <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x90e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x8(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB (%RSP),%EDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV (%R13),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R13),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R15),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R15),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R14),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x38(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%R8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R9,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%R9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0xa0(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RBX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RSI,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND %R9,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RBX,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x20(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 41f4ff <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x93f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPBROADCASTQ %RDX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x1e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x40(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV (%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x1a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %RBX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTD %ECX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x160(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x30(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R11,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %RDI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R8,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R10,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV %RSI,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RSI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x45872(%RIP),%YMM0,%YMM9 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
VPADDQ 0x456ea(%RIP),%YMM0,%YMM1 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R9,0x70(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x68(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 41f4fc <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x93c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x480600,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x28(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 403050 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
ADD %R9,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDDUP 0x445d5(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x45195(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x445ad(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x4518d(%RIP),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x445b5(%RIP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x45185(%RIP),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 41f56a <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x9aa> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27– | 1.5 | 2.11 |
○Loop 162 - advec_cell.cpp:65-110 - exec | 1.5 | 2.1 |
○Loop 161 - advec_cell.cpp:65-110 - exec | 0 | 0 |