Function: advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1 ... | Module: exec | Source: advec_cell.cpp:65-110 [...] | Coverage: 2.2% |
---|
Function: advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1 ... | Module: exec | Source: advec_cell.cpp:65-110 [...] | Coverage: 2.2% |
---|
/scratch_na/users/xoserete/qaas_runs/171-415-4687/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/scratch_na/users/xoserete/qaas_runs/171-415-4687/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_cell.cpp: 65 - 110 |
-------------------------------------------------------------------------------- |
65: #pragma omp parallel for simd collapse(2) |
66: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
67: for (int i = (x_min + 1); i < (x_max + 2 + 2); i++) |
68: ({ |
69: int upwind, donor, downwind, dif; |
70: double sigmat, sigma3, sigma4, sigmav, sigmam, diffuw, diffdw, limiter, wind; |
71: if (vol_flux_x(i, j) > 0.0) { |
72: upwind = i - 2; |
73: donor = i - 1; |
74: downwind = i; |
75: dif = donor; |
76: } else { |
77: upwind = std::min(i + 1, x_max + 2); |
78: donor = i; |
79: downwind = i - 1; |
80: dif = upwind; |
81: } |
82: sigmat = std::fabs(vol_flux_x(i, j)) / pre_vol(donor, j); |
83: sigma3 = (1.0 + sigmat) * (vertexdx[i] / vertexdx[dif]); |
84: sigma4 = 2.0 - sigmat; |
85: sigmav = sigmat; |
86: diffuw = density1(donor, j) - density1(upwind, j); |
87: diffdw = density1(downwind, j) - density1(donor, j); |
88: wind = 1.0; |
89: if (diffdw <= 0.0) wind = -1.0; |
90: if (diffuw * diffdw > 0.0) { |
91: limiter = (1.0 - sigmav) * wind * |
92: std::fmin(std::fmin(std::fabs(diffuw), std::fabs(diffdw)), |
93: one_by_six * (sigma3 * std::fabs(diffuw) + sigma4 * std::fabs(diffdw))); |
94: } else { |
95: limiter = 0.0; |
96: } |
97: mass_flux_x(i, j) = vol_flux_x(i, j) * (density1(donor, j) + limiter); |
98: sigmam = std::fabs(mass_flux_x(i, j)) / (density1(donor, j) * pre_vol(donor, j)); |
99: diffuw = energy1(donor, j) - energy1(upwind, j); |
100: diffdw = energy1(downwind, j) - energy1(donor, j); |
101: wind = 1.0; |
102: if (diffdw <= 0.0) wind = -1.0; |
103: if (diffuw * diffdw > 0.0) { |
104: limiter = (1.0 - sigmam) * wind * |
105: std::fmin(std::fmin(std::fabs(diffuw), std::fabs(diffdw)), |
106: one_by_six * (sigma3 * std::fabs(diffuw) + sigma4 * std::fabs(diffdw))); |
107: } else { |
108: limiter = 0.0; |
109: } |
110: ener_flux(i, j) = mass_flux_x(i, j) * (energy1(donor, j) + limiter); |
0x41eb40 PUSH %RBP |
0x41eb41 MOV %RSP,%RBP |
0x41eb44 PUSH %R15 |
0x41eb46 PUSH %R14 |
0x41eb48 PUSH %R13 |
0x41eb4a PUSH %R12 |
0x41eb4c PUSH %RBX |
0x41eb4d AND $-0x40,%RSP |
0x41eb51 SUB $0x1c0,%RSP |
0x41eb58 MOV 0x50(%RBP),%RAX |
0x41eb5c MOV 0x40(%RBP),%R12 |
0x41eb60 MOV 0x38(%RBP),%RSI |
0x41eb64 MOV 0x30(%RBP),%R10 |
0x41eb68 MOV %R10,0x78(%RSP) |
0x41eb6d MOV 0x28(%RBP),%R15 |
0x41eb71 MOV 0x20(%RBP),%RBX |
0x41eb75 MOV 0x18(%RBP),%R14 |
0x41eb79 MOV 0x10(%RBP),%R13 |
0x41eb7d MOVL $0,0x54(%RSP) |
0x41eb85 TEST %RAX,%RAX |
0x41eb88 JS 41f113 |
0x41eb8e MOV %RSI,0x30(%RSP) |
0x41eb93 MOV %RDX,0x48(%RSP) |
0x41eb98 MOV %RCX,0x28(%RSP) |
0x41eb9d MOV %R8,0x40(%RSP) |
0x41eba2 MOV %R9,0x38(%RSP) |
0x41eba7 MOV (%RDI),%ESI |
0x41eba9 MOVQ $0,0x90(%RSP) |
0x41ebb5 MOV %RAX,0x88(%RSP) |
0x41ebbd MOVQ $0x1,0xb8(%RSP) |
0x41ebc9 SUB $0x8,%RSP |
0x41ebcd LEA 0xc0(%RSP),%RAX |
0x41ebd5 LEA 0x5c(%RSP),%RCX |
0x41ebda LEA 0x98(%RSP),%R8 |
0x41ebe2 LEA 0x90(%RSP),%R9 |
0x41ebea MOV $0x6805f0,%EDI |
0x41ebef MOV %ESI,0x58(%RSP) |
0x41ebf3 MOV $0x22,%EDX |
0x41ebf8 PUSH $0x1 |
0x41ebfa PUSH $0x1 |
0x41ebfc PUSH %RAX |
0x41ebfd CALL 403180 <__kmpc_for_static_init_8@plt> |
0x41ec02 ADD $0x20,%RSP |
0x41ec06 MOV 0x90(%RSP),%RSI |
0x41ec0e MOV 0x88(%RSP),%RAX |
0x41ec16 MOV %RAX,0x70(%RSP) |
0x41ec1b CMP %RAX,%RSI |
0x41ec1e JA 41f12c |
0x41ec24 MOV %R12,%RDX |
0x41ec27 SUB 0x30(%RSP),%EDX |
0x41ec2b MOV (%R14),%RAX |
0x41ec2e MOV %RAX,0x68(%RSP) |
0x41ec33 MOV 0x10(%R14),%RAX |
0x41ec37 MOV %RAX,0x80(%RSP) |
0x41ec3f MOV (%RBX),%RAX |
0x41ec42 MOV (%RAX),%RCX |
0x41ec45 MOV %RCX,0x60(%RSP) |
0x41ec4a MOV 0x10(%RAX),%R14 |
0x41ec4e MOV 0x28(%RSP),%RAX |
0x41ec53 MOV 0x8(%RAX),%R12 |
0x41ec57 MOV 0x40(%RSP),%RAX |
0x41ec5c MOV (%RAX),%RAX |
0x41ec5f MOV (%RAX),%RCX |
0x41ec62 MOV %RCX,0x28(%RSP) |
0x41ec67 MOV 0x10(%RAX),%RBX |
0x41ec6b MOV (%R13),%R8 |
0x41ec6f MOV 0x10(%R13),%RAX |
0x41ec73 MOV %RAX,0x40(%RSP) |
0x41ec78 MOV 0x48(%RSP),%RCX |
0x41ec7d ADD $0x2,%ECX |
0x41ec80 LEA 0x1(%RSI),%RAX |
0x41ec84 MOV 0x70(%RSP),%RDI |
0x41ec89 LEA 0x1(%RDI),%R11 |
0x41ec8d CMP %R11,%RAX |
0x41ec90 CMOVG %RAX,%R11 |
0x41ec94 MOV 0x38(%RSP),%RAX |
0x41ec99 MOV (%RAX),%R9 |
0x41ec9c MOV 0x10(%RAX),%R13 |
0x41eca0 MOV (%R15),%R10 |
0x41eca3 MOV 0x10(%R15),%RAX |
0x41eca7 MOV %RAX,0x38(%RSP) |
0x41ecac SUB %RSI,%R11 |
0x41ecaf MOV $-0x8,%EDI |
0x41ecb4 MOV %R11,0x98(%RSP) |
0x41ecbc AND %R11,%RDI |
0x41ecbf MOV 0x28(%RSP),%R11 |
0x41ecc4 MOV %RCX,0x48(%RSP) |
0x41ecc9 MOV %RDX,0x58(%RSP) |
0x41ecce MOV %R8,0xb0(%RSP) |
0x41ecd6 MOV %R9,0xa8(%RSP) |
0x41ecde MOV %R10,0xa0(%RSP) |
0x41ece6 JE 41f14e |
0x41ecec VPBROADCASTQ %RDX,%ZMM16 |
0x41ecf2 MOV 0x78(%RSP),%RAX |
0x41ecf7 VPBROADCASTQ %RAX,%ZMM0 |
0x41ecfd VMOVDQU64 %ZMM0,0x140(%RSP) |
0x41ed05 MOV 0x30(%RSP),%RAX |
0x41ed0a VPBROADCASTD %EAX,%YMM19 |
0x41ed10 MOV 0x68(%RSP),%RAX |
0x41ed15 VPBROADCASTQ %RAX,%ZMM20 |
0x41ed1b VPBROADCASTD %ECX,%YMM21 |
0x41ed21 MOV 0x60(%RSP),%RAX |
0x41ed26 VPBROADCASTQ %RAX,%ZMM22 |
0x41ed2c VPBROADCASTQ %R11,%ZMM23 |
0x41ed32 VPBROADCASTQ %R8,%ZMM24 |
0x41ed38 VPBROADCASTQ %R9,%ZMM25 |
0x41ed3e VPBROADCASTQ %R10,%ZMM26 |
0x41ed44 VPBROADCASTQ %RSI,%ZMM0 |
0x41ed4a VPADDQ 0x44c2c(%RIP),%ZMM0,%ZMM17 |
0x41ed54 XOR %R15D,%R15D |
0x41ed57 VBROADCASTSD 0x43c07(%RIP),%ZMM28 |
0x41ed61 VPBROADCASTQ %R14,%ZMM0 |
0x41ed67 VMOVDQU64 %ZMM0,0x100(%RSP) |
0x41ed6f VBROADCASTSD 0x43bd7(%RIP),%ZMM31 |
0x41ed79 VPBROADCASTQ %RBX,%ZMM0 |
0x41ed7f VMOVDQU64 %ZMM0,0xc0(%RSP) |
0x41ed87 VBROADCASTSD 0x4592f(%RIP),%ZMM30 |
0x41ed91 VBROADCASTSD 0x4592d(%RIP),%ZMM18 |
0x41ed9b VXORPD %XMM27,%XMM27,%XMM27 |
0x41eda1 JMP 41edef |
0x41eda3 NOPW %CS:(%RAX,%RAX,1) |
(107) 0x41edb0 VADDPD %ZMM10,%ZMM6,%ZMM6{%K1} |
(107) 0x41edb6 VMULPD %ZMM3,%ZMM6,%ZMM2 |
(107) 0x41edbc VPMULLQ %ZMM1,%ZMM26,%ZMM1 |
(107) 0x41edc2 VPADDQ %ZMM0,%ZMM1,%ZMM0 |
(107) 0x41edc8 KXNORW %K0,%K0,%K1 |
(107) 0x41edcc MOV 0x38(%RSP),%RAX |
(107) 0x41edd1 VSCATTERQPD %ZMM2,(%RAX,%ZMM0,8){%K1} |
(107) 0x41edd8 VPADDQ 0x44876(%RIP){1to8},%ZMM17,%ZMM17 |
(107) 0x41ede2 ADD $0x8,%R15 |
(107) 0x41ede6 CMP %RDI,%R15 |
(107) 0x41ede9 JAE 41f122 |
(107) 0x41edef VMOVDQA64 %ZMM17,%ZMM0 |
(107) 0x41edf5 VMOVDQA64 %ZMM16,%ZMM1 |
(107) 0x41edfb MOV $0x451520,%RAX |
(107) 0x41ee02 CALL %RAX |
(107) 0x41ee04 VMOVAPD %ZMM30,%ZMM29 |
(107) 0x41ee0a VPADDQ 0x140(%RSP),%ZMM0,%ZMM30 |
(107) 0x41ee12 VMOVDQA64 %ZMM17,%ZMM0 |
(107) 0x41ee18 VMOVDQA64 %ZMM16,%ZMM1 |
(107) 0x41ee1e CALL 4513a0 <__svml_i64rem8_z0> |
(107) 0x41ee24 VPMOVQD %ZMM0,%YMM0 |
(107) 0x41ee2a VPADDD %YMM0,%YMM19,%YMM4 |
(107) 0x41ee30 VPMOVSXDQ %YMM4,%ZMM0 |
(107) 0x41ee36 VPSLLQ $0x20,%ZMM30,%ZMM1 |
(107) 0x41ee3d VMOVAPD %ZMM29,%ZMM30 |
(107) 0x41ee43 VPSRAQ $0x20,%ZMM1,%ZMM1 |
(107) 0x41ee4a VPXOR %XMM2,%XMM2,%XMM2 |
(107) 0x41ee4e VPMULLQ %ZMM1,%ZMM20,%ZMM2 |
(107) 0x41ee54 VPADDQ %ZMM0,%ZMM2,%ZMM2 |
(107) 0x41ee5a VXORPD %XMM3,%XMM3,%XMM3 |
(107) 0x41ee5e KXNORW %K0,%K0,%K1 |
(107) 0x41ee62 MOV 0x80(%RSP),%RAX |
(107) 0x41ee6a VGATHERQPD (%RAX,%ZMM2,8),%ZMM3{%K1} |
(107) 0x41ee71 VCMPPD $0x1,%ZMM3,%ZMM27,%K1 |
(107) 0x41ee78 VPCMPEQD %YMM7,%YMM7,%YMM7 |
(107) 0x41ee7c VPADDD %YMM7,%YMM4,%YMM2 |
(107) 0x41ee80 VPMOVSXDQ %YMM2,%ZMM5 |
(107) 0x41ee86 VPBLENDMQ %ZMM5,%ZMM0,%ZMM6{%K1} |
(107) 0x41ee8c VPXOR %XMM2,%XMM2,%XMM2 |
(107) 0x41ee90 VPMULLQ %ZMM1,%ZMM22,%ZMM2 |
(107) 0x41ee96 VPADDQ %ZMM6,%ZMM2,%ZMM2 |
(107) 0x41ee9c VXORPD %XMM8,%XMM8,%XMM8 |
(107) 0x41eea1 KXNORW %K0,%K0,%K2 |
(107) 0x41eea5 VGATHERQPD (%R14,%ZMM2,8),%ZMM8{%K2} |
(107) 0x41eeac VPSUBD %YMM7,%YMM4,%YMM7 |
(107) 0x41eeb0 VXORPD %XMM9,%XMM9,%XMM9 |
(107) 0x41eeb5 KXNORW %K0,%K0,%K2 |
(107) 0x41eeb9 VGATHERDPD (%R12,%YMM4,8),%ZMM9{%K2} |
(107) 0x41eec0 VPMINSD %YMM7,%YMM21,%YMM7 |
(107) 0x41eec6 VPMOVSXDQ %YMM7,%ZMM7 |
(107) 0x41eecc VPADDD 0x45802(%RIP){1to8},%YMM4,%YMM4 |
(107) 0x41eed6 VMOVDQA64 %ZMM7,%ZMM10 |
(107) 0x41eedc VPMOVSXDQ %YMM4,%ZMM10{%K1} |
(107) 0x41eee2 VMOVDQA64 %ZMM5,%ZMM7{%K1} |
(107) 0x41eee8 VPXOR %XMM4,%XMM4,%XMM4 |
(107) 0x41eeec KXNORW %K0,%K0,%K2 |
(107) 0x41eef0 VGATHERQPD (%R12,%ZMM7,8),%ZMM4{%K2} |
(107) 0x41eef7 VPBLENDMQ %ZMM0,%ZMM5,%ZMM11{%K1} |
(107) 0x41eefd VPXOR %XMM5,%XMM5,%XMM5 |
(107) 0x41ef01 VPMULLQ %ZMM1,%ZMM23,%ZMM5 |
(107) 0x41ef07 VPADDQ %ZMM6,%ZMM5,%ZMM7 |
(107) 0x41ef0d VXORPD %XMM12,%XMM12,%XMM12 |
(107) 0x41ef12 KXNORW %K0,%K0,%K1 |
(107) 0x41ef16 VGATHERQPD (%RBX,%ZMM7,8),%ZMM12{%K1} |
(107) 0x41ef1d VANDPD %ZMM28,%ZMM3,%ZMM13 |
(107) 0x41ef23 VDIVPD %ZMM8,%ZMM13,%ZMM8 |
(107) 0x41ef29 VPADDQ %ZMM10,%ZMM5,%ZMM13 |
(107) 0x41ef2f VXORPD %XMM14,%XMM14,%XMM14 |
(107) 0x41ef34 KXNORW %K0,%K0,%K1 |
(107) 0x41ef38 VGATHERQPD (%RBX,%ZMM13,8),%ZMM14{%K1} |
(107) 0x41ef3f VFMADD213PD %ZMM9,%ZMM8,%ZMM9 |
(107) 0x41ef45 VDIVPD %ZMM4,%ZMM9,%ZMM4 |
(107) 0x41ef4b VPADDQ %ZMM11,%ZMM5,%ZMM5 |
(107) 0x41ef51 VXORPD %XMM9,%XMM9,%XMM9 |
(107) 0x41ef56 KXNORW %K0,%K0,%K1 |
(107) 0x41ef5a VGATHERQPD (%RBX,%ZMM5,8),%ZMM9{%K1} |
(107) 0x41ef61 VBROADCASTSD 0x4574d(%RIP),%ZMM5 |
(107) 0x41ef6b VSUBPD %ZMM8,%ZMM5,%ZMM5 |
(107) 0x41ef71 VSUBPD %ZMM14,%ZMM12,%ZMM13 |
(107) 0x41ef77 VSUBPD %ZMM12,%ZMM9,%ZMM9 |
(107) 0x41ef7d VMULPD %ZMM13,%ZMM9,%ZMM14 |
(107) 0x41ef83 VCMPPD $0x1,%ZMM14,%ZMM27,%K1 |
(107) 0x41ef8a VCMPPD $0x1,%ZMM9,%ZMM27,%K2 |
(107) 0x41ef91 VSUBPD %ZMM8,%ZMM31,%ZMM8 |
(107) 0x41ef97 VXORPD %ZMM29,%ZMM8,%ZMM14 |
(107) 0x41ef9d VMOVAPD %ZMM8,%ZMM14{%K2} |
(107) 0x41efa3 VANDPD %ZMM28,%ZMM13,%ZMM8 |
(107) 0x41efa9 VANDPD %ZMM28,%ZMM9,%ZMM9 |
(107) 0x41efaf VMINPD %ZMM9,%ZMM8,%ZMM13 |
(107) 0x41efb5 VMULPD %ZMM4,%ZMM8,%ZMM8 |
(107) 0x41efbb VFMADD231PD %ZMM9,%ZMM5,%ZMM8 |
(107) 0x41efc1 VMULPD %ZMM18,%ZMM8,%ZMM8 |
(107) 0x41efc7 VMINPD %ZMM8,%ZMM13,%ZMM8 |
(107) 0x41efcd VFMADD231PD %ZMM14,%ZMM8,%ZMM12{%K1} |
(107) 0x41efd3 VMULPD %ZMM3,%ZMM12,%ZMM3 |
(107) 0x41efd9 VPXOR %XMM8,%XMM8,%XMM8 |
(107) 0x41efde VPMULLQ %ZMM1,%ZMM24,%ZMM8 |
(107) 0x41efe4 VPADDQ %ZMM0,%ZMM8,%ZMM8 |
(107) 0x41efea KXNORW %K0,%K0,%K1 |
(107) 0x41efee MOV 0x40(%RSP),%RAX |
(107) 0x41eff3 VSCATTERQPD %ZMM3,(%RAX,%ZMM8,8){%K1} |
(107) 0x41effa VPXOR %XMM8,%XMM8,%XMM8 |
(107) 0x41efff VPMULLQ %ZMM1,%ZMM25,%ZMM8 |
(107) 0x41f005 VPADDQ %ZMM6,%ZMM8,%ZMM9 |
(107) 0x41f00b VPXOR %XMM6,%XMM6,%XMM6 |
(107) 0x41f00f KXNORW %K0,%K0,%K1 |
(107) 0x41f013 VGATHERQPD (%R13,%ZMM9,8),%ZMM6{%K1} |
(107) 0x41f01b VPADDQ %ZMM10,%ZMM8,%ZMM9 |
(107) 0x41f021 VPXOR %XMM10,%XMM10,%XMM10 |
(107) 0x41f026 KXNORW %K0,%K0,%K1 |
(107) 0x41f02a VGATHERQPD (%R13,%ZMM9,8),%ZMM10{%K1} |
(107) 0x41f032 VPADDQ %ZMM11,%ZMM8,%ZMM8 |
(107) 0x41f038 VXORPD %XMM9,%XMM9,%XMM9 |
(107) 0x41f03d KXNORW %K0,%K0,%K1 |
(107) 0x41f041 VGATHERQPD (%R13,%ZMM8,8),%ZMM9{%K1} |
(107) 0x41f049 VSUBPD %ZMM10,%ZMM6,%ZMM8 |
(107) 0x41f04f VSUBPD %ZMM6,%ZMM9,%ZMM9 |
(107) 0x41f055 VMULPD %ZMM8,%ZMM9,%ZMM10 |
(107) 0x41f05b VCMPPD $0x1,%ZMM10,%ZMM27,%K1 |
(107) 0x41f062 KORTESTB %K1,%K1 |
(107) 0x41f066 VXORPD %XMM10,%XMM10,%XMM10 |
(107) 0x41f06b JE 41edb0 |
(107) 0x41f071 VPSLLQ $0x3,%ZMM2,%ZMM2 |
(107) 0x41f078 VPADDQ 0x100(%RSP),%ZMM2,%ZMM2 |
(107) 0x41f080 VPSLLQ $0x3,%ZMM7,%ZMM7 |
(107) 0x41f087 VPADDQ 0xc0(%RSP),%ZMM7,%ZMM7 |
(107) 0x41f08f KMOVQ %K1,%K2 |
(107) 0x41f094 VGATHERQPD (,%ZMM7,1),%ZMM10{%K2} |
(107) 0x41f09f VXORPD %XMM7,%XMM7,%XMM7 |
(107) 0x41f0a3 KMOVQ %K1,%K2 |
(107) 0x41f0a8 VGATHERQPD (,%ZMM2,1),%ZMM7{%K2} |
(107) 0x41f0b3 VANDPD %ZMM28,%ZMM3,%ZMM2 |
(107) 0x41f0b9 VMULPD %ZMM10,%ZMM7,%ZMM7 |
(107) 0x41f0bf VDIVPD %ZMM7,%ZMM2,%ZMM2 |
(107) 0x41f0c5 VCMPPD $0x1,%ZMM9,%ZMM27,%K2 |
(107) 0x41f0cc VSUBPD %ZMM2,%ZMM31,%ZMM2 |
(107) 0x41f0d2 VXORPD %ZMM30,%ZMM2,%ZMM7 |
(107) 0x41f0d8 VMOVAPD %ZMM2,%ZMM7{%K2} |
(107) 0x41f0de VANDPD %ZMM28,%ZMM8,%ZMM2 |
(107) 0x41f0e4 VANDPD %ZMM28,%ZMM9,%ZMM8 |
(107) 0x41f0ea VMINPD %ZMM8,%ZMM2,%ZMM9 |
(107) 0x41f0f0 VMULPD %ZMM4,%ZMM2,%ZMM2 |
(107) 0x41f0f6 VFMADD213PD %ZMM2,%ZMM8,%ZMM5 |
(107) 0x41f0fc VMULPD %ZMM18,%ZMM5,%ZMM2 |
(107) 0x41f102 VMINPD %ZMM2,%ZMM9,%ZMM2 |
(107) 0x41f108 VMULPD %ZMM2,%ZMM7,%ZMM10 |
(107) 0x41f10e JMP 41edb0 |
0x41f113 LEA -0x28(%RBP),%RSP |
0x41f117 POP %RBX |
0x41f118 POP %R12 |
0x41f11a POP %R13 |
0x41f11c POP %R14 |
0x41f11e POP %R15 |
0x41f120 POP %RBP |
0x41f121 RET |
0x41f122 CMP %RDI,0x98(%RSP) |
0x41f12a JNE 41f14b |
0x41f12c MOV $0x680610,%EDI |
0x41f131 MOV 0x50(%RSP),%ESI |
0x41f135 LEA -0x28(%RBP),%RSP |
0x41f139 POP %RBX |
0x41f13a POP %R12 |
0x41f13c POP %R13 |
0x41f13e POP %R14 |
0x41f140 POP %R15 |
0x41f142 POP %RBP |
0x41f143 VZEROUPPER |
0x41f146 JMP 402fe0 |
0x41f14b ADD %RDI,%RSI |
0x41f14e VPXOR %XMM0,%XMM0,%XMM0 |
0x41f152 VMOVDDUP 0x4380e(%RIP),%XMM1 |
0x41f15a VMOVSD 0x45556(%RIP),%XMM2 |
0x41f162 VMOVSD 0x437e6(%RIP),%XMM3 |
0x41f16a VMOVDDUP 0x4554e(%RIP),%XMM4 |
0x41f172 VMOVDDUP 0x437ee(%RIP),%XMM5 |
0x41f17a VMOVSD 0x45546(%RIP),%XMM6 |
0x41f182 JMP 41f1bd |
0x41f184 NOPW %CS:(%RAX,%RAX,1) |
(106) 0x41f190 VADDSD %XMM11,%XMM10,%XMM8 |
(106) 0x41f195 VMULSD %XMM7,%XMM8,%XMM7 |
(106) 0x41f199 IMUL 0xa0(%RSP),%RCX |
(106) 0x41f1a2 ADD %RAX,%RCX |
(106) 0x41f1a5 MOV 0x38(%RSP),%RAX |
(106) 0x41f1aa VMOVSD %XMM7,(%RAX,%RCX,8) |
(106) 0x41f1af INC %RSI |
(106) 0x41f1b2 CMP 0x70(%RSP),%RSI |
(106) 0x41f1b7 JG 41f12c |
(106) 0x41f1bd MOV %RSI,%RDI |
(106) 0x41f1c0 SHR $0x20,%RDI |
(106) 0x41f1c4 JE 41f1e0 |
(106) 0x41f1c6 MOV %RSI,%RAX |
(106) 0x41f1c9 XOR %EDX,%EDX |
(106) 0x41f1cb MOV 0x58(%RSP),%R8 |
(106) 0x41f1d0 DIV %R8 |
(106) 0x41f1d3 MOV %RAX,%RCX |
(106) 0x41f1d6 JMP 41f1ee |
0x41f1d8 NOPL (%RAX,%RAX,1) |
(106) 0x41f1e0 MOV %ESI,%EAX |
(106) 0x41f1e2 XOR %EDX,%EDX |
(106) 0x41f1e4 MOV 0x58(%RSP),%R8 |
(106) 0x41f1e9 DIV %R8D |
(106) 0x41f1ec MOV %EAX,%ECX |
(106) 0x41f1ee MOV 0x30(%RSP),%R9 |
(106) 0x41f1f3 TEST %RDI,%RDI |
(106) 0x41f1f6 MOV 0x28(%RSP),%R11 |
(106) 0x41f1fb JE 41f210 |
(106) 0x41f1fd MOV %RSI,%RAX |
(106) 0x41f200 CQTO |
(106) 0x41f202 IDIV %R8 |
(106) 0x41f205 JMP 41f217 |
0x41f207 NOPW (%RAX,%RAX,1) |
(106) 0x41f210 MOV %ESI,%EAX |
(106) 0x41f212 XOR %EDX,%EDX |
(106) 0x41f214 DIV %R8D |
(106) 0x41f217 ADD 0x78(%RSP),%RCX |
(106) 0x41f21c ADD %R9D,%EDX |
(106) 0x41f21f MOVSXD %EDX,%RAX |
(106) 0x41f222 MOVSXD %ECX,%RCX |
(106) 0x41f225 MOV 0x68(%RSP),%RDI |
(106) 0x41f22a IMUL %RCX,%RDI |
(106) 0x41f22e ADD %RAX,%RDI |
(106) 0x41f231 MOV 0x80(%RSP),%R8 |
(106) 0x41f239 VMOVSD (%R8,%RDI,8),%XMM7 |
(106) 0x41f23f VUCOMISD %XMM7,%XMM0 |
(106) 0x41f243 JAE 41f260 |
(106) 0x41f245 LEA -0x2(%RDX),%EDI |
(106) 0x41f248 DEC %EDX |
(106) 0x41f24a MOVSXD %EDX,%R10 |
(106) 0x41f24d MOVSXD %EDI,%RDX |
(106) 0x41f250 MOV %RAX,%RDI |
(106) 0x41f253 MOV %R10,%R9 |
(106) 0x41f256 JMP 41f27e |
0x41f258 NOPL (%RAX,%RAX,1) |
(106) 0x41f260 LEA 0x1(%RDX),%R8D |
(106) 0x41f264 MOV 0x48(%RSP),%RDI |
(106) 0x41f269 CMP %R8D,%EDI |
(106) 0x41f26c CMOVL %EDI,%R8D |
(106) 0x41f270 DEC %EDX |
(106) 0x41f272 MOVSXD %EDX,%RDI |
(106) 0x41f275 MOVSXD %R8D,%RDX |
(106) 0x41f278 MOV %RDX,%R10 |
(106) 0x41f27b MOV %RAX,%R9 |
(106) 0x41f27e VANDPD %XMM1,%XMM7,%XMM8 |
(106) 0x41f282 MOV 0x60(%RSP),%R8 |
(106) 0x41f287 IMUL %RCX,%R8 |
(106) 0x41f28b ADD %R9,%R8 |
(106) 0x41f28e VDIVSD (%R14,%R8,8),%XMM8,%XMM12 |
(106) 0x41f294 VMOVSD (%R12,%RAX,8),%XMM8 |
(106) 0x41f29a VFMADD213SD %XMM8,%XMM12,%XMM8 |
(106) 0x41f29f VDIVSD (%R12,%R10,8),%XMM8,%XMM8 |
(106) 0x41f2a5 IMUL %RCX,%R11 |
(106) 0x41f2a9 LEA (%R11,%R9,1),%R10 |
(106) 0x41f2ad VMOVSD (%RBX,%R10,8),%XMM11 |
(106) 0x41f2b3 LEA (%R11,%RDX,1),%R15 |
(106) 0x41f2b7 VSUBSD (%RBX,%R15,8),%XMM11,%XMM13 |
(106) 0x41f2bd ADD %RDI,%R11 |
(106) 0x41f2c0 VMOVSD (%RBX,%R11,8),%XMM10 |
(106) 0x41f2c6 VSUBSD %XMM12,%XMM2,%XMM9 |
(106) 0x41f2cb VSUBSD %XMM11,%XMM10,%XMM14 |
(106) 0x41f2d0 VMULSD %XMM13,%XMM14,%XMM15 |
(106) 0x41f2d5 VXORPD %XMM10,%XMM10,%XMM10 |
(106) 0x41f2da VUCOMISD %XMM10,%XMM15 |
(106) 0x41f2df VXORPD %XMM15,%XMM15,%XMM15 |
(106) 0x41f2e4 JBE 41f321 |
(106) 0x41f2e6 VSUBSD %XMM12,%XMM3,%XMM12 |
(106) 0x41f2eb VXORPD %XMM4,%XMM12,%XMM15 |
(106) 0x41f2ef VCMPSD $0x1,%XMM14,%XMM0,%K1 |
(106) 0x41f2f6 VMOVSD %XMM12,%XMM15,%XMM15{%K1} |
(106) 0x41f2fc VANDPD %XMM5,%XMM13,%XMM12 |
(106) 0x41f300 VANDPD %XMM5,%XMM14,%XMM13 |
(106) 0x41f304 VMINSD %XMM13,%XMM12,%XMM14 |
(106) 0x41f309 VMULSD %XMM8,%XMM12,%XMM12 |
(106) 0x41f30e VFMADD231SD %XMM13,%XMM9,%XMM12 |
(106) 0x41f313 VMULSD %XMM6,%XMM12,%XMM12 |
(106) 0x41f317 VMINSD %XMM12,%XMM14,%XMM12 |
(106) 0x41f31c VMULSD %XMM15,%XMM12,%XMM15 |
(106) 0x41f321 VADDSD %XMM11,%XMM15,%XMM11 |
(106) 0x41f326 VMULSD %XMM7,%XMM11,%XMM7 |
(106) 0x41f32a MOV 0xb0(%RSP),%R11 |
(106) 0x41f332 IMUL %RCX,%R11 |
(106) 0x41f336 ADD %RAX,%R11 |
(106) 0x41f339 MOV 0x40(%RSP),%R15 |
(106) 0x41f33e VMOVSD %XMM7,(%R15,%R11,8) |
(106) 0x41f344 MOV 0xa8(%RSP),%R11 |
(106) 0x41f34c IMUL %RCX,%R11 |
(106) 0x41f350 ADD %R11,%R9 |
(106) 0x41f353 VMOVSD (%R13,%R9,8),%XMM11 |
(106) 0x41f35a ADD %R11,%RDX |
(106) 0x41f35d VSUBSD (%R13,%RDX,8),%XMM11,%XMM12 |
(106) 0x41f364 ADD %RDI,%R11 |
(106) 0x41f367 VMOVSD (%R13,%R11,8),%XMM13 |
(106) 0x41f36e VSUBSD %XMM11,%XMM13,%XMM13 |
(106) 0x41f373 VMULSD %XMM12,%XMM13,%XMM14 |
(106) 0x41f378 VUCOMISD %XMM10,%XMM14 |
(106) 0x41f37d JBE 41f190 |
(106) 0x41f383 VANDPD %XMM5,%XMM7,%XMM10 |
(106) 0x41f387 VMOVSD (%R14,%R8,8),%XMM14 |
(106) 0x41f38d VMULSD (%RBX,%R10,8),%XMM14,%XMM14 |
(106) 0x41f393 VDIVSD %XMM14,%XMM10,%XMM10 |
(106) 0x41f398 VSUBSD %XMM10,%XMM3,%XMM10 |
(106) 0x41f39d VXORPD %XMM4,%XMM10,%XMM14 |
(106) 0x41f3a1 VCMPSD $0x1,%XMM13,%XMM0,%K1 |
(106) 0x41f3a8 VMOVSD %XMM10,%XMM14,%XMM14{%K1} |
(106) 0x41f3ae VANDPD %XMM5,%XMM12,%XMM10 |
(106) 0x41f3b2 VANDPD %XMM5,%XMM13,%XMM12 |
(106) 0x41f3b6 VMINSD %XMM12,%XMM10,%XMM13 |
(106) 0x41f3bb VMULSD %XMM8,%XMM10,%XMM8 |
(106) 0x41f3c0 VFMADD213SD %XMM8,%XMM12,%XMM9 |
(106) 0x41f3c5 VMULSD %XMM6,%XMM9,%XMM8 |
(106) 0x41f3c9 VMINSD %XMM8,%XMM13,%XMM8 |
(106) 0x41f3ce VMULSD %XMM8,%XMM14,%XMM10 |
(106) 0x41f3d3 JMP 41f190 |
0x41f3d8 NOPL (%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Source file and lines | advec_cell.cpp:65-110 |
Module | exec |
nb instructions | 156 |
nb uops | 158 |
loop length | 782 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 8 |
used ymm registers | 2 |
used zmm registers | 13 |
nb stack references | 33 |
micro-operation queue | 26.33 cycles |
front end | 26.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.50 | 5.60 | 21.00 | 21.00 | 18.50 | 13.00 | 5.50 | 18.50 | 18.50 | 18.50 | 5.40 | 21.00 |
cycles | 5.50 | 5.60 | 21.00 | 21.00 | 18.50 | 13.00 | 5.50 | 18.50 | 18.50 | 18.50 | 5.40 | 21.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 26.18 |
Stall cycles | 0.00 |
Front-end | 26.33 |
Dispatch | 21.00 |
Overall L1 | 26.33 |
all | 9% |
load | 7% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 9% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 20% |
all | 9% |
load | 4% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
all | 17% |
load | 18% |
store | 21% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 31% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 13% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
all | 16% |
load | 16% |
store | 21% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 31% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x1c0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x54(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 41f113 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x5d3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0xb8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0xc0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x5c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x98(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x90(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x6805f0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403180 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x90(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x88(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 41f12c <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x5ec> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R12,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB 0x30(%RSP),%EDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV (%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RAX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R13),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x48(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x70(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RDI),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R11,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R15),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R15),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %RSI,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R11,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND %R11,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV 0x28(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0xb0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0xa8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 41f14e <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x60e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPBROADCASTQ %RDX,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x78(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
MOV 0x30(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x68(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTD %ECX,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x60(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R11,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R8,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R9,%ZMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R10,%ZMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RSI,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x44c2c(%RIP),%ZMM0,%ZMM17 | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.67 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD 0x43c07(%RIP),%ZMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %R14,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VBROADCASTSD 0x43bd7(%RIP),%ZMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %RBX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VBROADCASTSD 0x4592f(%RIP),%ZMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x4592d(%RIP),%ZMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VXORPD %XMM27,%XMM27,%XMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 41edef <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x2af> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
CMP %RDI,0x98(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 41f14b <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x60b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x680610,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x50(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402fe0 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
ADD %RDI,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDDUP 0x4380e(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x45556(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x437e6(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x4554e(%RIP),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x437ee(%RIP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x45546(%RIP),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 41f1bd <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x67d> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_cell.cpp:65-110 |
Module | exec |
nb instructions | 156 |
nb uops | 158 |
loop length | 782 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 8 |
used ymm registers | 2 |
used zmm registers | 13 |
nb stack references | 33 |
micro-operation queue | 26.33 cycles |
front end | 26.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.50 | 5.60 | 21.00 | 21.00 | 18.50 | 13.00 | 5.50 | 18.50 | 18.50 | 18.50 | 5.40 | 21.00 |
cycles | 5.50 | 5.60 | 21.00 | 21.00 | 18.50 | 13.00 | 5.50 | 18.50 | 18.50 | 18.50 | 5.40 | 21.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 26.18 |
Stall cycles | 0.00 |
Front-end | 26.33 |
Dispatch | 21.00 |
Overall L1 | 26.33 |
all | 9% |
load | 7% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 9% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 20% |
all | 9% |
load | 4% |
store | 11% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
all | 17% |
load | 18% |
store | 21% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 31% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 13% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
all | 16% |
load | 16% |
store | 21% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 31% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x1c0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVL $0,0x54(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 41f113 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x5d3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0xb8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0xc0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x5c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x98(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x90(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x6805f0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403180 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x90(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x88(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 41f12c <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x5ec> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R12,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB 0x30(%RSP),%EDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV (%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%R14),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RAX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R13),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x48(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RSI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x70(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RDI),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R11,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %RAX,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RAX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R15),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R15),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %RSI,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x8,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R11,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND %R11,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV 0x28(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0xb0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0xa8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 41f14e <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x60e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPBROADCASTQ %RDX,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x78(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
MOV 0x30(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %EAX,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x68(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTD %ECX,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x60(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R11,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R8,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R9,%ZMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R10,%ZMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RSI,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ 0x44c2c(%RIP),%ZMM0,%ZMM17 | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.67 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD 0x43c07(%RIP),%ZMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %R14,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VBROADCASTSD 0x43bd7(%RIP),%ZMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %RBX,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU64 %ZMM0,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VBROADCASTSD 0x4592f(%RIP),%ZMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x4592d(%RIP),%ZMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VXORPD %XMM27,%XMM27,%XMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 41edef <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x2af> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
CMP %RDI,0x98(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 41f14b <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x60b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x680610,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x50(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402fe0 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
ADD %RDI,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDDUP 0x4380e(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x45556(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x437e6(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x4554e(%RIP),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x437ee(%RIP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x45546(%RIP),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 41f1bd <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_.extracted.27+0x67d> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D | 2.2 | 1.88 |
○Loop 107 - advec_cell.cpp:66-110 - exec | 2.2 | 1.87 |
○Loop 106 - advec_cell.cpp:66-110 - exec | 0 | 0 |