Function: .omp_outlined..16 | Module: exec | Source: advec_mom.cpp:157-160 [...] | Coverage: 1.25% |
---|
Function: .omp_outlined..16 | Module: exec | Source: advec_mom.cpp:157-160 [...] | Coverage: 1.25% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 157 - 160 |
-------------------------------------------------------------------------------- |
157: #pragma omp parallel for simd collapse(2) |
158: for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { |
159: for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { |
160: node_flux(i, j) = 0.25 * (mass_flux_y(i - 1, j + 0) + mass_flux_y(i, j) + mass_flux_y(i - 1, j + 1) + mass_flux_y(i + 0, j + 1)); |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/update_tile_halo_kernel.cpp: 142 - 144 |
-------------------------------------------------------------------------------- |
142: #pragma omp parallel for simd |
143: for (int k = (y_min - depth + 1); k < (y_max + 1 + depth + 2); k++) { |
144: for (int j = 0; j < depth; ++j) { |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x23abf0 PUSH %RBP |
0x23abf1 MOV %RSP,%RBP |
0x23abf4 PUSH %R15 |
0x23abf6 PUSH %R14 |
0x23abf8 PUSH %R13 |
0x23abfa PUSH %R12 |
0x23abfc PUSH %RBX |
0x23abfd SUB $0x68,%RSP |
0x23ac01 MOV (%RCX),%R15D |
0x23ac04 MOV (%RDX),%EBX |
0x23ac06 ADD $0x4,%R15D |
0x23ac0a CMP %EBX,%R15D |
0x23ac0d JL 23b22b |
0x23ac13 MOV (%R9),%R13D |
0x23ac16 MOV (%R8),%EAX |
0x23ac19 LEA 0x1(%RAX),%R12D |
0x23ac1d ADD $0x3,%R13D |
0x23ac21 CMP %R12D,%R13D |
0x23ac24 JLE 23b22b |
0x23ac2a LEA -0x1(%RBX),%ECX |
0x23ac2d SUB %R12D,%R13D |
0x23ac30 MOV (%RDI),%ESI |
0x23ac32 MOVQ $0,-0x58(%RBP) |
0x23ac3a MOVQ $0x1,-0x90(%RBP) |
0x23ac45 MOVL $0,-0x48(%RBP) |
0x23ac4c SUB %ECX,%R15D |
0x23ac4f MOV %RCX,-0x38(%RBP) |
0x23ac53 IMUL %R13,%R15 |
0x23ac57 DEC %R15 |
0x23ac5a MOV %R15,-0x40(%RBP) |
0x23ac5e SUB $0x8,%RSP |
0x23ac62 LEA -0x48(%RBP),%RCX |
0x23ac66 MOV %RAX,%R14 |
0x23ac69 LEA -0x90(%RBP),%RAX |
0x23ac70 LEA 0x261d1(%RIP),%RDI |
0x23ac77 LEA -0x58(%RBP),%R8 |
0x23ac7b LEA -0x40(%RBP),%R9 |
0x23ac7f MOV %ESI,-0x44(%RBP) |
0x23ac82 MOV $0x22,%EDX |
0x23ac87 PUSH $0x1 |
0x23ac89 PUSH $0x1 |
0x23ac8b PUSH %RAX |
0x23ac8c CALL 25f740 <@plt_start@+0x530> |
0x23ac91 ADD $0x20,%RSP |
0x23ac95 MOV -0x40(%RBP),%RAX |
0x23ac99 MOV -0x58(%RBP),%RCX |
0x23ac9d CMP %R15,%RAX |
0x23aca0 CMOVL %RAX,%R15 |
0x23aca4 MOV %R15,-0x40(%RBP) |
0x23aca8 CMP %R15,%RCX |
0x23acab JG 23b219 |
0x23acb1 MOV 0x18(%RBP),%RAX |
0x23acb5 MOV 0x10(%RBP),%RDX |
0x23acb9 MOV %R14,%R9 |
0x23acbc MOV %R13,-0x30(%RBP) |
0x23acc0 MOV %R15,%R11 |
0x23acc3 SUB %RCX,%R11 |
0x23acc6 INC %R11 |
0x23acc9 MOV (%RAX),%RSI |
0x23accc MOV 0x10(%RAX),%RDI |
0x23acd0 MOV (%RDX),%R14 |
0x23acd3 MOV 0x10(%RDX),%R13 |
0x23acd7 MOV %RSI,-0x50(%RBP) |
0x23acdb CMP $0x8,%R11 |
0x23acdf JAE 23acf1 |
0x23ace1 MOV -0x38(%RBP),%RSI |
0x23ace5 MOV -0x30(%RBP),%R8 |
0x23ace9 MOV %R9,%R10 |
0x23acec JMP 23b17b |
0x23acf1 MOV -0x30(%RBP),%RAX |
0x23acf5 VBROADCASTSD -0x294e7(%RIP),%ZMM10 |
0x23acff VPBROADCASTQ %RCX,%ZMM0 |
0x23ad05 VPADDQ -0x28f4f(%RIP),%ZMM0,%ZMM0 |
0x23ad0f VPBROADCASTQ -0x29511(%RIP),%ZMM11 |
0x23ad19 MOV %R11,-0x68(%RBP) |
0x23ad1d AND $-0x8,%R11 |
0x23ad21 VPBROADCASTD %R12D,%YMM3 |
0x23ad27 VPBROADCASTD %R9D,%YMM4 |
0x23ad2d VPBROADCASTQ %RSI,%ZMM5 |
0x23ad33 VPBROADCASTD %EBX,%YMM6 |
0x23ad39 VPBROADCASTQ %R14,%ZMM7 |
0x23ad3f MOV %R12,-0x78(%RBP) |
0x23ad43 MOV %R9,-0x80(%RBP) |
0x23ad47 MOV %RBX,-0x88(%RBP) |
0x23ad4e MOV %R14,-0x70(%RBP) |
0x23ad52 ADD %R11,%RCX |
0x23ad55 MOV %R11,-0x60(%RBP) |
0x23ad59 VPBROADCASTQ %RAX,%ZMM1 |
0x23ad5f MOV -0x38(%RBP),%RAX |
0x23ad63 VEXTRACTI32X4 $0x3,%ZMM1,%XMM8 |
0x23ad6a VEXTRACTI32X4 $0x2,%ZMM1,%XMM9 |
0x23ad71 VPBROADCASTD %EAX,%YMM2 |
0x23ad77 NOPW (%RAX,%RAX,1) |
(316) 0x23ad80 VEXTRACTI32X4 $0x3,%ZMM0,%XMM12 |
(316) 0x23ad87 VPEXTRQ $0x1,%XMM8,%R10 |
(316) 0x23ad8d KXNORW %K0,%K0,%K1 |
(316) 0x23ad91 VPEXTRQ $0x1,%XMM12,%RAX |
(316) 0x23ad97 CQTO |
(316) 0x23ad99 IDIV %R10 |
(316) 0x23ad9c VMOVQ %XMM8,%R10 |
(316) 0x23ada1 VMOVQ %RAX,%XMM13 |
(316) 0x23ada6 VMOVQ %XMM12,%RAX |
(316) 0x23adab CQTO |
(316) 0x23adad IDIV %R10 |
(316) 0x23adb0 VPEXTRQ $0x1,%XMM9,%R10 |
(316) 0x23adb6 VMOVQ %RAX,%XMM12 |
(316) 0x23adbb VPUNPCKLQDQ %XMM13,%XMM12,%XMM12 |
(316) 0x23adc0 VEXTRACTI32X4 $0x2,%ZMM0,%XMM13 |
(316) 0x23adc7 VPEXTRQ $0x1,%XMM13,%RAX |
(316) 0x23adcd CQTO |
(316) 0x23adcf IDIV %R10 |
(316) 0x23add2 VMOVQ %XMM9,%R10 |
(316) 0x23add7 VMOVQ %RAX,%XMM14 |
(316) 0x23addc VMOVQ %XMM13,%RAX |
(316) 0x23ade1 CQTO |
(316) 0x23ade3 IDIV %R10 |
(316) 0x23ade6 VMOVQ %RAX,%XMM13 |
(316) 0x23adeb VPUNPCKLQDQ %XMM14,%XMM13,%XMM13 |
(316) 0x23adf0 VEXTRACTI128 $0x1,%YMM0,%XMM14 |
(316) 0x23adf6 VPEXTRQ $0x1,%XMM14,%RAX |
(316) 0x23adfc VINSERTI128 $0x1,%XMM12,%YMM13,%YMM12 |
(316) 0x23ae02 VEXTRACTI128 $0x1,%YMM1,%XMM13 |
(316) 0x23ae08 VPEXTRQ $0x1,%XMM13,%R10 |
(316) 0x23ae0e CQTO |
(316) 0x23ae10 IDIV %R10 |
(316) 0x23ae13 VMOVQ %XMM13,%R10 |
(316) 0x23ae18 VMOVQ %RAX,%XMM15 |
(316) 0x23ae1d VMOVQ %XMM14,%RAX |
(316) 0x23ae22 CQTO |
(316) 0x23ae24 IDIV %R10 |
(316) 0x23ae27 VPEXTRQ $0x1,%XMM1,%R10 |
(316) 0x23ae2d VMOVQ %RAX,%XMM13 |
(316) 0x23ae32 VPEXTRQ $0x1,%XMM0,%RAX |
(316) 0x23ae38 CQTO |
(316) 0x23ae3a VPUNPCKLQDQ %XMM15,%XMM13,%XMM13 |
(316) 0x23ae3f IDIV %R10 |
(316) 0x23ae42 VMOVQ %XMM1,%R10 |
(316) 0x23ae47 VMOVQ %RAX,%XMM14 |
(316) 0x23ae4c VMOVQ %XMM0,%RAX |
(316) 0x23ae51 CQTO |
(316) 0x23ae53 IDIV %R10 |
(316) 0x23ae56 ADD $-0x8,%R11 |
(316) 0x23ae5a VMOVQ %RAX,%XMM15 |
(316) 0x23ae5f VPUNPCKLQDQ %XMM14,%XMM15,%XMM14 |
(316) 0x23ae64 VINSERTI128 $0x1,%XMM13,%YMM14,%YMM13 |
(316) 0x23ae6a VINSERTI64X4 $0x1,%YMM12,%ZMM13,%ZMM12 |
(316) 0x23ae71 VPMOVQD %ZMM12,%YMM14 |
(316) 0x23ae77 VPMULLQ %ZMM1,%ZMM12,%ZMM12 |
(316) 0x23ae7d VPSUBQ %ZMM12,%ZMM0,%ZMM12 |
(316) 0x23ae83 VPADDQ %ZMM11,%ZMM0,%ZMM0 |
(316) 0x23ae89 VPMOVQD %ZMM12,%YMM12 |
(316) 0x23ae8f VPADDD %YMM2,%YMM14,%YMM13 |
(316) 0x23ae93 VPADDD %YMM6,%YMM14,%YMM14 |
(316) 0x23ae97 VPMOVSXDQ %YMM13,%ZMM21 |
(316) 0x23ae9d VPMOVSXDQ %YMM14,%ZMM14 |
(316) 0x23aea3 VPADDD %YMM3,%YMM12,%YMM15 |
(316) 0x23aea7 VPADDD %YMM4,%YMM12,%YMM12 |
(316) 0x23aeab VPMOVSXDQ %YMM12,%ZMM16 |
(316) 0x23aeb1 VPMULLQ %ZMM21,%ZMM5,%ZMM17 |
(316) 0x23aeb7 VPMOVSXDQ %YMM15,%ZMM20 |
(316) 0x23aebd VPMULLQ %ZMM14,%ZMM5,%ZMM14 |
(316) 0x23aec3 VPMULLQ %ZMM21,%ZMM7,%ZMM12 |
(316) 0x23aec9 VPADDQ %ZMM16,%ZMM17,%ZMM13 |
(316) 0x23aecf VPADDQ %ZMM20,%ZMM17,%ZMM15 |
(316) 0x23aed5 VPADDQ %ZMM16,%ZMM14,%ZMM16 |
(316) 0x23aedb VPADDQ %ZMM20,%ZMM14,%ZMM14 |
(316) 0x23aee1 VPADDQ %ZMM20,%ZMM12,%ZMM12 |
(316) 0x23aee7 VEXTRACTI32X4 $0x1,%YMM13,%XMM18 |
(316) 0x23aeee VMOVQ %XMM13,%RAX |
(316) 0x23aef3 VPEXTRQ $0x1,%XMM13,%RDX |
(316) 0x23aef9 VEXTRACTI32X4 $0x1,%YMM15,%XMM17 |
(316) 0x23af00 VMOVQ %XMM18,%R10 |
(316) 0x23af06 VPEXTRQ $0x1,%XMM18,%R8 |
(316) 0x23af0d VEXTRACTI32X4 $0x2,%ZMM13,%XMM18 |
(316) 0x23af14 VEXTRACTI32X4 $0x3,%ZMM13,%XMM13 |
(316) 0x23af1b VMOVSD (%RDI,%RAX,8),%XMM19 |
(316) 0x23af22 VMOVQ %XMM15,%RAX |
(316) 0x23af27 VMOVQ %XMM18,%R12 |
(316) 0x23af2d VMOVQ %XMM13,%RBX |
(316) 0x23af32 VPEXTRQ $0x1,%XMM18,%R9 |
(316) 0x23af39 VPEXTRQ $0x1,%XMM13,%R14 |
(316) 0x23af3f VMOVHPD (%RDI,%RDX,8),%XMM19,%XMM19 |
(316) 0x23af46 VPEXTRQ $0x1,%XMM15,%RDX |
(316) 0x23af4c VMOVSD (%RDI,%RBX,8),%XMM13 |
(316) 0x23af51 VMOVSD (%RDI,%R12,8),%XMM18 |
(316) 0x23af58 VMOVHPD (%RDI,%R9,8),%XMM18,%XMM18 |
(316) 0x23af5f VMOVHPD (%RDI,%R14,8),%XMM13,%XMM13 |
(316) 0x23af65 VPEXTRQ $0x1,%XMM17,%R9 |
(316) 0x23af6c VINSERTF32X4 $0x1,%XMM13,%YMM18,%YMM13 |
(316) 0x23af73 VMOVSD (%RDI,%R10,8),%XMM18 |
(316) 0x23af7a VMOVHPD (%RDI,%R8,8),%XMM18,%XMM18 |
(316) 0x23af81 VMOVQ %XMM17,%R8 |
(316) 0x23af87 VEXTRACTI32X4 $0x2,%ZMM15,%XMM17 |
(316) 0x23af8e VEXTRACTI32X4 $0x3,%ZMM15,%XMM15 |
(316) 0x23af95 VMOVQ %XMM17,%R10 |
(316) 0x23af9b VMOVQ %XMM15,%R14 |
(316) 0x23afa0 VPEXTRQ $0x1,%XMM17,%RBX |
(316) 0x23afa7 VPEXTRQ $0x1,%XMM15,%R12 |
(316) 0x23afad VMOVSD (%RDI,%R14,8),%XMM15 |
(316) 0x23afb3 VMOVSD (%RDI,%R10,8),%XMM17 |
(316) 0x23afba VMOVHPD (%RDI,%R12,8),%XMM15,%XMM15 |
(316) 0x23afc0 VMOVHPD (%RDI,%RBX,8),%XMM17,%XMM17 |
(316) 0x23afc7 VINSERTF32X4 $0x1,%XMM18,%YMM19,%YMM18 |
(316) 0x23afce VMOVSD (%RDI,%RAX,8),%XMM19 |
(316) 0x23afd5 VPEXTRQ $0x1,%XMM16,%RAX |
(316) 0x23afdc VMOVHPD (%RDI,%RDX,8),%XMM19,%XMM19 |
(316) 0x23afe3 VMOVQ %XMM16,%RDX |
(316) 0x23afe9 VINSERTF64X4 $0x1,%YMM13,%ZMM18,%ZMM18 |
(316) 0x23aff0 VINSERTF32X4 $0x1,%XMM15,%YMM17,%YMM15 |
(316) 0x23aff7 VMOVSD (%RDI,%R8,8),%XMM17 |
(316) 0x23affe VMOVHPD (%RDI,%R9,8),%XMM17,%XMM17 |
(316) 0x23b005 VINSERTF32X4 $0x1,%XMM17,%YMM19,%YMM17 |
(316) 0x23b00c VINSERTF64X4 $0x1,%YMM15,%ZMM17,%ZMM15 |
(316) 0x23b013 VEXTRACTI32X4 $0x1,%YMM16,%XMM17 |
(316) 0x23b01a VMOVQ %XMM17,%R8 |
(316) 0x23b020 VPEXTRQ $0x1,%XMM17,%R9 |
(316) 0x23b027 VEXTRACTI32X4 $0x2,%ZMM16,%XMM17 |
(316) 0x23b02e VEXTRACTI32X4 $0x3,%ZMM16,%XMM16 |
(316) 0x23b035 VMOVQ %XMM17,%R10 |
(316) 0x23b03b VMOVQ %XMM16,%R12 |
(316) 0x23b041 VPEXTRQ $0x1,%XMM17,%RBX |
(316) 0x23b048 VPEXTRQ $0x1,%XMM16,%R14 |
(316) 0x23b04f VADDPD %ZMM15,%ZMM18,%ZMM15 |
(316) 0x23b055 VMOVSD (%RDI,%RDX,8),%XMM18 |
(316) 0x23b05c VPEXTRQ $0x1,%XMM14,%RDX |
(316) 0x23b062 VMOVSD (%RDI,%R12,8),%XMM16 |
(316) 0x23b069 VMOVSD (%RDI,%R10,8),%XMM17 |
(316) 0x23b070 VMOVHPD (%RDI,%RAX,8),%XMM18,%XMM18 |
(316) 0x23b077 VMOVQ %XMM14,%RAX |
(316) 0x23b07c VMOVHPD (%RDI,%R14,8),%XMM16,%XMM16 |
(316) 0x23b083 VMOVHPD (%RDI,%RBX,8),%XMM17,%XMM17 |
(316) 0x23b08a VINSERTF32X4 $0x1,%XMM16,%YMM17,%YMM16 |
(316) 0x23b091 VMOVSD (%RDI,%R8,8),%XMM17 |
(316) 0x23b098 VMOVHPD (%RDI,%R9,8),%XMM17,%XMM17 |
(316) 0x23b09f VINSERTF32X4 $0x1,%XMM17,%YMM18,%YMM17 |
(316) 0x23b0a6 VINSERTF64X4 $0x1,%YMM16,%ZMM17,%ZMM16 |
(316) 0x23b0ad VMOVSD (%RDI,%RAX,8),%XMM17 |
(316) 0x23b0b4 VMOVHPD (%RDI,%RDX,8),%XMM17,%XMM17 |
(316) 0x23b0bb VADDPD %ZMM16,%ZMM15,%ZMM15 |
(316) 0x23b0c1 VEXTRACTI32X4 $0x1,%YMM14,%XMM16 |
(316) 0x23b0c8 VMOVQ %XMM16,%R8 |
(316) 0x23b0ce VPEXTRQ $0x1,%XMM16,%R9 |
(316) 0x23b0d5 VEXTRACTI32X4 $0x2,%ZMM14,%XMM16 |
(316) 0x23b0dc VEXTRACTI32X4 $0x3,%ZMM14,%XMM14 |
(316) 0x23b0e3 VMOVQ %XMM16,%RBX |
(316) 0x23b0e9 VMOVQ %XMM14,%R14 |
(316) 0x23b0ee VPEXTRQ $0x1,%XMM16,%R10 |
(316) 0x23b0f5 VPEXTRQ $0x1,%XMM14,%R12 |
(316) 0x23b0fb VMOVSD (%RDI,%R14,8),%XMM14 |
(316) 0x23b101 VMOVSD (%RDI,%RBX,8),%XMM16 |
(316) 0x23b108 VMOVHPD (%RDI,%R12,8),%XMM14,%XMM14 |
(316) 0x23b10e VMOVHPD (%RDI,%R10,8),%XMM16,%XMM16 |
(316) 0x23b115 VINSERTF32X4 $0x1,%XMM14,%YMM16,%YMM14 |
(316) 0x23b11c VMOVSD (%RDI,%R8,8),%XMM16 |
(316) 0x23b123 VMOVHPD (%RDI,%R9,8),%XMM16,%XMM16 |
(316) 0x23b12a VINSERTF32X4 $0x1,%XMM16,%YMM17,%YMM16 |
(316) 0x23b131 VINSERTF64X4 $0x1,%YMM14,%ZMM16,%ZMM14 |
(316) 0x23b138 VADDPD %ZMM14,%ZMM15,%ZMM14 |
(316) 0x23b13e VMULPD %ZMM10,%ZMM14,%ZMM14 |
(316) 0x23b144 VSCATTERQPD %ZMM14,(%R13,%ZMM12,8){%K1} |
(316) 0x23b14c JNE 23ad80 |
0x23b152 MOV -0x60(%RBP),%RAX |
0x23b156 MOV -0x88(%RBP),%RBX |
0x23b15d MOV -0x80(%RBP),%R10 |
0x23b161 MOV -0x78(%RBP),%R12 |
0x23b165 MOV -0x38(%RBP),%RSI |
0x23b169 MOV -0x70(%RBP),%R14 |
0x23b16d MOV -0x30(%RBP),%R8 |
0x23b171 CMP %RAX,-0x68(%RBP) |
0x23b175 JE 23b219 |
0x23b17b VMOVSD -0x2996b(%RIP),%XMM0 |
0x23b183 NOPW %CS:(%RAX,%RAX,1) |
(317) 0x23b190 MOV %RCX,%RAX |
(317) 0x23b193 CQTO |
(317) 0x23b195 IDIV %R8 |
(317) 0x23b198 LEA (%R12,%RDX,1),%R9D |
(317) 0x23b19c ADD %R10D,%EDX |
(317) 0x23b19f MOVSXD %EDX,%RDX |
(317) 0x23b1a2 MOVSXD %R9D,%R9 |
(317) 0x23b1a5 LEA (%RSI,%RAX,1),%R8D |
(317) 0x23b1a9 MOV %R12,%RSI |
(317) 0x23b1ac MOV %RBX,%R12 |
(317) 0x23b1af MOV %R10,%RBX |
(317) 0x23b1b2 MOV -0x50(%RBP),%R10 |
(317) 0x23b1b6 MOVSXD %R8D,%R8 |
(317) 0x23b1b9 IMUL %R8,%R10 |
(317) 0x23b1bd IMUL %R14,%R8 |
(317) 0x23b1c1 LEA (%R10,%RDX,1),%R11 |
(317) 0x23b1c5 ADD %R9,%R10 |
(317) 0x23b1c8 ADD %R9,%R8 |
(317) 0x23b1cb VMOVSD (%RDI,%R11,8),%XMM1 |
(317) 0x23b1d1 VADDSD (%RDI,%R10,8),%XMM1,%XMM1 |
(317) 0x23b1d7 MOV %RBX,%R10 |
(317) 0x23b1da MOV %R12,%RBX |
(317) 0x23b1dd ADD %EBX,%EAX |
(317) 0x23b1df MOV %RSI,%R12 |
(317) 0x23b1e2 MOV -0x38(%RBP),%RSI |
(317) 0x23b1e6 CLTQ |
(317) 0x23b1e8 IMUL -0x50(%RBP),%RAX |
(317) 0x23b1ed ADD %RAX,%RDX |
(317) 0x23b1f0 ADD %R9,%RAX |
(317) 0x23b1f3 VADDSD (%RDI,%RDX,8),%XMM1,%XMM1 |
(317) 0x23b1f8 VADDSD (%RDI,%RAX,8),%XMM1,%XMM1 |
(317) 0x23b1fd VMULSD %XMM0,%XMM1,%XMM1 |
(317) 0x23b201 VMOVSD %XMM1,(%R13,%R8,8) |
(317) 0x23b208 MOV -0x30(%RBP),%R8 |
(317) 0x23b20c CMP %R15,%RCX |
(317) 0x23b20f LEA 0x1(%RCX),%RCX |
(317) 0x23b213 JL 23b190 |
0x23b219 MOV -0x44(%RBP),%ESI |
0x23b21c LEA 0x25c3d(%RIP),%RDI |
0x23b223 VZEROUPPER |
0x23b226 CALL 25f750 <@plt_start@+0x540> |
0x23b22b ADD $0x68,%RSP |
0x23b22f POP %RBX |
0x23b230 POP %R12 |
0x23b232 POP %R13 |
0x23b234 POP %R14 |
0x23b236 POP %R15 |
0x23b238 POP %RBP |
0x23b239 RET |
0x23b23a NOPW (%RAX,%RAX,1) |
0x24d04d NOPL (%RAX) |
0x25337f NOP |
0x25340b NOPL (%RAX,%RAX,1) |
0x253421 NOPW %CS:(%RAX,%RAX,1) |
0x253596 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | advec_mom.cpp:157-160 |
Module | exec |
nb instructions | 122 |
nb uops | 124 |
loop length | 535 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 4 |
used zmm registers | 6 |
nb stack references | 16 |
micro-operation queue | 20.67 cycles |
front end | 20.67 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.75 | 8.75 | 8.50 | 8.50 | 4.50 | 15.67 | 15.67 | 15.67 | 0.50 | 7.50 | 7.50 | 0.50 | 0.00 | 0.00 |
cycles | 8.75 | 8.75 | 8.50 | 8.50 | 4.50 | 16.00 | 16.00 | 16.00 | 0.50 | 7.50 | 7.50 | 0.50 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 20.67 |
Dispatch | 16.00 |
Overall L1 | 20.67 |
all | 6% |
load | 6% |
store | 0% |
mul | 0% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 6% |
load | 5% |
store | 0% |
mul | 0% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 16% |
store | 10% |
mul | 12% |
add-sub | 20% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 16% |
store | 10% |
mul | 12% |
add-sub | 20% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x68,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RCX),%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDX),%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x4,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %EBX,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JL 23b22b <.omp_outlined..16+0x63b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV (%R9),%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R8),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x1(%RAX),%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $0x3,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R12D,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 23b22b <.omp_outlined..16+0x63b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
LEA -0x1(%RBX),%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R12D,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVQ $0,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVQ $0x1,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVL $0,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB %ECX,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RCX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
IMUL %R13,%R15 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DEC %R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R15,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x48(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x90(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x261d1(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x58(%RBP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x40(%RBP),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,-0x44(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CALL 25f740 <@plt_start@+0x530> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x58(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %R15,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMOVL %RAX,%R15 | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R15,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R15,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JG 23b219 <.omp_outlined..16+0x629> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x18(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R14,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R13,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R15,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %RCX,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
INC %R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RAX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RAX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDX),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RDX),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RSI,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP $0x8,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 23acf1 <.omp_outlined..16+0x101> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV -0x38(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x30(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R9,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 23b17b <.omp_outlined..16+0x58b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VBROADCASTSD -0x294e7(%RIP),%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 |
VPBROADCASTQ %RCX,%ZMM0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPADDQ -0x28f4f(%RIP),%ZMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 1 |
VPBROADCASTQ -0x29511(%RIP),%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0.50 |
MOV %R11,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
AND $-0x8,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VPBROADCASTD %R12D,%YMM3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTD %R9D,%YMM4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RSI,%ZMM5 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTD %EBX,%YMM6 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R14,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
MOV %R12,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R9,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RBX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R14,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD %R11,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R11,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPBROADCASTQ %RAX,%ZMM1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VEXTRACTI32X4 $0x3,%ZMM1,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 |
VEXTRACTI32X4 $0x2,%ZMM1,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 |
VPBROADCASTD %EAX,%YMM2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x88(%RBP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x80(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x78(%RBP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x38(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x70(%RBP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x30(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %RAX,-0x68(%RBP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JE 23b219 <.omp_outlined..16+0x629> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VMOVSD -0x2996b(%RIP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV -0x44(%RBP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x25c3d(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 25f750 <@plt_start@+0x540> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x68,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Source file and lines | advec_mom.cpp:157-160 |
Module | exec |
nb instructions | 122 |
nb uops | 124 |
loop length | 535 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 4 |
used zmm registers | 6 |
nb stack references | 16 |
micro-operation queue | 20.67 cycles |
front end | 20.67 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.75 | 8.75 | 8.50 | 8.50 | 4.50 | 15.67 | 15.67 | 15.67 | 0.50 | 7.50 | 7.50 | 0.50 | 0.00 | 0.00 |
cycles | 8.75 | 8.75 | 8.50 | 8.50 | 4.50 | 16.00 | 16.00 | 16.00 | 0.50 | 7.50 | 7.50 | 0.50 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 20.67 |
Dispatch | 16.00 |
Overall L1 | 20.67 |
all | 6% |
load | 6% |
store | 0% |
mul | 0% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 6% |
load | 5% |
store | 0% |
mul | 0% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 16% |
store | 10% |
mul | 12% |
add-sub | 20% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 16% |
store | 10% |
mul | 12% |
add-sub | 20% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x68,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RCX),%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDX),%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x4,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %EBX,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JL 23b22b <.omp_outlined..16+0x63b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV (%R9),%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R8),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x1(%RAX),%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD $0x3,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R12D,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JLE 23b22b <.omp_outlined..16+0x63b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
LEA -0x1(%RBX),%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R12D,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVQ $0,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVQ $0x1,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVL $0,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB %ECX,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RCX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
IMUL %R13,%R15 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DEC %R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R15,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x48(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x90(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x261d1(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x58(%RBP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x40(%RBP),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,-0x44(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CALL 25f740 <@plt_start@+0x530> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x58(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %R15,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMOVL %RAX,%R15 | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R15,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R15,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JG 23b219 <.omp_outlined..16+0x629> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x18(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R14,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R13,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R15,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %RCX,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
INC %R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RAX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RAX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDX),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RDX),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RSI,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP $0x8,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 23acf1 <.omp_outlined..16+0x101> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV -0x38(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x30(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R9,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 23b17b <.omp_outlined..16+0x58b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VBROADCASTSD -0x294e7(%RIP),%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 |
VPBROADCASTQ %RCX,%ZMM0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPADDQ -0x28f4f(%RIP),%ZMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 1 |
VPBROADCASTQ -0x29511(%RIP),%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0.50 |
MOV %R11,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
AND $-0x8,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VPBROADCASTD %R12D,%YMM3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTD %R9D,%YMM4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RSI,%ZMM5 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTD %EBX,%YMM6 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R14,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
MOV %R12,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R9,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RBX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R14,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD %R11,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R11,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPBROADCASTQ %RAX,%ZMM1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VEXTRACTI32X4 $0x3,%ZMM1,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 |
VEXTRACTI32X4 $0x2,%ZMM1,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 |
VPBROADCASTD %EAX,%YMM2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x88(%RBP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x80(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x78(%RBP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x38(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x70(%RBP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x30(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %RAX,-0x68(%RBP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JE 23b219 <.omp_outlined..16+0x629> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
VMOVSD -0x2996b(%RIP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV -0x44(%RBP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x25c3d(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 25f750 <@plt_start@+0x540> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x68,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼.omp_outlined..16– | 1.25 | 0.64 |
○Loop 316 - advec_mom.cpp:158-160 - exec | 1.24 | 0.63 |
○Loop 317 - advec_mom.cpp:158-160 - exec | 0 | 0 |