Function: _Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_ ... | Module: exec | Source: advec_cell.cpp:54-58 [...] | Coverage: 1.25% |
---|
Function: _Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_ ... | Module: exec | Source: advec_cell.cpp:54-58 [...] | Coverage: 1.25% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_cell.cpp: 54 - 58 |
-------------------------------------------------------------------------------- |
54: #pragma omp parallel for simd collapse(2) |
55: for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { |
56: for (int i = (x_min - 2 + 1); i < (x_max + 2 + 2); i++) { |
57: pre_vol(i, j) = volume(i, j) + vol_flux_x(i + 1, j + 0) - vol_flux_x(i, j); |
58: post_vol(i, j) = volume(i, j); |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x427440 PUSH %RBP |
0x427441 MOV %RSP,%RBP |
0x427444 PUSH %R15 |
0x427446 PUSH %R14 |
0x427448 PUSH %R13 |
0x42744a PUSH %R12 |
0x42744c PUSH %RBX |
0x42744d MOV %RDI,%R13 |
0x427450 AND $-0x40,%RSP |
0x427454 SUB $0x40,%RSP |
0x427458 MOV 0x28(%RDI),%EAX |
0x42745b MOV 0x2c(%RDI),%EDX |
0x42745e MOV 0x20(%RDI),%EDI |
0x427461 MOV 0x24(%R13),%ECX |
0x427465 ADD $0x4,%EDX |
0x427468 DEC %EDI |
0x42746a LEA -0x1(%RAX),%R14D |
0x42746e MOV %EDX,0x18(%RSP) |
0x427472 MOV %EDI,0x14(%RSP) |
0x427476 CMP %EDX,%R14D |
0x427479 JGE 427923 |
0x42747f MOV %EDX,%R12D |
0x427482 LEA 0x4(%RCX),%R15D |
0x427486 SUB %R14D,%R12D |
0x427489 CMP %R15D,%EDI |
0x42748c JGE 427923 |
0x427492 MOV %R15D,%EBX |
0x427495 SUB %EDI,%EBX |
0x427497 MOV %EBX,0x1c(%RSP) |
0x42749b CALL 404650 <omp_get_num_threads@plt> |
0x4274a0 MOV %EAX,%EBX |
0x4274a2 CALL 404540 <omp_get_thread_num@plt> |
0x4274a7 XOR %EDX,%EDX |
0x4274a9 MOV %EAX,%R8D |
0x4274ac MOV 0x1c(%RSP),%EAX |
0x4274b0 IMUL %R12D,%EAX |
0x4274b4 DIV %EBX |
0x4274b6 MOV %EAX,%EBX |
0x4274b8 CMP %EDX,%R8D |
0x4274bb JB 42794d |
0x4274c1 IMUL %EBX,%R8D |
0x4274c5 LEA (%R8,%RDX,1),%EDI |
0x4274c9 LEA (%RBX,%RDI,1),%R9D |
0x4274cd MOV %R9D,0x10(%RSP) |
0x4274d2 CMP %R9D,%EDI |
0x4274d5 JAE 427923 |
0x4274db MOV %EDI,%EAX |
0x4274dd XOR %EDX,%EDX |
0x4274df MOV 0x14(%RSP),%R10D |
0x4274e4 VMOVQ (%R13),%XMM3 |
0x4274ea DIVL 0x1c(%RSP) |
0x4274ee VMOVQ 0x8(%R13),%XMM4 |
0x4274f4 VMOVQ 0x10(%R13),%XMM14 |
0x4274fa VMOVQ 0x18(%R13),%XMM1 |
0x427500 ADD %EDX,%R10D |
0x427503 LEA (%RAX,%R14,1),%R11D |
0x427507 SUB %R10D,%R15D |
0x42750a MOVSXD %R11D,%R8 |
0x42750d MOV %R10D,0x3c(%RSP) |
0x427512 NOPW %CS:(%RAX,%RAX,1) |
0x42751d NOPL (%RAX) |
(161) 0x427520 CMP %R15D,%EBX |
(161) 0x427523 MOV %R15D,%ECX |
(161) 0x427526 CMOVBE %EBX,%ECX |
(161) 0x427529 LEA (%RDI,%RCX,1),%ESI |
(161) 0x42752c MOV %ESI,0x38(%RSP) |
(161) 0x427530 CMP %ESI,%EDI |
(161) 0x427532 JAE 4278f8 |
(161) 0x427538 VMOVQ %XMM3,%R15 |
(161) 0x42753d VMOVQ %XMM14,%RBX |
(161) 0x427542 LEA -0x1(%RCX),%EAX |
(161) 0x427545 VMOVQ %XMM1,%R9 |
(161) 0x42754a MOV (%R15),%R13 |
(161) 0x42754d VMOVQ 0x10(%R15),%XMM6 |
(161) 0x427553 MOV (%RBX),%RDX |
(161) 0x427556 MOV (%R9),%R15 |
(161) 0x427559 VMOVQ %XMM4,%R14 |
(161) 0x42755e VMOVQ 0x10(%R9),%XMM5 |
(161) 0x427564 MOV (%R14),%R11 |
(161) 0x427567 MOV 0x10(%R14),%R12 |
(161) 0x42756b MOV 0x10(%RBX),%R14 |
(161) 0x42756f IMUL %R8,%R13 |
(161) 0x427573 IMUL %R8,%RDX |
(161) 0x427577 IMUL %R8,%R15 |
(161) 0x42757b IMUL %R8,%R11 |
(161) 0x42757f MOV %R13,0x20(%RSP) |
(161) 0x427584 MOV %RDX,0x28(%RSP) |
(161) 0x427589 MOV %R15,0x30(%RSP) |
(161) 0x42758e CMP $0x6,%EAX |
(161) 0x427591 JBE 427940 |
(161) 0x427597 MOVSXD 0x3c(%RSP),%RAX |
(161) 0x42759c LEA (%R13,%RAX,1),%RSI |
(161) 0x4275a1 LEA (%RDX,%RAX,1),%RDX |
(161) 0x4275a5 LEA 0x1(%R11,%RAX,1),%RBX |
(161) 0x4275aa ADD %R15,%RAX |
(161) 0x4275ad VMOVQ %XMM5,%R15 |
(161) 0x4275b2 LEA (%R14,%RDX,8),%R10 |
(161) 0x4275b6 VMOVQ %XMM6,%R13 |
(161) 0x4275bb LEA (%R15,%RAX,8),%RDX |
(161) 0x4275bf MOV %ECX,%R15D |
(161) 0x4275c2 LEA (%R13,%RSI,8),%R9 |
(161) 0x4275c7 XOR %EAX,%EAX |
(161) 0x4275c9 SHR $0x3,%R15D |
(161) 0x4275cd SAL $0x6,%R15 |
(161) 0x4275d1 LEA -0x40(%R15),%RSI |
(161) 0x4275d5 SHR $0x6,%RSI |
(161) 0x4275d9 SAL $0x3,%RBX |
(161) 0x4275dd INC %RSI |
(161) 0x4275e0 LEA (%R12,%RBX,1),%R13 |
(161) 0x4275e4 LEA -0x8(%R12,%RBX,1),%RBX |
(161) 0x4275e9 AND $0x3,%ESI |
(161) 0x4275ec JE 42768f |
(161) 0x4275f2 CMP $0x1,%RSI |
(161) 0x4275f6 JE 427657 |
(161) 0x4275f8 CMP $0x2,%RSI |
(161) 0x4275fc JE 427628 |
(161) 0x4275fe VMOVUPD (%R9),%ZMM7 |
(161) 0x427604 VADDPD (%R13),%ZMM7,%ZMM9 |
(161) 0x42760b MOV $0x40,%EAX |
(161) 0x427610 VSUBPD (%RBX),%ZMM9,%ZMM0 |
(161) 0x427616 VMOVUPD %ZMM0,(%R10) |
(161) 0x42761c VMOVUPD (%R9),%ZMM2 |
(161) 0x427622 VMOVUPD %ZMM2,(%RDX) |
(161) 0x427628 VMOVUPD (%R9,%RAX,1),%ZMM8 |
(161) 0x42762f VADDPD (%R13,%RAX,1),%ZMM8,%ZMM10 |
(161) 0x427637 VSUBPD (%RBX,%RAX,1),%ZMM10,%ZMM11 |
(161) 0x42763e VMOVUPD %ZMM11,(%R10,%RAX,1) |
(161) 0x427645 VMOVUPD (%R9,%RAX,1),%ZMM12 |
(161) 0x42764c VMOVUPD %ZMM12,(%RDX,%RAX,1) |
(161) 0x427653 ADD $0x40,%RAX |
(161) 0x427657 VMOVUPD (%R9,%RAX,1),%ZMM13 |
(161) 0x42765e VADDPD (%R13,%RAX,1),%ZMM13,%ZMM15 |
(161) 0x427666 VSUBPD (%RBX,%RAX,1),%ZMM15,%ZMM7 |
(161) 0x42766d VMOVUPD %ZMM7,(%R10,%RAX,1) |
(161) 0x427674 VMOVUPD (%R9,%RAX,1),%ZMM9 |
(161) 0x42767b VMOVUPD %ZMM9,(%RDX,%RAX,1) |
(161) 0x427682 ADD $0x40,%RAX |
(161) 0x427686 CMP %RAX,%R15 |
(161) 0x427689 JE 427759 |
(162) 0x42768f VMOVUPD (%R9,%RAX,1),%ZMM0 |
(162) 0x427696 VADDPD (%R13,%RAX,1),%ZMM0,%ZMM2 |
(162) 0x42769e VSUBPD (%RBX,%RAX,1),%ZMM2,%ZMM8 |
(162) 0x4276a5 VMOVUPD %ZMM8,(%R10,%RAX,1) |
(162) 0x4276ac VMOVUPD (%R9,%RAX,1),%ZMM10 |
(162) 0x4276b3 VMOVUPD %ZMM10,(%RDX,%RAX,1) |
(162) 0x4276ba VMOVUPD 0x40(%R9,%RAX,1),%ZMM11 |
(162) 0x4276c2 VADDPD 0x40(%R13,%RAX,1),%ZMM11,%ZMM12 |
(162) 0x4276ca VSUBPD 0x40(%RBX,%RAX,1),%ZMM12,%ZMM13 |
(162) 0x4276d2 VMOVUPD %ZMM13,0x40(%R10,%RAX,1) |
(162) 0x4276da VMOVUPD 0x40(%R9,%RAX,1),%ZMM15 |
(162) 0x4276e2 VMOVUPD %ZMM15,0x40(%RDX,%RAX,1) |
(162) 0x4276ea VMOVUPD 0x80(%R9,%RAX,1),%ZMM7 |
(162) 0x4276f2 VADDPD 0x80(%R13,%RAX,1),%ZMM7,%ZMM9 |
(162) 0x4276fa VSUBPD 0x80(%RBX,%RAX,1),%ZMM9,%ZMM0 |
(162) 0x427702 VMOVUPD %ZMM0,0x80(%R10,%RAX,1) |
(162) 0x42770a VMOVUPD 0x80(%R9,%RAX,1),%ZMM2 |
(162) 0x427712 VMOVUPD %ZMM2,0x80(%RDX,%RAX,1) |
(162) 0x42771a VMOVUPD 0xc0(%R9,%RAX,1),%ZMM8 |
(162) 0x427722 VADDPD 0xc0(%R13,%RAX,1),%ZMM8,%ZMM10 |
(162) 0x42772a VSUBPD 0xc0(%RBX,%RAX,1),%ZMM10,%ZMM11 |
(162) 0x427732 VMOVUPD %ZMM11,0xc0(%R10,%RAX,1) |
(162) 0x42773a VMOVUPD 0xc0(%R9,%RAX,1),%ZMM12 |
(162) 0x427742 VMOVUPD %ZMM12,0xc0(%RDX,%RAX,1) |
(162) 0x42774a ADD $0x100,%RAX |
(162) 0x427750 CMP %RAX,%R15 |
(162) 0x427753 JNE 42768f |
(161) 0x427759 MOV 0x3c(%RSP),%R9D |
(161) 0x42775e MOV %ECX,%R10D |
(161) 0x427761 AND $-0x8,%R10D |
(161) 0x427765 ADD %R10D,%EDI |
(161) 0x427768 LEA (%R10,%R9,1),%R15D |
(161) 0x42776c TEST $0x7,%CL |
(161) 0x42776f JE 4278f4 |
(161) 0x427775 SUB %R10D,%ECX |
(161) 0x427778 LEA -0x1(%RCX),%R13D |
(161) 0x42777c CMP $0x2,%R13D |
(161) 0x427780 JBE 4277f5 |
(161) 0x427782 MOVSXD 0x3c(%RSP),%RSI |
(161) 0x427787 MOV 0x20(%RSP),%RBX |
(161) 0x42778c VMOVQ %XMM6,%RAX |
(161) 0x427791 LEA (%RBX,%RSI,1),%RDX |
(161) 0x427795 LEA (%R11,%RSI,1),%R13 |
(161) 0x427799 ADD %R10,%RDX |
(161) 0x42779c LEA 0x1(%R10,%R13,1),%RBX |
(161) 0x4277a1 LEA (%RAX,%RDX,8),%R9 |
(161) 0x4277a5 VMOVUPD (%R12,%RBX,8),%YMM13 |
(161) 0x4277ab VSUBPD -0x8(%R12,%RBX,8),%YMM13,%YMM15 |
(161) 0x4277b2 MOV 0x28(%RSP),%RDX |
(161) 0x4277b7 VADDPD (%R9),%YMM15,%YMM7 |
(161) 0x4277bc LEA (%RDX,%RSI,1),%RAX |
(161) 0x4277c0 ADD %R10,%RAX |
(161) 0x4277c3 VMOVUPD %YMM7,(%R14,%RAX,8) |
(161) 0x4277c9 VMOVUPD (%R9),%YMM9 |
(161) 0x4277ce MOV 0x30(%RSP),%R9 |
(161) 0x4277d3 ADD %R9,%RSI |
(161) 0x4277d6 ADD %R10,%RSI |
(161) 0x4277d9 VMOVQ %XMM5,%R10 |
(161) 0x4277de VMOVUPD %YMM9,(%R10,%RSI,8) |
(161) 0x4277e4 TEST $0x3,%CL |
(161) 0x4277e7 JE 4278f4 |
(161) 0x4277ed AND $-0x4,%ECX |
(161) 0x4277f0 ADD %ECX,%EDI |
(161) 0x4277f2 ADD %ECX,%R15D |
(161) 0x4277f5 MOV 0x20(%RSP),%RBX |
(161) 0x4277fa LEA 0x1(%R15),%EAX |
(161) 0x4277fe MOVSXD %R15D,%RDX |
(161) 0x427801 VMOVQ %XMM6,%RCX |
(161) 0x427806 CLTQ |
(161) 0x427808 LEA (%R11,%RDX,1),%R10 |
(161) 0x42780c LEA (%R11,%RAX,1),%R13 |
(161) 0x427810 LEA (%RBX,%RDX,1),%RSI |
(161) 0x427814 LEA (%RCX,%RSI,8),%RCX |
(161) 0x427818 LEA (%R12,%R13,8),%RSI |
(161) 0x42781c MOV 0x28(%RSP),%R13 |
(161) 0x427821 VMOVSD (%RSI),%XMM0 |
(161) 0x427825 VSUBSD (%R12,%R10,8),%XMM0,%XMM2 |
(161) 0x42782b VADDSD (%RCX),%XMM2,%XMM8 |
(161) 0x42782f MOV 0x38(%RSP),%R10D |
(161) 0x427834 LEA (%R13,%RDX,1),%R9 |
(161) 0x427839 VMOVSD %XMM8,(%R14,%R9,8) |
(161) 0x42783f VMOVSD (%RCX),%XMM10 |
(161) 0x427843 MOV 0x30(%RSP),%RCX |
(161) 0x427848 VMOVQ %XMM5,%R9 |
(161) 0x42784d ADD %RCX,%RDX |
(161) 0x427850 VMOVSD %XMM10,(%R9,%RDX,8) |
(161) 0x427856 LEA 0x1(%RDI),%EDX |
(161) 0x427859 CMP %R10D,%EDX |
(161) 0x42785c JAE 4278f4 |
(161) 0x427862 LEA 0x2(%R15),%R9D |
(161) 0x427866 LEA (%RAX,%RBX,1),%RCX |
(161) 0x42786a MOV %RBX,%R10 |
(161) 0x42786d VMOVQ %XMM6,%RBX |
(161) 0x427872 MOVSXD %R9D,%RDX |
(161) 0x427875 LEA (%RBX,%RCX,8),%RCX |
(161) 0x427879 LEA (%R13,%RAX,1),%R9 |
(161) 0x42787e ADD $0x2,%EDI |
(161) 0x427881 LEA (%R11,%RDX,1),%RBX |
(161) 0x427885 LEA (%R12,%RBX,8),%RBX |
(161) 0x427889 VMOVSD (%RBX),%XMM11 |
(161) 0x42788d VADDSD (%RCX),%XMM11,%XMM12 |
(161) 0x427891 VSUBSD (%RSI),%XMM12,%XMM13 |
(161) 0x427895 VMOVSD %XMM13,(%R14,%R9,8) |
(161) 0x42789b VMOVSD (%RCX),%XMM15 |
(161) 0x42789f MOV 0x30(%RSP),%RCX |
(161) 0x4278a4 VMOVQ %XMM5,%RSI |
(161) 0x4278a9 ADD %RCX,%RAX |
(161) 0x4278ac VMOVSD %XMM15,(%RSI,%RAX,8) |
(161) 0x4278b1 MOV 0x38(%RSP),%EAX |
(161) 0x4278b5 CMP %EAX,%EDI |
(161) 0x4278b7 JAE 4278f4 |
(161) 0x4278b9 ADD $0x3,%R15D |
(161) 0x4278bd VMOVQ %XMM6,%RDI |
(161) 0x4278c2 ADD %RDX,%R10 |
(161) 0x4278c5 ADD %RDX,%R13 |
(161) 0x4278c8 MOVSXD %R15D,%R15 |
(161) 0x4278cb LEA (%RDI,%R10,8),%R10 |
(161) 0x4278cf ADD %RDX,%RCX |
(161) 0x4278d2 ADD %R11,%R15 |
(161) 0x4278d5 VMOVSD (%R12,%R15,8),%XMM6 |
(161) 0x4278db VADDSD (%R10),%XMM6,%XMM7 |
(161) 0x4278e0 VSUBSD (%RBX),%XMM7,%XMM9 |
(161) 0x4278e4 VMOVSD %XMM9,(%R14,%R13,8) |
(161) 0x4278ea VMOVSD (%R10),%XMM0 |
(161) 0x4278ef VMOVSD %XMM0,(%RSI,%RCX,8) |
(161) 0x4278f4 MOV 0x38(%RSP),%EDI |
(161) 0x4278f8 INC %R8 |
(161) 0x4278fb LEA (%R8),%R11D |
(161) 0x4278fe CMP %R11D,0x18(%RSP) |
(161) 0x427903 JLE 427920 |
(161) 0x427905 MOV 0x10(%RSP),%EBX |
(161) 0x427909 MOV 0x14(%RSP),%R14D |
(161) 0x42790e MOV 0x1c(%RSP),%R15D |
(161) 0x427913 SUB %EDI,%EBX |
(161) 0x427915 MOV %R14D,0x3c(%RSP) |
(161) 0x42791a JMP 427520 |
0x42791f NOP |
0x427920 VZEROUPPER |
0x427923 LEA -0x28(%RBP),%RSP |
0x427927 POP %RBX |
0x427928 POP %R12 |
0x42792a POP %R13 |
0x42792c POP %R14 |
0x42792e POP %R15 |
0x427930 POP %RBP |
0x427931 RET |
0x427932 NOPW %CS:(%RAX,%RAX,1) |
0x42793d NOPL (%RAX) |
(161) 0x427940 MOV 0x3c(%RSP),%R15D |
(161) 0x427945 XOR %R10D,%R10D |
(161) 0x427948 JMP 427775 |
0x42794d INC %EBX |
0x42794f XOR %EDX,%EDX |
0x427951 JMP 4274c1 |
0x427956 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | advec_cell.cpp:54-58 |
Module | exec |
nb instructions | 77 |
nb uops | 76 |
loop length | 276 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 6 |
micro-operation queue | 12.67 cycles |
front end | 12.67 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.00 | 6.00 | 6.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.00 | 6.00 | 6.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 12.00 |
Front-end | 12.67 |
Dispatch | 7.00 |
DIV/SQRT | 12.00 |
Overall L1 | 12.67 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 16% |
all | 8% |
load | 10% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB $0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x20(%RDI),%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x24(%R13),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x4,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
DEC %EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x1(%RAX),%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EDI,0x14(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %EDX,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 427923 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.1+0x4e3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %EDX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x4(%RCX),%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R14D,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R15D,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 427923 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.1+0x4e3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R15D,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %EDI,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EBX,0x1c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x1c(%RSP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
IMUL %R12D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %EBX | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
MOV %EAX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %EDX,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 42794d <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.1+0x50d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %EBX,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RBX,%RDI,1),%R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9D,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R9D,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 427923 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.1+0x4e3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %EDI,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x14(%RSP),%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVQ (%R13),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DIVL 0x1c(%RSP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
VMOVQ 0x8(%R13),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x10(%R13),%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x18(%R13),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%R10D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%R14,1),%R11D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R10D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVSXD %R11D,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R10D,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INC %EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 4274c1 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.1+0x81> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Source file and lines | advec_cell.cpp:54-58 |
Module | exec |
nb instructions | 77 |
nb uops | 76 |
loop length | 276 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 6 |
micro-operation queue | 12.67 cycles |
front end | 12.67 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.00 | 6.00 | 6.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 7.00 | 7.00 | 7.00 | 7.00 | 6.00 | 6.00 | 6.00 | 6.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 12.00 |
Front-end | 12.67 |
Dispatch | 7.00 |
DIV/SQRT | 12.00 |
Overall L1 | 12.67 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 16% |
all | 8% |
load | 10% |
store | 6% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB $0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x20(%RDI),%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x24(%R13),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x4,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
DEC %EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x1(%RAX),%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EDX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EDI,0x14(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %EDX,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 427923 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.1+0x4e3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %EDX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x4(%RCX),%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R14D,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R15D,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JGE 427923 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.1+0x4e3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R15D,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %EDI,%EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %EBX,0x1c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CALL 404650 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 404540 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x1c(%RSP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
IMUL %R12D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %EBX | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
MOV %EAX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %EDX,%R8D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JB 42794d <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.1+0x50d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
IMUL %EBX,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RBX,%RDI,1),%R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9D,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R9D,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 427923 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.1+0x4e3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %EDI,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x14(%RSP),%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVQ (%R13),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DIVL 0x1c(%RSP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-17 | 6 |
VMOVQ 0x8(%R13),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x10(%R13),%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x18(%R13),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%R10D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RAX,%R14,1),%R11D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R10D,%R15D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVSXD %R11D,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R10D,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
INC %EBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 4274c1 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.1+0x81> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.1– | 1.25 | 0.63 |
▼Loop 161 - advec_cell.cpp:56-58 - exec– | 0 | 0 |
○Loop 162 - advec_cell.cpp:57-58 - exec | 1.25 | 0.63 |