Function: .omp_outlined.#0x230940 | Module: exec | Source: advec_cell.cpp:44-48 [...] | Coverage: 1.47% |
---|
Function: .omp_outlined.#0x230940 | Module: exec | Source: advec_cell.cpp:44-48 [...] | Coverage: 1.47% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_cell.cpp: 44 - 48 |
-------------------------------------------------------------------------------- |
44: #pragma omp parallel for simd collapse(2) |
45: for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { |
46: for (int i = (x_min - 2 + 1); i < (x_max + 2 + 2); i++) { |
47: pre_vol(i, j) = volume(i, j) + (vol_flux_x(i + 1, j + 0) - vol_flux_x(i, j) + vol_flux_y(i + 0, j + 1) - vol_flux_y(i, j)); |
48: post_vol(i, j) = pre_vol(i, j) - (vol_flux_x(i + 1, j + 0) - vol_flux_x(i, j)); |
/beegfs/hackathon/users/eoseret/qaas_runs/170-854-8685/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x230940 PUSH %RBP |
0x230941 MOV %RSP,%RBP |
0x230944 PUSH %R15 |
0x230946 PUSH %R14 |
0x230948 PUSH %R13 |
0x23094a PUSH %R12 |
0x23094c PUSH %RBX |
0x23094d AND $-0x40,%RSP |
0x230951 SUB $0x1c0,%RSP |
0x230958 MOV (%RCX),%R14D |
0x23095b MOV (%RDX),%R15D |
0x23095e ADD $0x4,%R14D |
0x230962 CMP %R15D,%R14D |
0x230965 JL 23129b |
0x23096b MOV (%R9),%R12D |
0x23096e MOV (%R8),%EBX |
0x230971 ADD $0x4,%R12D |
0x230975 CMP %EBX,%R12D |
0x230978 JL 23129b |
0x23097e LEA -0x1(%RBX),%EAX |
0x230981 LEA -0x1(%R15),%R13D |
0x230985 MOV (%RDI),%ESI |
0x230987 MOVQ $0,0x60(%RSP) |
0x230990 MOVQ $0x1,0xf8(%RSP) |
0x23099c MOVL $0,0x34(%RSP) |
0x2309a4 SUB %EAX,%R12D |
0x2309a7 SUB %R13D,%R14D |
0x2309aa MOV %RAX,0x48(%RSP) |
0x2309af IMUL %R12,%R14 |
0x2309b3 DEC %R14 |
0x2309b6 MOV %R14,0x28(%RSP) |
0x2309bb SUB $0x8,%RSP |
0x2309bf LEA 0x3c(%RSP),%RCX |
0x2309c4 LEA 0x100(%RSP),%RAX |
0x2309cc LEA 0x2fff5(%RIP),%RDI |
0x2309d3 LEA 0x68(%RSP),%R8 |
0x2309d8 LEA 0x30(%RSP),%R9 |
0x2309dd MOV %ESI,0x38(%RSP) |
0x2309e1 MOV $0x22,%EDX |
0x2309e6 PUSH $0x1 |
0x2309e8 PUSH $0x1 |
0x2309ea PUSH %RAX |
0x2309eb CALL 25f740 <@plt_start@+0x530> |
0x2309f0 ADD $0x20,%RSP |
0x2309f4 MOV 0x28(%RSP),%RAX |
0x2309f9 MOV 0x60(%RSP),%RCX |
0x2309fe CMP %R14,%RAX |
0x230a01 CMOVL %RAX,%R14 |
0x230a05 MOV %R14,0x28(%RSP) |
0x230a0a CMP %R14,%RCX |
0x230a0d JG 231288 |
0x230a13 MOV 0x28(%RBP),%RDX |
0x230a17 MOV 0x10(%RBP),%R8 |
0x230a1b MOV %R13,0x38(%RSP) |
0x230a20 MOV 0x20(%RBP),%RSI |
0x230a24 MOV 0x18(%RBP),%RDI |
0x230a28 MOV 0x30(%RBP),%RAX |
0x230a2c MOV %RBX,0x20(%RSP) |
0x230a31 MOV %R12,0x40(%RSP) |
0x230a36 MOV (%RDX),%R13 |
0x230a39 MOV 0x10(%RDX),%R10 |
0x230a3d MOV (%R8),%RDX |
0x230a40 MOV 0x10(%R8),%R8 |
0x230a44 MOV (%RDI),%RBX |
0x230a47 MOV 0x10(%RDI),%R9 |
0x230a4b MOV (%RSI),%RDI |
0x230a4e MOV 0x10(%RSI),%R11 |
0x230a52 MOV 0x10(%RAX),%RSI |
0x230a56 MOV %R8,0x58(%RSP) |
0x230a5b MOV (%RAX),%R8 |
0x230a5e MOV %R14,%RAX |
0x230a61 SUB %RCX,%RAX |
0x230a64 MOV %RDI,0xa8(%RSP) |
0x230a6c MOV %R13,0xa0(%RSP) |
0x230a74 MOV %RDX,0x98(%RSP) |
0x230a7c MOV %RSI,0x50(%RSP) |
0x230a81 INC %RAX |
0x230a84 MOV %R8,0x90(%RSP) |
0x230a8c CMP $0x8,%RAX |
0x230a90 JAE 230a9c |
0x230a92 MOV 0x20(%RSP),%R13 |
0x230a97 JMP 2311a0 |
0x230a9c MOV %R14,0x68(%RSP) |
0x230aa1 MOV %RBX,%R14 |
0x230aa4 MOV %RAX,%RBX |
0x230aa7 MOV %RAX,0x78(%RSP) |
0x230aac MOV 0x38(%RSP),%RAX |
0x230ab1 VPBROADCASTQ %RCX,%ZMM0 |
0x230ab7 VPADDQ -0x1f041(%RIP),%ZMM0,%ZMM0 |
0x230ac1 VPBROADCASTQ -0x1f2c3(%RIP),%ZMM13 |
0x230acb AND $-0x8,%RBX |
0x230acf VPBROADCASTQ %R12,%ZMM1 |
0x230ad5 VPBROADCASTQ %RDI,%ZMM6 |
0x230adb VPBROADCASTD %R15D,%YMM7 |
0x230ae1 VPBROADCASTQ %R13,%ZMM8 |
0x230ae7 VPBROADCASTQ %RDX,%ZMM9 |
0x230aed VPBROADCASTQ %R8,%ZMM10 |
0x230af3 MOV %R14,0x80(%RSP) |
0x230afb MOV %R15,0x88(%RSP) |
0x230b03 ADD %RBX,%RCX |
0x230b06 VEXTRACTI32X4 $0x3,%ZMM1,%XMM11 |
0x230b0d VEXTRACTI32X4 $0x2,%ZMM1,%XMM12 |
0x230b14 MOV %RBX,0x70(%RSP) |
0x230b19 VPBROADCASTD %EAX,%YMM2 |
0x230b1f MOV 0x48(%RSP),%RAX |
0x230b24 VMOVDQA %YMM2,0x120(%RSP) |
0x230b2d VPBROADCASTD %EAX,%YMM2 |
0x230b33 MOV 0x20(%RSP),%RAX |
0x230b38 VMOVDQA %YMM2,0x100(%RSP) |
0x230b41 VPBROADCASTQ %R14,%ZMM2 |
0x230b47 VMOVDQA64 %ZMM2,0x140(%RSP) |
0x230b4f VPBROADCASTD %EAX,%YMM5 |
0x230b55 MOV %RBX,%RAX |
0x230b58 NOPL (%RAX,%RAX,1) |
(178) 0x230b60 VEXTRACTI32X4 $0x3,%ZMM0,%XMM14 |
(178) 0x230b67 MOV %RAX,0xf0(%RSP) |
(178) 0x230b6f VPEXTRQ $0x1,%XMM11,%RSI |
(178) 0x230b75 VMOVQ %XMM11,%RDI |
(178) 0x230b7a VPEXTRQ $0x1,%XMM12,%R8 |
(178) 0x230b80 VEXTRACTI32X4 $0x1,%YMM1,%XMM18 |
(178) 0x230b87 KXNORW %K0,%K0,%K1 |
(178) 0x230b8b VPEXTRQ $0x1,%XMM14,%RAX |
(178) 0x230b91 CQTO |
(178) 0x230b93 IDIV %RSI |
(178) 0x230b96 MOV %RAX,%RSI |
(178) 0x230b99 VMOVQ %XMM14,%RAX |
(178) 0x230b9e VEXTRACTI32X4 $0x2,%ZMM0,%XMM14 |
(178) 0x230ba5 VMOVQ %RSI,%XMM15 |
(178) 0x230baa VMOVQ %XMM12,%RSI |
(178) 0x230baf CQTO |
(178) 0x230bb1 IDIV %RDI |
(178) 0x230bb4 MOV %RAX,%RDI |
(178) 0x230bb7 VPEXTRQ $0x1,%XMM14,%RAX |
(178) 0x230bbd VMOVQ %RDI,%XMM16 |
(178) 0x230bc3 VMOVQ %XMM18,%RDI |
(178) 0x230bc9 CQTO |
(178) 0x230bcb VPUNPCKLQDQ %XMM15,%XMM16,%XMM15 |
(178) 0x230bd1 IDIV %R8 |
(178) 0x230bd4 VMOVQ %RAX,%XMM16 |
(178) 0x230bda VMOVQ %XMM14,%RAX |
(178) 0x230bdf VEXTRACTI128 $0x1,%YMM0,%XMM14 |
(178) 0x230be5 CQTO |
(178) 0x230be7 IDIV %RSI |
(178) 0x230bea VPEXTRQ $0x1,%XMM18,%RSI |
(178) 0x230bf1 VMOVQ %RAX,%XMM17 |
(178) 0x230bf7 VPEXTRQ $0x1,%XMM14,%RAX |
(178) 0x230bfd CQTO |
(178) 0x230bff IDIV %RSI |
(178) 0x230c02 MOV %RAX,%RSI |
(178) 0x230c05 VMOVQ %XMM14,%RAX |
(178) 0x230c0a VPUNPCKLQDQ %XMM16,%XMM17,%XMM14 |
(178) 0x230c10 CQTO |
(178) 0x230c12 VINSERTI128 $0x1,%XMM15,%YMM14,%YMM14 |
(178) 0x230c18 VMOVQ %RSI,%XMM15 |
(178) 0x230c1d VPEXTRQ $0x1,%XMM1,%RSI |
(178) 0x230c23 IDIV %RDI |
(178) 0x230c26 VPEXTRQ $0x1,%XMM0,%RDX |
(178) 0x230c2c VMOVQ %XMM1,%RDI |
(178) 0x230c31 VMOVQ %RAX,%XMM16 |
(178) 0x230c37 MOV %RDX,%RAX |
(178) 0x230c3a CQTO |
(178) 0x230c3c IDIV %RSI |
(178) 0x230c3f VPUNPCKLQDQ %XMM15,%XMM16,%XMM15 |
(178) 0x230c45 MOV %RAX,%RSI |
(178) 0x230c48 VMOVQ %XMM0,%RAX |
(178) 0x230c4d VMOVQ %RSI,%XMM16 |
(178) 0x230c53 CQTO |
(178) 0x230c55 IDIV %RDI |
(178) 0x230c58 VMOVQ %RAX,%XMM17 |
(178) 0x230c5e VPUNPCKLQDQ %XMM16,%XMM17,%XMM16 |
(178) 0x230c64 VINSERTI32X4 $0x1,%XMM15,%YMM16,%YMM15 |
(178) 0x230c6b VINSERTI64X4 $0x1,%YMM14,%ZMM15,%ZMM14 |
(178) 0x230c72 VPMOVQD %ZMM14,%YMM17 |
(178) 0x230c78 VPADDD 0x120(%RSP),%YMM17,%YMM15 |
(178) 0x230c80 VPMULLQ %ZMM1,%ZMM14,%ZMM14 |
(178) 0x230c86 VPSUBQ %ZMM14,%ZMM0,%ZMM14 |
(178) 0x230c8c VPADDQ %ZMM13,%ZMM0,%ZMM0 |
(178) 0x230c92 VPMOVQD %ZMM14,%YMM16 |
(178) 0x230c98 VPADDD 0x100(%RSP),%YMM16,%YMM14 |
(178) 0x230ca0 VPADDD %YMM17,%YMM7,%YMM17 |
(178) 0x230ca6 VPMOVSXDQ %YMM17,%ZMM17 |
(178) 0x230cac VPMOVSXDQ %YMM15,%ZMM15 |
(178) 0x230cb2 VPMULLQ 0x140(%RSP),%ZMM15,%ZMM18 |
(178) 0x230cba VPADDD %YMM16,%YMM5,%YMM16 |
(178) 0x230cc0 VPMOVSXDQ %YMM16,%ZMM16 |
(178) 0x230cc6 VPMULLQ %ZMM17,%ZMM8,%ZMM17 |
(178) 0x230ccc VPMOVSXDQ %YMM14,%ZMM14 |
(178) 0x230cd2 VPADDQ %ZMM14,%ZMM17,%ZMM17 |
(178) 0x230cd8 VEXTRACTI32X4 $0x3,%ZMM17,%XMM30 |
(178) 0x230cdf VEXTRACTI32X4 $0x2,%ZMM17,%XMM29 |
(178) 0x230ce6 VEXTRACTI32X4 $0x1,%YMM17,%XMM28 |
(178) 0x230ced VPADDQ %ZMM14,%ZMM18,%ZMM18 |
(178) 0x230cf3 VEXTRACTI32X4 $0x1,%YMM18,%XMM19 |
(178) 0x230cfa VEXTRACTI32X4 $0x2,%ZMM18,%XMM2 |
(178) 0x230d01 VMOVQ %XMM18,%RSI |
(178) 0x230d07 VPEXTRQ $0x1,%XMM18,%RDX |
(178) 0x230d0e VEXTRACTI32X4 $0x3,%ZMM18,%XMM18 |
(178) 0x230d15 VMOVQ %XMM19,%RDI |
(178) 0x230d1b VPEXTRQ $0x1,%XMM19,%R14 |
(178) 0x230d22 VPMULLQ %ZMM15,%ZMM6,%ZMM19 |
(178) 0x230d28 VMOVQ %XMM18,%RBX |
(178) 0x230d2e VPEXTRQ $0x1,%XMM18,%RAX |
(178) 0x230d35 VPEXTRQ $0x1,%XMM2,0xc0(%RSP) |
(178) 0x230d40 VMOVSD (%R9,%RSI,8),%XMM18 |
(178) 0x230d47 VMOVQ %XMM2,%R8 |
(178) 0x230d4c VMOVSD (%R9,%RBX,8),%XMM22 |
(178) 0x230d53 VMOVSD (%R9,%RDI,8),%XMM25 |
(178) 0x230d5a VMOVSD (%R9,%R8,8),%XMM23 |
(178) 0x230d61 VMOVHPD (%R9,%RAX,8),%XMM22,%XMM22 |
(178) 0x230d68 MOV 0xc0(%RSP),%RAX |
(178) 0x230d70 VMOVHPD (%R9,%R14,8),%XMM25,%XMM25 |
(178) 0x230d77 VPADDQ %ZMM16,%ZMM19,%ZMM20 |
(178) 0x230d7d VPADDQ %ZMM14,%ZMM19,%ZMM19 |
(178) 0x230d83 VEXTRACTI32X4 $0x1,%YMM20,%XMM16 |
(178) 0x230d8a VMOVQ %XMM20,%RSI |
(178) 0x230d90 VPEXTRQ $0x1,%XMM20,%R8 |
(178) 0x230d97 VEXTRACTI32X4 $0x2,%ZMM19,%XMM24 |
(178) 0x230d9e VEXTRACTI32X4 $0x1,%YMM19,%XMM21 |
(178) 0x230da5 VMOVQ %XMM16,%RDI |
(178) 0x230dab VPEXTRQ $0x1,%XMM16,%R15 |
(178) 0x230db2 VEXTRACTI32X4 $0x2,%ZMM20,%XMM16 |
(178) 0x230db9 MOV %R8,0xc8(%RSP) |
(178) 0x230dc1 MOV %RSI,0xd0(%RSP) |
(178) 0x230dc9 VMOVQ %XMM16,%RBX |
(178) 0x230dcf VPEXTRQ $0x1,%XMM16,%R12 |
(178) 0x230dd6 VMOVHPD (%R9,%RDX,8),%XMM18,%XMM16 |
(178) 0x230ddd VEXTRACTI32X4 $0x3,%ZMM20,%XMM18 |
(178) 0x230de4 VMOVSD (%R11,%RDI,8),%XMM20 |
(178) 0x230deb MOV %RDI,0xd8(%RSP) |
(178) 0x230df3 MOV %R15,%RDI |
(178) 0x230df6 VMOVQ %XMM18,%RDX |
(178) 0x230dfc VPEXTRQ $0x1,%XMM18,%R13 |
(178) 0x230e03 VMOVSD (%R11,%RSI,8),%XMM18 |
(178) 0x230e0a VMOVSD (%R11,%RBX,8),%XMM27 |
(178) 0x230e11 MOV %RBX,0xe0(%RSP) |
(178) 0x230e19 VMOVQ %XMM24,%RBX |
(178) 0x230e1f VMOVHPD (%R11,%R15,8),%XMM20,%XMM20 |
(178) 0x230e26 VMOVQ %XMM21,%RSI |
(178) 0x230e2c VMOVQ %XMM19,%R15 |
(178) 0x230e32 MOV %R12,0xb8(%RSP) |
(178) 0x230e3a VMOVHPD (%R11,%R8,8),%XMM18,%XMM18 |
(178) 0x230e41 VPEXTRQ $0x1,%XMM24,%R8 |
(178) 0x230e48 VMOVHPD (%R9,%RAX,8),%XMM23,%XMM24 |
(178) 0x230e4f VMOVQ %XMM30,%RAX |
(178) 0x230e55 VMOVSD (%R11,%RDX,8),%XMM26 |
(178) 0x230e5c MOV %RDX,0xe8(%RSP) |
(178) 0x230e64 VMOVQ %XMM17,%RDX |
(178) 0x230e6a VMOVHPD (%R11,%R12,8),%XMM27,%XMM27 |
(178) 0x230e71 VEXTRACTI32X4 $0x3,%ZMM19,%XMM23 |
(178) 0x230e78 MOV %RSI,0xb0(%RSP) |
(178) 0x230e80 VPEXTRQ $0x1,%XMM19,%R12 |
(178) 0x230e87 MOV %R13,%R14 |
(178) 0x230e8a VMOVSD (%R10,%RAX,8),%XMM31 |
(178) 0x230e91 VMOVQ %XMM29,%RAX |
(178) 0x230e97 VMOVHPD (%R11,%R13,8),%XMM26,%XMM26 |
(178) 0x230e9e MOV 0x58(%RSP),%R13 |
(178) 0x230ea3 VMOVSD (%R10,%RAX,8),%XMM2 |
(178) 0x230ea9 VMOVQ %XMM28,%RAX |
(178) 0x230eaf VMOVSD (%R10,%RAX,8),%XMM3 |
(178) 0x230eb5 VPEXTRQ $0x1,%XMM17,%RAX |
(178) 0x230ebc VMOVSD (%R10,%RDX,8),%XMM17 |
(178) 0x230ec3 VPEXTRQ $0x1,%XMM30,%RDX |
(178) 0x230eca VMOVHPD (%R10,%RDX,8),%XMM31,%XMM30 |
(178) 0x230ed1 VPEXTRQ $0x1,%XMM28,%RDX |
(178) 0x230ed8 VPMULLQ %ZMM15,%ZMM8,%ZMM28 |
(178) 0x230ede VMOVHPD (%R10,%RAX,8),%XMM17,%XMM17 |
(178) 0x230ee5 VPEXTRQ $0x1,%XMM29,%RAX |
(178) 0x230eec VMOVHPD (%R10,%RAX,8),%XMM2,%XMM2 |
(178) 0x230ef2 VMOVHPD (%R10,%RDX,8),%XMM3,%XMM3 |
(178) 0x230ef8 VINSERTF32X4 $0x1,%XMM25,%YMM16,%YMM16 |
(178) 0x230eff VPADDQ %ZMM14,%ZMM28,%ZMM28 |
(178) 0x230f05 VEXTRACTI32X4 $0x3,%ZMM28,%XMM4 |
(178) 0x230f0c VEXTRACTI32X4 $0x2,%ZMM28,%XMM31 |
(178) 0x230f13 VEXTRACTI32X4 $0x1,%YMM28,%XMM29 |
(178) 0x230f1a VINSERTF32X4 $0x1,%XMM22,%YMM24,%YMM22 |
(178) 0x230f21 VINSERTF32X4 $0x1,%XMM20,%YMM18,%YMM18 |
(178) 0x230f28 VMOVQ %XMM4,%RAX |
(178) 0x230f2d VPEXTRQ $0x1,%XMM4,%RDX |
(178) 0x230f33 VINSERTF32X4 $0x1,%XMM26,%YMM27,%YMM26 |
(178) 0x230f3a VMOVSD (%R11,%RBX,8),%XMM27 |
(178) 0x230f41 VMOVSD (%R10,%RAX,8),%XMM4 |
(178) 0x230f47 VMOVQ %XMM31,%RAX |
(178) 0x230f4d VMOVHPD (%R11,%R8,8),%XMM27,%XMM20 |
(178) 0x230f54 VMOVSD (%R11,%RSI,8),%XMM27 |
(178) 0x230f5b VPEXTRQ $0x1,%XMM21,%RSI |
(178) 0x230f62 VMOVHPD (%R10,%RDX,8),%XMM4,%XMM4 |
(178) 0x230f68 VPEXTRQ $0x1,%XMM31,%RDX |
(178) 0x230f6f VMOVSD (%R10,%RAX,8),%XMM31 |
(178) 0x230f76 VMOVQ %XMM29,%RAX |
(178) 0x230f7c VMOVHPD (%R11,%RSI,8),%XMM27,%XMM21 |
(178) 0x230f83 VMOVSD (%R11,%R15,8),%XMM27 |
(178) 0x230f8a VINSERTF64X4 $0x1,%YMM26,%ZMM18,%ZMM18 |
(178) 0x230f91 VMOVHPD (%R10,%RDX,8),%XMM31,%XMM31 |
(178) 0x230f98 VPEXTRQ $0x1,%XMM29,%RDX |
(178) 0x230f9f VMOVSD (%R10,%RAX,8),%XMM29 |
(178) 0x230fa6 VMOVQ %XMM28,%RAX |
(178) 0x230fac VMOVHPD (%R11,%R12,8),%XMM27,%XMM19 |
(178) 0x230fb3 VMOVHPD (%R10,%RDX,8),%XMM29,%XMM29 |
(178) 0x230fba VMOVSD (%R10,%RAX,8),%XMM24 |
(178) 0x230fc1 VPEXTRQ $0x1,%XMM28,%RDX |
(178) 0x230fc8 VMOVQ %XMM23,%RAX |
(178) 0x230fce VINSERTF32X4 $0x1,%XMM30,%YMM2,%YMM2 |
(178) 0x230fd5 VINSERTF32X4 $0x1,%XMM3,%YMM17,%YMM3 |
(178) 0x230fdc VMOVHPD (%R10,%RDX,8),%XMM24,%XMM24 |
(178) 0x230fe3 VPEXTRQ $0x1,%XMM23,%RDX |
(178) 0x230fea VMOVSD (%R11,%RAX,8),%XMM23 |
(178) 0x230ff1 VMOVHPD (%R11,%RDX,8),%XMM23,%XMM23 |
(178) 0x230ff8 VINSERTF64X4 $0x1,%YMM2,%ZMM3,%ZMM2 |
(178) 0x230fff VINSERTF32X4 $0x1,%XMM21,%YMM19,%YMM19 |
(178) 0x231006 VINSERTF32X4 $0x1,%XMM4,%YMM31,%YMM3 |
(178) 0x23100d VINSERTF32X4 $0x1,%XMM29,%YMM24,%YMM4 |
(178) 0x231014 VINSERTF32X4 $0x1,%XMM23,%YMM20,%YMM20 |
(178) 0x23101b VINSERTF64X4 $0x1,%YMM3,%ZMM4,%ZMM3 |
(178) 0x231022 VINSERTF64X4 $0x1,%YMM22,%ZMM16,%ZMM4 |
(178) 0x231029 VINSERTF64X4 $0x1,%YMM20,%ZMM19,%ZMM19 |
(178) 0x231030 VSUBPD %ZMM19,%ZMM18,%ZMM18 |
(178) 0x231036 VADDPD %ZMM2,%ZMM18,%ZMM2 |
(178) 0x23103c VSUBPD %ZMM3,%ZMM2,%ZMM2 |
(178) 0x231042 VPMULLQ %ZMM15,%ZMM9,%ZMM3 |
(178) 0x231048 VADDPD %ZMM2,%ZMM4,%ZMM2 |
(178) 0x23104e VPADDQ %ZMM14,%ZMM3,%ZMM3 |
(178) 0x231054 VSCATTERQPD %ZMM2,(%R13,%ZMM3,8){%K1} |
(178) 0x23105c KXNORW %K0,%K0,%K1 |
(178) 0x231060 MOV 0xe8(%RSP),%R13 |
(178) 0x231068 VMOVSD (%R11,%RAX,8),%XMM18 |
(178) 0x23106f VMOVSD (%R11,%RBX,8),%XMM19 |
(178) 0x231076 VMOVSD (%R11,%R15,8),%XMM21 |
(178) 0x23107d MOV 0xf0(%RSP),%RAX |
(178) 0x231085 VMOVHPD (%R11,%RDX,8),%XMM18,%XMM18 |
(178) 0x23108c MOV 0xb0(%RSP),%RDX |
(178) 0x231094 VMOVHPD (%R11,%R8,8),%XMM19,%XMM19 |
(178) 0x23109b VMOVHPD (%R11,%R12,8),%XMM21,%XMM21 |
(178) 0x2310a2 VMOVSD (%R11,%R13,8),%XMM3 |
(178) 0x2310a8 MOV 0xe0(%RSP),%R13 |
(178) 0x2310b0 ADD $-0x8,%RAX |
(178) 0x2310b4 VMOVHPD (%R11,%R14,8),%XMM3,%XMM3 |
(178) 0x2310ba MOV 0xb8(%RSP),%R14 |
(178) 0x2310c2 VMOVSD (%R11,%RDX,8),%XMM20 |
(178) 0x2310c9 VMOVHPD (%R11,%RSI,8),%XMM20,%XMM20 |
(178) 0x2310d0 MOV 0x50(%RSP),%RSI |
(178) 0x2310d5 VMOVSD (%R11,%R13,8),%XMM4 |
(178) 0x2310db MOV 0xd8(%RSP),%R13 |
(178) 0x2310e3 VMOVHPD (%R11,%R14,8),%XMM4,%XMM4 |
(178) 0x2310e9 VMOVSD (%R11,%R13,8),%XMM16 |
(178) 0x2310f0 VMOVHPD (%R11,%RDI,8),%XMM16,%XMM16 |
(178) 0x2310f7 MOV 0xd0(%RSP),%RDI |
(178) 0x2310ff VINSERTF128 $0x1,%XMM3,%YMM4,%YMM3 |
(178) 0x231105 VMOVSD (%R11,%RDI,8),%XMM17 |
(178) 0x23110c MOV 0xc8(%RSP),%RDI |
(178) 0x231114 VMOVHPD (%R11,%RDI,8),%XMM17,%XMM17 |
(178) 0x23111b VINSERTF32X4 $0x1,%XMM16,%YMM17,%YMM4 |
(178) 0x231122 VINSERTF32X4 $0x1,%XMM18,%YMM19,%YMM17 |
(178) 0x231129 VINSERTF32X4 $0x1,%XMM20,%YMM21,%YMM16 |
(178) 0x231130 VINSERTF64X4 $0x1,%YMM3,%ZMM4,%ZMM3 |
(178) 0x231137 VINSERTF64X4 $0x1,%YMM17,%ZMM16,%ZMM4 |
(178) 0x23113e VSUBPD %ZMM4,%ZMM3,%ZMM3 |
(178) 0x231144 VPMULLQ %ZMM15,%ZMM10,%ZMM4 |
(178) 0x23114a VSUBPD %ZMM3,%ZMM2,%ZMM2 |
(178) 0x231150 VPADDQ %ZMM14,%ZMM4,%ZMM3 |
(178) 0x231156 VSCATTERQPD %ZMM2,(%RSI,%ZMM3,8){%K1} |
(178) 0x23115d JNE 230b60 |
0x231163 MOV 0x70(%RSP),%RAX |
0x231168 MOV 0x88(%RSP),%R15 |
0x231170 MOV 0x20(%RSP),%R13 |
0x231175 MOV 0x40(%RSP),%R12 |
0x23117a MOV 0x80(%RSP),%RBX |
0x231182 MOV 0x68(%RSP),%R14 |
0x231187 CMP %RAX,0x78(%RSP) |
0x23118c JE 231288 |
0x231192 NOPW %CS:(%RAX,%RAX,1) |
(179) 0x2311a0 MOV %RCX,%RAX |
(179) 0x2311a3 CQTO |
(179) 0x2311a5 IDIV %R12 |
(179) 0x2311a8 MOV 0x38(%RSP),%RSI |
(179) 0x2311ad MOV 0xa8(%RSP),%R8 |
(179) 0x2311b5 MOV 0xa0(%RSP),%R12 |
(179) 0x2311bd LEA (%RSI,%RAX,1),%EDI |
(179) 0x2311c0 MOV 0x48(%RSP),%RSI |
(179) 0x2311c5 ADD %R15D,%EAX |
(179) 0x2311c8 MOVSXD %EDI,%RDI |
(179) 0x2311cb CLTQ |
(179) 0x2311cd IMUL %RDI,%R8 |
(179) 0x2311d1 IMUL %R12,%RAX |
(179) 0x2311d5 ADD %EDX,%ESI |
(179) 0x2311d7 ADD %R13D,%EDX |
(179) 0x2311da MOVSXD %EDX,%RDX |
(179) 0x2311dd MOVSXD %ESI,%RSI |
(179) 0x2311e0 ADD %R8,%RDX |
(179) 0x2311e3 ADD %RSI,%R8 |
(179) 0x2311e6 ADD %RSI,%RAX |
(179) 0x2311e9 VMOVSD (%R11,%RDX,8),%XMM0 |
(179) 0x2311ef VSUBSD (%R11,%R8,8),%XMM0,%XMM0 |
(179) 0x2311f5 VADDSD (%R10,%RAX,8),%XMM0,%XMM0 |
(179) 0x2311fb MOV %R12,%RAX |
(179) 0x2311fe IMUL %RDI,%RAX |
(179) 0x231202 MOV %R14,%R12 |
(179) 0x231205 MOV %RBX,%R14 |
(179) 0x231208 ADD %RSI,%RAX |
(179) 0x23120b VSUBSD (%R10,%RAX,8),%XMM0,%XMM0 |
(179) 0x231211 MOV %RBX,%RAX |
(179) 0x231214 IMUL %RDI,%RAX |
(179) 0x231218 MOV %R13,%RBX |
(179) 0x23121b MOV %R15,%R13 |
(179) 0x23121e MOV 0x58(%RSP),%R15 |
(179) 0x231223 ADD %RSI,%RAX |
(179) 0x231226 VADDSD (%R9,%RAX,8),%XMM0,%XMM0 |
(179) 0x23122c MOV 0x98(%RSP),%RAX |
(179) 0x231234 IMUL %RDI,%RAX |
(179) 0x231238 ADD %RSI,%RAX |
(179) 0x23123b VMOVSD %XMM0,(%R15,%RAX,8) |
(179) 0x231241 MOV %R13,%R15 |
(179) 0x231244 MOV %RBX,%R13 |
(179) 0x231247 MOV %R14,%RBX |
(179) 0x23124a MOV %R12,%R14 |
(179) 0x23124d MOV 0x40(%RSP),%R12 |
(179) 0x231252 VMOVSD (%R11,%RDX,8),%XMM1 |
(179) 0x231258 VSUBSD (%R11,%R8,8),%XMM1,%XMM1 |
(179) 0x23125e MOV 0x90(%RSP),%R8 |
(179) 0x231266 IMUL %R8,%RDI |
(179) 0x23126a ADD %RSI,%RDI |
(179) 0x23126d MOV 0x50(%RSP),%RSI |
(179) 0x231272 VSUBSD %XMM1,%XMM0,%XMM0 |
(179) 0x231276 VMOVSD %XMM0,(%RSI,%RDI,8) |
(179) 0x23127b CMP %R14,%RCX |
(179) 0x23127e LEA 0x1(%RCX),%RCX |
(179) 0x231282 JL 2311a0 |
0x231288 MOV 0x30(%RSP),%ESI |
0x23128c LEA 0x2f74d(%RIP),%RDI |
0x231293 VZEROUPPER |
0x231296 CALL 25f750 <@plt_start@+0x540> |
0x23129b LEA -0x28(%RBP),%RSP |
0x23129f POP %RBX |
0x2312a0 POP %R12 |
0x2312a2 POP %R13 |
0x2312a4 POP %R14 |
0x2312a6 POP %R15 |
0x2312a8 POP %RBP |
0x2312a9 RET |
Path / |
Source file and lines | advec_cell.cpp:44-48 |
Module | exec |
nb instructions | 135 |
nb uops | 147 |
loop length | 639 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 3 |
used zmm registers | 8 |
nb stack references | 30 |
micro-operation queue | 24.50 cycles |
front end | 24.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 9.00 | 9.00 | 8.75 | 8.75 | 4.50 | 20.67 | 20.67 | 20.67 | 0.50 | 10.00 | 10.00 | 0.50 | 2.00 | 2.00 |
cycles | 9.00 | 9.00 | 8.75 | 8.75 | 4.50 | 21.00 | 21.00 | 21.00 | 0.50 | 10.00 | 10.00 | 0.50 | 2.00 | 2.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 24.50 |
Dispatch | 21.00 |
Overall L1 | 24.50 |
all | 11% |
load | 9% |
store | 12% |
mul | 0% |
add-sub | 16% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
all | 15% |
load | 19% |
store | 18% |
mul | 12% |
add-sub | 22% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB $0x1c0,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RCX),%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDX),%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x4,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R15D,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JL 23129b <.omp_outlined.+0x95b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV (%R9),%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R8),%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x4,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %EBX,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JL 23129b <.omp_outlined.+0x95b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
LEA -0x1(%RBX),%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x1(%R15),%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVQ $0,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVQ $0x1,0xf8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVL $0,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB %EAX,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R13D,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RAX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
IMUL %R12,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DEC %R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R14,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x3c(%RSP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x100(%RSP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x2fff5(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x68(%RSP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x30(%RSP),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CALL 25f740 <@plt_start@+0x530> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x28(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x60(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %R14,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMOVL %RAX,%R14 | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R14,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JG 231288 <.omp_outlined.+0x948> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x28(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R13,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x20(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RBX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R12,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RDX),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RDX),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%R8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDI),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RDI),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RSI),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RSI),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RAX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R8,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RAX),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R14,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %RCX,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RDI,0xa8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R13,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RSI,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
INC %RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R8,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP $0x8,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 230a9c <.omp_outlined.+0x15c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x20(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
JMP 2311a0 <.omp_outlined.+0x860> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV %R14,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RBX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x38(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VPBROADCASTQ %RCX,%ZMM0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPADDQ -0x1f041(%RIP),%ZMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 1 |
VPBROADCASTQ -0x1f2c3(%RIP),%ZMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0.50 |
AND $-0x8,%RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VPBROADCASTQ %R12,%ZMM1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RDI,%ZMM6 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTD %R15D,%YMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R13,%ZMM8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RDX,%ZMM9 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R8,%ZMM10 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
MOV %R14,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R15,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD %RBX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VEXTRACTI32X4 $0x3,%ZMM1,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 |
VEXTRACTI32X4 $0x2,%ZMM1,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 |
MOV %RBX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPBROADCASTD %EAX,%YMM2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV 0x48(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVDQA %YMM2,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VPBROADCASTD %EAX,%YMM2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV 0x20(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVDQA %YMM2,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VPBROADCASTQ %R14,%ZMM2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VMOVDQA64 %ZMM2,0x140(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 4 | 2 |
VPBROADCASTD %EAX,%YMM5 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV %RBX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV 0x70(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x88(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x20(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x40(%RSP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x80(%RSP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x68(%RSP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %RAX,0x78(%RSP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JE 231288 <.omp_outlined.+0x948> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV 0x30(%RSP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x2f74d(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 25f750 <@plt_start@+0x540> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Source file and lines | advec_cell.cpp:44-48 |
Module | exec |
nb instructions | 135 |
nb uops | 147 |
loop length | 639 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 3 |
used zmm registers | 8 |
nb stack references | 30 |
micro-operation queue | 24.50 cycles |
front end | 24.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 9.00 | 9.00 | 8.75 | 8.75 | 4.50 | 20.67 | 20.67 | 20.67 | 0.50 | 10.00 | 10.00 | 0.50 | 2.00 | 2.00 |
cycles | 9.00 | 9.00 | 8.75 | 8.75 | 4.50 | 21.00 | 21.00 | 21.00 | 0.50 | 10.00 | 10.00 | 0.50 | 2.00 | 2.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 24.50 |
Dispatch | 21.00 |
Overall L1 | 24.50 |
all | 11% |
load | 9% |
store | 12% |
mul | 0% |
add-sub | 16% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
all | 15% |
load | 19% |
store | 18% |
mul | 12% |
add-sub | 22% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB $0x1c0,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RCX),%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDX),%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x4,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R15D,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JL 23129b <.omp_outlined.+0x95b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV (%R9),%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R8),%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
ADD $0x4,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %EBX,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JL 23129b <.omp_outlined.+0x95b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
LEA -0x1(%RBX),%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x1(%R15),%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVQ $0,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVQ $0x1,0xf8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVL $0,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB %EAX,%R12D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SUB %R13D,%R14D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RAX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
IMUL %R12,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DEC %R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R14,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x3c(%RSP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x100(%RSP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x2fff5(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x68(%RSP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA 0x30(%RSP),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CALL 25f740 <@plt_start@+0x530> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x28(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x60(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %R14,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMOVL %RAX,%R14 | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP %R14,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JG 231288 <.omp_outlined.+0x948> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x28(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R13,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x20(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x18(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %RBX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R12,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RDX),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RDX),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%R8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%R8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDI),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RDI),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RSI),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RSI),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x10(%RAX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R8,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RAX),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %R14,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %RCX,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RDI,0xa8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R13,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RDX,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RSI,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
INC %RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R8,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
CMP $0x8,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
JAE 230a9c <.omp_outlined.+0x15c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV 0x20(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
JMP 2311a0 <.omp_outlined.+0x860> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV %R14,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %RBX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x38(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VPBROADCASTQ %RCX,%ZMM0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPADDQ -0x1f041(%RIP),%ZMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 1 |
VPBROADCASTQ -0x1f2c3(%RIP),%ZMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0.50 |
AND $-0x8,%RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VPBROADCASTQ %R12,%ZMM1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RDI,%ZMM6 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTD %R15D,%YMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R13,%ZMM8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %RDX,%ZMM9 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTQ %R8,%ZMM10 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
MOV %R14,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %R15,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD %RBX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VEXTRACTI32X4 $0x3,%ZMM1,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 |
VEXTRACTI32X4 $0x2,%ZMM1,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 |
MOV %RBX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPBROADCASTD %EAX,%YMM2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV 0x48(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVDQA %YMM2,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VPBROADCASTD %EAX,%YMM2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV 0x20(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
VMOVDQA %YMM2,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 |
VPBROADCASTQ %R14,%ZMM2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 |
VMOVDQA64 %ZMM2,0x140(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 4 | 2 |
VPBROADCASTD %EAX,%YMM5 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV %RBX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV 0x70(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x88(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x20(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x40(%RSP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x80(%RSP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x68(%RSP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %RAX,0x78(%RSP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
JE 231288 <.omp_outlined.+0x948> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
MOV 0x30(%RSP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
LEA 0x2f74d(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 25f750 <@plt_start@+0x540> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼.omp_outlined.#0x230940– | 1.47 | 0.75 |
○Loop 178 - advec_cell.cpp:45-48 - exec | 1.47 | 0.75 |
○Loop 179 - advec_cell.cpp:45-48 - exec | 0 | 0 |