Loop Id: 163 | Module: exec | Source: calc_dt.cpp:52-75 [...] | Coverage: 0.01% |
---|
Loop Id: 163 | Module: exec | Source: calc_dt.cpp:52-75 [...] | Coverage: 0.01% |
---|
0x430920 CMP %R13D,%R10D |
0x430923 CMOVBE %R10D,%R13D |
0x430927 LEA (%R14,%R13,1),%EBX |
0x43092b CMP %EBX,%R14D |
0x43092e JAE 43163d |
0x430934 LEA -0x1(%R13),%R10D |
0x430938 LEA (,%R15,8),%R12 |
0x430940 CMP $0xe,%R10D |
0x430944 JBE 431330 |
0x43094a MOV 0x98(%RSP),%R11 |
0x430952 MOV 0x90(%RSP),%RCX |
0x43095a VBROADCASTSD %XMM0,%ZMM8 |
0x430960 VBROADCASTSD %XMM2,%ZMM9 |
0x430966 KXNORB %K1,%K1,%K1 |
0x43096a MOV 0x88(%RSP),%RDI |
0x430972 MOV 0xa8(%RSP),%R8 |
0x43097a VBROADCASTSD %XMM3,%ZMM10 |
0x430980 VBROADCASTSD %XMM4,%ZMM11 |
0x430986 MOV (%R11),%RDX |
0x430989 MOV (%RCX),%RSI |
0x43098c VMOVAPD %ZMM8,0x300(%RSP) |
0x430994 MOV %R13D,%R9D |
0x430997 MOV (%RDI),%R10 |
0x43099a MOV 0xa0(%RSP),%RAX |
0x4309a2 VMOVAPD %ZMM9,0x240(%RSP) |
0x4309aa SHR $0x4,%R9D |
0x4309ae IMUL %R15,%RDX |
0x4309b2 VPBROADCASTD 0xb4(%RSP),%ZMM7 |
0x4309ba VPBROADCASTQ 0x8(%R8),%ZMM12 |
0x4309c1 VMOVAPD %ZMM10,0x180(%RSP) |
0x4309c9 IMUL %R15,%RSI |
0x4309cd VPBROADCASTQ 0x8(%RAX),%ZMM13 |
0x4309d4 MOV 0x80(%RSP),%R8 |
0x4309dc VMOVAPD %ZMM11,0x140(%RSP) |
0x4309e4 IMUL %R15,%R10 |
0x4309e8 VPBROADCASTQ 0x10(%R11),%ZMM14 |
0x4309ef VPBROADCASTQ 0x10(%RCX),%ZMM1 |
0x4309f6 VMOVDQA64 %ZMM12,0x500(%RSP) |
0x4309fe VPBROADCASTQ 0x10(%RDI),%ZMM5 |
0x430a05 MOV 0x78(%RSP),%RCX |
0x430a0a VPBROADCASTQ %RDX,%ZMM15 |
0x430a10 VMOVDQA64 %ZMM13,0x4c0(%RSP) |
0x430a18 VPADDD 0x31ede(%RIP),%ZMM7,%ZMM29 |
0x430a22 VPBROADCASTQ %RSI,%ZMM6 |
0x430a28 MOV 0xb8(%RSP),%RDX |
0x430a30 VMOVDQA64 %ZMM14,0x480(%RSP) |
0x430a38 VPBROADCASTQ %R10,%ZMM7 |
0x430a3e MOV (%R8),%RAX |
0x430a41 VPBROADCASTQ 0x10(%R8),%ZMM11 |
0x430a48 VMOVDQA64 %ZMM15,0x440(%RSP) |
0x430a50 VMOVDQA64 %ZMM1,0x400(%RSP) |
0x430a58 VMOVDQA64 %ZMM6,0x3c0(%RSP) |
0x430a60 MOV %RAX,%R11 |
0x430a63 IMUL %RDX,%RAX |
0x430a67 VMOVDQA64 %ZMM5,0x380(%RSP) |
0x430a6f IMUL %R15,%R11 |
0x430a73 VMOVDQA64 %ZMM7,0x340(%RSP) |
0x430a7b VPBROADCASTQ 0x10(%RCX),%ZMM25 |
0x430a82 MOV (%RCX),%RSI |
0x430a85 MOV 0x70(%RSP),%RDI |
0x430a8a MOV 0x60(%RSP),%RCX |
0x430a8f VPBROADCASTQ %RAX,%ZMM19 |
0x430a95 VMOVSD %XMM3,0x58(%RSP) |
0x430a9b IMUL %R15,%RSI |
0x430a9f MOV 0x68(%RSP),%R8 |
0x430aa4 VPBROADCASTQ %R11,%ZMM22 |
0x430aaa VBROADCASTSD 0x31e24(%RIP),%ZMM24 |
0x430ab4 MOV (%RDI),%R10 |
0x430ab7 VPBROADCASTQ 0x10(%RDI),%ZMM8 |
0x430abe VMOVSD %XMM4,0x50(%RSP) |
0x430ac4 MOV (%R8),%RAX |
0x430ac7 VPBROADCASTQ 0x10(%R8),%ZMM10 |
0x430ace VMOVSD %XMM0,0x48(%RSP) |
0x430ad4 VPBROADCASTQ %RSI,%ZMM21 |
0x430ada MOV (%RCX),%RSI |
0x430add IMUL %R15,%R10 |
0x430ae1 VPBROADCASTQ 0x10(%RCX),%ZMM31 |
0x430ae8 MOV %RAX,%R11 |
0x430aeb IMUL %RDX,%RAX |
0x430aef VBROADCASTSD 0x31de7(%RIP),%ZMM23 |
0x430af9 VMOVDQA64 %ZMM8,0x2c0(%RSP) |
0x430b01 MOV %RSI,%RDI |
0x430b04 IMUL %RDX,%RSI |
0x430b08 VMOVSD %XMM2,0x40(%RSP) |
0x430b0e XOR %EDX,%EDX |
0x430b10 IMUL %R15,%RDI |
0x430b14 VPBROADCASTQ %R10,%ZMM9 |
0x430b1a IMUL %R15,%R11 |
0x430b1e VMOVDQA64 %ZMM9,0x280(%RSP) |
0x430b26 VBROADCASTSD 0x31f50(%RIP),%ZMM9 |
0x430b30 VPBROADCASTQ %RAX,%ZMM18 |
0x430b36 VPBROADCASTQ %RSI,%ZMM13 |
0x430b3c VPBROADCASTQ %RDI,%ZMM12 |
0x430b42 VMOVDQA64 %ZMM13,0x1c0(%RSP) |
0x430b4a VPBROADCASTQ %R11,%ZMM20 |
0x430b50 VMOVDQA64 %ZMM12,0x200(%RSP) |
0x430b58 NOPL (%RAX,%RAX,1) |
(165) 0x430b60 VMOVDQA32 %ZMM29,%ZMM2 |
(165) 0x430b66 VMOVDQA64 0x500(%RSP),%ZMM14 |
(165) 0x430b6e VMOVDQA64 0x4c0(%RSP),%ZMM7 |
(165) 0x430b76 KMOVB %K1,%K4 |
(165) 0x430b7a KMOVB %K1,%K5 |
(165) 0x430b7e KMOVB %K1,%K3 |
(165) 0x430b82 VEXTRACTI32X8 $0x1,%ZMM2,%YMM4 |
(165) 0x430b89 VPMOVSXDQ %YMM2,%ZMM1 |
(165) 0x430b8f KMOVB %K1,%K6 |
(165) 0x430b93 VMOVDQA64 0x380(%RSP),%ZMM30 |
(165) 0x430b9b KMOVB %K1,%K7 |
(165) 0x430b9f KMOVB %K1,%K2 |
(165) 0x430ba3 VPMOVSXDQ %YMM4,%ZMM0 |
(165) 0x430ba9 VMOVDQA64 0x440(%RSP),%ZMM4 |
(165) 0x430bb1 VPSLLQ $0x3,%ZMM1,%ZMM3 |
(165) 0x430bb8 VGATHERQPD (%R12,%ZMM7,1),%ZMM12{%K5} |
(165) 0x430bbf KMOVB %K1,%K5 |
(165) 0x430bc3 VPSLLQ $0x3,%ZMM0,%ZMM6 |
(165) 0x430bca VPADDQ %ZMM14,%ZMM3,%ZMM15 |
(165) 0x430bd0 VPADDD 0xc0(%RSP),%ZMM2,%ZMM2 |
(165) 0x430bd8 INC %EDX |
(165) 0x430bda VPADDQ %ZMM14,%ZMM6,%ZMM5 |
(165) 0x430be0 VPADDQ %ZMM4,%ZMM1,%ZMM13 |
(165) 0x430be6 VMOVDQA64 0x480(%RSP),%ZMM14 |
(165) 0x430bee VGATHERQPD (,%ZMM15,1),%ZMM8{%K3} |
(165) 0x430bf9 KMOVB %K1,%K3 |
(165) 0x430bfd VGATHERQPD (,%ZMM5,1),%ZMM6{%K4} |
(165) 0x430c08 VPADDQ %ZMM4,%ZMM0,%ZMM5 |
(165) 0x430c0e KMOVB %K1,%K4 |
(165) 0x430c12 VMINPD %ZMM12,%ZMM8,%ZMM8 |
(165) 0x430c18 VPSLLQ $0x3,%ZMM13,%ZMM3 |
(165) 0x430c1f VPSLLQ $0x3,%ZMM5,%ZMM7 |
(165) 0x430c26 VMINPD %ZMM12,%ZMM6,%ZMM6 |
(165) 0x430c2c VPADDQ %ZMM14,%ZMM3,%ZMM15 |
(165) 0x430c32 VPADDQ %ZMM14,%ZMM7,%ZMM3 |
(165) 0x430c38 VMOVDQA64 0x3c0(%RSP),%ZMM14 |
(165) 0x430c40 VGATHERQPD (,%ZMM15,1),%ZMM13{%K6} |
(165) 0x430c4b VGATHERQPD (,%ZMM3,1),%ZMM4{%K7} |
(165) 0x430c56 KMOVB %K1,%K6 |
(165) 0x430c5a KMOVB %K1,%K7 |
(165) 0x430c5e VPADDD 0x100(%RSP),%ZMM29,%ZMM29 |
(165) 0x430c66 VPADDQ %ZMM14,%ZMM1,%ZMM15 |
(165) 0x430c6c VPADDQ %ZMM14,%ZMM0,%ZMM14 |
(165) 0x430c72 VPSLLQ $0x3,%ZMM15,%ZMM5 |
(165) 0x430c79 VMOVDQA64 0x400(%RSP),%ZMM15 |
(165) 0x430c81 VPADDQ %ZMM15,%ZMM5,%ZMM7 |
(165) 0x430c87 VPSLLQ $0x3,%ZMM14,%ZMM5 |
(165) 0x430c8e VPADDQ %ZMM15,%ZMM5,%ZMM15 |
(165) 0x430c94 VGATHERQPD (,%ZMM7,1),%ZMM3{%K2} |
(165) 0x430c9f KMOVB %K1,%K2 |
(165) 0x430ca3 VGATHERQPD (,%ZMM15,1),%ZMM5{%K3} |
(165) 0x430cae VMOVDQA64 0x340(%RSP),%ZMM15 |
(165) 0x430cb6 VADDPD %ZMM3,%ZMM3,%ZMM3 |
(165) 0x430cbc KMOVB %K1,%K3 |
(165) 0x430cc0 VADDPD %ZMM5,%ZMM5,%ZMM5 |
(165) 0x430cc6 VPADDQ %ZMM15,%ZMM1,%ZMM7 |
(165) 0x430ccc VPADDQ %ZMM15,%ZMM0,%ZMM15 |
(165) 0x430cd2 VPSLLQ $0x3,%ZMM7,%ZMM14 |
(165) 0x430cd9 VPADDQ %ZMM30,%ZMM14,%ZMM7 |
(165) 0x430cdf VGATHERQPD (,%ZMM7,1),%ZMM14{%K4} |
(165) 0x430cea VPSLLQ $0x3,%ZMM15,%ZMM7 |
(165) 0x430cf1 KMOVB %K1,%K4 |
(165) 0x430cf5 VPADDQ %ZMM30,%ZMM7,%ZMM7 |
(165) 0x430cfb VDIVPD %ZMM14,%ZMM3,%ZMM14 |
(165) 0x430d01 VFMADD231PD %ZMM13,%ZMM13,%ZMM14 |
(165) 0x430d07 VGATHERQPD (,%ZMM7,1),%ZMM15{%K5} |
(165) 0x430d12 KMOVB %K1,%K5 |
(165) 0x430d16 VDIVPD %ZMM15,%ZMM5,%ZMM7 |
(165) 0x430d1c VFMADD132PD %ZMM4,%ZMM7,%ZMM4 |
(165) 0x430d22 VMOVAPD 0x300(%RSP),%ZMM15 |
(165) 0x430d2a VMULPD %ZMM15,%ZMM8,%ZMM5 |
(165) 0x430d30 VMULPD %ZMM15,%ZMM6,%ZMM12 |
(165) 0x430d36 VSQRTPD %ZMM14,%ZMM13 |
(165) 0x430d3c VMAXPD %ZMM24,%ZMM13,%ZMM7 |
(165) 0x430d42 VSQRTPD %ZMM4,%ZMM4 |
(165) 0x430d48 VMAXPD %ZMM24,%ZMM4,%ZMM14 |
(165) 0x430d4e VPADDQ %ZMM22,%ZMM1,%ZMM4 |
(165) 0x430d54 VPSLLQ $0x3,%ZMM4,%ZMM8 |
(165) 0x430d5b VDIVPD %ZMM7,%ZMM5,%ZMM3 |
(165) 0x430d61 VPADDQ %ZMM11,%ZMM8,%ZMM15 |
(165) 0x430d67 VMOVAPD %ZMM3,0x580(%RSP) |
(165) 0x430d6f VPADDQ %ZMM22,%ZMM0,%ZMM5 |
(165) 0x430d75 VDIVPD %ZMM14,%ZMM12,%ZMM13 |
(165) 0x430d7b VPADDQ %ZMM19,%ZMM1,%ZMM14 |
(165) 0x430d81 VMOVAPD %ZMM13,0x540(%RSP) |
(165) 0x430d89 VPSLLQ $0x3,%ZMM5,%ZMM3 |
(165) 0x430d90 VGATHERQPD (,%ZMM15,1),%ZMM7{%K6} |
(165) 0x430d9b VPSLLQ $0x3,%ZMM14,%ZMM13 |
(165) 0x430da2 VPADDQ %ZMM19,%ZMM0,%ZMM15 |
(165) 0x430da8 KMOVB %K1,%K6 |
(165) 0x430dac VPADDQ %ZMM11,%ZMM13,%ZMM4 |
(165) 0x430db2 VPSLLQ $0x3,%ZMM15,%ZMM5 |
(165) 0x430db9 VPADDQ %ZMM11,%ZMM3,%ZMM12 |
(165) 0x430dbf VGATHERQPD (,%ZMM4,1),%ZMM8{%K2} |
(165) 0x430dca VPADDQ %ZMM11,%ZMM5,%ZMM3 |
(165) 0x430dd0 VGATHERQPD (,%ZMM12,1),%ZMM6{%K7} |
(165) 0x430ddb KMOVB %K1,%K7 |
(165) 0x430ddf KMOVB %K1,%K2 |
(165) 0x430de3 VGATHERQPD (,%ZMM3,1),%ZMM14{%K3} |
(165) 0x430dee VPADDQ %ZMM21,%ZMM1,%ZMM12 |
(165) 0x430df4 VPADDQ %ZMM21,%ZMM0,%ZMM5 |
(165) 0x430dfa KMOVB %K1,%K3 |
(165) 0x430dfe VADDPD %ZMM8,%ZMM7,%ZMM7 |
(165) 0x430e04 VEXTRACTI32X8 $0x1,%ZMM2,%YMM8 |
(165) 0x430e0b VADDPD %ZMM14,%ZMM6,%ZMM6 |
(165) 0x430e11 VPSLLQ $0x3,%ZMM12,%ZMM13 |
(165) 0x430e18 VPSLLQ $0x3,%ZMM5,%ZMM3 |
(165) 0x430e1f VPMOVSXDQ %YMM2,%ZMM5 |
(165) 0x430e25 VPMOVSXDQ %YMM8,%ZMM2 |
(165) 0x430e2b VPADDQ %ZMM25,%ZMM13,%ZMM4 |
(165) 0x430e31 VPADDQ %ZMM25,%ZMM3,%ZMM12 |
(165) 0x430e37 VPADDQ %ZMM5,%ZMM22,%ZMM14 |
(165) 0x430e3d VPADDQ %ZMM2,%ZMM22,%ZMM3 |
(165) 0x430e43 VGATHERQPD (,%ZMM4,1),%ZMM15{%K4} |
(165) 0x430e4e KMOVB %K1,%K4 |
(165) 0x430e52 VMULPD %ZMM15,%ZMM7,%ZMM7 |
(165) 0x430e58 VPSLLQ $0x3,%ZMM14,%ZMM4 |
(165) 0x430e5f VGATHERQPD (,%ZMM12,1),%ZMM13{%K5} |
(165) 0x430e6a KMOVB %K1,%K5 |
(165) 0x430e6e VPSLLQ $0x3,%ZMM3,%ZMM12 |
(165) 0x430e75 VPADDQ %ZMM5,%ZMM19,%ZMM14 |
(165) 0x430e7b VPADDQ %ZMM11,%ZMM4,%ZMM15 |
(165) 0x430e81 VMULPD %ZMM13,%ZMM6,%ZMM6 |
(165) 0x430e87 VPSLLQ $0x3,%ZMM14,%ZMM4 |
(165) 0x430e8e VPADDQ %ZMM11,%ZMM12,%ZMM13 |
(165) 0x430e94 VPADDQ %ZMM2,%ZMM19,%ZMM12 |
(165) 0x430e9a VGATHERQPD (,%ZMM15,1),%ZMM8{%K6} |
(165) 0x430ea5 VGATHERQPD (,%ZMM13,1),%ZMM3{%K7} |
(165) 0x430eb0 KMOVB %K1,%K6 |
(165) 0x430eb4 KMOVB %K1,%K7 |
(165) 0x430eb8 VPADDQ %ZMM11,%ZMM4,%ZMM15 |
(165) 0x430ebe VPSLLQ $0x3,%ZMM12,%ZMM13 |
(165) 0x430ec5 VGATHERQPD (,%ZMM15,1),%ZMM16{%K2} |
(165) 0x430ed0 VPADDQ %ZMM11,%ZMM13,%ZMM4 |
(165) 0x430ed6 VPADDQ %ZMM5,%ZMM21,%ZMM15 |
(165) 0x430edc KMOVB %K1,%K2 |
(165) 0x430ee0 VGATHERQPD (,%ZMM4,1),%ZMM14{%K3} |
(165) 0x430eeb VPSLLQ $0x3,%ZMM15,%ZMM12 |
(165) 0x430ef2 VPADDQ %ZMM2,%ZMM21,%ZMM4 |
(165) 0x430ef8 KMOVB %K1,%K3 |
(165) 0x430efc VADDPD %ZMM16,%ZMM8,%ZMM8 |
(165) 0x430f02 VPADDQ %ZMM25,%ZMM12,%ZMM13 |
(165) 0x430f08 VPSLLQ $0x3,%ZMM4,%ZMM12 |
(165) 0x430f0f VADDPD %ZMM14,%ZMM3,%ZMM3 |
(165) 0x430f15 VGATHERQPD (,%ZMM13,1),%ZMM15{%K4} |
(165) 0x430f20 VPADDQ %ZMM25,%ZMM12,%ZMM4 |
(165) 0x430f26 VMOVDQA64 0x280(%RSP),%ZMM14 |
(165) 0x430f2e KMOVB %K1,%K4 |
(165) 0x430f32 VGATHERQPD (,%ZMM4,1),%ZMM13{%K5} |
(165) 0x430f3d VMOVDQA64 0x200(%RSP),%ZMM27 |
(165) 0x430f45 KMOVB %K1,%K5 |
(165) 0x430f49 VMULPD %ZMM15,%ZMM8,%ZMM4 |
(165) 0x430f4f VPADDQ %ZMM14,%ZMM1,%ZMM15 |
(165) 0x430f55 VPADDQ %ZMM14,%ZMM0,%ZMM14 |
(165) 0x430f5b VMULPD %ZMM13,%ZMM3,%ZMM3 |
(165) 0x430f61 VMOVDQA64 0x2c0(%RSP),%ZMM13 |
(165) 0x430f69 VPSLLQ $0x3,%ZMM15,%ZMM12 |
(165) 0x430f70 VPSLLQ $0x3,%ZMM14,%ZMM15 |
(165) 0x430f77 VPADDQ %ZMM13,%ZMM12,%ZMM8 |
(165) 0x430f7d VPADDQ %ZMM13,%ZMM15,%ZMM12 |
(165) 0x430f83 VPADDQ %ZMM20,%ZMM1,%ZMM13 |
(165) 0x430f89 VGATHERQPD (,%ZMM8,1),%ZMM16{%K6} |
(165) 0x430f94 VGATHERQPD (,%ZMM12,1),%ZMM15{%K7} |
(165) 0x430f9f KMOVB %K1,%K6 |
(165) 0x430fa3 KMOVB %K1,%K7 |
(165) 0x430fa7 VMULPD %ZMM24,%ZMM16,%ZMM26 |
(165) 0x430fad VPSLLQ $0x3,%ZMM13,%ZMM8 |
(165) 0x430fb4 VPADDQ %ZMM20,%ZMM0,%ZMM12 |
(165) 0x430fba VPADDQ %ZMM10,%ZMM8,%ZMM14 |
(165) 0x430fc0 VPSLLQ $0x3,%ZMM12,%ZMM8 |
(165) 0x430fc7 VMULPD %ZMM24,%ZMM15,%ZMM17 |
(165) 0x430fcd VGATHERQPD (,%ZMM14,1),%ZMM13{%K2} |
(165) 0x430fd8 VPADDQ %ZMM10,%ZMM8,%ZMM14 |
(165) 0x430fde KMOVB %K1,%K2 |
(165) 0x430fe2 VPADDQ %ZMM20,%ZMM5,%ZMM8 |
(165) 0x430fe8 VGATHERQPD (,%ZMM14,1),%ZMM12{%K3} |
(165) 0x430ff3 VPADDQ %ZMM18,%ZMM5,%ZMM5 |
(165) 0x430ff9 KMOVB %K1,%K3 |
(165) 0x430ffd VPSLLQ $0x3,%ZMM8,%ZMM14 |
(165) 0x431004 VPADDQ %ZMM10,%ZMM14,%ZMM8 |
(165) 0x43100a VPADDQ %ZMM20,%ZMM2,%ZMM14 |
(165) 0x431010 VGATHERQPD (,%ZMM8,1),%ZMM30{%K4} |
(165) 0x43101b VPSLLQ $0x3,%ZMM14,%ZMM8 |
(165) 0x431022 VPADDQ %ZMM18,%ZMM2,%ZMM2 |
(165) 0x431028 KMOVB %K1,%K4 |
(165) 0x43102c VPADDQ %ZMM10,%ZMM8,%ZMM14 |
(165) 0x431032 VPADDQ %ZMM27,%ZMM1,%ZMM8 |
(165) 0x431038 VGATHERQPD (,%ZMM14,1),%ZMM28{%K5} |
(165) 0x431043 VPSLLQ $0x3,%ZMM8,%ZMM14 |
(165) 0x43104a VADDPD %ZMM30,%ZMM13,%ZMM13 |
(165) 0x431050 KMOVB %K1,%K5 |
(165) 0x431054 VPADDQ %ZMM31,%ZMM14,%ZMM14 |
(165) 0x43105a VADDPD %ZMM28,%ZMM12,%ZMM12 |
(165) 0x431060 VGATHERQPD (,%ZMM14,1),%ZMM8{%K6} |
(165) 0x43106b VPADDQ %ZMM27,%ZMM0,%ZMM14 |
(165) 0x431071 VPADDQ %ZMM18,%ZMM0,%ZMM30 |
(165) 0x431077 KMOVB %K1,%K6 |
(165) 0x43107b VPSLLQ $0x3,%ZMM14,%ZMM14 |
(165) 0x431082 VPSLLQ $0x3,%ZMM30,%ZMM28 |
(165) 0x431089 VPADDQ %ZMM31,%ZMM14,%ZMM14 |
(165) 0x43108f VMULPD %ZMM8,%ZMM13,%ZMM8 |
(165) 0x431095 VPADDQ %ZMM10,%ZMM28,%ZMM30 |
(165) 0x43109b VGATHERQPD (,%ZMM14,1),%ZMM27{%K7} |
(165) 0x4310a6 VPADDQ %ZMM18,%ZMM1,%ZMM14 |
(165) 0x4310ac KMOVB %K1,%K7 |
(165) 0x4310b0 VMULPD %ZMM27,%ZMM12,%ZMM13 |
(165) 0x4310b6 VPSLLQ $0x3,%ZMM14,%ZMM12 |
(165) 0x4310bd VPADDQ %ZMM10,%ZMM12,%ZMM14 |
(165) 0x4310c3 VGATHERQPD (,%ZMM30,1),%ZMM12{%K3} |
(165) 0x4310ce VMOVDQA64 0x1c0(%RSP),%ZMM30 |
(165) 0x4310d6 VGATHERQPD (,%ZMM14,1),%ZMM27{%K2} |
(165) 0x4310e1 VPSLLQ $0x3,%ZMM5,%ZMM14 |
(165) 0x4310e8 VPADDQ %ZMM10,%ZMM14,%ZMM5 |
(165) 0x4310ee VPSLLQ $0x3,%ZMM2,%ZMM14 |
(165) 0x4310f5 VPADDQ %ZMM30,%ZMM1,%ZMM1 |
(165) 0x4310fb VGATHERQPD (,%ZMM5,1),%ZMM28{%K4} |
(165) 0x431106 VPADDQ %ZMM10,%ZMM14,%ZMM2 |
(165) 0x43110c VPADDQ %ZMM30,%ZMM0,%ZMM0 |
(165) 0x431112 VMOVAPD 0x180(%RSP),%ZMM30 |
(165) 0x43111a VGATHERQPD (,%ZMM2,1),%ZMM5{%K5} |
(165) 0x431125 VPSLLQ $0x3,%ZMM1,%ZMM14 |
(165) 0x43112c VADDPD %ZMM28,%ZMM27,%ZMM27 |
(165) 0x431132 VMOVAPD 0x240(%RSP),%ZMM28 |
(165) 0x43113a VADDPD %ZMM5,%ZMM12,%ZMM12 |
(165) 0x431140 VPADDQ %ZMM31,%ZMM14,%ZMM1 |
(165) 0x431146 VPSLLQ $0x3,%ZMM0,%ZMM14 |
(165) 0x43114d VPADDQ %ZMM31,%ZMM14,%ZMM0 |
(165) 0x431153 VMULPD %ZMM28,%ZMM16,%ZMM5 |
(165) 0x431159 VGATHERQPD (,%ZMM1,1),%ZMM2{%K6} |
(165) 0x431164 VMULPD %ZMM2,%ZMM27,%ZMM27 |
(165) 0x43116a VGATHERQPD (,%ZMM0,1),%ZMM1{%K7} |
(165) 0x431175 VANDPD %ZMM9,%ZMM4,%ZMM2 |
(165) 0x43117b VMULPD %ZMM1,%ZMM12,%ZMM14 |
(165) 0x431181 VANDPD %ZMM9,%ZMM7,%ZMM0 |
(165) 0x431187 VADDPD %ZMM8,%ZMM7,%ZMM7 |
(165) 0x43118d VMAXPD %ZMM0,%ZMM2,%ZMM1 |
(165) 0x431193 VMULPD %ZMM30,%ZMM16,%ZMM0 |
(165) 0x431199 VMAXPD %ZMM26,%ZMM1,%ZMM12 |
(165) 0x43119f VANDPD %ZMM9,%ZMM27,%ZMM1 |
(165) 0x4311a5 VADDPD %ZMM27,%ZMM4,%ZMM4 |
(165) 0x4311ab VDIVPD %ZMM12,%ZMM5,%ZMM2 |
(165) 0x4311b1 VANDPD %ZMM9,%ZMM8,%ZMM5 |
(165) 0x4311b7 VSUBPD %ZMM7,%ZMM4,%ZMM8 |
(165) 0x4311bd VMOVAPD %ZMM23,%ZMM7 |
(165) 0x4311c3 VMAXPD %ZMM5,%ZMM1,%ZMM12 |
(165) 0x4311c9 VANDPD %ZMM9,%ZMM6,%ZMM5 |
(165) 0x4311cf VADDPD %ZMM13,%ZMM6,%ZMM6 |
(165) 0x4311d5 VMAXPD %ZMM26,%ZMM12,%ZMM1 |
(165) 0x4311db VANDPD %ZMM9,%ZMM13,%ZMM26 |
(165) 0x4311e1 VDIVPD %ZMM1,%ZMM0,%ZMM0 |
(165) 0x4311e7 VMINPD %ZMM0,%ZMM2,%ZMM12 |
(165) 0x4311ed VMULPD %ZMM28,%ZMM15,%ZMM1 |
(165) 0x4311f3 VANDPD %ZMM9,%ZMM3,%ZMM2 |
(165) 0x4311f9 VMAXPD %ZMM5,%ZMM2,%ZMM0 |
(165) 0x4311ff VADDPD %ZMM14,%ZMM3,%ZMM3 |
(165) 0x431205 VMULPD %ZMM30,%ZMM15,%ZMM5 |
(165) 0x43120b VMAXPD %ZMM17,%ZMM0,%ZMM2 |
(165) 0x431211 VSUBPD %ZMM6,%ZMM3,%ZMM13 |
(165) 0x431217 VANDPD %ZMM9,%ZMM14,%ZMM0 |
(165) 0x43121d VMOVAPD %ZMM23,%ZMM6 |
(165) 0x431223 VDIVPD %ZMM2,%ZMM1,%ZMM1 |
(165) 0x431229 VMAXPD %ZMM26,%ZMM0,%ZMM2 |
(165) 0x43122f VMULPD 0x317c7(%RIP),%ZMM13,%ZMM14 |
(165) 0x431239 VMINPD 0x600(%RSP),%ZMM23,%ZMM13 |
(165) 0x431241 VMAXPD %ZMM17,%ZMM2,%ZMM0 |
(165) 0x431247 VMULPD 0x317af(%RIP),%ZMM8,%ZMM2 |
(165) 0x431251 VMOVAPD 0x140(%RSP),%ZMM8 |
(165) 0x431259 VDIVPD %ZMM15,%ZMM14,%ZMM15 |
(165) 0x43125f VCMPPD $0x1,0x317d6(%RIP),%ZMM15,%K3 |
(165) 0x43126a VDIVPD %ZMM0,%ZMM5,%ZMM5 |
(165) 0x431270 VMINPD %ZMM5,%ZMM1,%ZMM1 |
(165) 0x431276 VBROADCASTSD 0x316c0(%RIP),%ZMM5 |
(165) 0x431280 VDIVPD %ZMM16,%ZMM2,%ZMM0 |
(165) 0x431286 VDIVPD %ZMM0,%ZMM5,%ZMM4 |
(165) 0x43128c VCMPPD $0x1,0x317a9(%RIP),%ZMM0,%K2 |
(165) 0x431297 VBROADCASTSD 0x3169f(%RIP),%ZMM0 |
(165) 0x4312a1 VDIVPD %ZMM15,%ZMM0,%ZMM3 |
(165) 0x4312a7 VMULPD %ZMM8,%ZMM3,%ZMM6{%K3} |
(165) 0x4312ad VMULPD %ZMM8,%ZMM4,%ZMM7{%K2} |
(165) 0x4312b3 VMINPD 0x540(%RSP),%ZMM6,%ZMM14 |
(165) 0x4312bb VMINPD 0x580(%RSP),%ZMM7,%ZMM2 |
(165) 0x4312c3 VMINPD %ZMM14,%ZMM13,%ZMM15 |
(165) 0x4312c9 VMINPD %ZMM1,%ZMM15,%ZMM5 |
(165) 0x4312cf VMINPD 0x5c0(%RSP),%ZMM23,%ZMM1 |
(165) 0x4312d7 VMINPD %ZMM2,%ZMM1,%ZMM4 |
(165) 0x4312dd VMOVAPD %ZMM5,0x600(%RSP) |
(165) 0x4312e5 VMINPD %ZMM12,%ZMM4,%ZMM12 |
(165) 0x4312eb VMOVAPD %ZMM12,0x5c0(%RSP) |
(165) 0x4312f3 CMP %EDX,%R9D |
(165) 0x4312f6 JNE 430b60 |
0x4312fc MOV %R13D,%R9D |
0x4312ff VMOVSD 0x58(%RSP),%XMM3 |
0x431305 VMOVSD 0x50(%RSP),%XMM4 |
0x43130b AND $-0x10,%R9D |
0x43130f VMOVSD 0x48(%RSP),%XMM0 |
0x431315 VMOVSD 0x40(%RSP),%XMM2 |
0x43131b ADD %R9D,0xb4(%RSP) |
0x431323 ADD %R9D,%R14D |
0x431326 TEST $0xf,%R13B |
0x43132a JE 43163a |
0x431330 MOV 0x68(%RSP),%R9 |
0x431335 MOV 0xa0(%RSP),%RDX |
0x43133d MOV 0xa8(%RSP),%R11 |
0x431345 MOVSXD 0xb4(%RSP),%RDI |
0x43134d MOV 0x10(%R9),%R13 |
0x431351 MOV 0x80(%RSP),%R8 |
0x431359 MOV 0x8(%R11),%RSI |
0x43135d MOV 0x60(%RSP),%R10 |
0x431362 MOV %RDI,0x4c0(%RSP) |
0x43136a MOV 0x90(%RSP),%RDI |
0x431372 MOV 0x10(%R8),%RCX |
0x431376 MOV %R13,0x500(%RSP) |
0x43137e MOV 0x8(%RDX),%R13 |
0x431382 MOV (%R8),%R8 |
0x431385 MOV %RSI,0x580(%RSP) |
0x43138d MOV 0x10(%RDI),%RDX |
0x431391 MOV 0x10(%R10),%RAX |
0x431395 VMOVSD (%R13,%R12,1),%XMM12 |
0x43139c MOV 0x98(%RSP),%R12 |
0x4313a4 SAL $0x3,%R8 |
0x4313a8 VMOVSD 0x5c0(%RSP),%XMM5 |
0x4313b1 MOV (%R12),%R11 |
0x4313b5 MOV 0x10(%R12),%RSI |
0x4313ba MOV (%RDI),%R12 |
0x4313bd IMUL %R15,%R11 |
0x4313c1 IMUL %R15,%R12 |
0x4313c5 LEA (%RSI,%R11,8),%R13 |
0x4313c9 MOV 0x88(%RSP),%R11 |
0x4313d1 LEA (%RDX,%R12,8),%R12 |
0x4313d5 MOV (%R11),%RSI |
0x4313d8 MOV 0x10(%R11),%RDI |
0x4313dc MOV 0xb8(%RSP),%R11 |
0x4313e4 IMUL %R15,%RSI |
0x4313e8 LEA (%RDI,%RSI,8),%RDX |
0x4313ec MOV %R8,%RDI |
0x4313ef MOV %RDX,0x540(%RSP) |
0x4313f7 MOV 0x78(%RSP),%RDX |
0x4313fc IMUL %R11,%R8 |
0x431400 IMUL %R15,%RDI |
0x431404 MOV (%RDX),%RSI |
0x431407 IMUL %R15,%RSI |
0x43140b ADD %RCX,%RDI |
0x43140e ADD %R8,%RCX |
0x431411 MOV 0x10(%RDX),%R8 |
0x431415 MOV 0x70(%RSP),%RDX |
0x43141a LEA (%R8,%RSI,8),%R8 |
0x43141e MOV (%RDX),%RSI |
0x431421 MOV 0x10(%RDX),%R11 |
0x431425 MOV %RAX,0x480(%RSP) |
0x43142d MOV (%R9),%RDX |
0x431430 MOV (%R10),%R10 |
0x431433 IMUL %R15,%RSI |
0x431437 MOV 0x500(%RSP),%R9 |
0x43143f VMOVSD 0x315b9(%RIP),%XMM11 |
0x431447 SAL $0x3,%RDX |
0x43144b VMOVSD 0x315ed(%RIP),%XMM10 |
0x431453 LEA (%R11,%RSI,8),%R11 |
0x431457 MOV %RDX,%RSI |
0x43145a IMUL %R15,%RSI |
0x43145e ADD %R9,%RSI |
0x431461 LEA (,%R10,8),%R9 |
0x431469 MOV %R9,%R10 |
0x43146c IMUL %R15,%R10 |
0x431470 ADD %RAX,%R10 |
0x431473 MOV 0xb8(%RSP),%RAX |
0x43147b IMUL %RAX,%RDX |
0x43147f MOV 0x500(%RSP),%RAX |
0x431487 MOV %R15,0x500(%RSP) |
0x43148f ADD %RAX,%RDX |
0x431492 MOV 0xb8(%RSP),%RAX |
0x43149a IMUL %RAX,%R9 |
0x43149e MOV 0x480(%RSP),%RAX |
0x4314a6 ADD %RAX,%R9 |
0x4314a9 MOV 0xb4(%RSP),%EAX |
0x4314b0 SUB %EAX,%R14D |
0x4314b3 MOV 0x4c0(%RSP),%RAX |
0x4314bb JMP 4314dd |
(164) 0x4314c0 VMINSD 0x31418(%RIP),%XMM5,%XMM5 |
(164) 0x4314c8 INC %RAX |
(164) 0x4314cb LEA (%R14,%RAX,1),%R15D |
(164) 0x4314cf VMINSD %XMM8,%XMM5,%XMM5 |
(164) 0x4314d4 CMP %EBX,%R15D |
(164) 0x4314d7 JAE 431629 |
(164) 0x4314dd VMOVSD (%R12,%RAX,8),%XMM7 |
(164) 0x4314e3 MOV 0x540(%RSP),%R15 |
(164) 0x4314eb VMOVSD (%R13,%RAX,8),%XMM9 |
(164) 0x4314f2 VMOVSD (%RDI,%RAX,8),%XMM14 |
(164) 0x4314f7 VADDSD %XMM7,%XMM7,%XMM8 |
(164) 0x4314fb VMOVSD 0x8(%RDI,%RAX,8),%XMM1 |
(164) 0x431501 VMOVSD (%RSI,%RAX,8),%XMM7 |
(164) 0x431506 VADDSD (%RCX,%RAX,8),%XMM14,%XMM15 |
(164) 0x43150b VMOVSD (%R11,%RAX,8),%XMM14 |
(164) 0x431511 VDIVSD (%R15,%RAX,8),%XMM8,%XMM6 |
(164) 0x431517 VMULSD %XMM14,%XMM3,%XMM25 |
(164) 0x43151d VFMADD132SD %XMM9,%XMM6,%XMM9 |
(164) 0x431522 VADDSD 0x8(%RSI,%RAX,8),%XMM7,%XMM8 |
(164) 0x431528 MOV 0x580(%RSP),%R15 |
(164) 0x431530 VMULSD (%R8,%RAX,8),%XMM15,%XMM6 |
(164) 0x431536 VMULSD 0x31398(%RIP),%XMM14,%XMM22 |
(164) 0x431540 VMULSD (%R10,%RAX,8),%XMM8,%XMM15 |
(164) 0x431546 VMULSD %XMM14,%XMM2,%XMM8 |
(164) 0x43154b VSQRTSD %XMM9,%XMM9,%XMM13 |
(164) 0x431550 VADDSD 0x8(%RCX,%RAX,8),%XMM1,%XMM9 |
(164) 0x431556 VANDPD 0x31520(%RIP),%XMM6,%XMM19 |
(164) 0x431560 VMULSD 0x8(%R8,%RAX,8),%XMM9,%XMM1 |
(164) 0x431567 VMOVSD (%RDX,%RAX,8),%XMM9 |
(164) 0x43156c VADDSD %XMM15,%XMM6,%XMM6 |
(164) 0x431571 VANDPD 0x31505(%RIP),%XMM15,%XMM21 |
(164) 0x43157b VMAXSD 0x31355(%RIP),%XMM13,%XMM13 |
(164) 0x431583 VADDSD 0x8(%RDX,%RAX,8),%XMM9,%XMM7 |
(164) 0x431589 VMULSD (%R9,%RAX,8),%XMM7,%XMM9 |
(164) 0x43158f VANDPD 0x314e9(%RIP),%XMM1,%XMM7 |
(164) 0x431597 VMAXSD %XMM19,%XMM7,%XMM7 |
(164) 0x43159d VMAXSD %XMM22,%XMM7,%XMM7 |
(164) 0x4315a3 VADDSD %XMM9,%XMM1,%XMM1 |
(164) 0x4315a8 VDIVSD %XMM7,%XMM8,%XMM8 |
(164) 0x4315ac VANDPD 0x314cc(%RIP),%XMM9,%XMM7 |
(164) 0x4315b4 VSUBSD %XMM6,%XMM1,%XMM15 |
(164) 0x4315b8 VMAXSD %XMM21,%XMM7,%XMM7 |
(164) 0x4315be VMULSD %XMM11,%XMM15,%XMM9 |
(164) 0x4315c3 VMAXSD %XMM22,%XMM7,%XMM7 |
(164) 0x4315c9 VDIVSD %XMM7,%XMM25,%XMM7 |
(164) 0x4315cf VDIVSD %XMM14,%XMM9,%XMM14 |
(164) 0x4315d4 VMINSD %XMM7,%XMM8,%XMM8 |
(164) 0x4315d8 VMINSD (%R15,%RAX,8),%XMM12,%XMM7 |
(164) 0x4315de VMULSD %XMM0,%XMM7,%XMM7 |
(164) 0x4315e2 VDIVSD %XMM13,%XMM7,%XMM7 |
(164) 0x4315e7 VCOMISD %XMM14,%XMM10 |
(164) 0x4315ec VMINSD 0x312ec(%RIP),%XMM7,%XMM13 |
(164) 0x4315f4 VMINSD %XMM13,%XMM8,%XMM8 |
(164) 0x4315f9 JBE 4314c0 |
(164) 0x4315ff VMOVSD 0x31339(%RIP),%XMM7 |
(164) 0x431607 INC %RAX |
(164) 0x43160a LEA (%R14,%RAX,1),%R15D |
(164) 0x43160e VDIVSD %XMM14,%XMM7,%XMM13 |
(164) 0x431613 VMULSD %XMM4,%XMM13,%XMM1 |
(164) 0x431617 VMINSD %XMM5,%XMM1,%XMM5 |
(164) 0x43161b VMINSD %XMM8,%XMM5,%XMM5 |
(164) 0x431620 CMP %EBX,%R15D |
(164) 0x431623 JB 4314dd |
0x431629 MOV 0x500(%RSP),%R15 |
0x431631 VMOVSD %XMM5,0x5c0(%RSP) |
0x43163a MOV %EBX,%R14D |
0x43163d MOV 0x30(%RSP),%ECX |
0x431641 MOV 0x28(%RSP),%EBX |
0x431645 INC %R15 |
0x431648 INCQ 0xb8(%RSP) |
0x431650 SUB %EBX,%ECX |
0x431652 ADD %R15D,%ECX |
0x431655 CMP %ECX,0xb0(%RSP) |
0x43165c JLE 431680 |
0x43165e MOV 0x34(%RSP),%R10D |
0x431663 MOV 0x3c(%RSP),%R12D |
0x431668 MOV 0x38(%RSP),%R13D |
0x43166d MOV %R12D,0xb4(%RSP) |
0x431675 SUB %R14D,%R10D |
0x431678 JMP 430920 |
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/calc_dt.cpp: 52 - 75 |
-------------------------------------------------------------------------------- |
52: double dsx = celldx[i]; |
53: double dsy = celldy[j]; |
54: double cc = soundspeed(i, j) * soundspeed(i, j); |
55: cc = cc + 2.0 * viscosity_a(i, j) / density0(i, j); |
56: cc = std::fmax(std::sqrt(cc), g_small); |
57: double dtct = dtc_safe * std::fmin(dsx, dsy) / cc; |
58: double div = 0.0; |
59: double dv1 = (xvel0(i, j) + xvel0(i + 0, j + 1)) * xarea(i, j); |
60: double dv2 = (xvel0(i + 1, j + 0) + xvel0(i + 1, j + 1)) * xarea(i + 1, j + 0); |
61: div = div + dv2 - dv1; |
62: double dtut = dtu_safe * 2.0 * volume(i, j) / std::fmax(std::fmax(std::fabs(dv1), std::fabs(dv2)), g_small * volume(i, j)); |
63: dv1 = (yvel0(i, j) + yvel0(i + 1, j + 0)) * yarea(i, j); |
64: dv2 = (yvel0(i + 0, j + 1) + yvel0(i + 1, j + 1)) * yarea(i + 0, j + 1); |
65: div = div + dv2 - dv1; |
66: double dtvt = dtv_safe * 2.0 * volume(i, j) / std::fmax(std::fmax(std::fabs(dv1), std::fabs(dv2)), g_small * volume(i, j)); |
67: div = div / (2.0 * volume(i, j)); |
68: double dtdivt; |
69: if (div < -g_small) { |
70: dtdivt = dtdiv_safe * (-1.0 / div); |
71: } else { |
72: dtdivt = g_big; |
73: } |
74: double mins = std::fmin(dtct, std::fmin(dtut, std::fmin(dtvt, std::fmin(dtdivt, g_big)))); |
75: dt_min_val0 = std::fmin(mins, dt_min_val0); |
/scratch_na/users/xoserete/qaas_runs/171-415-4969/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○100.00 | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.27 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.33 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.16 |
Bottlenecks | micro-operation queue, |
Function | calc_dt_kernel(int, int, int, int, double, double, double, double, double, clover::Buffer2D |
Source | calc_dt.cpp:52-53,calc_dt.cpp:60-60,calc_dt.cpp:63-63,calc_dt.cpp:75-75,context.h:46-46,context.h:69-69 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 34.00 |
CQA cycles if no scalar integer | 15.00 |
CQA cycles if FP arith vectorized | 34.00 |
CQA cycles if fully vectorized | 25.63 |
Front-end cycles | 34.00 |
DIV/SQRT cycles | 10.30 |
P0 cycles | 26.00 |
P1 cycles | 29.33 |
P2 cycles | 29.33 |
P3 cycles | 15.00 |
P4 cycles | 15.00 |
P5 cycles | 10.30 |
P6 cycles | 15.00 |
P7 cycles | 15.00 |
P8 cycles | 15.00 |
P9 cycles | 10.40 |
P10 cycles | 29.33 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 35.18 |
Stall cycles (UFS) | 0.84 |
Nb insns | 201.00 |
Nb uops | 204.00 |
Nb loads | 88.00 |
Nb stores | 30.00 |
Nb stack references | 41.00 |
FLOP/cycle | 0.00 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 54.35 |
Bytes prefetched | 0.00 |
Bytes loaded | 720.00 |
Bytes stored | 1128.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 23.29 |
Vectorization ratio load | 3.70 |
Vectorization ratio store | 53.33 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 32.19 |
Vector-efficiency ratio load | 14.81 |
Vector-efficiency ratio store | 58.75 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 100.00 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 11.55 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.27 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.33 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.16 |
Bottlenecks | micro-operation queue, |
Function | calc_dt_kernel(int, int, int, int, double, double, double, double, double, clover::Buffer2D |
Source | calc_dt.cpp:52-53,calc_dt.cpp:60-60,calc_dt.cpp:63-63,calc_dt.cpp:75-75,context.h:46-46,context.h:69-69 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 34.00 |
CQA cycles if no scalar integer | 15.00 |
CQA cycles if FP arith vectorized | 34.00 |
CQA cycles if fully vectorized | 25.63 |
Front-end cycles | 34.00 |
DIV/SQRT cycles | 10.30 |
P0 cycles | 26.00 |
P1 cycles | 29.33 |
P2 cycles | 29.33 |
P3 cycles | 15.00 |
P4 cycles | 15.00 |
P5 cycles | 10.30 |
P6 cycles | 15.00 |
P7 cycles | 15.00 |
P8 cycles | 15.00 |
P9 cycles | 10.40 |
P10 cycles | 29.33 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 35.18 |
Stall cycles (UFS) | 0.84 |
Nb insns | 201.00 |
Nb uops | 204.00 |
Nb loads | 88.00 |
Nb stores | 30.00 |
Nb stack references | 41.00 |
FLOP/cycle | 0.00 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 54.35 |
Bytes prefetched | 0.00 |
Bytes loaded | 720.00 |
Bytes stored | 1128.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 23.29 |
Vectorization ratio load | 3.70 |
Vectorization ratio store | 53.33 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 32.19 |
Vector-efficiency ratio load | 14.81 |
Vector-efficiency ratio store | 58.75 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 100.00 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 11.55 |
Path / |
nb instructions | 201 |
nb uops | 204 |
loop length | 1109 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 8 |
used ymm registers | 0 |
used zmm registers | 22 |
nb stack references | 41 |
micro-operation queue | 34.00 cycles |
front end | 34.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 10.30 | 22.00 | 29.33 | 29.33 | 15.00 | 15.00 | 10.30 | 15.00 | 15.00 | 15.00 | 10.40 | 29.33 |
cycles | 10.30 | 26.00 | 29.33 | 29.33 | 15.00 | 15.00 | 10.30 | 15.00 | 15.00 | 15.00 | 10.40 | 29.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 35.18 |
Stall cycles | 0.84 |
LM full (events) | 2.99 |
Front-end | 34.00 |
Dispatch | 29.33 |
Overall L1 | 34.00 |
all | 26% |
load | 6% |
store | 57% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 16% |
load | 0% |
store | 44% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 23% |
load | 3% |
store | 53% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 34% |
load | 16% |
store | 61% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 27% |
load | 12% |
store | 51% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 32% |
load | 14% |
store | 58% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CMP %R13D,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVBE %R10D,%R13D | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1-2 | 1 |
LEA (%R14,%R13,1),%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %EBX,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 43163d <_Z14calc_dt_kerneliiiidddddRN6clover8Buffer2DIdEES2_RNS_8Buffer1DIdEES5_S5_S5_S2_S2_S2_S2_S2_S2_S2_S2_RdRiS6_S6_S7_S7_S7_._omp_fn.0.lto_priv.0+0xeed> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%R13),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (,%R15,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP $0xe,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 431330 <_Z14calc_dt_kerneliiiidddddRN6clover8Buffer2DIdEES2_RNS_8Buffer1DIdEES5_S5_S5_S2_S2_S2_S2_S2_S2_S2_S2_RdRiS6_S6_S7_S7_S7_._omp_fn.0.lto_priv.0+0xbe0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x98(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x90(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VBROADCASTSD %XMM0,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KXNORB %K1,%K1,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV 0x88(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xa8(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VBROADCASTSD %XMM3,%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM4,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV (%R11),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVAPD %ZMM8,0x300(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
MOV %R13D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%RDI),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xa0(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVAPD %ZMM9,0x240(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
SHR $0x4,%R9D | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
IMUL %R15,%RDX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTD 0xb4(%RSP),%ZMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x8(%R8),%ZMM12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVAPD %ZMM10,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
IMUL %R15,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ 0x8(%RAX),%ZMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV 0x80(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVAPD %ZMM11,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
IMUL %R15,%R10 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ 0x10(%R11),%ZMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x10(%RCX),%ZMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVDQA64 %ZMM12,0x500(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VPBROADCASTQ 0x10(%RDI),%ZMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV 0x78(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RDX,%ZMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA64 %ZMM13,0x4c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VPADDD 0x31ede(%RIP),%ZMM7,%ZMM29 | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.67 |
VPBROADCASTQ %RSI,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0xb8(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDQA64 %ZMM14,0x480(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VPBROADCASTQ %R10,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV (%R8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ 0x10(%R8),%ZMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVDQA64 %ZMM15,0x440(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VMOVDQA64 %ZMM1,0x400(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VMOVDQA64 %ZMM6,0x3c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
MOV %RAX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %RDX,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA64 %ZMM5,0x380(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
IMUL %R15,%R11 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA64 %ZMM7,0x340(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VPBROADCASTQ 0x10(%RCX),%ZMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV (%RCX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x70(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVSD %XMM3,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
IMUL %R15,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x68(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %R11,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD 0x31e24(%RIP),%ZMM24 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV (%RDI),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ 0x10(%RDI),%ZMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVSD %XMM4,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ 0x10(%R8),%ZMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVSD %XMM0,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RSI,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV (%RCX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R15,%R10 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ 0x10(%RCX),%ZMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV %RAX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %RDX,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD 0x31de7(%RIP),%ZMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVDQA64 %ZMM8,0x2c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
MOV %RSI,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %RDX,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVSD %XMM2,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL %R15,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R10,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
IMUL %R15,%R11 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA64 %ZMM9,0x280(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VBROADCASTSD 0x31f50(%RIP),%ZMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %RAX,%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RSI,%ZMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RDI,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA64 %ZMM13,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VPBROADCASTQ %R11,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA64 %ZMM12,0x200(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R13D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVSD 0x58(%RSP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x50(%RSP),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
AND $-0x10,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VMOVSD 0x48(%RSP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x40(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R9D,0xb4(%RSP) | 2 | 0.20 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.20 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 1 | 0.50 |
ADD %R9D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
TEST $0xf,%R13B | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 43163a <_Z14calc_dt_kerneliiiidddddRN6clover8Buffer2DIdEES2_RNS_8Buffer1DIdEES5_S5_S5_S2_S2_S2_S2_S2_S2_S2_S2_RdRiS6_S6_S7_S7_S7_._omp_fn.0.lto_priv.0+0xeea> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x68(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xa0(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xa8(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD 0xb4(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R9),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x80(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R11),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,0x4c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x90(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,0x500(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x8(%RDX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x580(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RDI),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R10),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%R13,%R12,1),%XMM12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x98(%RSP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x3,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VMOVSD 0x5c0(%RSP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R15,%R11 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
IMUL %R15,%R12 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RSI,%R11,8),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x88(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%R12,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%R11),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R11),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xb8(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R15,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDI,%RSI,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,0x540(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x78(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R11,%R8 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
IMUL %R15,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV (%RDX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R15,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RCX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R8,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x10(%RDX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x70(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R8,%RSI,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RDX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RDX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x480(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R9),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R10),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R15,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x500(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x315b9(%RIP),%XMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VMOVSD 0x315ed(%RIP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R11,%RSI,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R15,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R9,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (,%R10,8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R9,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R15,%R10 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RAX,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0xb8(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %RAX,%RDX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x500(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R15,0x500(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0xb8(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %RAX,%R9 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x480(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RAX,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0xb4(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EAX,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x4c0(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 4314dd <_Z14calc_dt_kerneliiiidddddRN6clover8Buffer2DIdEES2_RNS_8Buffer1DIdEES5_S5_S5_S2_S2_S2_S2_S2_S2_S2_S2_RdRiS6_S6_S7_S7_S7_._omp_fn.0.lto_priv.0+0xd8d> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV 0x500(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM5,0x5c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EBX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x30(%RSP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RSP),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
INCQ 0xb8(%RSP) | 3 | 0.20 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.20 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 1 | 0.50 |
SUB %EBX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R15D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %ECX,0xb0(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JLE 431680 <_Z14calc_dt_kerneliiiidddddRN6clover8Buffer2DIdEES2_RNS_8Buffer1DIdEES5_S5_S5_S2_S2_S2_S2_S2_S2_S2_S2_RdRiS6_S6_S7_S7_S7_._omp_fn.0.lto_priv.0+0xf30> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x34(%RSP),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x3c(%RSP),%R12D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RSP),%R13D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R12D,0xb4(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R14D,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JMP 430920 <_Z14calc_dt_kerneliiiidddddRN6clover8Buffer2DIdEES2_RNS_8Buffer1DIdEES5_S5_S5_S2_S2_S2_S2_S2_S2_S2_S2_RdRiS6_S6_S7_S7_S7_._omp_fn.0.lto_priv.0+0x1d0> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
nb instructions | 201 |
nb uops | 204 |
loop length | 1109 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 8 |
used ymm registers | 0 |
used zmm registers | 22 |
nb stack references | 41 |
micro-operation queue | 34.00 cycles |
front end | 34.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 10.30 | 22.00 | 29.33 | 29.33 | 15.00 | 15.00 | 10.30 | 15.00 | 15.00 | 15.00 | 10.40 | 29.33 |
cycles | 10.30 | 26.00 | 29.33 | 29.33 | 15.00 | 15.00 | 10.30 | 15.00 | 15.00 | 15.00 | 10.40 | 29.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 35.18 |
Stall cycles | 0.84 |
LM full (events) | 2.99 |
Front-end | 34.00 |
Dispatch | 29.33 |
Overall L1 | 34.00 |
all | 26% |
load | 6% |
store | 57% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 16% |
load | 0% |
store | 44% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 23% |
load | 3% |
store | 53% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 34% |
load | 16% |
store | 61% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 27% |
load | 12% |
store | 51% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 32% |
load | 14% |
store | 58% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CMP %R13D,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVBE %R10D,%R13D | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1-2 | 1 |
LEA (%R14,%R13,1),%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %EBX,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 43163d <_Z14calc_dt_kerneliiiidddddRN6clover8Buffer2DIdEES2_RNS_8Buffer1DIdEES5_S5_S5_S2_S2_S2_S2_S2_S2_S2_S2_RdRiS6_S6_S7_S7_S7_._omp_fn.0.lto_priv.0+0xeed> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%R13),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (,%R15,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP $0xe,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 431330 <_Z14calc_dt_kerneliiiidddddRN6clover8Buffer2DIdEES2_RNS_8Buffer1DIdEES5_S5_S5_S2_S2_S2_S2_S2_S2_S2_S2_RdRiS6_S6_S7_S7_S7_._omp_fn.0.lto_priv.0+0xbe0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x98(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x90(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VBROADCASTSD %XMM0,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KXNORB %K1,%K1,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
MOV 0x88(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xa8(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VBROADCASTSD %XMM3,%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM4,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV (%R11),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVAPD %ZMM8,0x300(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
MOV %R13D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%RDI),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xa0(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVAPD %ZMM9,0x240(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
SHR $0x4,%R9D | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
IMUL %R15,%RDX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTD 0xb4(%RSP),%ZMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x8(%R8),%ZMM12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVAPD %ZMM10,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
IMUL %R15,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ 0x8(%RAX),%ZMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV 0x80(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVAPD %ZMM11,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
IMUL %R15,%R10 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ 0x10(%R11),%ZMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x10(%RCX),%ZMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVDQA64 %ZMM12,0x500(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VPBROADCASTQ 0x10(%RDI),%ZMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV 0x78(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RDX,%ZMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA64 %ZMM13,0x4c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VPADDD 0x31ede(%RIP),%ZMM7,%ZMM29 | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.67 |
VPBROADCASTQ %RSI,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0xb8(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDQA64 %ZMM14,0x480(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VPBROADCASTQ %R10,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV (%R8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ 0x10(%R8),%ZMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVDQA64 %ZMM15,0x440(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VMOVDQA64 %ZMM1,0x400(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VMOVDQA64 %ZMM6,0x3c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
MOV %RAX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %RDX,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA64 %ZMM5,0x380(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
IMUL %R15,%R11 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA64 %ZMM7,0x340(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VPBROADCASTQ 0x10(%RCX),%ZMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV (%RCX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x70(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RAX,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVSD %XMM3,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
IMUL %R15,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x68(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %R11,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD 0x31e24(%RIP),%ZMM24 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV (%RDI),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ 0x10(%RDI),%ZMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVSD %XMM4,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ 0x10(%R8),%ZMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVSD %XMM0,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RSI,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV (%RCX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R15,%R10 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ 0x10(%RCX),%ZMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV %RAX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %RDX,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD 0x31de7(%RIP),%ZMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVDQA64 %ZMM8,0x2c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
MOV %RSI,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %RDX,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVSD %XMM2,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL %R15,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R10,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
IMUL %R15,%R11 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA64 %ZMM9,0x280(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VBROADCASTSD 0x31f50(%RIP),%ZMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ %RAX,%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RSI,%ZMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RDI,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA64 %ZMM13,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VPBROADCASTQ %R11,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQA64 %ZMM12,0x200(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R13D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVSD 0x58(%RSP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x50(%RSP),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
AND $-0x10,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
VMOVSD 0x48(%RSP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x40(%RSP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R9D,0xb4(%RSP) | 2 | 0.20 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.20 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 1 | 0.50 |
ADD %R9D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
TEST $0xf,%R13B | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 43163a <_Z14calc_dt_kerneliiiidddddRN6clover8Buffer2DIdEES2_RNS_8Buffer1DIdEES5_S5_S5_S2_S2_S2_S2_S2_S2_S2_S2_RdRiS6_S6_S7_S7_S7_._omp_fn.0.lto_priv.0+0xeea> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x68(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xa0(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xa8(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD 0xb4(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R9),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x80(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R11),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,0x4c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x90(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,0x500(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x8(%RDX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x580(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RDI),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R10),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%R13,%R12,1),%XMM12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x98(%RSP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x3,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VMOVSD 0x5c0(%RSP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R15,%R11 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
IMUL %R15,%R12 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RSI,%R11,8),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x88(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%R12,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%R11),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R11),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xb8(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R15,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDI,%RSI,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,0x540(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x78(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R11,%R8 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
IMUL %R15,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV (%RDX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R15,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RCX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R8,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x10(%RDX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x70(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R8,%RSI,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RDX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RDX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x480(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R9),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R10),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R15,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x500(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x315b9(%RIP),%XMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VMOVSD 0x315ed(%RIP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R11,%RSI,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R15,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R9,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (,%R10,8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R9,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R15,%R10 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RAX,%R10 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0xb8(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %RAX,%RDX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x500(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R15,0x500(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0xb8(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %RAX,%R9 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x480(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RAX,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0xb4(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EAX,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x4c0(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 4314dd <_Z14calc_dt_kerneliiiidddddRN6clover8Buffer2DIdEES2_RNS_8Buffer1DIdEES5_S5_S5_S2_S2_S2_S2_S2_S2_S2_S2_RdRiS6_S6_S7_S7_S7_._omp_fn.0.lto_priv.0+0xd8d> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV 0x500(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD %XMM5,0x5c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EBX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x30(%RSP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RSP),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
INCQ 0xb8(%RSP) | 3 | 0.20 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.20 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 1 | 0.50 |
SUB %EBX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R15D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %ECX,0xb0(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JLE 431680 <_Z14calc_dt_kerneliiiidddddRN6clover8Buffer2DIdEES2_RNS_8Buffer1DIdEES5_S5_S5_S2_S2_S2_S2_S2_S2_S2_S2_RdRiS6_S6_S7_S7_S7_._omp_fn.0.lto_priv.0+0xf30> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x34(%RSP),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x3c(%RSP),%R12D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RSP),%R13D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R12D,0xb4(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R14D,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JMP 430920 <_Z14calc_dt_kerneliiiidddddRN6clover8Buffer2DIdEES2_RNS_8Buffer1DIdEES5_S5_S5_S2_S2_S2_S2_S2_S2_S2_S2_RdRiS6_S6_S7_S7_S7_._omp_fn.0.lto_priv.0+0x1d0> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |