Function: accelerate_kernel_.DIR.OMP.PARALLEL.2 | Module: exec | Source: accelerate_kernel.f90:57-79 | Coverage: 5.15% |
---|
Function: accelerate_kernel_.DIR.OMP.PARALLEL.2 | Module: exec | Source: accelerate_kernel.f90:57-79 | Coverage: 5.15% |
---|
/beegfs/hackathon/users/eoseret/qaas_runs/170-861-0321/intel/CloverLeafFC/build/CloverLeafFC/CloverLeaf_ref/kernels/accelerate_kernel.f90: 57 - 79 |
-------------------------------------------------------------------------------- |
57: !$OMP PARALLEL |
58: |
59: !$OMP DO PRIVATE(j,k,stepbymass_s) |
60: DO k=y_min,y_max+1 |
61: !$OMP SIMD |
62: DO j=x_min,x_max+1 |
63: stepbymass_s=halfdt/((density0(j-1,k-1)*volume(j-1,k-1) & |
64: +density0(j ,k-1)*volume(j ,k-1) & |
65: +density0(j ,k )*volume(j ,k ) & |
66: +density0(j-1,k )*volume(j-1,k )) & |
67: *0.25_8) |
68: |
69: xvel1(j,k)=xvel0(j,k)-stepbymass_s*(xarea(j ,k )*(pressure(j ,k )-pressure(j-1,k )) & |
70: +xarea(j ,k-1)*(pressure(j ,k-1)-pressure(j-1,k-1))) |
71: yvel1(j,k)=yvel0(j,k)-stepbymass_s*(yarea(j ,k )*(pressure(j ,k )-pressure(j ,k-1)) & |
72: +yarea(j-1,k )*(pressure(j-1,k )-pressure(j-1,k-1))) |
73: xvel1(j,k)=xvel1(j,k)-stepbymass_s*(xarea(j ,k )*(viscosity(j ,k )-viscosity(j-1,k )) & |
74: +xarea(j ,k-1)*(viscosity(j ,k-1)-viscosity(j-1,k-1))) |
75: yvel1(j,k)=yvel1(j,k)-stepbymass_s*(yarea(j ,k )*(viscosity(j ,k )-viscosity(j ,k-1)) & |
76: +yarea(j-1,k )*(viscosity(j-1,k )-viscosity(j-1,k-1))) |
77: ENDDO |
78: ENDDO |
79: !$OMP END DO |
0x42bb20 PUSH %RBP |
0x42bb21 MOV %RSP,%RBP |
0x42bb24 PUSH %R15 |
0x42bb26 PUSH %R14 |
0x42bb28 PUSH %R13 |
0x42bb2a PUSH %R12 |
0x42bb2c PUSH %RBX |
0x42bb2d SUB $0x148,%RSP |
0x42bb34 MOV %R9,-0x88(%RBP) |
0x42bb3b MOV 0x60(%RBP),%EBX |
0x42bb3e MOV 0x58(%RBP),%EAX |
0x42bb41 SUB %EBX,%EAX |
0x42bb43 INC %EAX |
0x42bb45 MOVL $0,-0x80(%RBP) |
0x42bb4c JS 42bbc4 |
0x42bb4e MOV %R8,%R15 |
0x42bb51 MOV %RCX,%R12 |
0x42bb54 MOV %RDX,%R13 |
0x42bb57 MOV %RDI,-0xd8(%RBP) |
0x42bb5e MOV (%RDI),%ESI |
0x42bb60 MOVL $0,-0x40(%RBP) |
0x42bb67 MOV %EAX,-0x3c(%RBP) |
0x42bb6a MOVL $0x1,-0x7c(%RBP) |
0x42bb71 SUB $0x8,%RSP |
0x42bb75 LEA -0x7c(%RBP),%RAX |
0x42bb79 LEA -0x80(%RBP),%RCX |
0x42bb7d LEA -0x40(%RBP),%R8 |
0x42bb81 LEA -0x3c(%RBP),%R9 |
0x42bb85 MOV $0x570f80,%EDI |
0x42bb8a MOV %ESI,-0x74(%RBP) |
0x42bb8d MOV $0x22,%EDX |
0x42bb92 PUSH $0x1 |
0x42bb94 PUSH $0x1 |
0x42bb96 PUSH %RAX |
0x42bb97 CALL 404670 <__kmpc_for_static_init_4@plt> |
0x42bb9c ADD $0x20,%RSP |
0x42bba0 MOV -0x40(%RBP),%EAX |
0x42bba3 MOV -0x3c(%RBP),%ECX |
0x42bba6 MOV %EAX,-0x2c(%RBP) |
0x42bba9 SUB %EAX,%ECX |
0x42bbab MOV %ECX,-0x78(%RBP) |
0x42bbae JAE 42bbe2 |
0x42bbb0 MOV $0x570fa0,%EDI |
0x42bbb5 MOV -0x74(%RBP),%ESI |
0x42bbb8 CALL 404230 <__kmpc_for_static_fini@plt> |
0x42bbbd MOV -0xd8(%RBP),%RDI |
0x42bbc4 MOV (%RDI),%ESI |
0x42bbc6 MOV $0x570fc0,%EDI |
0x42bbcb CALL 404740 <__kmpc_barrier@plt> |
0x42bbd0 ADD $0x148,%RSP |
0x42bbd7 POP %RBX |
0x42bbd8 POP %R12 |
0x42bbda POP %R13 |
0x42bbdc POP %R14 |
0x42bbde POP %R15 |
0x42bbe0 POP %RBP |
0x42bbe1 RET |
0x42bbe2 MOV 0x40(%RBP),%RCX |
0x42bbe6 MOV 0x38(%RBP),%RAX |
0x42bbea SAL $0x20,%R15 |
0x42bbee MOV $-0x200000000,%RDX |
0x42bbf8 LEA (%R15,%RDX,1),%RSI |
0x42bbfc SAL $0x20,%R12 |
0x42bc00 ADD %R12,%RDX |
0x42bc03 MOV %RDX,%R10 |
0x42bc06 SAR $0x20,%R10 |
0x42bc0a TEST %RSI,%RSI |
0x42bc0d MOV $-0x1,%R8 |
0x42bc14 CMOVNS %RSI,%R8 |
0x42bc18 TEST %R8,%R8 |
0x42bc1b MOV $0x1,%R9D |
0x42bc21 CMOVG %R9,%R8 |
0x42bc25 MOV $0x200000000,%R11 |
0x42bc2f MOV %R11,%R14 |
0x42bc32 SUB %R15,%R14 |
0x42bc35 CMP %R14,%RSI |
0x42bc38 CMOVG %RSI,%R14 |
0x42bc3c MOV $-0x1,%RDI |
0x42bc43 SHR $0x20,%R14 |
0x42bc47 IMUL %R8,%R14 |
0x42bc4b LEA (,%R14,8),%RSI |
0x42bc53 MOV %RCX,%R8 |
0x42bc56 SUB %RSI,%R8 |
0x42bc59 MOV %R8,-0x110(%RBP) |
0x42bc60 ADD %EBX,-0x2c(%RBP) |
0x42bc63 MOV %RAX,%R8 |
0x42bc66 SUB %RSI,%R8 |
0x42bc69 MOV %R8,-0x108(%RBP) |
0x42bc70 TEST %RDX,%RDX |
0x42bc73 CMOVNS %RDX,%RDI |
0x42bc77 TEST %RDI,%RDI |
0x42bc7a CMOVG %R9,%RDI |
0x42bc7e MOV %R14,%R8 |
0x42bc81 NOT %R8 |
0x42bc84 LEA (%RCX,%R8,8),%RCX |
0x42bc88 MOV %RCX,-0x128(%RBP) |
0x42bc8f LEA (%RAX,%R8,8),%RAX |
0x42bc93 MOV %RAX,-0x120(%RBP) |
0x42bc9a MOVQ %R13,%XMM0 |
0x42bc9f MOV 0x48(%RBP),%RAX |
0x42bca3 SUB %R12,%R11 |
0x42bca6 MOV 0x30(%RBP),%RCX |
0x42bcaa CMP %R11,%RDX |
0x42bcad CMOVG %RDX,%R11 |
0x42bcb1 MOV 0x28(%RBP),%RDX |
0x42bcb5 SHR $0x20,%R11 |
0x42bcb9 IMUL %RDI,%R11 |
0x42bcbd SUB %RSI,%RCX |
0x42bcc0 MOV %RCX,-0xa0(%RBP) |
0x42bcc7 LEA (%RAX,%R8,8),%RCX |
0x42bccb MOV %RCX,-0x118(%RBP) |
0x42bcd2 SUB %RSI,%RAX |
0x42bcd5 MOV %RAX,-0x100(%RBP) |
0x42bcdc SUB %RSI,%RDX |
0x42bcdf MOV %RDX,-0x98(%RBP) |
0x42bce6 MOV -0x88(%RBP),%RAX |
0x42bced SUB %RSI,%RAX |
0x42bcf0 MOV %RAX,-0xf8(%RBP) |
0x42bcf7 MOV 0x10(%RBP),%RAX |
0x42bcfb SUB %RSI,%RAX |
0x42bcfe MOV %RAX,-0xf0(%RBP) |
0x42bd05 MOV 0x20(%RBP),%RAX |
0x42bd09 SUB %RSI,%RAX |
0x42bd0c MOV %RAX,-0xe8(%RBP) |
0x42bd13 MOV 0x18(%RBP),%RAX |
0x42bd17 SUB %RSI,%RAX |
0x42bd1a MOV %RAX,-0xe0(%RBP) |
0x42bd21 MOV 0x50(%RBP),%RAX |
0x42bd25 SUB %RSI,%RAX |
0x42bd28 MOV %RAX,-0x90(%RBP) |
0x42bd2f NOT %R10 |
0x42bd32 MOV %R10,-0x140(%RBP) |
0x42bd39 NEG %R11 |
0x42bd3c MOV %R11,-0x138(%RBP) |
0x42bd43 NEG %R14 |
0x42bd46 MOV %R14,-0x130(%RBP) |
0x42bd4d MOVSD 0x101703(%RIP),%XMM1 |
0x42bd55 MOVAPD 0x101833(%RIP),%XMM2 |
0x42bd5d XOR %EAX,%EAX |
0x42bd5f JMP 42bd86 |
0x42bd61 NOPW %CS:(%RAX,%RAX,1) |
(186) 0x42bd70 MOV -0x168(%RBP),%RCX |
(186) 0x42bd77 LEA 0x1(%RCX),%EAX |
(186) 0x42bd7a INCL -0x2c(%RBP) |
(186) 0x42bd7d CMP -0x78(%RBP),%ECX |
(186) 0x42bd80 JE 42bbb0 |
(186) 0x42bd86 MOV %RAX,-0x168(%RBP) |
(186) 0x42bd8d MOV 0x70(%RBP),%RAX |
(186) 0x42bd91 MOVSXD (%RAX),%R10 |
(186) 0x42bd94 MOV 0x68(%RBP),%RAX |
(186) 0x42bd98 MOV (%RAX),%ECX |
(186) 0x42bd9a MOV %ECX,%EAX |
(186) 0x42bd9c SUB %R10D,%EAX |
(186) 0x42bd9f INC %EAX |
(186) 0x42bda1 JS 42bd70 |
(186) 0x42bda3 MOVSXD -0x2c(%RBP),%R11 |
(186) 0x42bda7 MOV -0x140(%RBP),%RAX |
(186) 0x42bdae LEA (%RAX,%R11,1),%R9 |
(186) 0x42bdb2 ADD -0x138(%RBP),%R11 |
(186) 0x42bdb9 MOV 0x78(%RBP),%RAX |
(186) 0x42bdbd MOV (%RAX),%RBX |
(186) 0x42bdc0 MOV 0x80(%RBP),%RAX |
(186) 0x42bdc7 MOV (%RAX),%R12 |
(186) 0x42bdca MOV 0x88(%RBP),%RAX |
(186) 0x42bdd1 MOV (%RAX),%RAX |
(186) 0x42bdd4 MOV %RAX,-0xc8(%RBP) |
(186) 0x42bddb MOV 0x90(%RBP),%RAX |
(186) 0x42bde2 MOV (%RAX),%RDX |
(186) 0x42bde5 MOV 0x98(%RBP),%RAX |
(186) 0x42bdec MOV (%RAX),%R15 |
(186) 0x42bdef MOV 0xa0(%RBP),%RAX |
(186) 0x42bdf6 MOV (%RAX),%R13 |
(186) 0x42bdf9 MOV 0xa8(%RBP),%RAX |
(186) 0x42be00 MOV (%RAX),%RSI |
(186) 0x42be03 MOV 0xb0(%RBP),%RAX |
(186) 0x42be0a MOV (%RAX),%R14 |
(186) 0x42be0d MOV 0xb8(%RBP),%RAX |
(186) 0x42be14 MOV (%RAX),%RAX |
(186) 0x42be17 MOV %RAX,-0xb8(%RBP) |
(186) 0x42be1e MOV 0xc0(%RBP),%RAX |
(186) 0x42be25 MOV (%RAX),%RDI |
(186) 0x42be28 SUB %R10D,%ECX |
(186) 0x42be2b ADD $0x2,%ECX |
(186) 0x42be2e CMP $0x2,%ECX |
(186) 0x42be31 MOV $0x1,%EAX |
(186) 0x42be36 CMOVL %EAX,%ECX |
(186) 0x42be39 MOV %RCX,%R8 |
(186) 0x42be3c AND $0x7ffffffe,%R8 |
(186) 0x42be43 MOV %RCX,-0x70(%RBP) |
(186) 0x42be47 MOV %RDX,-0xd0(%RBP) |
(186) 0x42be4e MOV %RSI,-0xc0(%RBP) |
(186) 0x42be55 MOV %R14,-0x160(%RBP) |
(186) 0x42be5c JE 42c190 |
(186) 0x42be62 MOV %R12,%RAX |
(186) 0x42be65 IMUL %R9,%RAX |
(186) 0x42be69 LEA (%RAX,%R10,8),%RCX |
(186) 0x42be6d MOV -0x110(%RBP),%RDX |
(186) 0x42be74 ADD %RDX,%RCX |
(186) 0x42be77 MOV %RBX,%RAX |
(186) 0x42be7a IMUL %R9,%RAX |
(186) 0x42be7e MOV %R13,-0x48(%RBP) |
(186) 0x42be82 MOV %R12,%R13 |
(186) 0x42be85 MOV %R9,%R12 |
(186) 0x42be88 MOV %R9,-0xb0(%RBP) |
(186) 0x42be8f MOV %R15,%R9 |
(186) 0x42be92 LEA (%RAX,%R10,8),%R15 |
(186) 0x42be96 MOV -0x108(%RBP),%RSI |
(186) 0x42be9d ADD %RSI,%R15 |
(186) 0x42bea0 MOV %R13,-0x58(%RBP) |
(186) 0x42bea4 MOV %R13,%RAX |
(186) 0x42bea7 IMUL %R11,%RAX |
(186) 0x42beab MOV %R8,-0x38(%RBP) |
(186) 0x42beaf LEA (%RAX,%R10,8),%R8 |
(186) 0x42beb3 ADD %RDX,%R8 |
(186) 0x42beb6 MOV %RBX,-0x60(%RBP) |
(186) 0x42beba MOV %RBX,%RAX |
(186) 0x42bebd IMUL %R11,%RAX |
(186) 0x42bec1 MOV %RDI,%R13 |
(186) 0x42bec4 MOV %R10,%RDI |
(186) 0x42bec7 LEA (%RAX,%R10,8),%R10 |
(186) 0x42becb ADD %RSI,%R10 |
(186) 0x42bece MOV %R9,%RAX |
(186) 0x42bed1 IMUL %R11,%RAX |
(186) 0x42bed5 MOV %R11,%RSI |
(186) 0x42bed8 LEA (%RAX,%RDI,8),%R11 |
(186) 0x42bedc MOV -0xa0(%RBP),%RDX |
(186) 0x42bee3 ADD %RDX,%R11 |
(186) 0x42bee6 MOV %R9,-0x148(%RBP) |
(186) 0x42beed IMUL %R12,%R9 |
(186) 0x42bef1 LEA (%R9,%RDI,8),%RBX |
(186) 0x42bef5 ADD %RDX,%RBX |
(186) 0x42bef8 IMUL %RSI,%R14 |
(186) 0x42befc LEA (%R14,%RDI,8),%R9 |
(186) 0x42bf00 ADD -0x100(%RBP),%R9 |
(186) 0x42bf07 MOV %R13,%RAX |
(186) 0x42bf0a IMUL %RSI,%RAX |
(186) 0x42bf0e MOV %RSI,%R14 |
(186) 0x42bf11 LEA (%RAX,%RDI,8),%RAX |
(186) 0x42bf15 MOV -0x98(%RBP),%RSI |
(186) 0x42bf1c ADD %RSI,%RAX |
(186) 0x42bf1f MOV %R13,-0x150(%RBP) |
(186) 0x42bf26 IMUL %R12,%R13 |
(186) 0x42bf2a LEA (%R13,%RDI,8),%R13 |
(186) 0x42bf2f ADD %RSI,%R13 |
(186) 0x42bf32 MOV -0xb8(%RBP),%RDX |
(186) 0x42bf39 MOV %R14,-0xa8(%RBP) |
(186) 0x42bf40 IMUL %R14,%RDX |
(186) 0x42bf44 LEA (%RDX,%RDI,8),%RDX |
(186) 0x42bf48 ADD -0xf8(%RBP),%RDX |
(186) 0x42bf4f MOV %RDX,-0x50(%RBP) |
(186) 0x42bf53 MOV -0x48(%RBP),%RDX |
(186) 0x42bf57 IMUL %R14,%RDX |
(186) 0x42bf5b LEA (%RDX,%RDI,8),%RDX |
(186) 0x42bf5f ADD -0xf0(%RBP),%RDX |
(186) 0x42bf66 MOV %RDX,-0x68(%RBP) |
(186) 0x42bf6a MOV -0xc8(%RBP),%RDX |
(186) 0x42bf71 IMUL %R14,%RDX |
(186) 0x42bf75 LEA (%RDX,%RDI,8),%RDX |
(186) 0x42bf79 ADD -0xe8(%RBP),%RDX |
(186) 0x42bf80 MOV %RDX,-0x170(%RBP) |
(186) 0x42bf87 MOV -0xc0(%RBP),%RDX |
(186) 0x42bf8e IMUL %R14,%RDX |
(186) 0x42bf92 LEA (%RDX,%RDI,8),%RSI |
(186) 0x42bf96 ADD -0xe0(%RBP),%RSI |
(186) 0x42bf9d MOV -0xd0(%RBP),%R12 |
(186) 0x42bfa4 MOV %R12,%RDX |
(186) 0x42bfa7 IMUL -0xb0(%RBP),%RDX |
(186) 0x42bfaf LEA (%RDX,%RDI,8),%R14 |
(186) 0x42bfb3 ADD -0x90(%RBP),%R14 |
(186) 0x42bfba MOV %R12,%RDX |
(186) 0x42bfbd IMUL -0xa8(%RBP),%RDX |
(186) 0x42bfc5 MOV %RDI,-0x158(%RBP) |
(186) 0x42bfcc LEA (%RDX,%RDI,8),%RDX |
(186) 0x42bfd0 ADD -0x90(%RBP),%RDX |
(186) 0x42bfd7 XOR %EDI,%EDI |
(186) 0x42bfd9 NOPL (%RAX) |
(188) 0x42bfe0 MOVUPD -0x8(%RCX,%RDI,8),%XMM3 |
(188) 0x42bfe6 MOVUPD (%RCX,%RDI,8),%XMM4 |
(188) 0x42bfeb MOVUPD -0x8(%R15,%RDI,8),%XMM5 |
(188) 0x42bff2 MULPD %XMM3,%XMM5 |
(188) 0x42bff6 MOVUPD (%R15,%RDI,8),%XMM3 |
(188) 0x42bffc MULPD %XMM4,%XMM3 |
(188) 0x42c000 ADDPD %XMM5,%XMM3 |
(188) 0x42c004 MOVUPD -0x8(%R8,%RDI,8),%XMM4 |
(188) 0x42c00b MOVUPD (%R8,%RDI,8),%XMM5 |
(188) 0x42c011 MOVUPD -0x8(%R10,%RDI,8),%XMM6 |
(188) 0x42c018 MULPD %XMM4,%XMM6 |
(188) 0x42c01c MOVUPD (%R10,%RDI,8),%XMM4 |
(188) 0x42c022 MULPD %XMM5,%XMM4 |
(188) 0x42c026 ADDPD %XMM6,%XMM4 |
(188) 0x42c02a ADDPD %XMM3,%XMM4 |
(188) 0x42c02e MULPD %XMM2,%XMM4 |
(188) 0x42c032 MOVDQA %XMM0,%XMM3 |
(188) 0x42c036 PUNPCKLQDQ %XMM0,%XMM3 |
(188) 0x42c03a DIVPD %XMM4,%XMM3 |
(188) 0x42c03e MOVUPD (%RDX,%RDI,8),%XMM5 |
(188) 0x42c043 MOVUPD -0x8(%R11,%RDI,8),%XMM7 |
(188) 0x42c04a MOVUPD (%R11,%RDI,8),%XMM9 |
(188) 0x42c050 MOVUPD -0x8(%RBX,%RDI,8),%XMM4 |
(188) 0x42c056 MOVAPD %XMM4,%XMM6 |
(188) 0x42c05a SUBPD %XMM7,%XMM4 |
(188) 0x42c05e SUBPD %XMM9,%XMM7 |
(188) 0x42c063 MULPD %XMM5,%XMM7 |
(188) 0x42c067 MOVUPD (%R14,%RDI,8),%XMM10 |
(188) 0x42c06d MOVUPD (%RBX,%RDI,8),%XMM11 |
(188) 0x42c073 SUBPD %XMM11,%XMM6 |
(188) 0x42c078 MULPD %XMM10,%XMM6 |
(188) 0x42c07d ADDPD %XMM7,%XMM6 |
(188) 0x42c081 MOVUPD -0x8(%R9,%RDI,8),%XMM7 |
(188) 0x42c088 MOVUPD (%R9,%RDI,8),%XMM8 |
(188) 0x42c08e SUBPD %XMM9,%XMM11 |
(188) 0x42c093 MULPD %XMM8,%XMM11 |
(188) 0x42c098 MULPD %XMM7,%XMM4 |
(188) 0x42c09c ADDPD %XMM11,%XMM4 |
(188) 0x42c0a1 MOVUPD -0x8(%RAX,%RDI,8),%XMM11 |
(188) 0x42c0a8 MOVUPD (%RAX,%RDI,8),%XMM12 |
(188) 0x42c0ae MOVUPD -0x8(%R13,%RDI,8),%XMM9 |
(188) 0x42c0b5 MOVAPD %XMM9,%XMM13 |
(188) 0x42c0ba SUBPD %XMM11,%XMM9 |
(188) 0x42c0bf SUBPD %XMM12,%XMM11 |
(188) 0x42c0c4 MULPD %XMM5,%XMM11 |
(188) 0x42c0c9 MOVUPD (%R13,%RDI,8),%XMM5 |
(188) 0x42c0d0 SUBPD %XMM5,%XMM13 |
(188) 0x42c0d5 MULPD %XMM10,%XMM13 |
(188) 0x42c0da ADDPD %XMM11,%XMM13 |
(188) 0x42c0df ADDPD %XMM6,%XMM13 |
(188) 0x42c0e4 MULPD %XMM3,%XMM13 |
(188) 0x42c0e9 MOV -0x170(%RBP),%R12 |
(188) 0x42c0f0 MOVUPD (%R12,%RDI,8),%XMM6 |
(188) 0x42c0f6 ADDPD %XMM13,%XMM6 |
(188) 0x42c0fb MOVUPD (%RSI,%RDI,8),%XMM10 |
(188) 0x42c101 MOV -0x68(%RBP),%R12 |
(188) 0x42c105 MOVUPD %XMM6,(%R12,%RDI,8) |
(188) 0x42c10b SUBPD %XMM12,%XMM5 |
(188) 0x42c110 MULPD %XMM8,%XMM5 |
(188) 0x42c115 MULPD %XMM7,%XMM9 |
(188) 0x42c11a ADDPD %XMM5,%XMM9 |
(188) 0x42c11f ADDPD %XMM4,%XMM9 |
(188) 0x42c124 MULPD %XMM3,%XMM9 |
(188) 0x42c129 ADDPD %XMM10,%XMM9 |
(188) 0x42c12e MOV -0x50(%RBP),%R12 |
(188) 0x42c132 MOVUPD %XMM9,(%R12,%RDI,8) |
(188) 0x42c138 ADD $0x2,%RDI |
(188) 0x42c13c CMP -0x38(%RBP),%RDI |
(188) 0x42c140 JB 42bfe0 |
(186) 0x42c146 MOV -0x38(%RBP),%R8 |
(186) 0x42c14a CMP -0x70(%RBP),%R8 |
(186) 0x42c14e MOV -0xb0(%RBP),%R9 |
(186) 0x42c155 MOV -0xa8(%RBP),%R11 |
(186) 0x42c15c MOV -0x60(%RBP),%RBX |
(186) 0x42c160 MOV -0x58(%RBP),%R12 |
(186) 0x42c164 MOV -0x48(%RBP),%R13 |
(186) 0x42c168 MOV -0x158(%RBP),%R10 |
(186) 0x42c16f MOV -0x150(%RBP),%RDI |
(186) 0x42c176 MOV -0x148(%RBP),%R15 |
(186) 0x42c17d JE 42bd70 |
(186) 0x42c183 JMP 42c193 |
0x42c185 NOPW %CS:(%RAX,%RAX,1) |
(186) 0x42c190 XOR %R8D,%R8D |
(186) 0x42c193 SUB %R8,-0x70(%RBP) |
(186) 0x42c197 MOV %R15,%RAX |
(186) 0x42c19a IMUL %R11,%RAX |
(186) 0x42c19e LEA (%RAX,%R8,8),%RCX |
(186) 0x42c1a2 IMUL %R9,%R15 |
(186) 0x42c1a6 LEA (%R15,%R8,8),%RDX |
(186) 0x42c1aa MOV %RDI,%RAX |
(186) 0x42c1ad IMUL %R11,%RAX |
(186) 0x42c1b1 LEA (%RAX,%R8,8),%RSI |
(186) 0x42c1b5 IMUL %R9,%RDI |
(186) 0x42c1b9 LEA (%RDI,%R8,8),%RDI |
(186) 0x42c1bd LEA (%R8,%R10,1),%RAX |
(186) 0x42c1c1 LEA (%RCX,%R10,8),%RCX |
(186) 0x42c1c5 LEA (%RDX,%R10,8),%R8 |
(186) 0x42c1c9 MOV %R13,%RDX |
(186) 0x42c1cc MOV %R9,%R13 |
(186) 0x42c1cf LEA (%RSI,%R10,8),%R9 |
(186) 0x42c1d3 LEA (%RDI,%R10,8),%R10 |
(186) 0x42c1d7 MOV %R12,%RDI |
(186) 0x42c1da IMUL %R13,%RDI |
(186) 0x42c1de MOV %RBX,%R15 |
(186) 0x42c1e1 IMUL %R13,%R15 |
(186) 0x42c1e5 IMUL %R11,%R12 |
(186) 0x42c1e9 MOV %R12,-0x58(%RBP) |
(186) 0x42c1ed IMUL %R11,%RBX |
(186) 0x42c1f1 MOV %RBX,-0x60(%RBP) |
(186) 0x42c1f5 MOV -0x160(%RBP),%RBX |
(186) 0x42c1fc IMUL %R11,%RBX |
(186) 0x42c200 MOV -0xb8(%RBP),%R14 |
(186) 0x42c207 IMUL %R11,%R14 |
(186) 0x42c20b IMUL %R11,%RDX |
(186) 0x42c20f MOV %RDX,-0x48(%RBP) |
(186) 0x42c213 MOV -0xc8(%RBP),%R12 |
(186) 0x42c21a IMUL %R11,%R12 |
(186) 0x42c21e MOV -0xc0(%RBP),%RSI |
(186) 0x42c225 IMUL %R11,%RSI |
(186) 0x42c229 MOV -0xd0(%RBP),%RDX |
(186) 0x42c230 IMUL %RDX,%R13 |
(186) 0x42c234 IMUL %R11,%RDX |
(186) 0x42c238 LEA (%RDI,%RAX,8),%RDI |
(186) 0x42c23c LEA (%R15,%RAX,8),%R11 |
(186) 0x42c240 MOV %R11,-0x50(%RBP) |
(186) 0x42c244 MOV -0x58(%RBP),%R11 |
(186) 0x42c248 LEA (%R11,%RAX,8),%R11 |
(186) 0x42c24c MOV %R11,-0x38(%RBP) |
(186) 0x42c250 MOV -0x60(%RBP),%R11 |
(186) 0x42c254 LEA (%R11,%RAX,8),%R11 |
(186) 0x42c258 LEA (%RBX,%RAX,8),%RBX |
(186) 0x42c25c ADD -0x130(%RBP),%RAX |
(186) 0x42c263 LEA (%R14,%RAX,8),%R14 |
(186) 0x42c267 MOV -0x48(%RBP),%R15 |
(186) 0x42c26b LEA (%R15,%RAX,8),%R15 |
(186) 0x42c26f LEA (%R12,%RAX,8),%R12 |
(186) 0x42c273 LEA (%RSI,%RAX,8),%RSI |
(186) 0x42c277 LEA (%R13,%RAX,8),%R13 |
(186) 0x42c27c LEA (%RDX,%RAX,8),%RAX |
(186) 0x42c280 MOV -0x128(%RBP),%RDX |
(186) 0x42c287 ADD %RDX,%RDI |
(186) 0x42c28a MOV %RDI,-0x68(%RBP) |
(186) 0x42c28e MOV -0x120(%RBP),%RDI |
(186) 0x42c295 ADD %RDI,-0x50(%RBP) |
(186) 0x42c299 ADD %RDX,-0x38(%RBP) |
(186) 0x42c29d ADD %RDI,%R11 |
(186) 0x42c2a0 MOV -0xa0(%RBP),%RDX |
(186) 0x42c2a7 ADD %RDX,%RCX |
(186) 0x42c2aa ADD %RDX,%R8 |
(186) 0x42c2ad ADD -0x118(%RBP),%RBX |
(186) 0x42c2b4 MOV -0x98(%RBP),%RDX |
(186) 0x42c2bb ADD %RDX,%R9 |
(186) 0x42c2be ADD %RDX,%R10 |
(186) 0x42c2c1 ADD -0x88(%RBP),%R14 |
(186) 0x42c2c8 ADD 0x10(%RBP),%R15 |
(186) 0x42c2cc ADD 0x20(%RBP),%R12 |
(186) 0x42c2d0 ADD 0x18(%RBP),%RSI |
(186) 0x42c2d4 MOV 0x50(%RBP),%RDX |
(186) 0x42c2d8 ADD %RDX,%R13 |
(186) 0x42c2db ADD %RDX,%RAX |
(186) 0x42c2de XOR %EDX,%EDX |
(187) 0x42c2e0 MOV -0x68(%RBP),%RDI |
(187) 0x42c2e4 MOVUPD (%RDI,%RDX,8),%XMM3 |
(187) 0x42c2e9 MOV -0x50(%RBP),%RDI |
(187) 0x42c2ed MOVUPD (%RDI,%RDX,8),%XMM4 |
(187) 0x42c2f2 MULPD %XMM3,%XMM4 |
(187) 0x42c2f6 MOV -0x38(%RBP),%RDI |
(187) 0x42c2fa MOVUPD (%RDI,%RDX,8),%XMM3 |
(187) 0x42c2ff MOVUPD (%R11,%RDX,8),%XMM5 |
(187) 0x42c305 MULPD %XMM3,%XMM5 |
(187) 0x42c309 MOVAPD %XMM4,%XMM3 |
(187) 0x42c30d UNPCKHPD %XMM4,%XMM3 |
(187) 0x42c311 ADDSD %XMM4,%XMM3 |
(187) 0x42c315 MOVAPD %XMM5,%XMM4 |
(187) 0x42c319 UNPCKHPD %XMM5,%XMM4 |
(187) 0x42c31d ADDSD %XMM3,%XMM4 |
(187) 0x42c321 ADDSD %XMM5,%XMM4 |
(187) 0x42c325 MULSD %XMM1,%XMM4 |
(187) 0x42c329 MOVDQA %XMM0,%XMM3 |
(187) 0x42c32d DIVSD %XMM4,%XMM3 |
(187) 0x42c331 MOVSD -0x8(%RCX,%RDX,8),%XMM5 |
(187) 0x42c337 MOVSD (%RCX,%RDX,8),%XMM4 |
(187) 0x42c33c MOVSD -0x8(%R8,%RDX,8),%XMM7 |
(187) 0x42c343 MOVSD (%R8,%RDX,8),%XMM6 |
(187) 0x42c349 MOVAPD %XMM4,%XMM9 |
(187) 0x42c34e UNPCKLPD %XMM6,%XMM9 |
(187) 0x42c353 MOVSD -0x8(%R9,%RDX,8),%XMM10 |
(187) 0x42c35a MOVSD (%R9,%RDX,8),%XMM12 |
(187) 0x42c360 MOVSD -0x8(%R10,%RDX,8),%XMM8 |
(187) 0x42c367 MOVAPD %XMM5,%XMM11 |
(187) 0x42c36c UNPCKLPD %XMM7,%XMM11 |
(187) 0x42c371 SUBPD %XMM11,%XMM9 |
(187) 0x42c376 MOVSD (%R10,%RDX,8),%XMM11 |
(187) 0x42c37c UNPCKLPD %XMM12,%XMM5 |
(187) 0x42c381 UNPCKLPD %XMM11,%XMM12 |
(187) 0x42c386 UNPCKLPD %XMM10,%XMM4 |
(187) 0x42c38b UNPCKLPD %XMM8,%XMM10 |
(187) 0x42c390 SUBPD %XMM10,%XMM12 |
(187) 0x42c395 MOVSD (%RAX,%RDX,8),%XMM10 |
(187) 0x42c39b MOVHPD (%R13,%RDX,8),%XMM10 |
(187) 0x42c3a2 MULPD %XMM10,%XMM9 |
(187) 0x42c3a7 MULPD %XMM10,%XMM12 |
(187) 0x42c3ac MOVSD (%RSI,%RDX,8),%XMM10 |
(187) 0x42c3b2 ADDPD %XMM9,%XMM12 |
(187) 0x42c3b7 MOVAPD %XMM12,%XMM9 |
(187) 0x42c3bc UNPCKHPD %XMM12,%XMM9 |
(187) 0x42c3c1 ADDSD %XMM12,%XMM9 |
(187) 0x42c3c6 MOVSD (%R12,%RDX,8),%XMM12 |
(187) 0x42c3cc MULSD %XMM3,%XMM9 |
(187) 0x42c3d1 SUBSD %XMM9,%XMM12 |
(187) 0x42c3d6 MOVUPD (%RBX,%RDX,8),%XMM9 |
(187) 0x42c3dc MOVSD %XMM12,(%R15,%RDX,8) |
(187) 0x42c3e2 UNPCKLPD %XMM11,%XMM7 |
(187) 0x42c3e7 SUBPD %XMM7,%XMM5 |
(187) 0x42c3eb UNPCKLPD %XMM8,%XMM6 |
(187) 0x42c3f0 SUBPD %XMM6,%XMM4 |
(187) 0x42c3f4 MULPD %XMM9,%XMM5 |
(187) 0x42c3f9 SHUFPD $0x1,%XMM9,%XMM9 |
(187) 0x42c3ff MULPD %XMM9,%XMM4 |
(187) 0x42c404 ADDPD %XMM5,%XMM4 |
(187) 0x42c408 MOVAPD %XMM4,%XMM5 |
(187) 0x42c40c UNPCKHPD %XMM4,%XMM5 |
(187) 0x42c410 ADDSD %XMM4,%XMM5 |
(187) 0x42c414 MULSD %XMM3,%XMM5 |
(187) 0x42c418 SUBSD %XMM5,%XMM10 |
(187) 0x42c41d MOVSD %XMM10,(%R14,%RDX,8) |
(187) 0x42c423 INC %RDX |
(187) 0x42c426 CMP %RDX,-0x70(%RBP) |
(187) 0x42c42a JNE 42c2e0 |
(186) 0x42c430 JMP 42bd70 |
0x42c435 NOPW %CS:(%RAX,%RAX,1) |
0x42c43f NOP |
Path / |
Source file and lines | accelerate_kernel.f90:57-79 |
Module | exec |
nb instructions | 144 |
nb uops | 145 |
loop length | 614 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 36 |
micro-operation queue | 24.17 cycles |
front end | 24.17 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 16.25 | 16.25 | 16.25 | 16.25 | 4.00 | 16.33 | 16.33 | 16.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0.00 | 0.00 |
cycles | 16.25 | 16.25 | 16.25 | 16.25 | 4.00 | 16.33 | 16.33 | 16.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 24.17 |
Dispatch | 16.33 |
Overall L1 | 24.17 |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 50% |
load | 50% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 1% |
load | 25% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 11% |
load | 6% |
store | 10% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 18% |
load | 18% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 11% |
load | 12% |
store | 10% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x148,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x60(%RBP),%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x58(%RBP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SUB %EBX,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
INC %EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVL $0,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JS 42bbc4 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0xa4> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R8,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,-0xd8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVL $0,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EAX,-0x3c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVL $0x1,-0x7c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x7c(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x80(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x40(%RBP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x3c(%RBP),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x570f80,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,-0x74(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CALL 404670 <__kmpc_for_static_init_4@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x40(%RBP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x3c(%RBP),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %EAX,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB %EAX,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ECX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JAE 42bbe2 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0xc2> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV $0x570fa0,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x74(%RBP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CALL 404230 <__kmpc_for_static_fini@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0xd8(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV $0x570fc0,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CALL 404740 <__kmpc_barrier@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x148,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x40(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SAL $0x20,%R15 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV $-0x200000000,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
LEA (%R15,%RDX,1),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x20,%R12 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %R12,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RDX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAR $0x20,%R10 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
TEST %RSI,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $-0x1,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMOVNS %RSI,%R8 | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
TEST %R8,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x1,%R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMOVG %R9,%R8 | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV $0x200000000,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %R11,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R15,%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R14,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMOVG %RSI,%R14 | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV $-0x1,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SHR $0x20,%R14 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IMUL %R8,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (,%R14,8),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RCX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %RSI,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R8,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD %EBX,-0x2c(%RBP) | 2 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RAX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %RSI,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R8,-0x108(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
TEST %RDX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMOVNS %RDX,%RDI | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
TEST %RDI,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMOVG %R9,%RDI | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R14,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOT %R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RCX,%R8,8),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RCX,-0x128(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA (%RAX,%R8,8),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RAX,-0x120(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVQ %R13,%XMM0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 6 | 1 |
MOV 0x48(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SUB %R12,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x30(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %R11,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMOVG %RDX,%R11 | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SHR $0x20,%R11 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IMUL %RDI,%R11 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SUB %RSI,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RCX,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA (%RAX,%R8,8),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RCX,-0x118(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB %RSI,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RAX,-0x100(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB %RSI,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RDX,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV -0x88(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SUB %RSI,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RAX,-0xf8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SUB %RSI,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RAX,-0xf0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x20(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SUB %RSI,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RAX,-0xe8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x18(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SUB %RSI,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RAX,-0xe0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x50(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SUB %RSI,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RAX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
NOT %R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R10,-0x140(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
NEG %R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R11,-0x138(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
NEG %R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R14,-0x130(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVSD 0x101703(%RIP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVAPD 0x101833(%RIP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 42bd86 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x266> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Source file and lines | accelerate_kernel.f90:57-79 |
Module | exec |
nb instructions | 144 |
nb uops | 145 |
loop length | 614 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 36 |
micro-operation queue | 24.17 cycles |
front end | 24.17 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 16.25 | 16.25 | 16.25 | 16.25 | 4.00 | 16.33 | 16.33 | 16.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0.00 | 0.00 |
cycles | 16.25 | 16.25 | 16.25 | 16.25 | 4.00 | 16.33 | 16.33 | 16.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 24.17 |
Dispatch | 16.33 |
Overall L1 | 24.17 |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 50% |
load | 50% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 1% |
load | 25% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 11% |
load | 6% |
store | 10% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 18% |
load | 18% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 11% |
load | 12% |
store | 10% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SUB $0x148,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R9,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x60(%RBP),%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x58(%RBP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SUB %EBX,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
INC %EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVL $0,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JS 42bbc4 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0xa4> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV %R8,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,-0xd8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOVL $0,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV %EAX,-0x3c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVL $0x1,-0x7c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x7c(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x80(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x40(%RBP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA -0x3c(%RBP),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x570f80,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ESI,-0x74(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CALL 404670 <__kmpc_for_static_init_4@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x40(%RBP),%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV -0x3c(%RBP),%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV %EAX,-0x2c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB %EAX,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %ECX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JAE 42bbe2 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0xc2> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 |
MOV $0x570fa0,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV -0x74(%RBP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CALL 404230 <__kmpc_for_static_fini@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0xd8(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV $0x570fc0,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CALL 404740 <__kmpc_barrier@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD $0x148,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x40(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SAL $0x20,%R15 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV $-0x200000000,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
LEA (%R15,%RDX,1),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SAL $0x20,%R12 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
ADD %R12,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RDX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAR $0x20,%R10 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
TEST %RSI,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $-0x1,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMOVNS %RSI,%R8 | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
TEST %R8,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV $0x1,%R9D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMOVG %R9,%R8 | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV $0x200000000,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %R11,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R15,%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMP %R14,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMOVG %RSI,%R14 | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV $-0x1,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SHR $0x20,%R14 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IMUL %R8,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (,%R14,8),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RCX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %RSI,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R8,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
ADD %EBX,-0x2c(%RBP) | 2 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RAX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %RSI,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R8,-0x108(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
TEST %RDX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMOVNS %RDX,%RDI | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
TEST %RDI,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMOVG %R9,%RDI | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %R14,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOT %R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LEA (%RCX,%R8,8),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RCX,-0x128(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA (%RAX,%R8,8),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RAX,-0x120(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVQ %R13,%XMM0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 6 | 1 |
MOV 0x48(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SUB %R12,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV 0x30(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
CMP %R11,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
CMOVG %RDX,%R11 | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SHR $0x20,%R11 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IMUL %RDI,%R11 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
SUB %RSI,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RCX,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
LEA (%RAX,%R8,8),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RCX,-0x118(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB %RSI,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RAX,-0x100(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
SUB %RSI,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RDX,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV -0x88(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SUB %RSI,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RAX,-0xf8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SUB %RSI,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RAX,-0xf0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x20(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SUB %RSI,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RAX,-0xe8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x18(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SUB %RSI,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RAX,-0xe0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x50(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 |
SUB %RSI,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %RAX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
NOT %R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R10,-0x140(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
NEG %R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R11,-0x138(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
NEG %R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOV %R14,-0x130(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOVSD 0x101703(%RIP),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVAPD 0x101833(%RIP),%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 42bd86 <accelerate_kernel_module_mp_accelerate_kernel_.DIR.OMP.PARALLEL.2+0x266> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼accelerate_kernel_.DIR.OMP.PARALLEL.2– | 5.15 | 2.68 |
▼Loop 186 - accelerate_kernel.f90:60-76 - exec– | 0.04 | 0.02 |
○Loop 188 - accelerate_kernel.f90:62-76 - exec | 5.11 | 2.65 |
○Loop 187 - accelerate_kernel.f90:62-76 - exec | 0 | 0 |