Loop Id: 92 | Module: libparcsr_mv.so | Source: par_csr_matop.c:109-242 [...] | Coverage: 0.06% |
---|
Loop Id: 92 | Module: libparcsr_mv.so | Source: par_csr_matop.c:109-242 [...] | Coverage: 0.06% |
---|
0xc980 MOV -0x98(%RBP),%RAX |
0xc987 MOV -0x68(%RBP),%RCX |
0xc98b MOV %R8,(%RAX,%RCX,8) |
0xc98f MOV -0x90(%RBP),%RAX |
0xc996 MOV %R10,(%RAX,%RCX,8) |
0xc99a MOV %RDX,%R10 |
0xc99d MOV %RBX,%R8 |
0xc9a0 MOV -0xb8(%RBP),%RSI |
0xc9a7 CMP -0xa0(%RBP),%RSI |
0xc9ae LEA 0x1(%RSI),%RSI |
0xc9b2 JE c787 |
0xc9b8 MOV 0x78(%RBP),%RAX |
0xc9bc TEST %RAX,%RAX |
0xc9bf MOV -0xa8(%RBP),%RCX |
0xc9c6 JE c9d4 |
0xc9c8 LEA (%RCX,%RSI,1),%RAX |
0xc9cc MOV %R8,(%R14,%RAX,8) |
0xc9d0 LEA 0x1(%R8),%RBX |
0xc9d4 ADD %RSI,%RCX |
0xc9d7 CMPQ $0,0x70(%RBP) |
0xc9dc MOV %RSI,-0xb8(%RBP) |
0xc9e3 MOV %RCX,-0x68(%RBP) |
0xc9e7 JE ce80 |
0xc9ed MOV 0x10(%RBP),%RAX |
0xc9f1 MOV (%RAX,%RCX,8),%RDI |
0xc9f5 MOV 0x8(%RAX,%RCX,8),%RDX |
0xc9fa LEA 0x1(%RCX),%RSI |
0xc9fe CMP %RDX,%RDI |
0xca01 JGE ce84 |
0xca07 MOV %RSI,-0x88(%RBP) |
0xca0e MOV %RDI,%RAX |
0xca11 NOT %RAX |
0xca14 ADD %RAX,%RDX |
0xca17 MOV %RDX,-0x38(%RBP) |
0xca1b XOR %R12D,%R12D |
0xca1e MOV %R10,%RDX |
0xca21 MOV %RDI,-0x30(%RBP) |
0xca25 JMP ca56 |
(101) 0xca40 MOV -0x38(%RBP),%RSI |
(101) 0xca44 CMP %RSI,%R12 |
(101) 0xca47 LEA 0x1(%R12),%R12 |
(101) 0xca4c MOV -0x30(%RBP),%RDI |
(101) 0xca50 JE cec0 |
(101) 0xca56 LEA (%RDI,%R12,1),%RAX |
(101) 0xca5a MOV 0x18(%RBP),%RCX |
(101) 0xca5e MOV (%RCX,%RAX,8),%RAX |
(101) 0xca62 MOV 0x50(%RBP),%RCX |
(101) 0xca66 MOV (%RCX,%RAX,8),%R13 |
(101) 0xca6a MOV 0x8(%RCX,%RAX,8),%RDI |
(101) 0xca6f MOV %RDI,%R11 |
(101) 0xca72 SUB %R13,%R11 |
(101) 0xca75 JLE cb00 |
(101) 0xca7b CMP $0x8,%R11 |
(101) 0xca7f JAE cb80 |
(101) 0xca85 MOV %R11,%RCX |
(101) 0xca88 AND $-0x8,%RCX |
(101) 0xca8c CMP %R11,%RCX |
(101) 0xca8f JAE cb00 |
(101) 0xca91 ADD %RCX,%R13 |
(101) 0xca94 MOV 0x58(%RBP),%RSI |
(101) 0xca98 JMP cac8 |
(104) 0xcac0 INC %R13 |
(104) 0xcac3 CMP %R13,%RDI |
(104) 0xcac6 JE cb00 |
(104) 0xcac8 MOV (%RSI,%R13,8),%RCX |
(104) 0xcacc ADD %R15,%RCX |
(104) 0xcacf CMP %R10,(%R14,%RCX,8) |
(104) 0xcad3 JGE cac0 |
(104) 0xcad5 MOV %RDX,(%R14,%RCX,8) |
(104) 0xcad9 INC %RDX |
(104) 0xcadc JMP cac0 |
(101) 0xcb00 MOV 0x40(%RBP),%RCX |
(101) 0xcb04 MOV (%RCX,%RAX,8),%RDI |
(101) 0xcb08 MOV 0x8(%RCX,%RAX,8),%RAX |
(101) 0xcb0d MOV %RAX,%R9 |
(101) 0xcb10 SUB %RDI,%R9 |
(101) 0xcb13 JLE ca40 |
(101) 0xcb19 CMP $0x8,%R9 |
(101) 0xcb1d JAE cd40 |
(101) 0xcb23 MOV 0x48(%RBP),%R13 |
(101) 0xcb27 MOV %R9,%RCX |
(101) 0xcb2a AND $-0x8,%RCX |
(101) 0xcb2e CMP %R9,%RCX |
(101) 0xcb31 JAE ca40 |
(101) 0xcb37 ADD %RCX,%RDI |
(101) 0xcb3a MOV -0x38(%RBP),%RSI |
(101) 0xcb3e JMP cb4c |
(102) 0xcb40 INC %RDI |
(102) 0xcb43 CMP %RDI,%RAX |
(102) 0xcb46 JE ca44 |
(102) 0xcb4c MOV (%R13,%RDI,8),%RCX |
(102) 0xcb51 CMP %R8,(%R14,%RCX,8) |
(102) 0xcb55 JGE cb40 |
(102) 0xcb57 MOV %RBX,(%R14,%RCX,8) |
(102) 0xcb5b INC %RBX |
(102) 0xcb5e JMP cb40 |
(101) 0xcb80 MOV %R11,%R9 |
(101) 0xcb83 SHR $0x3,%R9 |
(101) 0xcb87 MOV -0xd0(%RBP),%RCX |
(101) 0xcb8e LEA (%RCX,%R13,8),%RCX |
(101) 0xcb92 JMP cbcd |
(105) 0xcbc0 ADD $0x40,%RCX |
(105) 0xcbc4 DEC %R9 |
(105) 0xcbc7 JE ca85 |
(105) 0xcbcd MOV -0x38(%RCX),%RSI |
(105) 0xcbd1 ADD %R15,%RSI |
(105) 0xcbd4 CMP %R10,(%R14,%RSI,8) |
(105) 0xcbd8 JGE cc80 |
(105) 0xcbde MOV %RDX,(%R14,%RSI,8) |
(105) 0xcbe2 INC %RDX |
(105) 0xcbe5 MOV -0x30(%RCX),%RSI |
(105) 0xcbe9 ADD %R15,%RSI |
(105) 0xcbec CMP %R10,(%R14,%RSI,8) |
(105) 0xcbf0 JL cc91 |
(105) 0xcbf6 MOV -0x28(%RCX),%RSI |
(105) 0xcbfa ADD %R15,%RSI |
(105) 0xcbfd CMP %R10,(%R14,%RSI,8) |
(105) 0xcc01 JGE cca9 |
(105) 0xcc07 MOV %RDX,(%R14,%RSI,8) |
(105) 0xcc0b INC %RDX |
(105) 0xcc0e MOV -0x20(%RCX),%RSI |
(105) 0xcc12 ADD %R15,%RSI |
(105) 0xcc15 CMP %R10,(%R14,%RSI,8) |
(105) 0xcc19 JL ccba |
(105) 0xcc1f MOV -0x18(%RCX),%RSI |
(105) 0xcc23 ADD %R15,%RSI |
(105) 0xcc26 CMP %R10,(%R14,%RSI,8) |
(105) 0xcc2a JGE ccd2 |
(105) 0xcc30 MOV %RDX,(%R14,%RSI,8) |
(105) 0xcc34 INC %RDX |
(105) 0xcc37 MOV -0x10(%RCX),%RSI |
(105) 0xcc3b ADD %R15,%RSI |
(105) 0xcc3e CMP %R10,(%R14,%RSI,8) |
(105) 0xcc42 JL cce3 |
(105) 0xcc48 MOV -0x8(%RCX),%RSI |
(105) 0xcc4c ADD %R15,%RSI |
(105) 0xcc4f CMP %R10,(%R14,%RSI,8) |
(105) 0xcc53 JGE ccfb |
(105) 0xcc59 MOV %RDX,(%R14,%RSI,8) |
(105) 0xcc5d INC %RDX |
(105) 0xcc60 MOV (%RCX),%RSI |
(105) 0xcc63 ADD %R15,%RSI |
(105) 0xcc66 CMP %R10,(%R14,%RSI,8) |
(105) 0xcc6a JGE cbc0 |
(105) 0xcc70 JMP cd0b |
(105) 0xcc80 MOV -0x30(%RCX),%RSI |
(105) 0xcc84 ADD %R15,%RSI |
(105) 0xcc87 CMP %R10,(%R14,%RSI,8) |
(105) 0xcc8b JGE cbf6 |
(105) 0xcc91 MOV %RDX,(%R14,%RSI,8) |
(105) 0xcc95 INC %RDX |
(105) 0xcc98 MOV -0x28(%RCX),%RSI |
(105) 0xcc9c ADD %R15,%RSI |
(105) 0xcc9f CMP %R10,(%R14,%RSI,8) |
(105) 0xcca3 JL cc07 |
(105) 0xcca9 MOV -0x20(%RCX),%RSI |
(105) 0xccad ADD %R15,%RSI |
(105) 0xccb0 CMP %R10,(%R14,%RSI,8) |
(105) 0xccb4 JGE cc1f |
(105) 0xccba MOV %RDX,(%R14,%RSI,8) |
(105) 0xccbe INC %RDX |
(105) 0xccc1 MOV -0x18(%RCX),%RSI |
(105) 0xccc5 ADD %R15,%RSI |
(105) 0xccc8 CMP %R10,(%R14,%RSI,8) |
(105) 0xcccc JL cc30 |
(105) 0xccd2 MOV -0x10(%RCX),%RSI |
(105) 0xccd6 ADD %R15,%RSI |
(105) 0xccd9 CMP %R10,(%R14,%RSI,8) |
(105) 0xccdd JGE cc48 |
(105) 0xcce3 MOV %RDX,(%R14,%RSI,8) |
(105) 0xcce7 INC %RDX |
(105) 0xccea MOV -0x8(%RCX),%RSI |
(105) 0xccee ADD %R15,%RSI |
(105) 0xccf1 CMP %R10,(%R14,%RSI,8) |
(105) 0xccf5 JL cc59 |
(105) 0xccfb MOV (%RCX),%RSI |
(105) 0xccfe ADD %R15,%RSI |
(105) 0xcd01 CMP %R10,(%R14,%RSI,8) |
(105) 0xcd05 JGE cbc0 |
(105) 0xcd0b MOV %RDX,(%R14,%RSI,8) |
(105) 0xcd0f INC %RDX |
(105) 0xcd12 JMP cbc0 |
(101) 0xcd40 MOV %R9,%RCX |
(101) 0xcd43 SHR $0x3,%RCX |
(101) 0xcd47 MOV -0xc8(%RBP),%RSI |
(101) 0xcd4e LEA (%RSI,%RDI,8),%R11 |
(101) 0xcd52 MOV 0x48(%RBP),%R13 |
(101) 0xcd56 JMP cd8d |
(103) 0xcd80 ADD $0x40,%R11 |
(103) 0xcd84 DEC %RCX |
(103) 0xcd87 JE cb27 |
(103) 0xcd8d MOV -0x38(%R11),%RSI |
(103) 0xcd91 CMP %R8,(%R14,%RSI,8) |
(103) 0xcd95 JGE ce00 |
(103) 0xcd97 MOV %RBX,(%R14,%RSI,8) |
(103) 0xcd9b INC %RBX |
(103) 0xcd9e MOV -0x30(%R11),%RSI |
(103) 0xcda2 CMP %R8,(%R14,%RSI,8) |
(103) 0xcda6 JL ce0a |
(103) 0xcda8 MOV -0x28(%R11),%RSI |
(103) 0xcdac CMP %R8,(%R14,%RSI,8) |
(103) 0xcdb0 JGE ce1b |
(103) 0xcdb2 MOV %RBX,(%R14,%RSI,8) |
(103) 0xcdb6 INC %RBX |
(103) 0xcdb9 MOV -0x20(%R11),%RSI |
(103) 0xcdbd CMP %R8,(%R14,%RSI,8) |
(103) 0xcdc1 JL ce25 |
(103) 0xcdc3 MOV -0x18(%R11),%RSI |
(103) 0xcdc7 CMP %R8,(%R14,%RSI,8) |
(103) 0xcdcb JGE ce36 |
(103) 0xcdcd MOV %RBX,(%R14,%RSI,8) |
(103) 0xcdd1 INC %RBX |
(103) 0xcdd4 MOV -0x10(%R11),%RSI |
(103) 0xcdd8 CMP %R8,(%R14,%RSI,8) |
(103) 0xcddc JL ce40 |
(103) 0xcdde MOV -0x8(%R11),%RSI |
(103) 0xcde2 CMP %R8,(%R14,%RSI,8) |
(103) 0xcde6 JGE ce51 |
(103) 0xcde8 MOV %RBX,(%R14,%RSI,8) |
(103) 0xcdec INC %RBX |
(103) 0xcdef MOV (%R11),%RSI |
(103) 0xcdf2 CMP %R8,(%R14,%RSI,8) |
(103) 0xcdf6 JGE cd80 |
(103) 0xcdf8 JMP ce5e |
(103) 0xce00 MOV -0x30(%R11),%RSI |
(103) 0xce04 CMP %R8,(%R14,%RSI,8) |
(103) 0xce08 JGE cda8 |
(103) 0xce0a MOV %RBX,(%R14,%RSI,8) |
(103) 0xce0e INC %RBX |
(103) 0xce11 MOV -0x28(%R11),%RSI |
(103) 0xce15 CMP %R8,(%R14,%RSI,8) |
(103) 0xce19 JL cdb2 |
(103) 0xce1b MOV -0x20(%R11),%RSI |
(103) 0xce1f CMP %R8,(%R14,%RSI,8) |
(103) 0xce23 JGE cdc3 |
(103) 0xce25 MOV %RBX,(%R14,%RSI,8) |
(103) 0xce29 INC %RBX |
(103) 0xce2c MOV -0x18(%R11),%RSI |
(103) 0xce30 CMP %R8,(%R14,%RSI,8) |
(103) 0xce34 JL cdcd |
(103) 0xce36 MOV -0x10(%R11),%RSI |
(103) 0xce3a CMP %R8,(%R14,%RSI,8) |
(103) 0xce3e JGE cdde |
(103) 0xce40 MOV %RBX,(%R14,%RSI,8) |
(103) 0xce44 INC %RBX |
(103) 0xce47 MOV -0x8(%R11),%RSI |
(103) 0xce4b CMP %R8,(%R14,%RSI,8) |
(103) 0xce4f JL cde8 |
(103) 0xce51 MOV (%R11),%RSI |
(103) 0xce54 CMP %R8,(%R14,%RSI,8) |
(103) 0xce58 JGE cd80 |
(103) 0xce5e MOV %RBX,(%R14,%RSI,8) |
(103) 0xce62 INC %RBX |
(103) 0xce65 JMP cd80 |
0xce80 LEA 0x1(%RCX),%RSI |
0xce84 MOV %R10,%RDX |
0xce87 JMP cecb |
0xcec0 MOV -0x68(%RBP),%RCX |
0xcec4 MOV -0x88(%RBP),%RSI |
0xcecb MOV -0xb0(%RBP),%RAX |
0xced2 MOV (%RAX,%RCX,8),%R13 |
0xced6 MOV (%RAX,%RSI,8),%R11 |
0xceda CMP %R11,%R13 |
0xcedd JGE c980 |
0xcee3 MOV %R13,%RAX |
0xcee6 NOT %RAX |
0xcee9 ADD %RAX,%R11 |
0xceec CMPQ $0,0x88(%RBP) |
0xcef4 MOV %R11,-0x30(%RBP) |
0xcef8 JE d2c0 |
0xcefe XOR %R12D,%R12D |
0xcf01 MOV %R13,-0x38(%RBP) |
0xcf05 JMP cf56 |
(96) 0xcf40 MOV -0x30(%RBP),%R11 |
(96) 0xcf44 CMP %R11,%R12 |
(96) 0xcf47 LEA 0x1(%R12),%R12 |
(96) 0xcf4c MOV -0x38(%RBP),%R13 |
(96) 0xcf50 JE c980 |
(96) 0xcf56 LEA (%R13,%R12,1),%RAX |
(96) 0xcf5b MOV -0x78(%RBP),%RCX |
(96) 0xcf5f MOV (%RCX,%RAX,8),%RDI |
(96) 0xcf63 MOV 0x20(%RBP),%RCX |
(96) 0xcf67 MOV (%RCX,%RDI,8),%RAX |
(96) 0xcf6b MOV 0x8(%RCX,%RDI,8),%R13 |
(96) 0xcf70 MOV %R13,%R9 |
(96) 0xcf73 SUB %RAX,%R9 |
(96) 0xcf76 JLE d144 |
(96) 0xcf7c CMP $0x8,%R9 |
(96) 0xcf80 JAE d000 |
(96) 0xcf86 MOV %R9,%RCX |
(96) 0xcf89 AND $-0x8,%RCX |
(96) 0xcf8d CMP %R9,%RCX |
(96) 0xcf90 JAE d140 |
(96) 0xcf96 ADD %RCX,%RAX |
(96) 0xcf99 MOV 0x28(%RBP),%RSI |
(96) 0xcf9d MOV -0x30(%RBP),%R11 |
(96) 0xcfa1 JMP cfcc |
(99) 0xcfc0 INC %RAX |
(99) 0xcfc3 CMP %RAX,%R13 |
(99) 0xcfc6 JE d144 |
(99) 0xcfcc MOV (%RSI,%RAX,8),%RCX |
(99) 0xcfd0 CMP %R8,(%R14,%RCX,8) |
(99) 0xcfd4 JGE cfc0 |
(99) 0xcfd6 MOV %RBX,(%R14,%RCX,8) |
(99) 0xcfda INC %RBX |
(99) 0xcfdd JMP cfc0 |
(96) 0xd000 MOV %R9,%RCX |
(96) 0xd003 SHR $0x3,%RCX |
(96) 0xd007 MOV -0x70(%RBP),%RSI |
(96) 0xd00b LEA (%RSI,%RAX,8),%R11 |
(96) 0xd00f JMP d04d |
(100) 0xd040 ADD $0x40,%R11 |
(100) 0xd044 DEC %RCX |
(100) 0xd047 JE cf86 |
(100) 0xd04d MOV -0x38(%R11),%RSI |
(100) 0xd051 CMP %R8,(%R14,%RSI,8) |
(100) 0xd055 JGE d0c0 |
(100) 0xd057 MOV %RBX,(%R14,%RSI,8) |
(100) 0xd05b INC %RBX |
(100) 0xd05e MOV -0x30(%R11),%RSI |
(100) 0xd062 CMP %R8,(%R14,%RSI,8) |
(100) 0xd066 JL d0ca |
(100) 0xd068 MOV -0x28(%R11),%RSI |
(100) 0xd06c CMP %R8,(%R14,%RSI,8) |
(100) 0xd070 JGE d0db |
(100) 0xd072 MOV %RBX,(%R14,%RSI,8) |
(100) 0xd076 INC %RBX |
(100) 0xd079 MOV -0x20(%R11),%RSI |
(100) 0xd07d CMP %R8,(%R14,%RSI,8) |
(100) 0xd081 JL d0e5 |
(100) 0xd083 MOV -0x18(%R11),%RSI |
(100) 0xd087 CMP %R8,(%R14,%RSI,8) |
(100) 0xd08b JGE d0f6 |
(100) 0xd08d MOV %RBX,(%R14,%RSI,8) |
(100) 0xd091 INC %RBX |
(100) 0xd094 MOV -0x10(%R11),%RSI |
(100) 0xd098 CMP %R8,(%R14,%RSI,8) |
(100) 0xd09c JL d100 |
(100) 0xd09e MOV -0x8(%R11),%RSI |
(100) 0xd0a2 CMP %R8,(%R14,%RSI,8) |
(100) 0xd0a6 JGE d111 |
(100) 0xd0a8 MOV %RBX,(%R14,%RSI,8) |
(100) 0xd0ac INC %RBX |
(100) 0xd0af MOV (%R11),%RSI |
(100) 0xd0b2 CMP %R8,(%R14,%RSI,8) |
(100) 0xd0b6 JGE d040 |
(100) 0xd0b8 JMP d11e |
(100) 0xd0c0 MOV -0x30(%R11),%RSI |
(100) 0xd0c4 CMP %R8,(%R14,%RSI,8) |
(100) 0xd0c8 JGE d068 |
(100) 0xd0ca MOV %RBX,(%R14,%RSI,8) |
(100) 0xd0ce INC %RBX |
(100) 0xd0d1 MOV -0x28(%R11),%RSI |
(100) 0xd0d5 CMP %R8,(%R14,%RSI,8) |
(100) 0xd0d9 JL d072 |
(100) 0xd0db MOV -0x20(%R11),%RSI |
(100) 0xd0df CMP %R8,(%R14,%RSI,8) |
(100) 0xd0e3 JGE d083 |
(100) 0xd0e5 MOV %RBX,(%R14,%RSI,8) |
(100) 0xd0e9 INC %RBX |
(100) 0xd0ec MOV -0x18(%R11),%RSI |
(100) 0xd0f0 CMP %R8,(%R14,%RSI,8) |
(100) 0xd0f4 JL d08d |
(100) 0xd0f6 MOV -0x10(%R11),%RSI |
(100) 0xd0fa CMP %R8,(%R14,%RSI,8) |
(100) 0xd0fe JGE d09e |
(100) 0xd100 MOV %RBX,(%R14,%RSI,8) |
(100) 0xd104 INC %RBX |
(100) 0xd107 MOV -0x8(%R11),%RSI |
(100) 0xd10b CMP %R8,(%R14,%RSI,8) |
(100) 0xd10f JL d0a8 |
(100) 0xd111 MOV (%R11),%RSI |
(100) 0xd114 CMP %R8,(%R14,%RSI,8) |
(100) 0xd118 JGE d040 |
(100) 0xd11e MOV %RBX,(%R14,%RSI,8) |
(100) 0xd122 INC %RBX |
(100) 0xd125 JMP d040 |
(96) 0xd140 MOV -0x30(%RBP),%R11 |
(96) 0xd144 MOV 0x30(%RBP),%RCX |
(96) 0xd148 MOV (%RCX,%RDI,8),%RAX |
(96) 0xd14c MOV 0x8(%RCX,%RDI,8),%RCX |
(96) 0xd151 MOV %RCX,%RDI |
(96) 0xd154 SUB %RAX,%RDI |
(96) 0xd157 JLE cf44 |
(96) 0xd15d CMP $0x4,%RDI |
(96) 0xd161 JAE d200 |
(96) 0xd167 MOV 0x60(%RBP),%R13 |
(96) 0xd16b MOV %RDI,%RSI |
(96) 0xd16e AND $-0x4,%RSI |
(96) 0xd172 CMP %RDI,%RSI |
(96) 0xd175 JAE cf40 |
(96) 0xd17b ADD %RSI,%RAX |
(96) 0xd17e MOV 0x38(%RBP),%RDI |
(96) 0xd182 MOV -0x30(%RBP),%R11 |
(96) 0xd186 JMP d1cc |
(97) 0xd1c0 INC %RAX |
(97) 0xd1c3 CMP %RAX,%RCX |
(97) 0xd1c6 JE cf44 |
(97) 0xd1cc MOV (%RDI,%RAX,8),%RSI |
(97) 0xd1d0 MOV (%R13,%RSI,8),%RSI |
(97) 0xd1d5 ADD %R15,%RSI |
(97) 0xd1d8 CMP %R10,(%R14,%RSI,8) |
(97) 0xd1dc JGE d1c0 |
(97) 0xd1de MOV %RDX,(%R14,%RSI,8) |
(97) 0xd1e2 INC %RDX |
(97) 0xd1e5 JMP d1c0 |
(96) 0xd200 MOV %RDI,%R9 |
(96) 0xd203 SHR $0x2,%R9 |
(96) 0xd207 MOV -0xc0(%RBP),%RSI |
(96) 0xd20e LEA (%RSI,%RAX,8),%R11 |
(96) 0xd212 MOV 0x60(%RBP),%R13 |
(96) 0xd216 JMP d24d |
(98) 0xd240 ADD $0x20,%R11 |
(98) 0xd244 DEC %R9 |
(98) 0xd247 JE d16b |
(98) 0xd24d MOV -0x18(%R11),%RSI |
(98) 0xd251 MOV (%R13,%RSI,8),%RSI |
(98) 0xd256 ADD %R15,%RSI |
(98) 0xd259 CMP %R10,(%R14,%RSI,8) |
(98) 0xd25d JGE d266 |
(98) 0xd25f MOV %RDX,(%R14,%RSI,8) |
(98) 0xd263 INC %RDX |
(98) 0xd266 MOV -0x10(%R11),%RSI |
(98) 0xd26a MOV (%R13,%RSI,8),%RSI |
(98) 0xd26f ADD %R15,%RSI |
(98) 0xd272 CMP %R10,(%R14,%RSI,8) |
(98) 0xd276 JGE d27f |
(98) 0xd278 MOV %RDX,(%R14,%RSI,8) |
(98) 0xd27c INC %RDX |
(98) 0xd27f MOV -0x8(%R11),%RSI |
(98) 0xd283 MOV (%R13,%RSI,8),%RSI |
(98) 0xd288 ADD %R15,%RSI |
(98) 0xd28b CMP %R10,(%R14,%RSI,8) |
(98) 0xd28f JGE d298 |
(98) 0xd291 MOV %RDX,(%R14,%RSI,8) |
(98) 0xd295 INC %RDX |
(98) 0xd298 MOV (%R11),%RSI |
(98) 0xd29b MOV (%R13,%RSI,8),%RSI |
(98) 0xd2a0 ADD %R15,%RSI |
(98) 0xd2a3 CMP %R10,(%R14,%RSI,8) |
(98) 0xd2a7 JGE d240 |
(98) 0xd2a9 MOV %RDX,(%R14,%RSI,8) |
(98) 0xd2ad INC %RDX |
(98) 0xd2b0 JMP d240 |
0xd2c0 XOR %R12D,%R12D |
0xd2c3 JMP d312 |
(93) 0xd300 MOV -0x30(%RBP),%R11 |
(93) 0xd304 CMP %R11,%R12 |
(93) 0xd307 LEA 0x1(%R12),%R12 |
(93) 0xd30c JE c980 |
(93) 0xd312 LEA (%R13,%R12,1),%RAX |
(93) 0xd317 MOV -0x78(%RBP),%RCX |
(93) 0xd31b MOV (%RCX,%RAX,8),%RAX |
(93) 0xd31f MOV 0x20(%RBP),%RCX |
(93) 0xd323 MOV (%RCX,%RAX,8),%RDI |
(93) 0xd327 MOV 0x8(%RCX,%RAX,8),%RAX |
(93) 0xd32c MOV %RAX,%R9 |
(93) 0xd32f SUB %RDI,%R9 |
(93) 0xd332 JLE d304 |
(93) 0xd334 CMP $0x8,%R9 |
(93) 0xd338 JAE d3c0 |
(93) 0xd33e MOV %R9,%RCX |
(93) 0xd341 AND $-0x8,%RCX |
(93) 0xd345 CMP %R9,%RCX |
(93) 0xd348 JAE d300 |
(93) 0xd34a ADD %RCX,%RDI |
(93) 0xd34d MOV 0x28(%RBP),%RSI |
(93) 0xd351 MOV -0x30(%RBP),%R11 |
(93) 0xd355 JMP d38c |
(94) 0xd380 INC %RDI |
(94) 0xd383 CMP %RDI,%RAX |
(94) 0xd386 JE d304 |
(94) 0xd38c MOV (%RSI,%RDI,8),%RCX |
(94) 0xd390 CMP %R8,(%R14,%RCX,8) |
(94) 0xd394 JGE d380 |
(94) 0xd396 MOV %RBX,(%R14,%RCX,8) |
(94) 0xd39a INC %RBX |
(94) 0xd39d JMP d380 |
(93) 0xd3c0 MOV %R9,%RCX |
(93) 0xd3c3 SHR $0x3,%RCX |
(93) 0xd3c7 MOV -0x70(%RBP),%RSI |
(93) 0xd3cb LEA (%RSI,%RDI,8),%R11 |
(93) 0xd3cf JMP d40d |
(95) 0xd400 ADD $0x40,%R11 |
(95) 0xd404 DEC %RCX |
(95) 0xd407 JE d33e |
(95) 0xd40d MOV -0x38(%R11),%RSI |
(95) 0xd411 CMP %R8,(%R14,%RSI,8) |
(95) 0xd415 JGE d480 |
(95) 0xd417 MOV %RBX,(%R14,%RSI,8) |
(95) 0xd41b INC %RBX |
(95) 0xd41e MOV -0x30(%R11),%RSI |
(95) 0xd422 CMP %R8,(%R14,%RSI,8) |
(95) 0xd426 JL d48a |
(95) 0xd428 MOV -0x28(%R11),%RSI |
(95) 0xd42c CMP %R8,(%R14,%RSI,8) |
(95) 0xd430 JGE d49b |
(95) 0xd432 MOV %RBX,(%R14,%RSI,8) |
(95) 0xd436 INC %RBX |
(95) 0xd439 MOV -0x20(%R11),%RSI |
(95) 0xd43d CMP %R8,(%R14,%RSI,8) |
(95) 0xd441 JL d4a5 |
(95) 0xd443 MOV -0x18(%R11),%RSI |
(95) 0xd447 CMP %R8,(%R14,%RSI,8) |
(95) 0xd44b JGE d4b6 |
(95) 0xd44d MOV %RBX,(%R14,%RSI,8) |
(95) 0xd451 INC %RBX |
(95) 0xd454 MOV -0x10(%R11),%RSI |
(95) 0xd458 CMP %R8,(%R14,%RSI,8) |
(95) 0xd45c JL d4c0 |
(95) 0xd45e MOV -0x8(%R11),%RSI |
(95) 0xd462 CMP %R8,(%R14,%RSI,8) |
(95) 0xd466 JGE d4d1 |
(95) 0xd468 MOV %RBX,(%R14,%RSI,8) |
(95) 0xd46c INC %RBX |
(95) 0xd46f MOV (%R11),%RSI |
(95) 0xd472 CMP %R8,(%R14,%RSI,8) |
(95) 0xd476 JGE d400 |
(95) 0xd478 JMP d4de |
(95) 0xd480 MOV -0x30(%R11),%RSI |
(95) 0xd484 CMP %R8,(%R14,%RSI,8) |
(95) 0xd488 JGE d428 |
(95) 0xd48a MOV %RBX,(%R14,%RSI,8) |
(95) 0xd48e INC %RBX |
(95) 0xd491 MOV -0x28(%R11),%RSI |
(95) 0xd495 CMP %R8,(%R14,%RSI,8) |
(95) 0xd499 JL d432 |
(95) 0xd49b MOV -0x20(%R11),%RSI |
(95) 0xd49f CMP %R8,(%R14,%RSI,8) |
(95) 0xd4a3 JGE d443 |
(95) 0xd4a5 MOV %RBX,(%R14,%RSI,8) |
(95) 0xd4a9 INC %RBX |
(95) 0xd4ac MOV -0x18(%R11),%RSI |
(95) 0xd4b0 CMP %R8,(%R14,%RSI,8) |
(95) 0xd4b4 JL d44d |
(95) 0xd4b6 MOV -0x10(%R11),%RSI |
(95) 0xd4ba CMP %R8,(%R14,%RSI,8) |
(95) 0xd4be JGE d45e |
(95) 0xd4c0 MOV %RBX,(%R14,%RSI,8) |
(95) 0xd4c4 INC %RBX |
(95) 0xd4c7 MOV -0x8(%R11),%RSI |
(95) 0xd4cb CMP %R8,(%R14,%RSI,8) |
(95) 0xd4cf JL d468 |
(95) 0xd4d1 MOV (%R11),%RSI |
(95) 0xd4d4 CMP %R8,(%R14,%RSI,8) |
(95) 0xd4d8 JGE d400 |
(95) 0xd4de MOV %RBX,(%R14,%RSI,8) |
(95) 0xd4e2 INC %RBX |
(95) 0xd4e5 JMP d400 |
/home/eoseret/qaas_runs_CPU_9468/171-716-5699/intel/AMG/build/AMG/AMG/parcsr_mv/par_csr_matop.c: 109 - 242 |
-------------------------------------------------------------------------------- |
109: if (ii < rest) |
[...] |
127: for (i1 = ns; i1 < ne; i1++) |
[...] |
135: if ( allsquare ) { |
136: B_marker[i1] = jj_count_diag; |
137: jj_count_diag++; |
[...] |
144: if (num_cols_offd_A) |
145: { |
146: for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++) |
147: { |
148: i2 = A_offd_j[jj2]; |
[...] |
154: for (jj3 = B_ext_offd_i[i2]; jj3 < B_ext_offd_i[i2+1]; jj3++) |
[...] |
164: if (B_marker[i3] < jj_row_begin_offd) |
165: { |
166: B_marker[i3] = jj_count_offd; |
167: jj_count_offd++; |
168: } |
169: } |
170: for (jj3 = B_ext_diag_i[i2]; jj3 < B_ext_diag_i[i2+1]; jj3++) |
171: { |
172: i3 = B_ext_diag_j[jj3]; |
173: |
174: if (B_marker[i3] < jj_row_begin_diag) |
175: { |
176: B_marker[i3] = jj_count_diag; |
177: jj_count_diag++; |
[...] |
187: for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++) |
188: { |
189: i2 = A_diag_j[jj2]; |
[...] |
195: for (jj3 = B_diag_i[i2]; jj3 < B_diag_i[i2+1]; jj3++) |
196: { |
197: i3 = B_diag_j[jj3]; |
[...] |
205: if (B_marker[i3] < jj_row_begin_diag) |
206: { |
207: B_marker[i3] = jj_count_diag; |
208: jj_count_diag++; |
[...] |
216: if (num_cols_offd_B) |
217: { |
218: for (jj3 = B_offd_i[i2]; jj3 < B_offd_i[i2+1]; jj3++) |
219: { |
220: i3 = num_cols_diag_B+map_B_to_C[B_offd_j[jj3]]; |
[...] |
228: if (B_marker[i3] < jj_row_begin_offd) |
229: { |
230: B_marker[i3] = jj_count_offd; |
231: jj_count_offd++; |
[...] |
241: (*C_diag_i)[i1] = jj_row_begin_diag; |
242: (*C_offd_i)[i1] = jj_row_begin_offd; |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 11.80 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.74 |
Bottlenecks | micro-operation queue, |
Function | hypre_ParMatmul_RowSizes.extracted |
Source | par_csr_matop.c:127-127,par_csr_matop.c:135-137,par_csr_matop.c:144-146,par_csr_matop.c:187-187,par_csr_matop.c:216-216,par_csr_matop.c:231-231,par_csr_matop.c:241-242 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 9.83 |
CQA cycles if no scalar integer | 9.83 |
CQA cycles if FP arith vectorized | 9.83 |
CQA cycles if fully vectorized | 0.83 |
Front-end cycles | 9.83 |
DIV/SQRT cycles | 3.40 |
P0 cycles | 3.40 |
P1 cycles | 5.67 |
P2 cycles | 5.67 |
P3 cycles | 5.00 |
P4 cycles | 3.40 |
P5 cycles | 3.40 |
P6 cycles | 5.00 |
P7 cycles | 5.00 |
P8 cycles | 5.00 |
P9 cycles | 3.40 |
P10 cycles | 5.67 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 10.00 |
Stall cycles (UFS) | 0.00 |
Nb insns | 59.00 |
Nb uops | 59.00 |
Nb loads | 17.00 |
Nb stores | 10.00 |
Nb stack references | 14.00 |
FLOP/cycle | 0.00 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 21.97 |
Bytes prefetched | 0.00 |
Bytes loaded | 136.00 |
Bytes stored | 80.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 11.81 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 10.80 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 11.80 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.74 |
Bottlenecks | micro-operation queue, |
Function | hypre_ParMatmul_RowSizes.extracted |
Source | par_csr_matop.c:127-127,par_csr_matop.c:135-137,par_csr_matop.c:144-146,par_csr_matop.c:187-187,par_csr_matop.c:216-216,par_csr_matop.c:231-231,par_csr_matop.c:241-242 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 9.83 |
CQA cycles if no scalar integer | 9.83 |
CQA cycles if FP arith vectorized | 9.83 |
CQA cycles if fully vectorized | 0.83 |
Front-end cycles | 9.83 |
DIV/SQRT cycles | 3.40 |
P0 cycles | 3.40 |
P1 cycles | 5.67 |
P2 cycles | 5.67 |
P3 cycles | 5.00 |
P4 cycles | 3.40 |
P5 cycles | 3.40 |
P6 cycles | 5.00 |
P7 cycles | 5.00 |
P8 cycles | 5.00 |
P9 cycles | 3.40 |
P10 cycles | 5.67 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 10.00 |
Stall cycles (UFS) | 0.00 |
Nb insns | 59.00 |
Nb uops | 59.00 |
Nb loads | 17.00 |
Nb stores | 10.00 |
Nb stack references | 14.00 |
FLOP/cycle | 0.00 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 21.97 |
Bytes prefetched | 0.00 |
Bytes loaded | 136.00 |
Bytes stored | 80.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 11.81 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 10.80 |
Path / |
Function | hypre_ParMatmul_RowSizes.extracted |
Source file and lines | par_csr_matop.c:109-242 |
Module | libparcsr_mv.so |
nb instructions | 59 |
nb uops | 59 |
loop length | 252 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 14 |
micro-operation queue | 9.83 cycles |
front end | 9.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.40 | 3.40 | 5.67 | 5.67 | 5.00 | 3.40 | 3.40 | 5.00 | 5.00 | 5.00 | 3.40 | 5.67 |
cycles | 3.40 | 3.40 | 5.67 | 5.67 | 5.00 | 3.40 | 3.40 | 5.00 | 5.00 | 5.00 | 3.40 | 5.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 10.00 |
Stall cycles | 0.00 |
Front-end | 9.83 |
Dispatch | 5.67 |
Overall L1 | 9.83 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 11% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0x98(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x68(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8,(%RAX,%RCX,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x90(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,(%RAX,%RCX,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RBX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0xb8(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP -0xa0(%RBP),%RSI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x1(%RSI),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JE c787 <hypre_ParMatmul_RowSizes.extracted+0xc7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x78(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV -0xa8(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JE c9d4 <hypre_ParMatmul_RowSizes.extracted+0x314> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (%RCX,%RSI,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,(%R14,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x1(%R8),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RSI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMPQ $0,0x70(%RBP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV %RSI,-0xb8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE ce80 <hypre_ParMatmul_RowSizes.extracted+0x7c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RCX,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%RCX,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RCX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RDX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE ce84 <hypre_ParMatmul_RowSizes.extracted+0x7c4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
NOT %RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R10,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP ca56 <hypre_ParMatmul_RowSizes.extracted+0x396> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
LEA 0x1(%RCX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R10,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP cecb <hypre_ParMatmul_RowSizes.extracted+0x80b> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x68(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x88(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RCX,8),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RSI,8),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R11,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE c980 <hypre_ParMatmul_RowSizes.extracted+0x2c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R13,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
NOT %RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RAX,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMPQ $0,0x88(%RBP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV %R11,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE d2c0 <hypre_ParMatmul_RowSizes.extracted+0xc00> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R13,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP cf56 <hypre_ParMatmul_RowSizes.extracted+0x896> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP d312 <hypre_ParMatmul_RowSizes.extracted+0xc52> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
Function | hypre_ParMatmul_RowSizes.extracted |
Source file and lines | par_csr_matop.c:109-242 |
Module | libparcsr_mv.so |
nb instructions | 59 |
nb uops | 59 |
loop length | 252 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 14 |
micro-operation queue | 9.83 cycles |
front end | 9.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.40 | 3.40 | 5.67 | 5.67 | 5.00 | 3.40 | 3.40 | 5.00 | 5.00 | 5.00 | 3.40 | 5.67 |
cycles | 3.40 | 3.40 | 5.67 | 5.67 | 5.00 | 3.40 | 3.40 | 5.00 | 5.00 | 5.00 | 3.40 | 5.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 10.00 |
Stall cycles | 0.00 |
Front-end | 9.83 |
Dispatch | 5.67 |
Overall L1 | 9.83 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 11% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0x98(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x68(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8,(%RAX,%RCX,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x90(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,(%RAX,%RCX,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RBX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0xb8(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP -0xa0(%RBP),%RSI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x1(%RSI),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JE c787 <hypre_ParMatmul_RowSizes.extracted+0xc7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x78(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV -0xa8(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JE c9d4 <hypre_ParMatmul_RowSizes.extracted+0x314> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (%RCX,%RSI,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,(%R14,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x1(%R8),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RSI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMPQ $0,0x70(%RBP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV %RSI,-0xb8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE ce80 <hypre_ParMatmul_RowSizes.extracted+0x7c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RCX,8),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%RCX,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RCX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RDX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE ce84 <hypre_ParMatmul_RowSizes.extracted+0x7c4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
NOT %RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R10,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP ca56 <hypre_ParMatmul_RowSizes.extracted+0x396> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
LEA 0x1(%RCX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R10,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP cecb <hypre_ParMatmul_RowSizes.extracted+0x80b> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x68(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x88(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RCX,8),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RSI,8),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %R11,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE c980 <hypre_ParMatmul_RowSizes.extracted+0x2c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R13,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
NOT %RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RAX,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMPQ $0,0x88(%RBP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV %R11,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE d2c0 <hypre_ParMatmul_RowSizes.extracted+0xc00> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R13,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP cf56 <hypre_ParMatmul_RowSizes.extracted+0x896> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP d312 <hypre_ParMatmul_RowSizes.extracted+0xc52> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |