Skip to content

Commit

Permalink
Merge pull request #3273 from austinpagan/sbgemm_gcc10_fix
Browse files Browse the repository at this point in the history
Power10: Fix for SBGEMM
  • Loading branch information
martin-frbg committed Jun 15, 2021
2 parents baf03a0 + e6dd44d commit c4b464c
Showing 1 changed file with 29 additions and 5 deletions.
34 changes: 29 additions & 5 deletions kernel/power/sbgemm_kernel_power10.c
Expand Up @@ -98,6 +98,30 @@ typedef FLOAT v2sf_t __attribute__ ((vector_size (8)));
rowC = (v2sf_t *) &CO[7* ldc+J]; \
rowC[0] += result[6] * alpha;

#define SAVE4x2_ACC_SCALAR(ACC) { \
__builtin_mma_disassemble_acc ((void *)result, ACC); \
res[0] = result[0] * alpha; \
res[1] = result[1] * alpha; \
res[2] = result[2] * alpha; \
res[3] = result[3] * alpha; \
CO[0 * ldc] += res[0][0]; \
CO[1 * ldc] += res[1][0]; \
CO[2 * ldc] += res[2][0]; \
CO[3 * ldc] += res[3][0]; \
}

#define SAVE4x2_ACC1_SCALAR(ACC) { \
__builtin_mma_disassemble_acc ((void *)result, ACC); \
res[0] = result[0] * alpha; \
res[1] = result[1] * alpha; \
res[2] = result[2] * alpha; \
res[3] = result[3] * alpha; \
CO[4 * ldc] += res[0][0]; \
CO[5 * ldc] += res[1][0]; \
CO[6 * ldc] += res[2][0]; \
CO[7 * ldc] += res[3][0]; \
}

#define MMA __builtin_mma_xvbf16ger2pp

#define SAVE2x4_ACC(ACC, J) \
Expand Down Expand Up @@ -313,7 +337,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, IFLOAT * A,
{
IFLOAT *BO = B;
v2sf_t *rowC;
v2sf_t result[8];
v4sf_t result[4], res[4];
__vector_quad acc0, acc1;
__builtin_mma_xxsetaccz (&acc0);
__builtin_mma_xxsetaccz (&acc1);
Expand All @@ -335,8 +359,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, IFLOAT * A,
MMA (&acc0, MERGE_HIGH (rowB[0], vzero), (vec_t) rowA);
MMA (&acc1, MERGE_LOW (rowB[0], vzero), (vec_t) rowA);
}
SAVE4x2_ACC (&acc0, 0);
SAVE4x2_ACC1 (&acc1, 0);
SAVE4x2_ACC_SCALAR (&acc0);
SAVE4x2_ACC1_SCALAR (&acc1);
CO += 1;
AO += k;
BO += (k << 3);
Expand Down Expand Up @@ -547,7 +571,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, IFLOAT * A,
{
IFLOAT *BO = B;
v2sf_t *rowC;
v2sf_t result[8];
v4sf_t result[4], res[4];
__vector_quad acc0;
BLASLONG l = 0;
__builtin_mma_xxsetaccz (&acc0);
Expand All @@ -571,7 +595,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, IFLOAT * A,
};
MMA (&acc0, (vec_t)(rowB_mrg), (vec_t) rowA);
}
SAVE4x2_ACC (&acc0, 0);
SAVE4x2_ACC_SCALAR (&acc0);
AO += k;
BO += (k << 2);
CO += 1;
Expand Down

0 comments on commit c4b464c

Please sign in to comment.