Skip to content

Commit

Permalink
Merge pull request #1914 from fenrus75/smallmatrix
Browse files Browse the repository at this point in the history
Add a "sgemm direct" mode for small matrixes
  • Loading branch information
martin-frbg committed Dec 13, 2018
2 parents 8771880 + cdc668d commit 78d877b
Show file tree
Hide file tree
Showing 4 changed files with 483 additions and 1 deletion.
8 changes: 8 additions & 0 deletions common_level3.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,14 @@ __global__ void cuda_dgemm_kernel(int, int, int, double *, double *, double *);
extern "C" {
#endif

extern void sgemm_kernel_direct(BLASLONG M, BLASLONG N, BLASLONG K,
float * A, BLASLONG strideA,
float * B, BLASLONG strideB,
float * R, BLASLONG strideR);

extern int sgemm_kernel_direct_performant(BLASLONG M, BLASLONG N, BLASLONG K);


int sgemm_beta(BLASLONG, BLASLONG, BLASLONG, float,
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int dgemm_beta(BLASLONG, BLASLONG, BLASLONG, double,
Expand Down
8 changes: 8 additions & 0 deletions interface/gemm.c
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,14 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS

PRINT_DEBUG_CNAME;

#if !defined(COMPLEX) && !defined(DOUBLE) && defined(USE_SGEMM_KERNEL_DIRECT)
if (beta == 0 && alpha == 1.0 && order == CblasRowMajor && TransA == CblasNoTrans && TransB == CblasNoTrans && sgemm_kernel_direct_performant(m,n,k)) {
sgemm_kernel_direct(m, n, k, a, lda, b, ldb, c, ldc);
return;
}

#endif

#ifndef COMPLEX
args.alpha = (void *)α
args.beta = (void *)β
Expand Down

0 comments on commit 78d877b

Please sign in to comment.