Permalink
Browse files

Ref #79 Added GEMM_MULTITHREAD_THRESHOLD flag to use single thread in…

… gemm function with small matrices.
1 parent 3afedbf commit 31c836ac255a1d23e5694ab85c760edc0c6e0214 @xianyi committed Mar 22, 2012
Showing with 17 additions and 1 deletion.
  1. +5 −0 Makefile.rule
  2. +5 −0 Makefile.system
  3. +1 −0 getarch_2nd.c
  4. +6 −1 interface/gemm.c
View
@@ -88,6 +88,11 @@ VERSION = 0.1alpha2.5
# If you need to synchronize FP CSR between threads (for x86/x86_64 only).
# CONSISTENT_FPCSR = 1
+# If any gemm arguement m, n or k is less or equal this threshold, gemm will be execute
+# with single thread. You can use this flag to avoid the overhead of multi-threading
+# in small matrix sizes. The default value is 4.
+# GEMM_MULTITHREAD_THRESHOLD = 4
+
# If you need santy check by comparing reference BLAS. It'll be very
# slow (Not implemented yet).
# SANITY_CHECK = 1
View
@@ -40,6 +40,11 @@ ifdef INTERFACE64
GETARCH_FLAGS += -DUSE64BITINT
endif
+ifndef GEMM_MULTITHREAD_THRESHOLD
+GEMM_MULTITHREAD_THRESHOLD=4
+endif
+GETARCH_FLAGS += -DGEMM_MULTITHREAD_THRESHOLD=$(GEMM_MULTITHREAD_THRESHOLD)
+
# This operation is expensive, so execution should be once.
ifndef GOTOBLAS_MAKEFILE
export GOTOBLAS_MAKEFILE = 1
View
@@ -34,6 +34,7 @@ int main(int argc, char **argv) {
#ifdef USE64BITINT
printf("#define USE64BITINT\n");
#endif
+ printf("#define GEMM_MULTITHREAD_THRESHOLD\t%ld\n", GEMM_MULTITHREAD_THRESHOLD);
}
return 0;
View
@@ -397,8 +397,13 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
mode |= (transb << BLAS_TRANSB_SHIFT);
args.common = NULL;
- args.nthreads = num_cpu_avail(3);
+ if(args.m <= GEMM_MULTITHREAD_THRESHOLD || args.n <= GEMM_MULTITHREAD_THRESHOLD
+ || args.k <=GEMM_MULTITHREAD_THRESHOLD){
+ args.nthreads = 1;
+ }else{
+ args.nthreads = num_cpu_avail(3);
+ }
if (args.nthreads == 1) {
#endif

0 comments on commit 31c836a

Please sign in to comment.