Permalink
Browse files

Merge branch 'develop'

  • Loading branch information...
2 parents 71d29fa + 3e87648 commit 48f075cfd52b73bf77d7627fcb425392c4d6bf24 @xianyi committed Aug 20, 2012
View
@@ -1,5 +1,18 @@
OpenBLAS ChangeLog
====================================================================
+Version 0.2.3
+20-Aug-2012
+common:
+ * Fixed LAPACK unstable bug about ?laswp. (#130)
+ * Fixed the shared library bug about unloading the library on
+ Linux (#132).
+ * Fixed the compilation failure on BlueGene/P (TARGET=PPC440FP2)
+ Please use gcc and IBM xlf. (#134)
+x86/x86-64:
+ * Supported goto_set_num_threads and openblas_set_num_threads
+ APIs in Windows. They can set the number of threads on runtime.
+
+====================================================================
Version 0.2.2
6-July-2012
common:
View
@@ -3,7 +3,7 @@ include ./Makefile.system
BLASDIRS = interface driver/level2 driver/level3 driver/others
-ifndef DYNAMIC_ARCH
+ifneq ($(DYNAMIC_ARCH), 1)
BLASDIRS += kernel
endif
@@ -99,11 +99,9 @@ ifeq ($(OSNAME), Darwin)
endif
ifeq ($(OSNAME), WINNT)
$(MAKE) -C exports dll
- -ln -fs $(LIBDLLNAME) $(LIBPREFIX).dll
endif
ifeq ($(OSNAME), CYGWIN_NT)
$(MAKE) -C exports dll
- -ln -fs $(LIBDLLNAME) $(LIBPREFIX).dll
endif
tests :
@@ -147,7 +145,7 @@ ifeq ($(EXPRECISION), 1)
echo "#define EXPRECISION">> config_last.h
endif
##
-ifdef DYNAMIC_ARCH
+ifeq ($(DYNAMIC_ARCH), 1)
$(MAKE) -C kernel commonlibs || exit 1
for d in $(DYNAMIC_CORE) ; \
do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
@@ -165,7 +163,7 @@ prof_blas :
$(MAKE) -C $$d prof || exit 1 ; \
fi; \
done
-ifdef DYNAMIC_ARCH
+ifeq ($(DYNAMIC_ARCH), 1)
$(MAKE) -C kernel commonprof || exit 1
endif
@@ -184,7 +182,7 @@ hpl :
$(MAKE) -C $$d $(@F) || exit 1 ; \
fi; \
done
-ifdef DYNAMIC_ARCH
+ifeq ($(DYNAMIC_ARCH), 1)
$(MAKE) -C kernel commonlibs || exit 1
for d in $(DYNAMIC_CORE) ; \
do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
@@ -233,7 +231,7 @@ ifndef NOFORTRAN
-@echo "LAPACKLIB_P = ../$(LIBNAME_P)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
-# -@echo "CEXTRALIB = $(CEXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
+ -@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
endif
View
@@ -3,7 +3,7 @@
#
# This library's version
-VERSION = 0.2.2
+VERSION = 0.2.3
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
@@ -108,19 +108,16 @@ VERSION = 0.2.2
# The installation directory.
# PREFIX = /opt/OpenBLAS
-# Common Optimization Flag; -O2 is enough.
-# DEBUG = 1
-
-ifeq ($(DEBUG), 1)
-COMMON_OPT += -g
-# -DDEBUG
-else
-COMMON_OPT += -O2
-endif
+# Common Optimization Flag;
+# The default -O2 is enough.
+# COMMON_OPT = -O2
# Profiling flags
COMMON_PROF = -pg
+# Build Debug version
+# DEBUG = 1
+
#
# End of user configuration
#
View
@@ -244,7 +244,7 @@ endif
endif
-ifdef DYNAMIC_ARCH
+ifeq ($(DYNAMIC_ARCH), 1)
ifeq ($(ARCH), x86)
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
CORE2 PENRYN DUNNINGTON NEHALEM SANDYBRIDGE ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
@@ -687,6 +687,15 @@ AWK = awk
REVISION = -r$(VERSION)
MAJOR_VERSION = $(word 1,$(subst ., ,$(VERSION)))
+ifeq ($(DEBUG), 1)
+COMMON_OPT += -g
+endif
+
+ifndef COMMON_OPT
+COMMON_OPT = -O2
+endif
+
+
CFLAGS = $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
PFLAGS = $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
@@ -705,7 +714,7 @@ ifndef LIBSUFFIX
LIBSUFFIX = a
endif
-ifndef DYNAMIC_ARCH
+ifneq ($(DYNAMIC_ARCH), 1)
ifndef SMP
LIBNAME = $(LIBPREFIX)_$(LIBCORE)$(REVISION).$(LIBSUFFIX)
LIBNAME_P = $(LIBPREFIX)_$(LIBCORE)$(REVISION)_p.$(LIBSUFFIX)
@@ -724,8 +733,8 @@ endif
endif
+LIBDLLNAME = $(LIBPREFIX).dll
LIBSONAME = $(LIBNAME:.$(LIBSUFFIX)=.so)
-LIBDLLNAME = $(LIBNAME:.$(LIBSUFFIX)=.dll)
LIBDYNNAME = $(LIBNAME:.$(LIBSUFFIX)=.dylib)
LIBDEFNAME = $(LIBNAME:.$(LIBSUFFIX)=.def)
LIBEXPNAME = $(LIBNAME:.$(LIBSUFFIX)=.exp)
View
@@ -100,6 +100,7 @@ Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD ve
* Please use gcc version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MingW/BSD.
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture. The Clang 3.0 will generate the wrong AVX binary code.
* The number of CPUs/Cores should less than or equal to 256.
+* On Linux, OpenBLAS sets the processor affinity by default. This may cause [the conflict with R parallel](https://stat.ethz.ch/pipermail/r-sig-hpc/2012-April/001348.html). You can build the library with NO_AFFINITY=1.
* On Loongson 3A. make test would be failed because of pthread_create error. The error code is EAGAIN. However, it will be OK when you run the same testcase on shell.
## Specification of Git Branches
View
@@ -45,7 +45,7 @@ extern "C" {
int BLASFUNC(xerbla)(char *, blasint *info, blasint);
-void BLASFUNC(openblas_set_num_threads)(int *);
+void openblas_set_num_threads_(int *);
FLOATRET BLASFUNC(sdot) (blasint *, float *, blasint *, float *, blasint *);
FLOATRET BLASFUNC(sdsdot)(blasint *, float *, float *, blasint *, float *, blasint *);
View
@@ -14,7 +14,7 @@ endif
# COMMONOBJS += info.$(SUFFIX)
-ifdef DYNAMIC_ARCH
+ifeq ($(DYNAMIC_ARCH), 1)
COMMONOBJS += dynamic.$(SUFFIX)
else
COMMONOBJS += parameter.$(SUFFIX)
@@ -70,7 +70,7 @@ ifndef BLAS_SERVER
BLAS_SERVER = blas_server.c
endif
-ifdef DYNAMIC_ARCH
+ifeq ($(DYNAMIC_ARCH), 1)
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX)
else
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX)
@@ -435,7 +435,7 @@ static int blas_thread_server(void *arg){
blas_memory_free(buffer);
- pthread_exit(NULL);
+ //pthread_exit(NULL);
return 0;
}
@@ -63,13 +63,7 @@ static blas_pool_t pool;
static HANDLE blas_threads [MAX_CPU_NUMBER];
static DWORD blas_threads_id[MAX_CPU_NUMBER];
-void goto_set_num_threads(int num)
-{
-}
-void openblas_set_num_threads(int num)
-{
-}
static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
@@ -187,7 +181,7 @@ static DWORD WINAPI blas_thread_server(void *arg){
do {
action = WaitForMultipleObjects(2, handles, FALSE, INFINITE);
- } while ((action != WAIT_OBJECT_0) && (action == WAIT_OBJECT_0 + 1));
+ } while ((action != WAIT_OBJECT_0) && (action != WAIT_OBJECT_0 + 1));
if (action == WAIT_OBJECT_0 + 1) break;
@@ -271,7 +265,9 @@ static DWORD WINAPI blas_thread_server(void *arg){
} else {
legacy_exec(routine, queue -> mode, queue -> args, sb);
}
- }
+ }else{
+ continue; //if queue == NULL
+ }
#ifdef SMP_DEBUG
fprintf(STDERR, "Server[%2ld] Finished!\n", cpu);
@@ -433,7 +429,7 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
/* Shutdown procedure, but user don't have to call this routine. The */
/* kernel automatically kill threads. */
-int blas_thread_shutdown_(void){
+int BLASFUNC(blas_thread_shutdown)(void){
int i;
@@ -445,7 +441,7 @@ int blas_thread_shutdown_(void){
SetEvent(pool.killed);
- for(i = 0; i < blas_cpu_number - 1; i++){
+ for(i = 0; i < blas_num_threads - 1; i++){
WaitForSingleObject(blas_threads[i], INFINITE);
}
@@ -456,3 +452,47 @@ int blas_thread_shutdown_(void){
return 0;
}
+
+void goto_set_num_threads(int num_threads)
+{
+ long i;
+
+ if (num_threads < 1) num_threads = blas_cpu_number;
+
+ if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER;
+
+ if (num_threads > blas_num_threads) {
+
+ LOCK_COMMAND(&server_lock);
+
+ //increased_threads = 1;
+ if (!blas_server_avail){
+
+ InitializeCriticalSection(&pool.lock);
+ pool.filled = CreateEvent(NULL, FALSE, FALSE, NULL);
+ pool.killed = CreateEvent(NULL, TRUE, FALSE, NULL);
+
+ pool.shutdown = 0;
+ pool.queue = NULL;
+ blas_server_avail = 1;
+ }
+
+ for(i = blas_num_threads - 1; i < num_threads - 1; i++){
+
+ blas_threads[i] = CreateThread(NULL, 0,
+ blas_thread_server, (void *)i,
+ 0, &blas_threads_id[i]);
+ }
+
+ blas_num_threads = num_threads;
+
+ UNLOCK_COMMAND(&server_lock);
+ }
+
+ blas_cpu_number = num_threads;
+}
+
+void openblas_set_num_threads(int num)
+{
+ goto_set_num_threads(num);
+}
View
@@ -60,6 +60,8 @@ extern gotoblas_t gotoblas_NEHALEM;
extern gotoblas_t gotoblas_OPTERON;
extern gotoblas_t gotoblas_OPTERON_SSE3;
extern gotoblas_t gotoblas_BARCELONA;
+extern gotoblas_t gotoblas_SANDYBRIDGE;
+extern gotoblas_t gotoblas_BOBCAT;
#define VENDOR_INTEL 1
#define VENDOR_AMD 2
@@ -122,15 +124,24 @@ static gotoblas_t *get_coretype(void){
if (model == 12) return &gotoblas_ATOM;
return NULL;
- case 2:
- //Intel Core (Clarkdale) / Core (Arrandale)
- // Pentium (Clarkdale) / Pentium Mobile (Arrandale)
- // Xeon (Clarkdale), 32nm
- if (model == 5) return &gotoblas_NEHALEM;
+ case 2:
+ //Intel Core (Clarkdale) / Core (Arrandale)
+ // Pentium (Clarkdale) / Pentium Mobile (Arrandale)
+ // Xeon (Clarkdale), 32nm
+ if (model == 5) return &gotoblas_NEHALEM;
- //Intel Xeon Processor 5600 (Westmere-EP)
- if (model == 12) return &gotoblas_NEHALEM;
- return NULL;
+ //Intel Xeon Processor 5600 (Westmere-EP)
+ //Xeon Processor E7 (Westmere-EX)
+ if (model == 12 || model == 15) return &gotoblas_NEHALEM;
+
+ //Intel Core i5-2000 /i7-2000 (Sandy Bridge)
+ //Intel Core i7-3000 / Xeon E5
+ if (model == 10 || model == 13) return &gotoblas_SANDYBRIDGE;
+ return NULL;
+ case 3:
+ //Intel Sandy Bridge 22nm (Ivy Bridge?)
+ if (model == 10) return &gotoblas_SANDYBRIDGE;
+ return NULL;
}
case 0xf:
if (model <= 0x2) return &gotoblas_NORTHWOOD;
@@ -144,7 +155,9 @@ static gotoblas_t *get_coretype(void){
if ((exfamily == 0) || (exfamily == 2)) {
if (ecx & (1 << 0)) return &gotoblas_OPTERON_SSE3;
else return &gotoblas_OPTERON;
- } else {
+ } else if (exfamily == 5) {
+ return &gotoblas_BOBCAT;
+ } else {
return &gotoblas_BARCELONA;
}
}
@@ -178,6 +191,8 @@ static char *corename[] = {
"Opteron(SSE3)",
"Barcelona",
"Nano",
+ "Sandybridge",
+ "Bobcat",
};
char *gotoblas_corename(void) {
@@ -197,7 +212,9 @@ char *gotoblas_corename(void) {
if (gotoblas == &gotoblas_OPTERON) return corename[13];
if (gotoblas == &gotoblas_BARCELONA) return corename[14];
if (gotoblas == &gotoblas_NANO) return corename[15];
-
+ if (gotoblas == &gotoblas_SANDYBRIDGE) return corename[16];
+ if (gotoblas == &gotoblas_BOBCAT) return corename[17];
+
return corename[0];
}
Oops, something went wrong.

0 comments on commit 48f075c

Please sign in to comment.