Skip to content

Commit

Permalink
Merge branch 'develop' into bulldozer
Browse files Browse the repository at this point in the history
  • Loading branch information
xianyi committed Aug 5, 2013
2 parents 72b1eda + 79ba521 commit 143cca4
Show file tree
Hide file tree
Showing 13 changed files with 141 additions and 29 deletions.
12 changes: 12 additions & 0 deletions Changelog.txt
@@ -1,4 +1,16 @@
OpenBLAS ChangeLog
====================================================================
Version 0.2.8
01-Aug-2013
common:
* Support Open64 5.0. (#266)
* Add executable stack markings. (#262, Thank Sébastien Fabbro)
* Respect user's LDFLAGS (Thank Sébastien Fabbro)

x86/x86-64:
* Rollback bulldozer and piledriver kernels to barcelona kernels (#263)
We will fix the compuational error bug in bulldozer and piledriver kernels.

====================================================================
Version 0.2.7
20-Jul-2013
Expand Down
2 changes: 1 addition & 1 deletion Makefile.rule
Expand Up @@ -3,7 +3,7 @@
#

# This library's version
VERSION = 0.2.7
VERSION = 0.2.8

# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
Expand Down
7 changes: 5 additions & 2 deletions Makefile.system
Expand Up @@ -324,14 +324,16 @@ ifeq ($(ARCH), x86)
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
ifneq ($(NO_AVX), 1)
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
DYNAMIC_CORE += SANDYBRIDGE
#BULLDOZER PILEDRIVER
endif
endif

ifeq ($(ARCH), x86_64)
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
ifneq ($(NO_AVX), 1)
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
DYNAMIC_CORE += SANDYBRIDGE
#BULLDOZER PILEDRIVER
endif
endif

Expand Down Expand Up @@ -895,6 +897,7 @@ export CC
export FC
export BU
export FU
export NEED2UNDERSCORES
export USE_THREAD
export NUM_THREADS
export NUM_CORES
Expand Down
8 changes: 4 additions & 4 deletions cpuid.h
Expand Up @@ -105,8 +105,8 @@
#define CORE_NANO 19
#define CORE_SANDYBRIDGE 20
#define CORE_BOBCAT 21
#define CORE_BULLDOZER 22
#define CORE_PILEDRIVER 23
#define CORE_BULLDOZER CORE_BARCELONA
#define CORE_PILEDRIVER CORE_BARCELONA
#define CORE_HASWELL CORE_SANDYBRIDGE

#define HAVE_SSE (1 << 0)
Expand Down Expand Up @@ -198,8 +198,8 @@ typedef struct {
#define CPUTYPE_NANO 43
#define CPUTYPE_SANDYBRIDGE 44
#define CPUTYPE_BOBCAT 45
#define CPUTYPE_BULLDOZER 46
#define CPUTYPE_PILEDRIVER 47
#define CPUTYPE_BULLDOZER CPUTYPE_BARCELONA
#define CPUTYPE_PILEDRIVER CPUTYPE_BARCELONA
// this define is because BLAS doesn't have haswell specific optimizations yet
#define CPUTYPE_HASWELL CPUTYPE_SANDYBRIDGE

Expand Down
8 changes: 5 additions & 3 deletions driver/others/dynamic.c
Expand Up @@ -63,14 +63,16 @@ extern gotoblas_t gotoblas_BARCELONA;
extern gotoblas_t gotoblas_BOBCAT;
#ifndef NO_AVX
extern gotoblas_t gotoblas_SANDYBRIDGE;
extern gotoblas_t gotoblas_BULLDOZER;
extern gotoblas_t gotoblas_PILEDRIVER;
//extern gotoblas_t gotoblas_BULLDOZER;
//extern gotoblas_t gotoblas_PILEDRIVER;
#else
//Use NEHALEM kernels for sandy bridge
#define gotoblas_SANDYBRIDGE gotoblas_NEHALEM
#endif

#define gotoblas_BULLDOZER gotoblas_BARCELONA
#define gotoblas_PILEDRIVER gotoblas_BARCELONA
#endif

//Use sandy bridge kernels for haswell.
#define gotoblas_HASWELL gotoblas_SANDYBRIDGE

Expand Down
20 changes: 12 additions & 8 deletions exports/Makefile
Expand Up @@ -18,6 +18,10 @@ ifndef NO_LAPACKE
NO_LAPACKE = 0
endif

ifndef NEED2UNDERSCORES
NEED2UNDERSCORES=0
endif

ifeq ($(OSNAME), WINNT)
ifeq ($(F_COMPILER), GFORTRAN)
EXTRALIB += -lgfortran
Expand Down Expand Up @@ -94,13 +98,13 @@ libgoto2_shared.dll : ../$(LIBNAME) libgoto2_shared.def
-Wl,--out-implib,libgoto2_shared.lib $(FEXTRALIB)

libopenblas.def : gensymbol
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F)
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F)

libgoto2_shared.def : gensymbol
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F)
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F)

libgoto_hpl.def : gensymbol
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F)
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F)

$(LIBDYNNAME) : ../$(LIBNAME) osx.def
$(CC) $(CFLAGS) -all_load -headerpad_max_install_names -install_name $(CURDIR)/../$(LIBDYNNAME) -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
Expand Down Expand Up @@ -187,23 +191,23 @@ static : ../$(LIBNAME)
rm -f goto.$(SUFFIX)

linux.def : gensymbol ../Makefile.system ../getarch.c
perl ./gensymbol linux $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F)
perl ./gensymbol linux $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F)

osx.def : gensymbol ../Makefile.system ../getarch.c
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F)
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F)

aix.def : gensymbol ../Makefile.system ../getarch.c
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F)
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F)

symbol.S : gensymbol
perl ./gensymbol win2kasm noarch dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > symbol.S
perl ./gensymbol win2kasm noarch dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > symbol.S

test : linktest.c
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
rm -f linktest

linktest.c : gensymbol ../Makefile.system ../getarch.c
perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > linktest.c
perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > linktest.c

clean ::
@rm -f *.def *.dylib __.SYMDEF*
Expand Down
69 changes: 66 additions & 3 deletions exports/gensymbol
Expand Up @@ -114,8 +114,8 @@

# ALLAUX -- Auxiliary routines called from all precisions
# already provided by @blasobjs: xerbla, lsame
ilaenv, ieeeck, lsamen, xerbla_array, iparmq,
ilaprec, ilatrans, ilauplo, iladiag, chla_transtype,
ilaenv, ieeeck, lsamen, iparmq,
ilaprec, ilatrans, ilauplo, iladiag,
ilaver, slamch, slamc3,

# SCLAUX -- Auxiliary routines called from both REAL and COMPLEX.
Expand Down Expand Up @@ -2672,12 +2672,25 @@
#LAPACKE_zlagsy_work,
);

#These function may need 2 underscores.
@lapack_embeded_underscore_objs=(xerbla_array, chla_transtype,);

if ($ARGV[5] == 1) {
#NO_LAPACK=1
@underscore_objs = (@blasobjs, @misc_underscore_objs);
} elsif (-d "../lapack-3.1.1" || -d "../lapack-3.4.0" || -d "../lapack-3.4.1" ||
-d "../lapack-3.4.2" || -d "../lapack-netlib") {
@underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs);

if ($ARGV[7] == 0){
# NEED2UNDERSCORES=0
# Don't need 2 underscores
@underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs, @lapack_embeded_underscore_objs);
}else{
# Need 2 underscores
@underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs);
@need_2underscore_objs = (@lapack_embeded_underscore_objs);
};

} else {
@underscore_objs = (@blasobjs, @lapackobjs, @misc_underscore_objs);
}
Expand Down Expand Up @@ -2729,6 +2742,10 @@ if ($ARGV[0] eq "linux"){
print $objs, $bu, "\n";
}

foreach $objs (@need_2underscore_objs) {
print $objs, $bu, $bu, "\n";
}

# if ($ARGV[4] == 0) {
foreach $objs (@no_underscore_objs) {
print $objs, "\n";
Expand All @@ -2750,6 +2767,10 @@ if ($ARGV[0] eq "osx"){
print "_", $objs, $bu, "\n";
}

foreach $objs (@need_2underscore_objs) {
print "_", $objs, $bu, $bu, "\n";
}

# if ($ARGV[4] == 0) {
foreach $objs (@no_underscore_objs) {
print "_", $objs, "\n";
Expand All @@ -2767,6 +2788,10 @@ if ($ARGV[0] eq "aix"){
print $objs, $bu, "\n";
}

foreach $objs (@need_2underscore_objs) {
print $objs, $bu, $bu, "\n";
}

# if ($ARGV[4] == 0) {
foreach $objs (@no_underscore_objs) {
print $objs, "\n";
Expand All @@ -2791,6 +2816,17 @@ if ($ARGV[0] eq "win2k"){
print "\t$uppercase=$objs", "_ \@", $count, "\n";
$count ++;
}

foreach $objs (@need_2underscore_objs) {
$uppercase = $objs;
$uppercase =~ tr/[a-z]/[A-Z]/;
print "\t$objs=$objs","__ \@", $count, "\n";
$count ++;
print "\t",$objs, "__=$objs","__ \@", $count, "\n";
$count ++;
print "\t$uppercase=$objs", "__ \@", $count, "\n";
$count ++;
}

#for misc_common_objs
foreach $objs (@misc_common_objs) {
Expand Down Expand Up @@ -2852,6 +2888,18 @@ if ($ARGV[0] eq "microsoft"){
print "\t$uppercase\_ = $objs","_\n";
$count ++;
}

foreach $objs (@need_2underscore_objs) {
$uppercase = $objs;
$uppercase =~ tr/[a-z]/[A-Z]/;
print "\t$objs=$objs","__ \@", $count, "\n";
$count ++;
print "\t",$objs, "__=$objs","__ \@", $count, "\n";
$count ++;
print "\t$uppercase=$objs", "__ \@", $count, "\n";
$count ++;
}

exit(0);
}

Expand All @@ -2868,6 +2916,16 @@ if ($ARGV[0] eq "win2kasm"){
print "_", $uppercase, "_:\n";
print "\tjmp\t_", $objs, "_\n";
}

foreach $objs (@need_2underscore_objs) {
$uppercase = $objs;
$uppercase =~ tr/[a-z]/[A-Z]/;
print "\t.align 16\n";
print "\t.globl _", $uppercase, "__\n";
print "_", $uppercase, "__:\n";
print "\tjmp\t_", $objs, "__\n";
}

exit(0);
}

Expand All @@ -2880,6 +2938,11 @@ if ($ARGV[0] eq "linktest"){
foreach $objs (@underscore_objs) {
print $objs, $bu, "();\n" if $objs ne "xerbla";
}

foreach $objs (@need_2underscore_objs) {
print $objs, $bu, $bu, "();\n";
}

# if ($ARGV[4] == 0) {
foreach $objs (@no_underscore_objs) {
print $objs, "();\n";
Expand Down
21 changes: 20 additions & 1 deletion f_check
Expand Up @@ -114,6 +114,12 @@ if ($compiler eq "") {
$vendor = IBM;
$openmp = "-openmp";
}

# for embeded underscore name, e.g. zho_ge, it may append 2 underscores.
$data = `$compiler -O2 -S ftest3.f > /dev/null 2>&1 && cat ftest3.s && rm -f ftest3.s`;
if ($data =~ /zho_ge__/) {
$need2bu = 1;
}
}

if ($vendor eq "") {
Expand Down Expand Up @@ -245,6 +251,8 @@ if ($link ne "") {

$link =~ s/\-rpath\s+/\-rpath\@/g;

$link =~ s/\-rpath-link\s+/\-rpath-link\@/g;

@flags = split(/[\s\,\n]/, $link);
# remove leading and trailing quotes from each flag.
@flags = map {s/^['"]|['"]$//g; $_} @flags;
Expand All @@ -265,7 +273,15 @@ if ($link ne "") {
$linker_L .= "-Wl,". $flags . " ";
}

if ($flags =~ /^\-rpath/) {
if ($flags =~ /^\-rpath\@/) {
$flags =~ s/\@/\,/g;
if ($vendor eq "PGI") {
$flags =~ s/lib$/libso/;
}
$linker_L .= "-Wl,". $flags . " " ;
}

if ($flags =~ /^\-rpath-link\@/) {
$flags =~ s/\@/\,/g;
if ($vendor eq "PGI") {
$flags =~ s/lib$/libso/;
Expand Down Expand Up @@ -309,6 +325,9 @@ print MAKEFILE "NOFORTRAN=1\n" if $nofortran == 1;

print CONFFILE "#define BUNDERSCORE\t$bu\n" if $bu ne "";
print CONFFILE "#define NEEDBUNDERSCORE\t1\n" if $bu ne "";
print CONFFILE "#define NEED2UNDERSCORES\t1\n" if $need2bu ne "";

print MAKEFILE "NEED2UNDERSCORES=1\n" if $need2bu ne "";

if (($linker_l ne "") || ($linker_a ne "")) {
print MAKEFILE "FEXTRALIB=$linker_L $linker_l $linker_a\n";
Expand Down
6 changes: 6 additions & 0 deletions ftest3.f
@@ -0,0 +1,6 @@
double complex function zho_ge()

zho_ge = (0.0d0,0.0d0)

return
end
6 changes: 3 additions & 3 deletions getarch.c
Expand Up @@ -354,7 +354,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "OPTERON"
#endif

#if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL)
#if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL) || defined (FORCE_PILEDRIVER) || defined (FORCE_BULLDOZER)
#define FORCE
#define FORCE_INTEL
#define ARCHITECTURE "X86"
Expand Down Expand Up @@ -384,7 +384,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "BOBCAT"
#endif

#if defined (FORCE_BULLDOZER)
#if 0
#define FORCE
#define FORCE_INTEL
#define ARCHITECTURE "X86"
Expand All @@ -400,7 +400,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "BULLDOZER"
#endif

#if defined (FORCE_PILEDRIVER)
#if 0
#define FORCE
#define FORCE_INTEL
#define ARCHITECTURE "X86"
Expand Down
2 changes: 1 addition & 1 deletion getarch_2nd.c
Expand Up @@ -8,7 +8,7 @@

int main(int argc, char **argv) {

if ( (argc <= 1) || (argc >= 2) && (*argv[1] == '0')) {
if ( (argc <= 1) || ((argc >= 2) && (*argv[1] == '0'))) {
printf("SGEMM_UNROLL_M=%d\n", SGEMM_DEFAULT_UNROLL_M);
printf("SGEMM_UNROLL_N=%d\n", SGEMM_DEFAULT_UNROLL_N);
printf("DGEMM_UNROLL_M=%d\n", DGEMM_DEFAULT_UNROLL_M);
Expand Down

0 comments on commit 143cca4

Please sign in to comment.