Skip to content

Commit

Permalink
Merge pull request #1221 from ashwinyes/develop_arm_softfp
Browse files Browse the repository at this point in the history
arm: add support for softfp in arm vfp assembly files
  • Loading branch information
xianyi committed Jul 10, 2017
2 parents 3db2adf + 4239dd6 commit a590e61
Show file tree
Hide file tree
Showing 39 changed files with 683 additions and 311 deletions.
23 changes: 3 additions & 20 deletions Makefile.arm
@@ -1,5 +1,4 @@
#ifeq logical or
ifeq ($(CORE), $(filter $(CORE),CORTEXA9 CORTEXA15))
ifeq ($(CORE), $(filter $(CORE),ARMV7 CORTEXA9 CORTEXA15))
ifeq ($(OSNAME), Android)
CCOMMON_OPT += -mfpu=neon -march=armv7-a
FCOMMON_OPT += -mfpu=neon -march=armv7-a
Expand All @@ -9,28 +8,12 @@ FCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
endif
endif

ifeq ($(CORE), ARMV7)
ifeq ($(OSNAME), Android)
ifeq ($(ARM_SOFTFP_ABI), 1)
CCOMMON_OPT += -mfpu=neon -march=armv7-a
FCOMMON_OPT += -mfpu=neon -march=armv7-a
else
CCOMMON_OPT += -mfpu=neon -march=armv7-a -Wl,--no-warn-mismatch
FCOMMON_OPT += -mfpu=neon -march=armv7-a -Wl,--no-warn-mismatch
endif
else
CCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
FCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
endif
endif

ifeq ($(CORE), ARMV6)
CCOMMON_OPT += -mfpu=vfp -march=armv6
FCOMMON_OPT += -mfpu=vfp -march=armv6
endif


ifeq ($(CORE), ARMV5)
CCOMMON_OPT += -marm -march=armv5
FCOMMON_OPT += -marm -march=armv5
CCOMMON_OPT += -march=armv5
FCOMMON_OPT += -march=armv5
endif
12 changes: 7 additions & 5 deletions Makefile.system
Expand Up @@ -242,6 +242,10 @@ EXTRALIB += -lm
NO_EXPRECISION = 1
endif

ifeq ($(OSNAME), Android)
EXTRALIB += -lm
endif

ifeq ($(OSNAME), AIX)
EXTRALIB += -lm
endif
Expand Down Expand Up @@ -486,12 +490,10 @@ BINARY_DEFINED = 1
CCOMMON_OPT += -marm
FCOMMON_OPT += -marm

# If softfp abi is mentioned on the command line, force it.
ifeq ($(ARM_SOFTFP_ABI), 1)
CCOMMON_OPT += -mfloat-abi=softfp -DARM_SOFTFP_ABI
FCOMMON_OPT += -mfloat-abi=softfp -DARM_SOFTFP_ABI
else
CCOMMON_OPT += -mfloat-abi=hard
FCOMMON_OPT += -mfloat-abi=hard
CCOMMON_OPT += -mfloat-abi=softfp
FCOMMON_OPT += -mfloat-abi=softfp
endif

ifeq ($(OSNAME), Android)
Expand Down
5 changes: 0 additions & 5 deletions common_arm.h
Expand Up @@ -111,11 +111,6 @@ static inline int blas_quickdivide(blasint x, blasint y){

#define PROFCODE

#ifdef __ARM_PCS
//-mfloat-abi=softfp
#define SOFT_FLOAT_ABI
#endif

#endif


Expand Down
63 changes: 16 additions & 47 deletions kernel/arm/KERNEL.ARMV6
@@ -1,7 +1,5 @@
include $(KERNELDIR)/KERNEL.ARMV5



###############################################################################
SAMAXKERNEL = iamax_vfp.S
DAMAXKERNEL = iamax_vfp.S
CAMAXKERNEL = iamax_vfp.S
Expand Down Expand Up @@ -44,10 +42,10 @@ DAXPYKERNEL = axpy_vfp.S
CAXPYKERNEL = axpy_vfp.S
ZAXPYKERNEL = axpy_vfp.S

SCOPYKERNEL = copy.c
DCOPYKERNEL = copy.c
CCOPYKERNEL = zcopy.c
ZCOPYKERNEL = zcopy.c
SROTKERNEL = rot_vfp.S
DROTKERNEL = rot_vfp.S
CROTKERNEL = rot_vfp.S
ZROTKERNEL = rot_vfp.S

SDOTKERNEL = sdot_vfp.S
DDOTKERNEL = ddot_vfp.S
Expand All @@ -59,16 +57,6 @@ DNRM2KERNEL = nrm2_vfp.S
CNRM2KERNEL = nrm2_vfp.S
ZNRM2KERNEL = nrm2_vfp.S

SROTKERNEL = rot_vfp.S
DROTKERNEL = rot_vfp.S
CROTKERNEL = rot_vfp.S
ZROTKERNEL = rot_vfp.S

SSCALKERNEL = scal.c
DSCALKERNEL = scal.c
CSCALKERNEL = zscal.c
ZSCALKERNEL = zscal.c

SSWAPKERNEL = swap_vfp.S
DSWAPKERNEL = swap_vfp.S
CSWAPKERNEL = swap_vfp.S
Expand All @@ -84,26 +72,25 @@ DGEMVTKERNEL = gemv_t_vfp.S
CGEMVTKERNEL = cgemv_t_vfp.S
ZGEMVTKERNEL = zgemv_t_vfp.S

STRMMKERNEL = strmm_kernel_4x2_vfp.S
DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S
CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S
ZTRMMKERNEL = ztrmm_kernel_2x2_vfp.S

SGEMMKERNEL = sgemm_kernel_4x2_vfp.S
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
SGEMMINCOPY = sgemm_ncopy_4_vfp.S
SGEMMITCOPY = sgemm_tcopy_4_vfp.S
SGEMMINCOPYOBJ = sgemm_incopy.o
SGEMMITCOPYOBJ = sgemm_itcopy.o
endif
SGEMMONCOPY = sgemm_ncopy_2_vfp.S
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
SGEMMONCOPYOBJ = sgemm_oncopy.o
SGEMMOTCOPYOBJ = sgemm_otcopy.o
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
SGEMMONCOPYOBJ = sgemm_oncopy.o
SGEMMOTCOPYOBJ = sgemm_otcopy.o

DGEMMKERNEL = dgemm_kernel_4x2_vfp.S
ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N))
DGEMMINCOPY = dgemm_ncopy_4_vfp.S
DGEMMITCOPY = dgemm_tcopy_4_vfp.S
DGEMMINCOPYOBJ = dgemm_incopy.o
DGEMMITCOPYOBJ = dgemm_itcopy.o
endif
DGEMMONCOPY = dgemm_ncopy_2_vfp.S
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
DGEMMONCOPYOBJ = dgemm_oncopy.o
Expand All @@ -121,26 +108,8 @@ ZGEMMOTCOPY = zgemm_tcopy_2_vfp.S
ZGEMMONCOPYOBJ = zgemm_oncopy.o
ZGEMMOTCOPYOBJ = zgemm_otcopy.o

STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c



STRMMKERNEL = strmm_kernel_4x2_vfp.S
DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S
CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S
ZTRMMKERNEL = ztrmm_kernel_2x2_vfp.S

114 changes: 5 additions & 109 deletions kernel/arm/KERNEL.ARMV7
@@ -1,91 +1,12 @@

#################################################################################
SAMAXKERNEL = iamax_vfp.S
DAMAXKERNEL = iamax_vfp.S
CAMAXKERNEL = iamax_vfp.S
ZAMAXKERNEL = iamax_vfp.S

SAMINKERNEL = iamax_vfp.S
DAMINKERNEL = iamax_vfp.S
CAMINKERNEL = iamax_vfp.S
ZAMINKERNEL = iamax_vfp.S

SMAXKERNEL = iamax_vfp.S
DMAXKERNEL = iamax_vfp.S

SMINKERNEL = iamax_vfp.S
DMINKERNEL = iamax_vfp.S

ISAMAXKERNEL = iamax_vfp.S
IDAMAXKERNEL = iamax_vfp.S
ICAMAXKERNEL = iamax_vfp.S
IZAMAXKERNEL = iamax_vfp.S

ISAMINKERNEL = iamax_vfp.S
IDAMINKERNEL = iamax_vfp.S
ICAMINKERNEL = iamax_vfp.S
IZAMINKERNEL = iamax_vfp.S

ISMAXKERNEL = iamax_vfp.S
IDMAXKERNEL = iamax_vfp.S

ISMINKERNEL = iamax_vfp.S
IDMINKERNEL = iamax_vfp.S

SSWAPKERNEL = swap_vfp.S
DSWAPKERNEL = swap_vfp.S
CSWAPKERNEL = swap_vfp.S
ZSWAPKERNEL = swap_vfp.S

SASUMKERNEL = asum_vfp.S
DASUMKERNEL = asum_vfp.S
CASUMKERNEL = asum_vfp.S
ZASUMKERNEL = asum_vfp.S

SAXPYKERNEL = axpy_vfp.S
DAXPYKERNEL = axpy_vfp.S
CAXPYKERNEL = axpy_vfp.S
ZAXPYKERNEL = axpy_vfp.S

SCOPYKERNEL = copy.c
DCOPYKERNEL = copy.c
CCOPYKERNEL = zcopy.c
ZCOPYKERNEL = zcopy.c

SDOTKERNEL = sdot_vfp.S
DDOTKERNEL = ddot_vfp.S
CDOTKERNEL = cdot_vfp.S
ZDOTKERNEL = zdot_vfp.S
include $(KERNELDIR)/KERNEL.ARMV6

SNRM2KERNEL = nrm2_vfpv3.S
DNRM2KERNEL = nrm2_vfpv3.S
CNRM2KERNEL = nrm2_vfpv3.S
ZNRM2KERNEL = nrm2_vfpv3.S

SROTKERNEL = rot_vfp.S
DROTKERNEL = rot_vfp.S
CROTKERNEL = rot_vfp.S
ZROTKERNEL = rot_vfp.S

SSCALKERNEL = scal.c
DSCALKERNEL = scal.c
CSCALKERNEL = zscal.c
ZSCALKERNEL = zscal.c

SGEMVNKERNEL = gemv_n_vfpv3.S
DGEMVNKERNEL = gemv_n_vfpv3.S
CGEMVNKERNEL = cgemv_n_vfp.S
ZGEMVNKERNEL = zgemv_n_vfp.S

SGEMVTKERNEL = gemv_t_vfp.S
DGEMVTKERNEL = gemv_t_vfp.S
CGEMVTKERNEL = cgemv_t_vfp.S
ZGEMVTKERNEL = zgemv_t_vfp.S

STRMMKERNEL = strmm_kernel_4x4_vfpv3.S
DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S
CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S
ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S

SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S
SGEMMONCOPY = sgemm_ncopy_4_vfp.S
Expand All @@ -100,35 +21,10 @@ DGEMMONCOPYOBJ = dgemm_oncopy.o
DGEMMOTCOPYOBJ = dgemm_otcopy.o

CGEMMKERNEL = cgemm_kernel_2x2_vfpv3.S
CGEMMONCOPY = cgemm_ncopy_2_vfp.S
CGEMMOTCOPY = cgemm_tcopy_2_vfp.S
CGEMMONCOPYOBJ = cgemm_oncopy.o
CGEMMOTCOPYOBJ = cgemm_otcopy.o

ZGEMMKERNEL = zgemm_kernel_2x2_vfpv3.S
ZGEMMONCOPY = zgemm_ncopy_2_vfp.S
ZGEMMOTCOPY = zgemm_tcopy_2_vfp.S
ZGEMMONCOPYOBJ = zgemm_oncopy.o
ZGEMMOTCOPYOBJ = zgemm_otcopy.o

STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

STRMMKERNEL = strmm_kernel_4x4_vfpv3.S
DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S
CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S
ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S

8 changes: 8 additions & 0 deletions kernel/arm/asum_vfp.S
Expand Up @@ -475,6 +475,14 @@ asum_kernel_L999:
vadd.f32 s0 , s0, s1 // set return value
#endif

#if !defined(__ARM_PCS_VFP)
#if !defined(DOUBLE)
vmov r0, s0
#else
vmov r0, r1, d0
#endif
#endif

bx lr

EPILOGUE
Expand Down

0 comments on commit a590e61

Please sign in to comment.