Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Provide an inline asm fallback for the ARMv8 intrinsics #1697

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -167,12 +167,12 @@ jobs:
packages: qemu qemu-user gcc-arm-linux-gnueabihf g++-arm-linux-gnueabihf libc-dev-armel-cross
codecov: ubuntu_gcc_armhf

- name: Ubuntu GCC ARM HF No ACLE ASAN
- name: Ubuntu GCC ARM HF No ARMv8 ASAN
os: ubuntu-latest
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-armhf.cmake -DWITH_ACLE=OFF -DWITH_SANITIZER=Address
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-armhf.cmake -DWITH_ARMV8=OFF -DWITH_SANITIZER=Address
asan-options: detect_leaks=0
packages: qemu qemu-user gcc-arm-linux-gnueabihf g++-arm-linux-gnueabihf libc-dev-armel-cross
codecov: ubuntu_gcc_armhf_no_acle
codecov: ubuntu_gcc_armhf_no_armv8

- name: Ubuntu GCC ARM HF No NEON ASAN
os: ubuntu-latest
Expand All @@ -194,11 +194,11 @@ jobs:
packages: qemu qemu-user gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libc-dev-arm64-cross
codecov: ubuntu_gcc_aarch64

- name: Ubuntu GCC AARCH64 No ACLE UBSAN
- name: Ubuntu GCC AARCH64 No ARMv8 UBSAN
os: ubuntu-latest
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-aarch64.cmake -DWITH_ACLE=OFF -DWITH_SANITIZER=Undefined
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-aarch64.cmake -DWITH_ARMV8=OFF -DWITH_SANITIZER=Undefined
packages: qemu qemu-user gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libc-dev-arm64-cross
codecov: ubuntu_gcc_aarch64_no_acle
codecov: ubuntu_gcc_aarch64_no_armv8

- name: Ubuntu GCC AARCH64 No NEON UBSAN
os: ubuntu-latest
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/configure.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@ jobs:
chost: arm-linux-gnueabihf
packages: qemu qemu-user gcc-arm-linux-gnueabihf libc-dev-armel-cross

- name: Ubuntu GCC ARM HF No ACLE
- name: Ubuntu GCC ARM HF No ARMv8
os: ubuntu-latest
compiler: arm-linux-gnueabihf-gcc
configure-args: --warn --without-acle
configure-args: --warn --without-armv8
chost: arm-linux-gnueabihf
packages: qemu qemu-user gcc-arm-linux-gnueabihf libc-dev-armel-cross

Expand All @@ -82,10 +82,10 @@ jobs:
chost: aarch64-linux-gnu
packages: qemu qemu-user gcc-aarch64-linux-gnu libc-dev-arm64-cross

- name: Ubuntu GCC AARCH64 No ACLE
- name: Ubuntu GCC AARCH64 No ARMv8
os: ubuntu-latest
compiler: aarch64-linux-gnu-gcc
configure-args: --warn --without-acle
configure-args: --warn --without-armv8
chost: aarch64-linux-gnu
packages: qemu qemu-user gcc-aarch64-linux-gnu libc-dev-arm64-cross

Expand Down
29 changes: 16 additions & 13 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ set(WITH_SANITIZER AUTO CACHE STRING "Enable sanitizer support")
set_property(CACHE WITH_SANITIZER PROPERTY STRINGS "Memory" "Address" "Undefined" "Thread")

if(BASEARCH_ARM_FOUND)
option(WITH_ACLE "Build with ACLE" ON)
option(WITH_ARMV8 "Build with ARMv8 CRC32 intrinsics" ON)
option(WITH_NEON "Build with NEON intrinsics" ON)
cmake_dependent_option(WITH_ARMV6 "Build with ARMv6 SIMD" ON "NOT ARCH STREQUAL \"aarch64\"" OFF)
elseif(BASEARCH_PPC_FOUND)
Expand Down Expand Up @@ -130,7 +130,7 @@ option(INSTALL_UTILS "Copy minigzip and minideflate during install" OFF)
mark_as_advanced(FORCE
ZLIB_SYMBOL_PREFIX
WITH_REDUCED_MEM
WITH_ACLE WITH_NEON
WITH_ARMV8 WITH_NEON
WITH_ARMV6
WITH_DFLTCC_DEFLATE
WITH_DFLTCC_INFLATE
Expand Down Expand Up @@ -649,19 +649,22 @@ if(WITH_OPTIM)
endif()
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/arm_features.h ${ARCHDIR}/arm_functions.h)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/arm_features.c)
if(WITH_ACLE)
check_acle_compiler_flag()
if(HAVE_ACLE_FLAG)
add_definitions(-DARM_ACLE)
set(ACLE_SRCS ${ARCHDIR}/crc32_acle.c)
set_property(SOURCE ${ACLE_SRCS} PROPERTY COMPILE_FLAGS "${ACLEFLAG} ${NOLTOFLAG}")
list(APPEND ZLIB_ARCH_SRCS ${ACLE_SRCS})
add_feature_info(ACLE_CRC 1 "Support ACLE optimized CRC hash generation, using \"${ACLEFLAG}\"")
if(WITH_ARMV8)
check_armv8_compiler_flag()
if(HAVE_ARMV8_INLINE_ASM OR HAVE_ARMV8_INTRIN)
add_definitions(-DARM_CRC32)
set(ARMV8_SRCS ${ARCHDIR}/crc32_armv8.c)
set_property(SOURCE ${ARMV8_SRCS} PROPERTY COMPILE_FLAGS "${ARMV8FLAG} ${NOLTOFLAG}")
list(APPEND ZLIB_ARCH_SRCS ${ARMV8_SRCS})
add_feature_info(ARMV8_CRC 1 "Support ARMv8 optimized CRC hash generation, using \"${ARMV8FLAG}\"")
if(HAVE_ARMV8_INTRIN)
add_definitions(-DARM_CRC32_INTRIN)
endif()
else()
set(WITH_ACLE OFF)
set(WITH_ARMV8 OFF)
endif()
else()
set(WITH_ACLE OFF)
set(WITH_ARMV8 OFF)
endif()
if(WITH_NEON)
check_neon_compiler_flag()
Expand Down Expand Up @@ -1308,7 +1311,7 @@ add_feature_info(WITH_INFLATE_STRICT WITH_INFLATE_STRICT "Build with strict infl
add_feature_info(WITH_INFLATE_ALLOW_INVALID_DIST WITH_INFLATE_ALLOW_INVALID_DIST "Build with zero fill for inflate invalid distances")

if(BASEARCH_ARM_FOUND)
add_feature_info(WITH_ACLE WITH_ACLE "Build with ACLE")
add_feature_info(WITH_ARMV8 WITH_ARMV8 "Build with ARMv8 CRC32 intrinsics")
add_feature_info(WITH_NEON WITH_NEON "Build with NEON intrinsics")
add_feature_info(WITH_ARMV6 WITH_ARMV6 "Build with ARMv6 SIMD")
elseif(BASEARCH_PPC_FOUND)
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Features
* Deflate medium and quick algorithms based on Intel’s zlib fork
* Support for CPU intrinsics when available
* Adler32 implementation using SSSE3, AVX2, AVX512, AVX512-VNNI, Neon, VMX & VSX
* CRC32-B implementation using PCLMULQDQ, VPCLMULQDQ, ACLE, & IBM Z
* CRC32-B implementation using PCLMULQDQ, VPCLMULQDQ, ARMv8, & IBM Z
* Slide hash implementations using SSE2, AVX2, ARMv6, Neon, VMX & VSX
* Compare256 implementations using SSE2, AVX2, Neon, POWER9 & RVV
* Inflate chunk copying using SSE2, SSSE3, AVX, Neon & VSX
Expand Down Expand Up @@ -203,7 +203,7 @@ Advanced Build Options
| WITH_SSE42 | | Build with SSE42 intrinsics | ON |
| WITH_PCLMULQDQ | | Build with PCLMULQDQ intrinsics | ON |
| WITH_VPCLMULQDQ | --without-vpclmulqdq | Build with VPCLMULQDQ intrinsics | ON |
| WITH_ACLE | --without-acle | Build with ACLE intrinsics | ON |
| WITH_ARMV8 | --without-armv8 | Build with ARMv8 intrinsics | ON |
| WITH_NEON | --without-neon | Build with NEON intrinsics | ON |
| WITH_ARMV6 | --without-armv6 | Build with ARMv6 intrinsics | ON |
| WITH_ALTIVEC | --without-altivec | Build with AltiVec (VMX) intrinsics | ON |
Expand Down
12 changes: 6 additions & 6 deletions arch/arm/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ SFLAGS=
INCLUDES=
SUFFIX=

ACLEFLAG=
ARMV8FLAG=
NEONFLAG=
ARMV6FLAG=
NOLTOFLAG=
Expand All @@ -22,7 +22,7 @@ all: \
arm_features.o arm_features.lo \
chunkset_neon.o chunkset_neon.lo \
compare256_neon.o compare256_neon.lo \
crc32_acle.o crc32_acle.lo \
crc32_armv8.o crc32_armv8.lo \
slide_hash_neon.o slide_hash_neon.lo \
slide_hash_armv6.o slide_hash_armv6.lo \

Expand Down Expand Up @@ -50,11 +50,11 @@ compare256_neon.o:
compare256_neon.lo:
$(CC) $(SFLAGS) $(NEONFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_neon.c

crc32_acle.o:
$(CC) $(CFLAGS) $(ACLEFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c
crc32_armv8.o:
$(CC) $(CFLAGS) $(ARMV8FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_armv8.c

crc32_acle.lo:
$(CC) $(SFLAGS) $(ACLEFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c
crc32_armv8.lo:
$(CC) $(SFLAGS) $(ARMV8FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_armv8.c

slide_hash_neon.o:
$(CC) $(CFLAGS) $(NEONFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_neon.c
Expand Down
52 changes: 51 additions & 1 deletion arch/arm/acle_intrins.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,62 @@
# include <arm_acle.h>
#endif

#ifdef ARM_ACLE
#ifdef ARM_CRC32
#if defined(__aarch64__)
# define Z_TARGET_CRC Z_TARGET("+crc")
#else
# define Z_TARGET_CRC
#endif

#if !defined(ARM_CRC32_INTRIN) && !defined(_MSC_VER)
#ifdef __aarch64__
static inline uint32_t __crc32b(uint32_t __a, uint8_t __b) {
uint32_t __c;
__asm__ __volatile__("crc32b %w0, %w1, %w2" : "=r" (__c) : "r"(__a), "r"(__b));
return __c;
}

static inline uint32_t __crc32h(uint32_t __a, uint16_t __b) {
uint32_t __c;
__asm__ __volatile__("crc32h %w0, %w1, %w2" : "=r" (__c) : "r"(__a), "r"(__b));
return __c;
}

static inline uint32_t __crc32w(uint32_t __a, uint32_t __b) {
uint32_t __c;
__asm__ __volatile__("crc32w %w0, %w1, %w2" : "=r" (__c) : "r"(__a), "r"(__b));
return __c;
}

static inline uint32_t __crc32d(uint32_t __a, uint64_t __b) {
uint32_t __c;
__asm__ __volatile__("crc32x %w0, %w1, %x2" : "=r" (__c) : "r"(__a), "r"(__b));
return __c;
}
#else
static inline uint32_t __crc32b(uint32_t __a, uint8_t __b) {
uint32_t __c;
__asm__ __volatile__("crc32b %0, %1, %2" : "=r" (__c) : "r"(__a), "r"(__b));
return __c;
}

static inline uint32_t __crc32h(uint32_t __a, uint16_t __b) {
uint32_t __c;
__asm__ __volatile__("crc32h %0, %1, %2" : "=r" (__c) : "r"(__a), "r"(__b));
return __c;
}

static inline uint32_t __crc32w(uint32_t __a, uint32_t __b) {
uint32_t __c;
__asm__ __volatile__("crc32w %0, %1, %2" : "=r" (__c) : "r"(__a), "r"(__b));
return __c;
}

static inline uint32_t __crc32d(uint32_t __a, uint64_t __b) {
return __crc32w (__crc32w (__a, __b & 0xffffffffULL), __b >> 32);
}
#endif
#endif
#endif

#ifdef ARM_SIMD
Expand Down
2 changes: 1 addition & 1 deletion arch/arm/arm_features.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ static int arm_has_crc32() {
&& hascrc32 == 1;
#elif defined(_WIN32)
return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE);
#elif defined(ARM_NOCHECK_ACLE)
#elif defined(ARM_NOCHECK_CRC32)
return 1;
#else
return 0;
Expand Down
4 changes: 2 additions & 2 deletions arch/arm/arm_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ void slide_hash_neon(deflate_state *s);
void inflate_fast_neon(PREFIX3(stream) *strm, uint32_t start);
#endif

#ifdef ARM_ACLE
uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len);
#ifdef ARM_CRC32
uint32_t crc32_armv8(uint32_t crc, const uint8_t *buf, size_t len);
#endif

#ifdef ARM_SIMD
Expand Down
4 changes: 2 additions & 2 deletions arch/arm/crc32_acle.c → arch/arm/crc32_armv8.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
*
*/

#ifdef ARM_ACLE
#if defined(ARM_CRC32)
#include "acle_intrins.h"
#include "zbuild.h"

Z_INTERNAL Z_TARGET_CRC uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len) {
Z_INTERNAL Z_TARGET_CRC uint32_t crc32_armv8(uint32_t crc, const uint8_t *buf, size_t len) {
Z_REGISTER uint32_t c;
Z_REGISTER const uint16_t *buf2;
Z_REGISTER const uint32_t *buf4;
Expand Down
82 changes: 55 additions & 27 deletions cmake/detect-intrinsics.cmake
Original file line number Diff line number Diff line change
@@ -1,36 +1,59 @@
# detect-intrinsics.cmake -- Detect compiler intrinsics support
# Licensed under the Zlib license, see LICENSE.md for details

macro(check_acle_compiler_flag)
if(MSVC)
# Both ARM and ARM64-targeting msvc support intrinsics, but
# ARM msvc is missing some intrinsics introduced with ARMv8, e.g. crc32
if(MSVC_C_ARCHITECTURE_ID STREQUAL "ARM64")
set(HAVE_ACLE_FLAG TRUE)
endif()
else()
if(NOT NATIVEFLAG)
if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
set(ACLEFLAG "-march=armv8-a+crc" CACHE INTERNAL "Compiler option to enable ACLE support")
macro(check_armv8_compiler_flag)
if(NOT NATIVEFLAG)
if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
check_c_compiler_flag("-march=armv8-a+crc" HAVE_MARCH_ARMV8_CRC)
if(HAVE_MARCH_ARMV8_CRC)
set(ARMV8FLAG "-march=armv8-a+crc" CACHE INTERNAL "Compiler option to enable ARMv8 support")
else()
check_c_compiler_flag("-march=armv8-a+crc+simd" HAVE_MARCH_ARMV8_CRC_SIMD)
if(HAVE_MARCH_ARMV8_CRC_SIMD)
set(ARMV8FLAG "-march=armv8-a+crc+simd" CACHE INTERNAL "Compiler option to enable ARMv8 support")
else()
check_c_compiler_flag("-Wa,-march=armv8-a+crc" HAVE_WA_MARCH_ARMV8_CRC)
if(HAVE_WA_MARCH_ARMV8_CRC)
set(ARMV8FLAG "-Wa,-march=armv8-a+crc" CACHE INTERNAL "Compiler option to enable ARMv8 support")
else()
check_c_compiler_flag("-Wa,-march=armv8-a+crc+simd" HAVE_WA_MARCH_ARMV8_CRC_SIMD)
if(HAVE_WA_MARCH_ARMV8_CRC_SIMD)
set(ARMV8FLAG "-Wa,-march=armv8-a+crc+simd" CACHE INTERNAL "Compiler option to enable ARMv8 support")
endif()
endif()
endif()
endif()
endif()
# Check whether compiler supports ACLE flag
set(CMAKE_REQUIRED_FLAGS "${ACLEFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
check_c_source_compiles(
"int main() { return 0; }"
HAVE_ACLE_FLAG FAIL_REGEX "not supported")
if(NOT NATIVEFLAG AND NOT HAVE_ACLE_FLAG)
set(ACLEFLAG "-march=armv8-a+crc+simd" CACHE INTERNAL "Compiler option to enable ACLE support" FORCE)
# Check whether compiler supports ACLE flag
set(CMAKE_REQUIRED_FLAGS "${ACLEFLAG}")
check_c_source_compiles(
"int main() { return 0; }"
HAVE_ACLE_FLAG2 FAIL_REGEX "not supported")
set(HAVE_ACLE_FLAG ${HAVE_ACLE_FLAG2} CACHE INTERNAL "Have compiler option to enable ACLE intrinsics" FORCE)
unset(HAVE_ACLE_FLAG2 CACHE) # Don't cache this internal variable
endif()
set(CMAKE_REQUIRED_FLAGS)
endif()
# Check whether compiler supports ARMv8 inline asm
set(CMAKE_REQUIRED_FLAGS "${ARMV8FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
check_c_source_compiles(
"unsigned int f(unsigned int a, unsigned int b) {
unsigned int c;
#ifdef __aarch64__
__asm__ __volatile__ ( \"crc32w %w0, %w1, %w2\" : \"=r\" (c) : \"r\" (a), \"r\" (b));
#else
__asm__ __volatile__ ( \"crc32w %0, %1, %2\" : \"=r\" (c) : \"r\" (a), \"r\" (b));
#endif
return (int)c;
}
int main(void) { return f(1,2); }"
HAVE_ARMV8_INLINE_ASM
)
# Check whether compiler supports ARMv8 intrinsics
check_c_source_compiles(
"#if defined(_MSC_VER)
#include <intrin.h>
#else
#include <arm_acle.h>
#endif
unsigned int f(unsigned int a, unsigned int b) {
return __crc32w(a, b);
}
int main(void) { return 0; }"
HAVE_ARMV8_INTRIN
)
set(CMAKE_REQUIRED_FLAGS)
endmacro()

macro(check_armv6_compiler_flag)
Expand All @@ -39,6 +62,11 @@ macro(check_armv6_compiler_flag)
check_c_compiler_flag("-march=armv6" HAVE_MARCH_ARMV6)
if(HAVE_MARCH_ARMV6)
set(ARMV6FLAG "-march=armv6" CACHE INTERNAL "Compiler option to enable ARMv6 support")
else()
check_c_compiler_flag("-Wa,-march=armv6" HAVE_WA_MARCH_ARMV6)
if(HAVE_WA_MARCH_ARMV6)
set(ARMV6FLAG "-Wa,-march=armv6" CACHE INTERNAL "Compiler option to enable ARMv6 support")
endif()
endif()
endif()
endif()
Expand Down
Loading