Skip to content

Commit

Permalink
Clean up SSE4.2 detection
Browse files Browse the repository at this point in the history
  • Loading branch information
ccawley2011 authored and Dead2 committed Apr 15, 2023
1 parent 7a60d93 commit b1aafe5
Show file tree
Hide file tree
Showing 7 changed files with 17 additions and 53 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,12 @@ jobs:
cmake-args: -DWITH_SSE2=OFF -DWITH_SANITIZER=Undefined
codecov: ubuntu_gcc_no_sse2

- name: Ubuntu GCC No SSE4 UBSAN
- name: Ubuntu GCC No SSE4.2 UBSAN
os: ubuntu-latest
compiler: gcc
cxx-compiler: g++
cmake-args: -DWITH_SSE4=OFF -DWITH_SANITIZER=Undefined
codecov: ubuntu_gcc_no_sse4
cmake-args: -DWITH_SSE42=OFF -DWITH_SANITIZER=Undefined
codecov: ubuntu_gcc_no_sse42

- name: Ubuntu GCC No PCLMULQDQ UBSAN
os: ubuntu-latest
Expand Down
5 changes: 2 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -810,9 +810,8 @@ if(WITH_OPTIM)
if(HAVE_SSE42CRC_INTRIN)
add_definitions(-DX86_SSE42_CRC_INTRIN)
endif()
endif()
if(NOT HAVE_SSE42CRC_INLINE_ASM AND NOT HAVE_SSE42CRC_INTRIN AND NOT HAVE_SSE42CMPSTR_INTRIN)
set(WITH_SSE4 OFF)
else()
set(WITH_SSE42 OFF)
endif()
endif()
if(WITH_SSE2)
Expand Down
6 changes: 3 additions & 3 deletions arch/x86/insert_string_sse42.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@
#define HASH_CALC_VAR h
#define HASH_CALC_VAR_INIT uint32_t h = 0

#define UPDATE_HASH update_hash_sse4
#define INSERT_STRING insert_string_sse4
#define QUICK_INSERT_STRING quick_insert_string_sse4
#define UPDATE_HASH update_hash_sse42
#define INSERT_STRING insert_string_sse42
#define QUICK_INSERT_STRING quick_insert_string_sse42

#ifdef X86_SSE42
# include "../../insert_string_tpl.h"
Expand Down
17 changes: 2 additions & 15 deletions cmake/detect-intrinsics.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ macro(check_sse42_intrinsics)
set(SSE42FLAG "-msse4.2")
endif()
endif()
# Check whether compiler supports SSE4 CRC inline asm
# Check whether compiler supports SSE4.2 CRC inline asm
set(CMAKE_REQUIRED_FLAGS "${SSE42FLAG} ${NATIVEFLAG}")
check_c_source_compile_or_run(
"int main(void) {
Expand All @@ -461,7 +461,7 @@ macro(check_sse42_intrinsics)
}"
HAVE_SSE42CRC_INLINE_ASM
)
# Check whether compiler supports SSE4 CRC intrinsics
# Check whether compiler supports SSE4.2 CRC intrinsics
check_c_source_compile_or_run(
"#include <immintrin.h>
int main(void) {
Expand All @@ -477,19 +477,6 @@ macro(check_sse42_intrinsics)
}"
HAVE_SSE42CRC_INTRIN
)
# Check whether compiler supports SSE4.2 compare string intrinsics
check_c_source_compile_or_run(
"#include <immintrin.h>
int main(void) {
unsigned char a[64] = { 0 };
unsigned char b[64] = { 0 };
__m128i xmm_src0, xmm_src1;
xmm_src0 = _mm_loadu_si128((__m128i *)(char *)a);
xmm_src1 = _mm_loadu_si128((__m128i *)(char *)b);
return _mm_cmpestri(xmm_src0, 16, xmm_src1, 16, 0);
}"
HAVE_SSE42CMPSTR_INTRIN
)
set(CMAKE_REQUIRED_FLAGS)
endmacro()

Expand Down
24 changes: 1 addition & 23 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,6 @@ if test $native -eq 1; then
avx2flag=""
sse2flag=""
ssse3flag=""
sse4flag=""
sse42flag=""
pclmulflag=""
vpclmulflag=""
Expand Down Expand Up @@ -1415,7 +1414,7 @@ EOF
}

check_sse42_intrinsics() {
# Check whether compiler supports SSE4 CRC inline asm
# Check whether compiler supports SSE4.2 CRC inline asm
cat > $test.c << EOF
int main(void) {
unsigned val = 0, h = 0;
Expand Down Expand Up @@ -1448,27 +1447,6 @@ EOF
echo "Checking for SSE4.2 CRC intrinsics ... No." | tee -a configure.log
HAVE_SSE42CRC_INTRIN=0
fi

# Check whether compiler supports SSE4.2 compare string intrinsics
cat > $test.c << EOF
#include <immintrin.h>
int main(void)
{
unsigned char a[64] = { 0 };
unsigned char b[64] = { 0 };
__m128i xmm_src0, xmm_src1;
xmm_src0 = _mm_loadu_si128((__m128i *)(char *)a);
xmm_src1 = _mm_loadu_si128((__m128i *)(char *)b);
return _mm_cmpestri(xmm_src0, 16, xmm_src1, 16, 0);
}
EOF
if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then
echo "Checking for SSE4.2 compare string intrinsics ... Yes." | tee -a configure.log
HAVE_SSE42CMPSTR_INTRIN=1
else
echo "Checking for SSE4.2 compare string intrinsics ... No." | tee -a configure.log
HAVE_SSE42CMPSTR_INTRIN=0
fi
}

check_ssse3_intrinsics() {
Expand Down
6 changes: 3 additions & 3 deletions cpu_features.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ extern uint32_t compare256_power9(const uint8_t *src0, const uint8_t *src1);
/* insert_string */
extern void insert_string_c(deflate_state *const s, const uint32_t str, uint32_t count);
#ifdef X86_SSE42
extern void insert_string_sse4(deflate_state *const s, const uint32_t str, uint32_t count);
extern void insert_string_sse42(deflate_state *const s, const uint32_t str, uint32_t count);
#elif defined(ARM_ACLE)
extern void insert_string_acle(deflate_state *const s, const uint32_t str, uint32_t count);
#endif
Expand Down Expand Up @@ -235,7 +235,7 @@ extern uint32_t longest_match_slow_power9(deflate_state *const s, Pos cur_match)
/* quick_insert_string */
extern Pos quick_insert_string_c(deflate_state *const s, const uint32_t str);
#ifdef X86_SSE42
extern Pos quick_insert_string_sse4(deflate_state *const s, const uint32_t str);
extern Pos quick_insert_string_sse42(deflate_state *const s, const uint32_t str);
#elif defined(ARM_ACLE)
extern Pos quick_insert_string_acle(deflate_state *const s, const uint32_t str);
#endif
Expand All @@ -261,7 +261,7 @@ extern void slide_hash_avx2(deflate_state *s);
/* update_hash */
extern uint32_t update_hash_c(deflate_state *const s, uint32_t h, uint32_t val);
#ifdef X86_SSE42
extern uint32_t update_hash_sse4(deflate_state *const s, uint32_t h, uint32_t val);
extern uint32_t update_hash_sse42(deflate_state *const s, uint32_t h, uint32_t val);
#elif defined(ARM_ACLE)
extern uint32_t update_hash_acle(deflate_state *const s, uint32_t h, uint32_t val);
#endif
Expand Down
6 changes: 3 additions & 3 deletions functable.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@ static void init_functable(void) {
#ifdef X86_SSE42
if (cf.x86.has_sse42) {
ft.adler32_fold_copy = &adler32_fold_copy_sse42;
ft.insert_string = &insert_string_sse4;
ft.quick_insert_string = &quick_insert_string_sse4;
ft.update_hash = &update_hash_sse4;
ft.insert_string = &insert_string_sse42;
ft.quick_insert_string = &quick_insert_string_sse42;
ft.update_hash = &update_hash_sse42;
}
#endif
// X86 - PCLMUL
Expand Down

0 comments on commit b1aafe5

Please sign in to comment.