Skip to content

Commit

Permalink
Move unaligned access detection into zutil.h
Browse files Browse the repository at this point in the history
Provide zmemcmp and zmemcpy.
  • Loading branch information
nmoinvaz committed Jan 10, 2022
1 parent d2f3136 commit 9e31ad4
Show file tree
Hide file tree
Showing 14 changed files with 87 additions and 106 deletions.
42 changes: 0 additions & 42 deletions CMakeLists.txt
Expand Up @@ -251,46 +251,6 @@ if(NOT MSVC AND NOT CMAKE_C_FLAGS MATCHES "([\\/\\-]O)3")
CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}")
endif()

# Set architecture alignment requirements
if(WITH_UNALIGNED)
if((BASEARCH_ARM_FOUND AND NOT "${ARCH}" MATCHES "armv[2-7]") OR (BASEARCH_PPC_FOUND AND "${ARCH}" MATCHES "powerpc64le") OR BASEARCH_X86_FOUND)
if(NOT DEFINED UNALIGNED_OK)
set(UNALIGNED_OK TRUE)
endif()
endif()
if(UNALIGNED_OK)
add_definitions(-DUNALIGNED_OK)
message(STATUS "Architecture supports unaligned reads")
endif()
if(BASEARCH_ARM_FOUND)
if(NOT DEFINED UNALIGNED64_OK)
if("${ARCH}" MATCHES "armv[2-7]")
set(UNALIGNED64_OK FALSE)
elseif("${ARCH}" MATCHES "(arm(v[8-9])?|aarch64)")
set(UNALIGNED64_OK TRUE)
endif()
endif()
endif()
if(BASEARCH_PPC_FOUND)
if(NOT DEFINED UNALIGNED64_OK)
if("${ARCH}" MATCHES "powerpc64le")
set(UNALIGNED64_OK TRUE)
endif()
endif()
endif()
if(BASEARCH_X86_FOUND)
if(NOT DEFINED UNALIGNED64_OK)
set(UNALIGNED64_OK TRUE)
endif()
endif()
if(UNALIGNED64_OK)
add_definitions(-DUNALIGNED64_OK)
message(STATUS "Architecture supports unaligned reads of > 4 bytes")
endif()
else()
message(STATUS "Unaligned reads manually disabled")
endif()

# Apply warning compiler flags
if(WITH_MAINTAINER_WARNINGS)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${WARNFLAGS} ${WARNFLAGS_MAINTAINER} ${WARNFLAGS_DISABLE}")
Expand Down Expand Up @@ -1454,8 +1414,6 @@ add_feature_info(WITH_MAINTAINER_WARNINGS WITH_MAINTAINER_WARNINGS "Build with p
add_feature_info(WITH_CODE_COVERAGE WITH_CODE_COVERAGE "Enable code coverage reporting")
add_feature_info(WITH_INFLATE_STRICT WITH_INFLATE_STRICT "Build with strict inflate distance checking")
add_feature_info(WITH_INFLATE_ALLOW_INVALID_DIST WITH_INFLATE_ALLOW_INVALID_DIST "Build with zero fill for inflate invalid distances")
add_feature_info(WITH_UNALIGNED UNALIGNED_OK "Support unaligned reads on platforms that support it")
add_feature_info(WITH_UNALIGNED64 UNALIGNED64_OK "Support unaligned 64-bit reads on platforms that support it")

if(BASEARCH_ARM_FOUND)
add_feature_info(WITH_ACLE WITH_ACLE "Build with ACLE")
Expand Down
1 change: 0 additions & 1 deletion README.md
Expand Up @@ -194,7 +194,6 @@ Advanced Build Options
| CMake | configure | Description | Default |
|:--------------------------------|:----------------------|:--------------------------------------------------------------------|------------------------|
| ZLIB_DUAL_LINK | | Dual link tests with system zlib | OFF |
| UNALIGNED_OK | | Allow unaligned reads | ON (x86, arm) |
| | --force-sse2 | Skip runtime check for SSE2 instructions (Always on for x86_64) | OFF (x86) |
| WITH_AVX2 | | Build with AVX2 intrinsics | ON |
| WITH_AVX512 | | Build with AVX512 intrinsics | ON |
Expand Down
4 changes: 2 additions & 2 deletions arch/x86/compare258_avx.c
Expand Up @@ -15,7 +15,7 @@
# include <nmmintrin.h>
#endif

/* UNALIGNED_OK, AVX2 intrinsic comparison */
/* AVX2 unaligned intrinsic comparison */
static inline uint32_t compare256_unaligned_avx2_static(const unsigned char *src0, const unsigned char *src1) {
uint32_t len = 0;

Expand Down Expand Up @@ -48,7 +48,7 @@ static inline uint32_t compare256_unaligned_avx2_static(const unsigned char *src
}

static inline uint32_t compare258_unaligned_avx2_static(const unsigned char *src0, const unsigned char *src1) {
if (*(uint16_t *)src0 != *(uint16_t *)src1)
if (zmemcmp_2(src0, src1) == 0)
return (*src0 == *src1);

return compare256_unaligned_avx2_static(src0+2, src1+2) + 2;
Expand Down
4 changes: 2 additions & 2 deletions arch/x86/compare258_sse.c
Expand Up @@ -25,7 +25,7 @@
# include <nmmintrin.h>
#endif

/* UNALIGNED_OK, SSE4.2 intrinsic comparison */
/* SSE4.2 unaligned intrinsic comparison */
static inline uint32_t compare256_unaligned_sse4_static(const unsigned char *src0, const unsigned char *src1) {
uint32_t len = 0;

Expand Down Expand Up @@ -55,7 +55,7 @@ static inline uint32_t compare256_unaligned_sse4_static(const unsigned char *src
}

static inline uint32_t compare258_unaligned_sse4_static(const unsigned char *src0, const unsigned char *src1) {
if (*(uint16_t *)src0 != *(uint16_t *)src1)
if (zmemcmp_2(src0, src1) == 0)
return (*src0 == *src1);

return compare256_unaligned_sse4_static(src0+2, src1+2) + 2;
Expand Down
8 changes: 4 additions & 4 deletions arch/x86/x86.c
Expand Up @@ -70,9 +70,9 @@ void Z_INTERNAL x86_check_features(void) {

/* NULL terminate the string */
memset(cpu_identity, 0, 13);
memcpy(cpu_identity, (char*)&ebx, sizeof(int));
memcpy(cpu_identity + 4, (char*)&edx, sizeof(int));
memcpy(cpu_identity + 8, (char*)&ecx, sizeof(int));
zmemcpy_4(cpu_identity, (char *)&ebx);
zmemcpy_4(cpu_identity + 4, (char *)&edx);
zmemcpy_4(cpu_identity + 8, (char *)&ecx);

intel_cpu = strncmp(cpu_identity, "GenuineIntel", 12) == 0;

Expand Down Expand Up @@ -119,7 +119,7 @@ void Z_INTERNAL x86_check_features(void) {
x86_cpu_well_suited_avx512 = 1;
} else if (model == 0xa && extended_model == 0x6) {
/* Icelake server */
x86_cpu_well_suited_avx512 = 1;
x86_cpu_well_suited_avx512 = 1;
} else if (model == 0xf && extended_model == 0x8) {
/* Saphire rapids */
x86_cpu_well_suited_avx512 = 1;
Expand Down
20 changes: 10 additions & 10 deletions compare258.c
Expand Up @@ -69,21 +69,21 @@ Z_INTERNAL uint32_t compare258_c(const unsigned char *src0, const unsigned char
#include "match_tpl.h"

#ifdef UNALIGNED_OK
/* UNALIGNED_OK, 16-bit integer comparison */
/* 16-bit unaligned integer comparison */
static inline uint32_t compare256_unaligned_16_static(const unsigned char *src0, const unsigned char *src1) {
uint32_t len = 0;

do {
if (*(uint16_t *)src0 != *(uint16_t *)src1)
if (zmemcmp_2(src0, src1) == 0)
return len + (*src0 == *src1);
src0 += 2, src1 += 2, len += 2;
if (*(uint16_t *)src0 != *(uint16_t *)src1)
if (zmemcmp_2(src0, src1) == 0)
return len + (*src0 == *src1);
src0 += 2, src1 += 2, len += 2;
if (*(uint16_t *)src0 != *(uint16_t *)src1)
if (zmemcmp_2(src0, src1) == 0)
return len + (*src0 == *src1);
src0 += 2, src1 += 2, len += 2;
if (*(uint16_t *)src0 != *(uint16_t *)src1)
if (zmemcmp_2(src0, src1) == 0)
return len + (*src0 == *src1);
src0 += 2, src1 += 2, len += 2;
} while (len < 256);
Expand All @@ -92,7 +92,7 @@ static inline uint32_t compare256_unaligned_16_static(const unsigned char *src0,
}

static inline uint32_t compare258_unaligned_16_static(const unsigned char *src0, const unsigned char *src1) {
if (*(uint16_t *)src0 != *(uint16_t *)src1)
if (zmemcmp_2(src0, src1) == 0)
return (*src0 == *src1);

return compare256_unaligned_16_static(src0+2, src1+2) + 2;
Expand All @@ -114,7 +114,7 @@ Z_INTERNAL uint32_t compare258_unaligned_16(const unsigned char *src0, const uns
#include "match_tpl.h"

#ifdef HAVE_BUILTIN_CTZ
/* UNALIGNED_OK, 32-bit integer comparison */
/* 32-bit unaligned integer comparison */
static inline uint32_t compare256_unaligned_32_static(const unsigned char *src0, const unsigned char *src1) {
uint32_t len = 0;

Expand All @@ -137,7 +137,7 @@ static inline uint32_t compare256_unaligned_32_static(const unsigned char *src0,
}

static inline uint32_t compare258_unaligned_32_static(const unsigned char *src0, const unsigned char *src1) {
if (*(uint16_t *)src0 != *(uint16_t *)src1)
if (zmemcmp_2(src0, src1) == 0)
return (*src0 == *src1);

return compare256_unaligned_32_static(src0+2, src1+2) + 2;
Expand All @@ -161,7 +161,7 @@ Z_INTERNAL uint32_t compare258_unaligned_32(const unsigned char *src0, const uns
#endif

#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
/* UNALIGNED64_OK, 64-bit integer comparison */
/* 64-bit unaligned integer comparison */
static inline uint32_t compare256_unaligned_64_static(const unsigned char *src0, const unsigned char *src1) {
uint32_t len = 0;

Expand All @@ -184,7 +184,7 @@ static inline uint32_t compare256_unaligned_64_static(const unsigned char *src0,
}

static inline uint32_t compare258_unaligned_64_static(const unsigned char *src0, const unsigned char *src1) {
if (*(uint16_t *)src0 != *(uint16_t *)src1)
if (zmemcmp_2(src0, src1) == 0)
return (*src0 == *src1);

return compare256_unaligned_64_static(src0+2, src1+2) + 2;
Expand Down
16 changes: 0 additions & 16 deletions configure
Expand Up @@ -1422,9 +1422,6 @@ case "${ARCH}" in
i386 | i486 | i586 | i686 |x86_64)
ARCHDIR=arch/x86

CFLAGS="${CFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"
SFLAGS="${SFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"

# Enable arch-specific optimizations
if test $without_optimizations -eq 0; then
CFLAGS="${CFLAGS} -DX86_FEATURES"
Expand Down Expand Up @@ -1646,8 +1643,6 @@ EOF
fi
;;
armv6l | armv6hl)
CFLAGS="${CFLAGS} -DUNALIGNED_OK"
SFLAGS="${SFLAGS} -DUNALIGNED_OK"

if test $without_optimizations -eq 0; then
if test $buildacle -eq 1; then
Expand All @@ -1660,8 +1655,6 @@ EOF
fi
;;
arm | armv7*)
CFLAGS="${CFLAGS} -DUNALIGNED_OK"
SFLAGS="${SFLAGS} -DUNALIGNED_OK"

if test $without_optimizations -eq 0; then
if test $buildacle -eq 1; then
Expand All @@ -1682,8 +1675,6 @@ EOF
fi
;;
armv8-a | armv8-a+simd)
CFLAGS="${CFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"
SFLAGS="${SFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"

if test $without_optimizations -eq 0; then
if test $buildacle -eq 1; then
Expand All @@ -1704,8 +1695,6 @@ EOF
fi
;;
armv8-a+crc | armv8-a+crc+simd | armv8.[1234]-a | armv8.[1234]-a+simd)
CFLAGS="${CFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"
SFLAGS="${SFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"

acleflag="-march=${ARCH}"

Expand Down Expand Up @@ -1787,9 +1776,6 @@ EOF

neonflag="-march=${ARCH}"
acleflag="-march=${ARCH}"

CFLAGS="${CFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"
SFLAGS="${SFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"
;;
powerpc*)
case "${ARCH}" in
Expand All @@ -1801,8 +1787,6 @@ EOF
;;
powerpc64le)
[ ! -z $CROSS_PREFIX ] && QEMU_ARCH=ppc64le
CFLAGS="${CFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"
SFLAGS="${SFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"
;;
esac

Expand Down
1 change: 1 addition & 0 deletions functable.c
Expand Up @@ -4,6 +4,7 @@
*/

#include "zbuild.h"
#include "zutil.h"
#include "zendian.h"
#include "crc32_p.h"
#include "deflate.h"
Expand Down
4 changes: 2 additions & 2 deletions insert_string_tpl.h
Expand Up @@ -29,9 +29,9 @@
# define HASH_CALC_MASK HASH_MASK
#endif
#ifndef HASH_CALC_READ
# ifdef UNALIGNED_OK
# if BYTE_ORDER == LITTLE_ENDIAN
# define HASH_CALC_READ \
memcpy(&val, strstart, sizeof(val));
zmemcpy_4(&val, strstart);
# else
# define HASH_CALC_READ \
val = ((uint32_t)(strstart[0])); \
Expand Down
37 changes: 24 additions & 13 deletions match_tpl.h
Expand Up @@ -85,11 +85,16 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
}
#endif

memcpy(&scan_end, scan+offset, sizeof(bestcmp_t));
#ifndef UNALIGNED_OK
memcpy(&scan_end0, scan+offset+1, sizeof(bestcmp_t));
#ifdef UNALIGNED64_OK
zmemcpy_8(&scan_start, scan);
zmemcpy_8(&scan_end, scan+offset);
#elif defined(UNALIGNED_OK)
zmemcpy_4(&scan_start, scan);
zmemcpy_4(&scan_end, scan+offset);
#else
memcpy(&scan_start, scan, sizeof(bestcmp_t));
*scan_start = *(scan);
*scan_end = *(scan+offset);
*scan_end0 = *(scan+offset+1);
#endif
mbase_end = (mbase_start+offset);

Expand Down Expand Up @@ -153,24 +158,24 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
#ifdef UNALIGNED_OK
if (best_len < sizeof(uint32_t)) {
for (;;) {
if (*(uint16_t *)(mbase_end+cur_match) == (uint16_t)scan_end &&
*(uint16_t *)(mbase_start+cur_match) == (uint16_t)scan_start)
if (zmemcmp_2(mbase_end+cur_match, &scan_end) == 0 &&
zmemcmp_2(mbase_start+cur_match, &scan_start) == 0)
break;
GOTO_NEXT_CHAIN;
}
# ifdef UNALIGNED64_OK
} else if (best_len >= sizeof(uint64_t)) {
for (;;) {
if (*(uint64_t *)(mbase_end+cur_match) == (uint64_t)scan_end &&
*(uint64_t *)(mbase_start+cur_match) == (uint64_t)scan_start)
if (zmemcmp_8(mbase_end+cur_match, &scan_end) == 0 &&
zmemcmp_8(mbase_start+cur_match, &scan_start) == 0)
break;
GOTO_NEXT_CHAIN;
}
# endif
} else {
for (;;) {
if (*(uint32_t *)(mbase_end+cur_match) == (uint32_t)scan_end &&
*(uint32_t *)(mbase_start+cur_match) == (uint32_t)scan_start)
if (zmemcmp_4(mbase_end+cur_match, &scan_end) == 0 &&
zmemcmp_4(mbase_start+cur_match, &scan_start) == 0)
break;
GOTO_NEXT_CHAIN;
}
Expand Down Expand Up @@ -207,10 +212,16 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
#endif
}
#endif
memcpy(&scan_end, scan+offset, sizeof(bestcmp_t));
#ifndef UNALIGNED_OK
memcpy(&scan_end0, scan+offset+1, sizeof(bestcmp_t));

#ifdef UNALIGNED64_OK
zmemcpy_8(&scan_end, scan+offset);
#elif defined(UNALIGNED_OK)
zmemcpy_4(&scan_end, scan+offset);
#else
*scan_end = *(scan+offset);
*scan_end0 = *(scan+offset+1);
#endif

#ifdef LONGEST_MATCH_SLOW
/* Look for a better string offset */
if (UNLIKELY(len > STD_MIN_MATCH && match_start + len < strstart)) {
Expand Down
2 changes: 0 additions & 2 deletions win32/Makefile.a64
Expand Up @@ -27,8 +27,6 @@ CFLAGS = -nologo -MD -W3 -O2 -Oy- -Zi -Fd"zlib" $(LOC)
WFLAGS = \
-D_CRT_SECURE_NO_DEPRECATE \
-D_CRT_NONSTDC_NO_DEPRECATE \
-DUNALIGNED_OK \
-DUNALIGNED64_OK \
-D_ARM64_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE=1 \
-DARM_FEATURES \
#
Expand Down
1 change: 0 additions & 1 deletion win32/Makefile.arm
Expand Up @@ -27,7 +27,6 @@ CFLAGS = -nologo -MD -W3 -O2 -Oy- -Zi -Fd"zlib" $(LOC)
WFLAGS = \
-D_CRT_SECURE_NO_DEPRECATE \
-D_CRT_NONSTDC_NO_DEPRECATE \
-DUNALIGNED_OK \
-D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE=1 \
-DARM_FEATURES \
#
Expand Down
2 changes: 0 additions & 2 deletions win32/Makefile.msc
Expand Up @@ -35,8 +35,6 @@ WFLAGS = \
-DX86_AVX2 \
-DX86_AVX_CHUNKSET \
-DX86_SSE2_CHUNKSET \
-DUNALIGNED_OK \
-DUNALIGNED64_OK \
#
LDFLAGS = -nologo -debug -incremental:no -opt:ref -manifest
ARFLAGS = -nologo
Expand Down

0 comments on commit 9e31ad4

Please sign in to comment.