Skip to content

Commit

Permalink
Introduce zmemcmp to use unaligned access for architectures we know s…
Browse files Browse the repository at this point in the history
…upport unaligned access, otherwise use memcmp.
  • Loading branch information
nmoinvaz committed Feb 1, 2022
1 parent 75c1e97 commit f5d680f
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 15 deletions.
2 changes: 1 addition & 1 deletion deflate_quick.c
Expand Up @@ -92,7 +92,7 @@ Z_INTERNAL block_state deflate_quick(deflate_state *s, int flush) {
const uint8_t *str_start = s->window + s->strstart;
const uint8_t *match_start = s->window + hash_head;

if (*(uint16_t *)str_start == *(uint16_t *)match_start) {
if (zmemcmp_2(str_start, match_start) == 0) {
match_len = functable.compare256(str_start+2, match_start+2) + 2;

if (match_len >= WANT_MIN_MATCH) {
Expand Down
12 changes: 6 additions & 6 deletions match_tpl.h
Expand Up @@ -145,24 +145,24 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
#ifdef UNALIGNED_OK
if (best_len < sizeof(uint32_t)) {
for (;;) {
if (*(uint16_t *)(mbase_end+cur_match) == *(uint16_t *)scan_end &&
*(uint16_t *)(mbase_start+cur_match) == *(uint16_t *)scan_start)
if (zmemcmp_2(mbase_end+cur_match, scan_end) == 0 &&
zmemcmp_2(mbase_start+cur_match, scan_start) == 0)
break;
GOTO_NEXT_CHAIN;
}
# ifdef UNALIGNED64_OK
} else if (best_len >= sizeof(uint64_t)) {
for (;;) {
if (*(uint64_t *)(mbase_end+cur_match) == *(uint64_t *)scan_end &&
*(uint64_t *)(mbase_start+cur_match) == *(uint64_t *)scan_start)
if (zmemcmp_8(mbase_end+cur_match, scan_end) == 0 &&
zmemcmp_8(mbase_start+cur_match, scan_start) == 0)
break;
GOTO_NEXT_CHAIN;
}
# endif
} else {
for (;;) {
if (*(uint32_t *)(mbase_end+cur_match) == *(uint32_t *)scan_end &&
*(uint32_t *)(mbase_start+cur_match) == *(uint32_t *)scan_start)
if (zmemcmp_4(mbase_end+cur_match, scan_end) == 0 &&
zmemcmp_4(mbase_start+cur_match, scan_start) == 0)
break;
GOTO_NEXT_CHAIN;
}
Expand Down
29 changes: 21 additions & 8 deletions zbuild.h
Expand Up @@ -194,19 +194,32 @@
# define Tracecv(c, x)
#endif

/* Force compiler to emit unaligned memory accesses if unaligned access is supported
on the architecture, otherwise don't assume unaligned access is supported. Older
compilers don't optimize memcpy and memcmp calls to unaligned access instructions
when it is supported on the architecture resulting in significant performance impact.
Newer compilers might optimize memcpy but not all optimize memcmp for all integer types. */
#ifdef UNALIGNED_OK
# define zmemcpy_2(dest, src) *((uint16_t *)dest) = *((uint16_t *)src)
# define zmemcpy_4(dest, src) *((uint32_t *)dest) = *((uint32_t *)src)
# define zmemcpy_2(dest, src) (*((uint16_t *)(dest)) = *((uint16_t *)(src)))
# define zmemcmp_2(str1, str2) (*((uint16_t *)(str1)) != *((uint16_t *)(str2)))
# define zmemcpy_4(dest, src) (*((uint32_t *)(dest)) = *((uint32_t *)(src)))
# define zmemcmp_4(str1, str2) (*((uint32_t *)(str1)) != *((uint32_t *)(str2)))
# if UINTPTR_MAX == UINT64_MAX
# define zmemcpy_8(dest, src) *((uint64_t *)dest) = *((uint64_t *)src)
# define zmemcpy_8(dest, src) (*((uint64_t *)(dest)) = *((uint64_t *)(src)))
# define zmemcmp_8(str1, str2) (*((uint64_t *)(str1)) != *((uint64_t *)(str2)))
# else
# define zmemcpy_8(dest, src) ((uint32_t *)dest)[0] = *((uint32_t *)src)[0] \
((uint32_t *)dest)[1] = *((uint32_t *)src)[1]
# define zmemcpy_8(dest, src) (((uint32_t *)(dest))[0] = ((uint32_t *)(src))[0], \
((uint32_t *)(dest))[1] = ((uint32_t *)(src))[1])
# define zmemcmp_8(str1, str2) (((uint32_t *)(str1))[0] != ((uint32_t *)(str2))[0] || \
((uint32_t *)(str1))[1] != ((uint32_t *)(str2))[1])
# endif
#else
# define zmemcpy_2(dest, src) memcpy(dest, src, 2)
# define zmemcpy_4(dest, src) memcpy(dest, src, 4)
# define zmemcpy_8(dest, src) memcpy(dest, src, 8)
# define zmemcpy_2(dest, src) memcpy(dest, src, 2)
# define zmemcmp_2(str1, str2) memcmp(str1, str2, 2)
# define zmemcpy_4(dest, src) memcpy(dest, src, 4)
# define zmemcmp_4(str1, str2) memcmp(str1, str2, 4)
# define zmemcpy_8(dest, src) memcpy(dest, src, 8)
# define zmemcmp_8(str1, str2) memcmp(str1, str2, 8)
#endif

#endif

0 comments on commit f5d680f

Please sign in to comment.