Skip to content

Commit

Permalink
add SSE4.2 optimized hash function
Browse files Browse the repository at this point in the history
For systems supporting SSE4.2, use the crc32 instruction as a fast
hash function. Also, provide a better fallback hash.

For both new hash functions, we hash 4 bytes, instead of 3, for certain
levels. This shortens the hash chains, and also improves the quality
of each hash entry.
  • Loading branch information
jtkukunas committed Jun 3, 2014
1 parent 5640481 commit d306c75
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 15 deletions.
6 changes: 6 additions & 0 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -803,6 +803,9 @@ case "${ARCH}" in
FILL_WINDOW_SSE_o=""
FILL_WINDOW_SSE_lo=""
fi

CFLAGS="${CFLAGS} -DUSE_SSE4_2_CRC_HASH"
SFLAGS="${SFLAGS} -DUSE_SSE4_2_CRC_HASH"
;;
i386 | i486 | i586 | i686)
OBJC="${OBJC} x86.o"
Expand All @@ -828,6 +831,9 @@ case "${ARCH}" in
FILL_WINDOW_SSE_o=""
FILL_WINDOW_SSE_lo=""
fi

CFLAGS="${CFLAGS} -DUSE_SSE4_2_CRC_HASH"
SFLAGS="${SFLAGS} -DUSE_SSE4_2_CRC_HASH"
;;
esac

Expand Down
76 changes: 61 additions & 15 deletions deflate.c
Original file line number Diff line number Diff line change
Expand Up @@ -169,17 +169,56 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compilers */
* input characters and the first MIN_MATCH bytes of str are valid
* (except for the last MIN_MATCH-1 bytes of the input file).
*/
#ifdef USE_SSE4_2_CRC_HASH
#include "x86.h"
local inline Pos insert_string_sse(deflate_state *z_const s, z_const Pos str)
{
Pos ret;
unsigned *ip, val, h = 0;

ip = (unsigned *)&s->window[str];
val = *ip;

if (s->level >= 6)
val &= 0xFFFFFF;

__asm__ __volatile__ (
"crc32 %1,%0\n\t"
: "+r" (h)
: "r" (val)
);

ret = s->head[h & s->hash_mask];
s->head[h & s->hash_mask] = str;
s->prev[str & s->w_mask] = ret;
return ret;
}
#endif

local inline Pos insert_string_c(deflate_state *z_const s, z_const Pos str)
{
Pos ret;

UPDATE_HASH(s, s->ins_h, str);
#ifdef FASTEST
#define INSERT_STRING(s, str, match_head) \
(UPDATE_HASH(s, s->ins_h, (str)), \
match_head = s->head[s->ins_h], \
s->head[s->ins_h] = (Pos)(str))
ret = s->head[s->ins_h];
#else
#define INSERT_STRING(s, str, match_head) \
(UPDATE_HASH(s, s->ins_h, (str)), \
match_head = s->prev[(str) & s->w_mask] = s->head[s->ins_h], \
s->head[s->ins_h] = (Pos)(str))
ret = s->prev[str & s->w_mask] = s->head[s->ins_h];
#endif
s->head[s->ins_h] = str;

return ret;
}

local inline Pos insert_string(deflate_state *z_const s, z_const Pos str)
{
#ifdef USE_SSE4_2_CRC_HASH
if (x86_cpu_has_sse42)
return insert_string_sse(s, str);
#endif
return insert_string_c(s, str);
}


/* ===========================================================================
* Initialize the hash table (avoiding 64K overflow for 16 bit systems).
Expand Down Expand Up @@ -226,7 +265,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
* output size for (length,distance) codes is <= 24 bits.
*/

#ifdef CHECK_SSE2
#if defined(CHECK_SSE2) || defined(USE_SSE4_2_CRC_HASH)
x86_check_features();
#endif

Expand Down Expand Up @@ -285,7 +324,13 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
s->w_size = 1 << s->w_bits;
s->w_mask = s->w_size - 1;

#ifdef USE_SSE4_2_CRC_HASH
if (x86_cpu_has_sse42)
s->hash_bits = 15;
else
#endif
s->hash_bits = memLevel + 7;

s->hash_size = 1 << s->hash_bits;
s->hash_mask = s->hash_size - 1;
s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH);
Expand Down Expand Up @@ -1278,7 +1323,8 @@ local void fill_window_c(s)
if (s->lookahead + s->insert >= MIN_MATCH) {
uInt str = s->strstart - s->insert;
s->ins_h = s->window[str];
UPDATE_HASH(s, s->ins_h, str + 1 - (MIN_MATCH-1));
if (str >= 1)
UPDATE_HASH(s, s->ins_h, str + 1 - (MIN_MATCH-1));
#if MIN_MATCH != 3
Call UPDATE_HASH() MIN_MATCH-3 more times
#endif
Expand Down Expand Up @@ -1458,7 +1504,7 @@ local block_state deflate_fast(s, flush)
*/
hash_head = NIL;
if (s->lookahead >= MIN_MATCH) {
INSERT_STRING(s, s->strstart, hash_head);
hash_head = insert_string(s, s->strstart);
}

/* Find the longest match, discarding those <= prev_length.
Expand Down Expand Up @@ -1489,7 +1535,7 @@ local block_state deflate_fast(s, flush)
s->match_length--; /* string at strstart already in table */
do {
s->strstart++;
INSERT_STRING(s, s->strstart, hash_head);
hash_head = insert_string(s, s->strstart);
/* strstart never exceeds WSIZE-MAX_MATCH, so there are
* always MIN_MATCH bytes ahead.
*/
Expand All @@ -1501,7 +1547,7 @@ local block_state deflate_fast(s, flush)
s->strstart += s->match_length;
s->match_length = 0;
s->ins_h = s->window[s->strstart];
UPDATE_HASH(s, s->ins_h, s->strstart+1 - (MIN_MATCH-1));
UPDATE_HASH(s, s->ins_h, s->strstart+2 - (MIN_MATCH));
#if MIN_MATCH != 3
Call UPDATE_HASH() MIN_MATCH-3 more times
#endif
Expand Down Expand Up @@ -1561,7 +1607,7 @@ local block_state deflate_slow(s, flush)
*/
hash_head = NIL;
if (s->lookahead >= MIN_MATCH) {
INSERT_STRING(s, s->strstart, hash_head);
hash_head = insert_string(s, s->strstart);
}

/* Find the longest match, discarding those <= prev_length.
Expand Down Expand Up @@ -1612,7 +1658,7 @@ local block_state deflate_slow(s, flush)
s->prev_length -= 2;
do {
if (++s->strstart <= max_insert) {
INSERT_STRING(s, s->strstart, hash_head);
hash_head = insert_string(s, s->strstart);
}
} while (--s->prev_length != 0);
s->match_available = 0;
Expand Down
15 changes: 15 additions & 0 deletions deflate.h
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,21 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf,
* input characters, so that a running hash key can be computed from the
* previous key instead of complete recalculation each time.
*/
#ifdef USE_SSE4_2_CRC_HASH
#define UPDATE_HASH(s,h,i) (\
{\
if (s->level < 6) \
h = (3483 * (s->window[i]) +\
23081* (s->window[i+1]) +\
6954 * (s->window[i+2]) +\
20947* (s->window[i+3])) & s->hash_mask;\
else\
h = (25881* (s->window[i]) +\
24674* (s->window[i+1]) +\
25811* (s->window[i+2])) & s->hash_mask;\
})
#else
#define UPDATE_HASH(s,h,i) (h = (((h)<<s->hash_shift) ^ (s->window[i + (MIN_MATCH-1)])) & s->hash_mask)
#endif

#endif /* DEFLATE_H */

0 comments on commit d306c75

Please sign in to comment.