Skip to content

Commit

Permalink
Introduce zmemcpy to use unaligned access for architectures we know s…
Browse files Browse the repository at this point in the history
…upport unaligned access, otherwise use memcpy.
  • Loading branch information
nmoinvaz committed Feb 1, 2022
1 parent f414727 commit 75c1e97
Show file tree
Hide file tree
Showing 10 changed files with 47 additions and 32 deletions.
6 changes: 3 additions & 3 deletions arch/arm/chunkset_neon.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,19 @@ static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) {

static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
uint16_t tmp;
memcpy(&tmp, from, 2);
zmemcpy_2(&tmp, from);
*chunk = vreinterpretq_u8_u16(vdupq_n_u16(tmp));
}

static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
uint32_t tmp;
memcpy(&tmp, from, 4);
zmemcpy_4(&tmp, from);
*chunk = vreinterpretq_u8_u32(vdupq_n_u32(tmp));
}

static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
uint64_t tmp;
memcpy(&tmp, from, 8);
zmemcpy_8(&tmp, from);
*chunk = vreinterpretq_u8_u64(vdupq_n_u64(tmp));
}

Expand Down
6 changes: 3 additions & 3 deletions arch/power/chunkset_power8.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,19 @@ static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) {

static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
uint16_t tmp;
memcpy(&tmp, from, 2);
zmemcpy_2(&tmp, from);
*chunk = (vector unsigned char)vec_splats(tmp);
}

static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
uint32_t tmp;
memcpy(&tmp, from, 4);
zmemcpy_4(&tmp, from);
*chunk = (vector unsigned char)vec_splats(tmp);
}

static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
uint64_t tmp;
memcpy(&tmp, from, 8);
zmemcpy_8(&tmp, from);
*chunk = (vector unsigned char)vec_splats(tmp);
}

Expand Down
10 changes: 5 additions & 5 deletions chunkset.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,20 @@ static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) {

static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
uint8_t *dest = (uint8_t *)chunk;
memcpy(dest, from, sizeof(uint32_t));
memcpy(dest+4, from, sizeof(uint32_t));
zmemcpy_4(dest, from);
zmemcpy_4(dest+4, from);
}

static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
memcpy(chunk, from, sizeof(uint64_t));
zmemcpy_8(chunk, from);
}

static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
chunkmemset_8((uint8_t *)s, chunk);
zmemcpy_8(chunk, (uint8_t *)s);
}

static inline void storechunk(uint8_t *out, chunk_t *chunk) {
memcpy(out, chunk, sizeof(uint64_t));
zmemcpy_8(out, chunk);
}

#define CHUNKSIZE chunksize_c
Expand Down
6 changes: 3 additions & 3 deletions chunkset_tpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,20 +60,20 @@ Z_INTERNAL uint8_t* CHUNKCOPY_SAFE(uint8_t *out, uint8_t const *from, unsigned l
#endif
#if CHUNK_SIZE >= 8
while (len >= 8) {
memcpy(out, from, 8);
zmemcpy_8(out, from);
out += 8;
from += 8;
len -= 8;
}
#endif
if (len >= 4) {
memcpy(out, from, 4);
zmemcpy_4(out, from);
out += 4;
from += 4;
len -= 4;
}
if (len >= 2) {
memcpy(out, from, 2);
zmemcpy_2(out, from);
out += 2;
from += 2;
len -= 2;
Expand Down
8 changes: 4 additions & 4 deletions compare256.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ static inline uint32_t compare256_unaligned_32_static(const uint8_t *src0, const
do {
uint32_t sv, mv, diff;

memcpy(&sv, src0, sizeof(sv));
memcpy(&mv, src1, sizeof(mv));
zmemcpy_4(&sv, src0);
zmemcpy_4(&mv, src1);

diff = sv ^ mv;
if (diff) {
Expand Down Expand Up @@ -141,8 +141,8 @@ static inline uint32_t compare256_unaligned_64_static(const uint8_t *src0, const
do {
uint64_t sv, mv, diff;

memcpy(&sv, src0, sizeof(sv));
memcpy(&mv, src1, sizeof(mv));
zmemcpy_8(&sv, src0);
zmemcpy_8(&mv, src1);

diff = sv ^ mv;
if (diff) {
Expand Down
10 changes: 5 additions & 5 deletions deflate.h
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ static inline void put_short(deflate_state *s, uint16_t w) {
#if BYTE_ORDER == BIG_ENDIAN
w = ZSWAP16(w);
#endif
memcpy(&s->pending_buf[s->pending], &w, sizeof(w));
zmemcpy_2(&s->pending_buf[s->pending], &w);
s->pending += 2;
}

Expand All @@ -317,7 +317,7 @@ static inline void put_short_msb(deflate_state *s, uint16_t w) {
#if BYTE_ORDER == LITTLE_ENDIAN
w = ZSWAP16(w);
#endif
memcpy(&s->pending_buf[s->pending], &w, sizeof(w));
zmemcpy_2(&s->pending_buf[s->pending], &w);
s->pending += 2;
}

Expand All @@ -329,7 +329,7 @@ static inline void put_uint32(deflate_state *s, uint32_t dw) {
#if BYTE_ORDER == BIG_ENDIAN
dw = ZSWAP32(dw);
#endif
memcpy(&s->pending_buf[s->pending], &dw, sizeof(dw));
zmemcpy_4(&s->pending_buf[s->pending], &dw);
s->pending += 4;
}

Expand All @@ -341,7 +341,7 @@ static inline void put_uint32_msb(deflate_state *s, uint32_t dw) {
#if BYTE_ORDER == LITTLE_ENDIAN
dw = ZSWAP32(dw);
#endif
memcpy(&s->pending_buf[s->pending], &dw, sizeof(dw));
zmemcpy_4(&s->pending_buf[s->pending], &dw);
s->pending += 4;
}

Expand All @@ -353,7 +353,7 @@ static inline void put_uint64(deflate_state *s, uint64_t lld) {
#if BYTE_ORDER == BIG_ENDIAN
lld = ZSWAP64(lld);
#endif
memcpy(&s->pending_buf[s->pending], &lld, sizeof(lld));
zmemcpy_8(&s->pending_buf[s->pending], &lld);
s->pending += 8;
}

Expand Down
2 changes: 1 addition & 1 deletion inffast.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
/* Load 64 bits from IN and place the bytes at offset BITS in the result. */
static inline uint64_t load_64_bits(const unsigned char *in, unsigned bits) {
uint64_t chunk;
memcpy(&chunk, in, sizeof(chunk));
zmemcpy_8(&chunk, in);

#if BYTE_ORDER == LITTLE_ENDIAN
return chunk << bits;
Expand Down
4 changes: 2 additions & 2 deletions insert_string_tpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@
# define HASH_CALC_MASK HASH_MASK
#endif
#ifndef HASH_CALC_READ
# ifdef UNALIGNED_OK
# if BYTE_ORDER == LITTLE_ENDIAN
# define HASH_CALC_READ \
memcpy(&val, strstart, sizeof(val));
zmemcpy_4(&val, strstart);
# else
# define HASH_CALC_READ \
val = ((uint32_t)(strstart[0])); \
Expand Down
12 changes: 6 additions & 6 deletions match_tpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,11 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
#endif

#ifdef UNALIGNED64_OK
memcpy(scan_start, scan, sizeof(uint64_t));
memcpy(scan_end, scan+offset, sizeof(uint64_t));
zmemcpy_8(scan_start, scan);
zmemcpy_8(scan_end, scan+offset);
#elif defined(UNALIGNED_OK)
memcpy(scan_start, scan, sizeof(uint32_t));
memcpy(scan_end, scan+offset, sizeof(uint32_t));
zmemcpy_4(scan_start, scan);
zmemcpy_4(scan_end, scan+offset);
#else
scan_end[0] = *(scan+offset);
scan_end[1] = *(scan+offset+1);
Expand Down Expand Up @@ -201,9 +201,9 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
#endif

#ifdef UNALIGNED64_OK
memcpy(scan_end, scan+offset, sizeof(uint64_t));
zmemcpy_8(scan_end, scan+offset);
#elif defined(UNALIGNED_OK)
memcpy(scan_end, scan+offset, sizeof(uint32_t));
zmemcpy_4(scan_end, scan+offset);
#else
scan_end[0] = *(scan+offset);
scan_end[1] = *(scan+offset+1);
Expand Down
15 changes: 15 additions & 0 deletions zbuild.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,4 +194,19 @@
# define Tracecv(c, x)
#endif

#ifdef UNALIGNED_OK
# define zmemcpy_2(dest, src) *((uint16_t *)dest) = *((uint16_t *)src)
# define zmemcpy_4(dest, src) *((uint32_t *)dest) = *((uint32_t *)src)
# if UINTPTR_MAX == UINT64_MAX
# define zmemcpy_8(dest, src) *((uint64_t *)dest) = *((uint64_t *)src)
# else
# define zmemcpy_8(dest, src) ((uint32_t *)dest)[0] = *((uint32_t *)src)[0] \
((uint32_t *)dest)[1] = *((uint32_t *)src)[1]
# endif
#else
# define zmemcpy_2(dest, src) memcpy(dest, src, 2)
# define zmemcpy_4(dest, src) memcpy(dest, src, 4)
# define zmemcpy_8(dest, src) memcpy(dest, src, 8)
#endif

#endif

0 comments on commit 75c1e97

Please sign in to comment.