Skip to content

Commit

Permalink
Merge pull request #24 from zendesk/ggrossman/zstd
Browse files Browse the repository at this point in the history
Add option to enable zstd compression
  • Loading branch information
ggrossman committed Jun 22, 2023
2 parents cb237bf + 798f568 commit 8c8dee7
Show file tree
Hide file tree
Showing 7 changed files with 116 additions and 51 deletions.
1 change: 0 additions & 1 deletion .github/workflows/actions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ jobs:
fail-fast: false
matrix:
ruby:
- '2.5'
- '2.6'
- '2.7'
- '3.0'
Expand Down
6 changes: 4 additions & 2 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
PATH
remote: .
specs:
large_object_store (1.5.0)
large_object_store (1.6.0)
zstd-ruby (~> 1.5.5)

GEM
remote: https://rubygems.org/
Expand Down Expand Up @@ -36,6 +37,7 @@ GEM
tzinfo (2.0.6)
concurrent-ruby (~> 1.0)
zeitwerk (2.6.7)
zstd-ruby (1.5.5.0)

PLATFORMS
ruby
Expand All @@ -49,4 +51,4 @@ DEPENDENCIES
rspec (~> 3)

BUNDLED WITH
2.3.26
2.3.20
13 changes: 13 additions & 0 deletions Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,19 @@ store.write("a" * 1000, compress: true, compress_limit: 100) # compress when gre
store.write("a" * 1000, raw: true) # store as string to avoid marshaling overhead
```

zstd
====

[zstd compression](https://engineering.fb.com/2016/08/31/core-data/smaller-and-faster-data-compression-with-zstandard/), a modern improvement over the venerable zlib compression algorithm, is supported by passing the `zstd` flag when writing items:

```
store.write("a" * 10_000_000, compress: true, zstd: true)
```

For backwards compatibility and to enable safe roll-out of the change in working systems, the `zstd` flag defaults to `false`.

zstd decompression is used when the zstd magic number is detected at the beginning of compressed data, so `zstd: true` does not need to be passed when reading/fetching items.

Author
======
[Ana Martinez](https://github.com/anamartinez)<br/>
Expand Down
3 changes: 2 additions & 1 deletion large_object_store.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@ Gem::Specification.new name, LargeObjectStore::VERSION do |s|
s.homepage = "https://github.com/anamartinez/#{name}"
s.files = `git ls-files lib Readme.md`.split("\n")
s.license = "MIT"
s.required_ruby_version = '>= 2.5'
s.required_ruby_version = '>= 2.6'
s.add_runtime_dependency('zstd-ruby', '~> 1.5.5')
end
48 changes: 38 additions & 10 deletions lib/large_object_store.rb
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
require "large_object_store/version"
require "zlib"
require "zstd-ruby"
require "securerandom"

module LargeObjectStore
UUID_BYTES = 16
UUID_SIZE = UUID_BYTES * 2
CACHE_VERSION = 3
CACHE_VERSION = 4
MAX_OBJECT_SIZE = 1024**2
ITEM_HEADER_SIZE = 100
DEFAULT_COMPRESS_LIMIT = 16*1024
NORMAL = 0
COMPRESSED = 1
RAW = 2
FLAG_RADIX = 32 # we can store 32 different states
ZSTD_MAGIC = "\x28\xB5\x2F\xFD".force_encoding('ASCII-8BIT')
ZSTD_COMPRESS_LEVEL = 3 # Default level recommended by zstd authors

def self.wrap(*args)
RailsWrapper.new(*args)
Expand All @@ -31,7 +34,7 @@ def initialize(store, serializer: Marshal)

def write(key, value, **options)
options = options.dup
value = serialize(value, **options)
value = serialize(value, options)

# calculate slice size; note that key length is a factor because
# the key is stored on the same slab page as the value
Expand Down Expand Up @@ -107,7 +110,7 @@ def delete(key)

# convert a object to a string
# modifies options
def serialize(value, **options)
def serialize(value, options)
flag = NORMAL

if options.delete(:raw)
Expand All @@ -117,28 +120,53 @@ def serialize(value, **options)
value = @serializer.dump(value)
end

if compress?(value, **options)
if compress?(value, options)
flag |= COMPRESSED
value = Zlib::Deflate.deflate(value)
value = compress(value, options)
end

value.prepend(flag.to_s(FLAG_RADIX))
end

def compress(value, options)
if options[:zstd]
Zstd.compress(value, ZSTD_COMPRESS_LEVEL)
else
Zlib::Deflate.deflate(value)
end
end

def decompress(data)
if data.start_with?(ZSTD_MAGIC)
Zstd.decompress(data)
else
Zlib::Inflate.inflate(data)
end
end

# opposite operations and order of serialize
def deserialize(raw_data)
data = raw_data.dup
flag = data.slice!(0, 1).to_i(FLAG_RADIX)
data = Zlib::Inflate.inflate(data) if flag & COMPRESSED == COMPRESSED
data = decompress(data) if flag & COMPRESSED == COMPRESSED
data = @serializer.load(data) if flag & RAW != RAW
data
end

# Don't pass compression on to Rails, we're doing it ourselves.
def compress?(value, **options)
return unless options.delete(:compress)
compress_limit = options.delete(:compress_limit) || DEFAULT_COMPRESS_LIMIT
value.bytesize > compress_limit
def compress?(value, options)
return unless options[:compress]

compress_limit = options[:compress_limit] || DEFAULT_COMPRESS_LIMIT
should_compress = value.bytesize > compress_limit

if should_compress
# Pass compress: false to Rails in case the default is true
options[:compress] = false
options.delete(:compress_limit)
end

should_compress
end

def key(key, i)
Expand Down
2 changes: 1 addition & 1 deletion lib/large_object_store/version.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module LargeObjectStore
VERSION = "1.5.0"
VERSION = "1.6.0"
end
94 changes: 58 additions & 36 deletions spec/large_object_store_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,12 @@ def type(data, kind)
expect(store.read("a")).to eq("a")
expect(type(store.store.read("a_#{version}_0"), :single)).to eq(:raw_compressed)
end

it "can read and write compressed zstd raw" do
expect(store.write("a", "a", raw: true, zstd: true, compress: true, compress_limit: 0)).to eq(true)
expect(store.read("a")).to eq("a")
expect(type(store.store.read("a_#{version}_0"), :single)).to eq(:raw_compressed)
end
end

describe "compression" do
Expand All @@ -151,42 +157,58 @@ def type(data, kind)
expect(type(store.store.read("a_#{version}_0"), :single)).to eq(:normal)
end

it "can read/write compressed non-string objects" do
s = ["x"] * 10000
expect(store.write("a", s, :compress => true)).to eq(true)
expect(store.read("a")).to eq(s)
expect(type(store.store.read("a_#{version}_0"), :single)).to eq(:compressed)
end

it "compresses large objects" do
s = "x" * 25000
expect(store.write("a", s, :compress => true)).to eq(true)
expect(store.read("a")).to eq(s)
expect(type(store.store.read("a_#{version}_0"), :single)).to eq(:compressed)
end

it "compresses objects larger than optional compress_limit" do
s = "compress me"
len = s.length
expect(store.write("a", s, :compress => true, :compress_limit => len-1)).to eq(true)
expect(store.read("a")).to eq(s)
expect(type(store.store.read("a_#{version}_0"), :single)).to eq(:compressed)
end

it "does not compress objects smaller than optional compress limit" do
s = "don't compress me"
len = s.length
expect(store.write("a", s, :compress => true, :compress_limit => len*2)).to eq(true)
expect(store.read("a")).to eq(s)
expect(type(store.store.read("a_#{version}_0"), :single)).to eq(:normal)
end

it "can read/write giant compressed objects" do
s = SecureRandom.hex(5_000_000)
expect(store.write("a", s, :compress => true)).to eq(true)
expect(store.store.read("a_#{version}_0").first).to eq(6)
expect(type(store.store.read("a_#{version}_1"), :multi)).to eq(:compressed)
expect(store.read("a").size).to eq(s.size)
[false, true].each do |zstd|
describe "with zstd=#{zstd}" do
before do
if zstd
# Zlib shouldn't be called
expect(Zlib::Deflate).not_to receive(:deflate)
expect(Zlib::Inflate).not_to receive(:inflate)
else
# Zstd shouldn't be called
expect(Zstd).not_to receive(:compress)
expect(Zstd).not_to receive(:decompress)
end
end

it "can read/write compressed non-string objects" do
s = ["x"] * 10000
expect(store.write("a", s, compress: true, zstd: zstd)).to eq(true)
expect(store.read("a")).to eq(s)
expect(type(store.store.read("a_#{version}_0"), :single)).to eq(:compressed)
end

it "compresses large objects" do
s = "x" * 25000
expect(store.write("a", s, compress: true, zstd: zstd)).to eq(true)
expect(store.read("a")).to eq(s)
expect(type(store.store.read("a_#{version}_0"), :single)).to eq(:compressed)
end

it "compresses objects larger than optional compress_limit" do
s = "compress me"
len = s.length
expect(store.write("a", s, compress: true, zstd: zstd, compress_limit: len-1)).to eq(true)
expect(store.read("a")).to eq(s)
expect(type(store.store.read("a_#{version}_0"), :single)).to eq(:compressed)
end

it "does not compress objects smaller than optional compress limit" do
s = "don't compress me"
len = s.length
expect(store.write("a", s, compress: true, zstd: zstd, compress_limit: len*2)).to eq(true)
expect(store.read("a")).to eq(s)
expect(type(store.store.read("a_#{version}_0"), :single)).to eq(:normal)
end

it "can read/write giant compressed objects" do
s = SecureRandom.hex(5_000_000)
expect(store.write("a", s, compress: true, zstd: zstd)).to eq(true)
expect(store.store.read("a_#{version}_0").first).to be_between(5, 6)
expect(type(store.store.read("a_#{version}_1"), :multi)).to eq(:compressed)
expect(store.read("a").size).to eq(s.size)
end
end
end
end

Expand Down

0 comments on commit 8c8dee7

Please sign in to comment.