-
Notifications
You must be signed in to change notification settings - Fork 271
/
lzw.rb
126 lines (111 loc) · 3.81 KB
/
lzw.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# coding: utf-8
module PDF
class Reader
# A general class for decoding LZW compressed data. LZW can be
# used in PDF files to compresses streams, usually for image data sourced
# from a TIFF file.
#
# See the following links for more information:
#
# ref http://www.fileformat.info/format/tiff/corion-lzw.htm
# ref http://marknelson.us/1989/10/01/lzw-data-compression/
#
# The PDF spec also has some data on the algorithm.
#
class LZW # :nodoc:
# Wraps an LZW encoded string
class BitStream # :nodoc:
def initialize(data, bits_in_chunk)
@data = data
@data.force_encoding("BINARY")
@bits_in_chunk = bits_in_chunk
@current_pos = 0
@bits_left_in_byte = 8
end
def set_bits_in_chunk(bits_in_chunk)
@bits_in_chunk = bits_in_chunk
end
def read
bits_left_in_chunk = @bits_in_chunk
chunk = nil
while bits_left_in_chunk > 0 and @current_pos < @data.size
chunk = 0 if chunk.nil?
codepoint = @data[@current_pos, 1].unpack("C*")[0]
current_byte = codepoint & (2**@bits_left_in_byte - 1) #clear consumed bits
dif = bits_left_in_chunk - @bits_left_in_byte
if dif > 0 then current_byte <<= dif
elsif dif < 0 then current_byte >>= dif.abs
end
chunk |= current_byte #add bits to result
bits_left_in_chunk = if dif >= 0 then dif else 0 end
@bits_left_in_byte = if dif < 0 then dif.abs else 0 end
if @bits_left_in_byte.zero? #next byte
@current_pos += 1
@bits_left_in_byte = 8
end
end
chunk
end
end
CODE_EOD = 257 #end of data
CODE_CLEAR_TABLE = 256 #clear table
# stores de pairs code => string
class StringTable < Hash # :nodoc:
attr_reader :string_table_pos
def initialize
super
@string_table_pos = 258 #initial code
end
#if code less than 258 return fixed string
def [](key)
if key > 257 then super else key.chr end
end
def add(string)
store(@string_table_pos, string)
@string_table_pos += 1
end
end
# Decompresses a LZW compressed string.
#
def self.decode(data)
stream = BitStream.new data.to_s, 9 # size of codes between 9 and 12 bits
result = ''
until (code = stream.read) == CODE_EOD
if code == CODE_CLEAR_TABLE
stream.set_bits_in_chunk(9)
string_table = StringTable.new
code = stream.read
break if code == CODE_EOD
result << string_table[code]
old_code = code
else
string = string_table[code]
if string
result << string
string_table.add create_new_string(string_table, old_code, code)
old_code = code
else
new_string = create_new_string(string_table, old_code, old_code)
result << new_string
string_table.add new_string
old_code = code
end
#increase de size of the codes when limit reached
if string_table.string_table_pos == 511
stream.set_bits_in_chunk(10)
elsif string_table.string_table_pos == 1023
stream.set_bits_in_chunk(11)
elsif string_table.string_table_pos == 2047
stream.set_bits_in_chunk(12)
end
end
end
result
end
def self.create_new_string(string_table,some_code, other_code)
string_table[some_code] + string_table[other_code][0].chr
end
private_class_method :create_new_string
end
end
end