-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathLCPCompressor.hpp
159 lines (123 loc) · 5.17 KB
/
LCPCompressor.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#pragma once
#include <tudocomp/util.hpp>
#include <tudocomp/Compressor.hpp>
#include <tudocomp/compressors/lzss/LZSSCoding.hpp>
#include <tudocomp/compressors/lzss/LZSSFactors.hpp>
#include <tudocomp/compressors/lzss/LZSSLiterals.hpp>
#include <tudocomp/ds/TextDS.hpp>
#include <tudocomp_stat/StatPhase.hpp>
// For default params
#include <tudocomp/compressors/lcpcomp/decompress/ScanDec.hpp>
#include <tudocomp/compressors/lcpcomp/compress/ArraysComp.hpp>
namespace tdc {
namespace lcpcomp {
class MaxLCPStrategy;
class CompactDec;
template<typename coder_t, typename decode_buffer_t>
inline void decode_text_internal(Env&& env, coder_t& decoder, std::ostream& outs) {
StatPhase decode_phase("Decoding");
// decode text range
auto text_len = decoder.template decode<len_t>(len_r);
// init decode buffer
decode_buffer_t buffer(std::move(env), text_len);
StatPhase::wrap("Starting Decoding", [&]{
Range text_r(text_len);
// decode shortest and longest factor
auto flen_min = decoder.template decode<len_t>(text_r);
auto flen_max = decoder.template decode<len_t>(text_r);
MinDistributedRange flen_r(flen_min, flen_max);
// decode longest distance between factors
auto fdist_max = decoder.template decode<len_t>(text_r);
Range fdist_r(fdist_max);
// decode
while(!decoder.eof()) {
len_t num;
auto b = decoder.template decode<bool>(bit_r);
if(b) num = decoder.template decode<len_t>(fdist_r);
else num = 0;
// decode characters
while(num--) {
auto c = decoder.template decode<uliteral_t>(literal_r);
buffer.decode_literal(c);
}
if(!decoder.eof()) {
//decode factor
auto src = decoder.template decode<len_t>(text_r);
auto len = decoder.template decode<len_t>(flen_r);
buffer.decode_factor(src, len);
}
}
});
StatPhase::wrap("Scan Decoding", [&]{ buffer.decode_lazy(); });
StatPhase::wrap("Eager Decoding", [&]{
buffer.decode_eagerly();
IF_STATS(StatPhase::log("longest_chain", buffer.longest_chain()));
});
StatPhase::wrap("Output Text", [&]{ buffer.write_to(outs); });
}
}//ns
/// Factorizes the input by finding redundant phrases in a re-ordered version
/// of the LCP table.
template<typename coder_t, typename strategy_t, typename dec_t, typename text_t = TextDS<>>
class LCPCompressor : public Compressor {
public:
inline static Meta meta() {
Meta m("compressor", "lcpcomp");
m.option("coder").templated<coder_t>("coder");
m.option("comp").templated<strategy_t, lcpcomp::ArraysComp>("lcpcomp_comp");
m.option("dec").templated<dec_t, lcpcomp::ScanDec>("lcpcomp_dec");
m.option("textds").templated<text_t, TextDS<>>("textds");
m.option("threshold").dynamic(5);
m.option("flatten").dynamic(1); // 0 or 1
m.uses_textds<text_t>(strategy_t::textds_flags());
return m;
}
/// Construct the class with an environment.
inline LCPCompressor(Env&& env) : Compressor(std::move(env)) {}
inline virtual void compress(Input& input, Output& output) override {
auto in = input.as_view();
DCHECK(in.ends_with(uint8_t(0)));
auto text = StatPhase::wrap("Construct Text DS", [&]{
return text_t(env().env_for_option("textds"), in, strategy_t::textds_flags());
});
// read options
const len_t threshold = env().option("threshold").as_integer(); //factor threshold
lzss::FactorBuffer factors;
StatPhase::wrap("Factorize", [&]{
// Factorize
strategy_t strategy(env().env_for_option("comp"));
strategy.factorize(text, threshold, factors);
StatPhase::log("threshold", threshold);
StatPhase::log("factors", factors.size());
});
// sort factors
StatPhase::wrap("Sort Factors", [&]{ factors.sort(); });
if(env().option("flatten").as_integer()) {
// flatten factors
StatPhase::wrap("Flatten Factors", [&]{ factors.flatten(); });
}
// encode
StatPhase::wrap("Encode Factors", [&]{
typename coder_t::Encoder coder(
env().env_for_option("coder"),
output,
lzss::TextLiterals<text_t>(text, factors));
lzss::encode_text(coder, text, factors); //TODO is this correct?
});
}
inline virtual void decompress(Input& input, Output& output) override {
//TODO: tell that forward-factors are allowed
typename coder_t::Decoder decoder(env().env_for_option("coder"), input);
auto outs = output.as_stream();
//lzss::decode_text_internal<coder_t, dec_t>(decoder, outs);
// if(lazy == 0)
// lzss::decode_text_internal<coder_t, dec_t>(decoder, outs);
// else
lcpcomp::decode_text_internal<typename coder_t::Decoder, dec_t>(env().env_for_option("dec"), decoder, outs);
}
};
/// \brief Contains factorization and decoding strategies for
/// the \ref LCPCompressor.
namespace lcpcomp {
}
}