# Demo

In [1]:
from compression import LZW
from compression import Metrics
from compression import RLE
from compression import Huffman
from compression import Golomb
from compression import Arithmetic

## LZW

In [2]:
file = "abbfcsdfdddfadfafafa"
lzw = LZW()
encoded_file = lzw.LZW_encoder(file)
print(encoded_file)

[97, 98, 98, 102, 99, 115, 100, 102, 100, 136, 102, 97, 134, 97, 138, 138]


In [3]:
lzw.LZW_decoder(encoded_file)

'abbfcsdfdddfadfafafa'

In [4]:
bins = Metrics.binarify(encoded_file)
print(bins)

['01100001', '01100010', '01100010', '01100110', '01100011', '01110011', '01100100', '01100110', '01100100', '10001000', '01100110', '01100001', '10000110', '01100001', '10001010', '10001010']


In [5]:
l_avg = Metrics.Avg_length(bins, [1/len(bins)]*len(bins))

In [6]:
print(l_avg)

8.0


In [7]:
H,d= Metrics.entropy(file)

In [8]:
print(H)

2.2854752972273342


In [9]:
print(d)

{'a': 0.25, 'b': 0.1, 'f': 0.3, 'c': 0.05, 's': 0.05, 'd': 0.25}


In [10]:
before,after = Metrics.No_bits(file,bits_array=bins)

In [11]:
# compression ratio
print(before/after)
print((before/after).as_integer_ratio())

1.25
(5, 4)


In [12]:
# effiecency
(H/l_avg)*100

28.56844121534168

## RLE

In [13]:
rle = RLE()

In [14]:
file         = "aaabbcab"
encoded_file = rle.run_length_encoding(file)

In [15]:
print(encoded_file)

a3b2c1a1b1


In [16]:
before,after = Metrics.No_bits(file,encoded_file)

In [17]:
# compression ratio
print(before/after)
print((before /after).as_integer_ratio())

1.28
(5764607523034235, 4503599627370496)


In [18]:
H,d= Metrics.entropy(file)

In [19]:
print(H)

1.4056390622295662


In [20]:
print(d)

{'a': 0.5, 'b': 0.375, 'c': 0.125}


## Huffman


In [21]:
huffman = Huffman()

In [22]:
text = "hello world compression test"
encoded,d = huffman.encode(text)
print("Encoded:", encoded)
print("d",d)
print(huffman.compressionRatio(text,encoded))

root = huffman.build_tree(text)
decoded = huffman.decode(encoded, root)
print("Decoded:", decoded)

print("Original matches decoded:", text == decoded)


Encoded: 11111011010010101000111101011110010110010001100010111011110101110011001001100111011001000010000110011000
d {' ': '000', 's': '001', 'l': '010', 'e': '011', 't': '1000', 'n': '10010', 'i': '10011', 'o': '101', 'c': '11000', 'd': '11001', 'p': '11010', 'm': '11011', 'r': '1110', 'w': '11110', 'h': '11111'}
2.1538461538461537
Decoded: hello world compression test
Original matches decoded: True


In [23]:
Metrics.entropy(text)

(3.699513850319966,
 {'h': 0.03571428571428571,
  'e': 0.10714285714285714,
  'l': 0.10714285714285714,
  'o': 0.14285714285714285,
  ' ': 0.10714285714285714,
  'w': 0.03571428571428571,
  'r': 0.07142857142857142,
  'd': 0.03571428571428571,
  'c': 0.03571428571428571,
  'm': 0.03571428571428571,
  'p': 0.03571428571428571,
  's': 0.10714285714285714,
  'i': 0.03571428571428571,
  'n': 0.03571428571428571,
  't': 0.07142857142857142})

In [24]:
#Metrics.Avg_length()

# Golomb

In [25]:
file = [2,5,1500,3,3,2]

In [26]:
encoded_file = []
m = 1000
for integer in file:
    encoded_file.append(Golomb.golomb_encode(integer,m))
print(encoded_file)

['0000000010', '0000000101', '101000001100', '0000000011', '0000000011', '0000000010']


In [27]:
decoded_value = Golomb.golomb_decode(encoded_file[0], m)
print(decoded_value)

2


In [28]:
bins = Metrics.binarify(file)
print(bins)

['00000000010', '00000000101', '10111011100', '00000000011', '00000000011', '00000000010']


In [29]:
before  = len(bins) * len(bins[0])
_,after = Metrics.No_bits("",bits_array=encoded_file)

In [30]:
# compression ratio
print(before/after)
print((before /after).as_integer_ratio())

1.064516129032258
(599269305254945, 562949953421312)


In [31]:
before,after

(66, 62)

In [32]:
#l_avg = Metrics.Avg_length(encoded_file, [1/len(encoded_file)]*len(encoded_file))

In [33]:
#print(l_avg)

# Arithmetic

In [34]:
file = "aabbddaabcc"

In [35]:
H,table = Metrics.entropy(file)

In [36]:
table

{'a': 0.36363636363636365,
 'b': 0.2727272727272727,
 'd': 0.18181818181818182,
 'c': 0.18181818181818182}

In [37]:
symbols = table.keys()

# Get probabilities from user input
probabilities = table.values()

# Get sequence from user input
sequence = "abc"

# Encode the sequence
encoded_value = Arithmetic.encode_sequence(sequence, symbols, probabilities)

# Output the encoded value
print("Encoded value:", encoded_value)

Encoded value: 0.222389181066867
