# LZW Compression

In [16]:
def lzw_enc(source):
    lzw_dict_enc = {chr(i):chr(i) for i in range(256)}
    cur_size = len(lzw_dict_enc)
    compressed = []
    s = source[0]    
    for c in source[1:]:
        # print ("Encoding s={} c={}".format(s, c))
        if s+c in lzw_dict_enc:
            # print ("  s+c={} in Dict={}".format(s+c, lzw_dict_enc[s+c]))
            s = s+c
        else:
            # print ("  s+c={} not in Dict".format(s+c))
            # print ("     Output {}".format(lzw_dict_enc[s]))
            compressed.append(lzw_dict_enc[s])
            # print ("     Insert entry {}:{}".format(s+c, cur_size))
            lzw_dict_enc[s+c] = cur_size
            cur_size += 1
            s = c
            # print ("     New cur_size={}, New s={}".format(cur_size, s))
    # print ("End of Compression, Output {} (s={})".format(lzw_dict_enc[s], s))
    compressed.append(lzw_dict_enc[s])            
    return compressed



def lzw_dec(compressed):
    lzw_dict_dec = {chr(i):chr(i) for i in range(256)}
    cur_size = len(lzw_dict_dec)
    uncompressed = []
    
    s = compressed[0]
    uncompressed.append(s) 
    for k in compressed[1:]:
        # print ("Decoding s={} k={}".format(s, k))
        if k in lzw_dict_dec:
            decoded_symbol = lzw_dict_dec[k]
            # print ("  Key in dict ==> {}:{}".format(k, lzw_dict_dec[k]))
        elif k == cur_size:
            # print ("  Key not in Dict; k == cur_size", k)
            decoded_symbol = s + s[0]
        else:
            raise ValueError('Bad compressed key: %s' % k)
            
        uncompressed.append(decoded_symbol)    
        lzw_dict_dec[cur_size] = s + decoded_symbol[0]
        # print ("  Insert an entry {}:{}, Decoded Symbol is {}".format(cur_size, lzw_dict_dec[cur_size], decoded_symbol))
        cur_size += 1  
        s = decoded_symbol            
    return uncompressed


In [17]:
text = 'abcabcabcabcabcabcabcabcabc'
text_enc= lzw_enc(text)
text_dec = lzw_dec(text_enc)

print ("\nText:\n", text)
print ("\nText_encoded:\n", text_enc)
print ("\nText_decoded:\n", ''.join(text_dec))


Text:
 abcabcabcabcabcabcabcabcabc

Text_encoded:
 ['a', 'b', 'c', 256, 258, 257, 259, 262, 261, 264, 260, 'c']

Text_decoded:
 abcabcabcabcabcabcabcabcabc


In [21]:
text = 'It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness, it was the epoch of belief, it was the epoch of incredulity, it was the season of Light, it was the season of Darkness, it was the spring of hope, it was the winter of despair, we had everything before us, we had nothing before us, we were all going direct to Heaven, we were all going direct the other way - in short, the period was so far like the present period, that some of its noisiest authorities insisted on its being received, for good or for evil, in the superlative degree of comparison only.'
print ("\nText Length = ", len(text))
print (text)

compressed = lzw_enc(text)
print ("\nCompressed code Length = ", len(compressed))
print (compressed)

uncompressed = lzw_dec(compressed)
print ("\nUncompressed Result:")
print (uncompressed)
print ("\nDecoded Text:")
print (''.join(uncompressed))
   


Text Length =  613
It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness, it was the epoch of belief, it was the epoch of incredulity, it was the season of Light, it was the season of Darkness, it was the spring of hope, it was the winter of despair, we had everything before us, we had nothing before us, we were all going direct to Heaven, we were all going direct the other way - in short, the period was so far like the present period, that some of its noisiest authorities insisted on its being received, for good or for evil, in the superlative degree of comparison only.

Compressed code Length =  335
['I', 't', ' ', 'w', 'a', 's', ' ', 't', 'h', 'e', ' ', 'b', 'e', 's', 257, 'o', 'f', 262, 'i', 'm', 268, ',', ' ', 'i', 257, 259, 261, 263, 265, 'w', 'o', 'r', 269, ' ', 271, 273, 275, 's', 277, 279, 258, 260, 262, 264, ' ', 'a', 'g', 265, 290, 'w', 'i', 's', 'd', 'o', 'm', 294, 280, 297, 283, 300, 302, 289, 272, 'f', 'o', 'o', 'l',

In [22]:
text = 'It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness, it was the epoch of belief, it was the epoch of incredulity, it was the season of Light, it was the season of Darkness, it was the spring of hope, it was the winter of despair, we had everything before us, we had nothing before us, we were all going direct to Heaven, we were all going direct the other way – in short, the period was so far like the present period, that some of its noisiest authorities insisted on its being received, for good or for evil, in the superlative degree of comparison only.'

L = list(set( (ord(t), t) for t in text))
print (sorted(L))

[(32, ' '), (44, ','), (46, '.'), (68, 'D'), (72, 'H'), (73, 'I'), (76, 'L'), (97, 'a'), (98, 'b'), (99, 'c'), (100, 'd'), (101, 'e'), (102, 'f'), (103, 'g'), (104, 'h'), (105, 'i'), (107, 'k'), (108, 'l'), (109, 'm'), (110, 'n'), (111, 'o'), (112, 'p'), (114, 'r'), (115, 's'), (116, 't'), (117, 'u'), (118, 'v'), (119, 'w'), (121, 'y'), (8211, '–')]
