## Notebook Principal

Contém os testes com a árvore trie, além de exemplos de compressão e decompressão

In [23]:
from trie import RadixTree
from lzw import *
from Str_functions import *
import pandas as pd
import os

# Testes com Radix Tree

In [24]:
tree = RadixTree()
tree.insert_many(["word", "word1", "word2", "word3", "word12", "abc", "a", "bc"])
print(tree)
tree.print_tree()

{'word': 0, 'word1': 1, 'word12': 4, 'word2': 2, 'word3': 3, 'a': 6, 'abc': 5, 'bc': 7}
--- word:  	(0)
------ 1:  	(1)
--------- 2:  	(4)
------ 2:  	(2)
------ 3:  	(3)
--- a:  	(6)
------ bc:  	(5)
--- bc:  	(7)


# Função para escrever os códigos criados num arquivo de texto

In [25]:
def write_codes(codes, compressed_codes, n_bits, infile, by="ascii"):
    """
    Escreve os códigos em um arquivo.

    Args:
        codes (str): Códigos a serem escritos.
        compressed_codes (str): Códigos comprimidos.
        n_bits (int): Número de bits por código.
        infile (str): Nome do arquivo de entrada.
    """
    with open("tests/" + infile, "r", encoding="utf-8") as file:
        size = len(file.read())
        file_name = "encoded/" + infile.split(".")[0] + "_codes.txt"
        if not os.path.exists("encoded"):
            os.makedirs("encoded")
        if not os.path.exists(file_name):
            open(file_name, "x")
        with open(file_name, "w", encoding="utf-8") as file:
            file.write(f"Arquivo de entrada: {infile} ({size} bytes)\n")
            file.write(f"Formato: {by}\n\n")
            file.write(f"LZW Codes:\n")
            file.write(f"{codes}\n\n")
            file.write(f"Compressed LZW Codes:\n")
            file.write(f"{compressed_codes}\n")
            file.write(f"Number of bits per code: {n_bits}\n\n")
            file.write(f"Original Size:     {size*8} bits\n")
            file.write(f"Compressed Size:   {len(compressed_codes)} bits\n")
            file.write(f"Compression Ratio: {size*8/len(compressed_codes)}")
            file.close()

In [26]:
def main():
    """
    Exemplo de uso.
    """
    n_files = 4
    path = "tests"
    files = [f"ex ({i}).txt" for i in range(1, n_files + 1)]
    for file_name in files:
        f = path + "/" + file_name
        with open(f, "r", encoding="utf-8") as file:
            print("-" * 50)
            print(f"File: {file_name}")
            s = file.read()
            print("-" * 50)
            compressed_ascii, n_bits = encoder(s, by="ascii")
            write_codes(
                decompress(compressed_ascii, n_bits),
                compressed_ascii,
                n_bits,
                file_name,
                by="ascii",
            )

In [27]:
if __name__ == "__main__":
    main()

--------------------------------------------------
File: ex (1).txt
--------------------------------------------------
Encoding ascii text...
Original Size:     144 bits
Compressed Size:   144 bits
Compression Ratio: 1.0
--------------------------------------------------
File: ex (2).txt
--------------------------------------------------
Encoding ascii text...
Original Size:     93944 bits
Compressed Size:   36336 bits
Compression Ratio: 2.5854249229414354
--------------------------------------------------
File: ex (3).txt
--------------------------------------------------
Encoding ascii text...
Original Size:     71768 bits
Compressed Size:   40092 bits
Compression Ratio: 1.7900828095380625
--------------------------------------------------
File: ex (4).txt
--------------------------------------------------
Encoding ascii text...
Original Size:     16416 bits
Compressed Size:   1377 bits
Compression Ratio: 11.92156862745098


> Exemplo:

Para a entrada "A ASA ESTÁ EM CASA"

In [28]:
f = "tests/ex (3).txt"
with open(f, "r", encoding="utf-8") as file:
    s2 = file.read()
    Codes2 = lzw_encoder.encode(s2)
    bin_codes2, n_bits2 = compress(Codes2)
    unique_codes2 = list(set(Codes2))
    d = dict(zip(unique_codes2, [lzw_encoder.dictionary[c] for c in unique_codes2]))
    df2 = pd.DataFrame(list(d.items()), columns=["Code", "String"])

KeyboardInterrupt: 

In [None]:
print(f"Lista de códigos em int:\n{Codes2}\n")
print(f"Lista de códigos em binário: (códigos de {n_bits2} bits)\n{bin_codes2}\n")
print(f"Tamanho da string: {len(s2)*8} bits ({len(s2)} bytes)")
print(
    f"Tamanho dos códigos em binário: {len(bin_codes2)} bits ({len(bin_codes2)/8} bytes)"
)
print(
    f"Tamanho do dicionário: {len(lzw_encoder.dictionary)} itens"
)
print(f"Taxa de compressão: {(len(s2)*8)/len(bin_codes2)}")

Lista de códigos em int:
[5898, 3610, 5901, 3613, 285, 5905, 3617, 292, 294, 5909, 3623, 302, 5913, 3627, 2903, 3640, 3631, 5919, 5720, 330, 5923, 5398, 7932, 5743, 347, 3644, 352, 478, 5931, 3649, 5934, 4189, 5937, 3657, 5940, 3660, 387, 5944, 1825, 3665, 7013, 5949, 5972, 5951, 409, 5953, 3673, 5956, 3828, 5958, 5850, 5960, 6012, 3683, 7035, 5965, 5445, 2187, 5968, 1569, 3714, 464, 7959, 469, 1982, 7963, 5977, 1559, 5979, 1877, 5982, 994, 1463, 3708, 5986, 3711, 7835, 459, 5970, 967, 5048, 7137, 899, 4898, 7096, 3722, 6000, 1504, 6116, 3846, 5007, 7475, 6098, 3732, 2891, 6011, 3737, 6014, 564, 6016, 2888, 6018, 4984, 640, 3747, 3115, 3749, 315, 6026, 6109, 7118, 3754, 4871, 6031, 856, 6033, 604, 6035, 3761, 1631, 3763, 6040, 650, 6043, 687, 6080, 6046, 3336, 3775, 4905, 6050, 8019, 673, 6053, 673, 6055, 3783, 649, 3766, 651, 3788, 3585, 1864, 6064, 661, 3873, 7683, 6069, 5706, 2814, 3800, 676, 6074, 680, 7506, 6497, 846, 6044, 6080, 3475, 6799, 7580, 3813, 6086, 1361, 6088, 6881, 381

In [None]:
df2

Unnamed: 0,Code,String
0,10,\n
1,32,
2,37,%
3,40,(
4,41,)
...,...,...
1482,3543,tp
1483,3545,s:/
1484,3547,/w
1485,3560,/ww


In [None]:
codes2 = decompress(bin_codes2, n_bits2)

print(f"Códigos:\n{codes2}\n")
decoded_string2 = lzw_decoder.decode(codes2)
print(f"String decodificada:\n{decoded_string2}")

Códigos:
[85, 78, 73, 86, 69, 82, 83, 73, 68, 65, 68, 69, 32, 70, 69, 281, 82, 65, 76, 32, 281, 32, 77, 73, 78, 258, 32, 71, 275, 65, 73, 83, 10, 73, 110, 115, 116, 105, 116, 117, 116, 111, 32, 100, 101, 268, 105, 234, 110, 99, 105, 97, 115, 265, 120, 97, 116, 322, 10, 68, 101, 112, 97, 114, 327, 109, 101, 110, 311, 313, 315, 67, 317, 319, 321, 313, 97, 268, 111, 109, 112, 310, 97, 231, 227, 111, 10, 329, 67, 67, 50, 48, 55, 32, 45, 45, 257, 108, 103, 111, 114, 308, 109, 111, 323, 50, 10, 80, 114, 111, 102, 46, 32, 82, 337, 326, 312, 86, 105, 109, 105, 101, 105, 379, 357, 84, 114, 97, 98, 97, 108, 104, 312, 378, 225, 307, 99, 312, 49, 364, 366, 77, 97, 110, 105, 351, 108, 353, 355, 340, 32, 115, 101, 113, 117, 318, 320, 328, 10, 79, 98, 106, 101, 307, 118, 374, 357, 78, 101, 115, 422, 32, 116, 397, 399, 401, 312, 422, 114, 419, 398, 370, 100, 97, 100, 374, 32, 456, 322, 112, 101, 99, 311, 323, 112, 114, 405, 105, 407, 323, 314, 32, 109, 413, 415, 117, 417, 354, 312, 471, 422, 424, 426,