# Quantifying Shakespeare 
Analyzing frequencies of different letters in shakespeare's work. 

1. Read all of Shakespeare's works into a list of strings 
2. Count the frequencies of letters used 
3. Visualize the frequencies with a bar graph 


In [1]:
from io import TextIOWrapper
def read_lines(filename: str) -> list[str]:
    """Read a text file into a list of strings for each line"""
    lines: list[str] = []
    file_handle: TextIOWrapper = open(filename, "r")
    for line in file_handle: 
        # strip leading and tailing whitespaces "  a  " -> "a"
        line = line.strip()
        # make every character lower case 
        line = line.lower()
        lines.append(line)
    file_handle.close()
    return lines 

shakespeare_lines: list[str] = read_lines("../data/t8.shakespeare.txt")
print(len(shakespeare_lines))


124456


In [2]:
def tally(counts: dict[str, int], key: str) -> None: 
    """Mutates coutns by increasing the value stored at key by 1 """
    if key in counts: 
        counts[key] += 1 
    else: 
        counts[key] = 1 
    return counts

tally({"a": 1, "b": 2}, "c")

{'a': 1, 'b': 2, 'c': 1}

In [3]:
d: dict = {}
tally(d, "a")
print(d)
tally(d, "a")
print(d)
tally(d, "a")
print(d)

{'a': 1}
{'a': 2}
{'a': 3}


In [6]:
def count_letters(lines: list[str]) -> dict[str, int]:
    counts: dict[str, int] = {}
    for line in lines: 
        for char in line: 
                tally(counts, char)
    return(counts)

shakes_letters = count_letters(shakespeare_lines)

print(count_letters(shakespeare_lines))


{'t': 329775, 'h': 236868, 'i': 253990, 's': 248989, ' ': 823058, 'e': 447204, '1': 928, '0': 299, 'x': 5294, 'f': 80516, 'l': 170019, 'p': 58464, 'r': 237864, 'n': 243262, 'd': 149462, 'b': 61956, 'y': 94370, 'o': 314600, 'j': 4779, 'c': 88185, 'g': 68199, 'u': 128947, ',': 83174, 'a': 289150, 'w': 89390, '.': 78025, 'm': 111452, 'k': 35408, '!': 8844, '*': 63, '<': 468, 'v': 37569, '9': 948, '-': 8074, '3': 330, '(': 628, ')': 629, '2': 366, '>': 441, '7': 41, '4': 93, '[': 2085, '#': 1, ']': 2077, 'z': 1631, '8': 40, '@': 8, ':': 1827, '=': 1, '%': 1, '"': 470, '/': 5, '6': 63, '5': 82, "'": 31069, ';': 17199, '~': 1, '_': 71, 'q': 3582, '?': 10476, '|': 33, '&': 21, '`': 1, '}': 2}


# Sorting Dictionaries 

In [7]:
# make my dictionary a list 
list_version_shakes: list[tuple[str,int]] = list(shakes_letters.items())
print(list_version_shakes)

[('t', 329775), ('h', 236868), ('i', 253990), ('s', 248989), (' ', 823058), ('e', 447204), ('1', 928), ('0', 299), ('x', 5294), ('f', 80516), ('l', 170019), ('p', 58464), ('r', 237864), ('n', 243262), ('d', 149462), ('b', 61956), ('y', 94370), ('o', 314600), ('j', 4779), ('c', 88185), ('g', 68199), ('u', 128947), (',', 83174), ('a', 289150), ('w', 89390), ('.', 78025), ('m', 111452), ('k', 35408), ('!', 8844), ('*', 63), ('<', 468), ('v', 37569), ('9', 948), ('-', 8074), ('3', 330), ('(', 628), (')', 629), ('2', 366), ('>', 441), ('7', 41), ('4', 93), ('[', 2085), ('#', 1), (']', 2077), ('z', 1631), ('8', 40), ('@', 8), (':', 1827), ('=', 1), ('%', 1), ('"', 470), ('/', 5), ('6', 63), ('5', 82), ("'", 31069), (';', 17199), ('~', 1), ('_', 71), ('q', 3582), ('?', 10476), ('|', 33), ('&', 21), ('`', 1), ('}', 2)]
