In [7]:
import random
import pandas
import matplotlib.pyplot as plt

MIN_SIZE = 4
ALPHABET = 'abcdefghijklmnopqrstuvwxyz'
DATA_FILE = 'names.txt'

def read_names(file_name: str) -> list:
    names = []
    with open(file_name, 'r') as file:
        line = file.readline()
        while line:
            names.append('^' + line.replace('\n', '$'))
            line = file.readline()
    return names

class BigramLanguageModel:
    def __init__(self):
        self.count: dict = {}
        self.context: dict = {}
        self.char_number = 0

    def update(self, name: str) -> None:
        bigrams = self.all_bigrams(name)
        self.char_number += len(name) - 2

        for bigram in bigrams:
            if bigram in self.count:
                self.count[bigram] += 1
            else:
                self.count[bigram] = 1

            if bigram[0] in self.context:
                self.context[bigram[0]].append(bigram[1])
            else:
                self.context[bigram[0]] = [bigram[1]]

    def all_bigrams(self, word: str) -> list:
        return [word[i:i+2] for i in range(len(word)-1)]


    def getting_next(self, context: str) -> str:
        variants = self.context[context]
        return random.choice(variants)

    def generate_name(self) -> str:
        word = ''
        next_char = self.getting_next('^')
        while next_char != '$' or len(word) < MIN_SIZE:
            if next_char == '$':
                next_char = self.getting_next(word[-1])
                continue
            word += next_char
            next_char = self.getting_next(word[-1])
        return word[:-1].replace('$', ' ')
        
    def get_probability(self, char: str) -> dict:
        variants = self.context[char]
        data = {}
        for c in variants:
            if c in data:
                data[c] += 1
            else:
                data[c] = 1
        data.pop('$', None)
        return data

    def get_all_probabilities(self) -> dict:
        result = {}
        for char in ALPHABET:
            data = self.get_probability(char)
            for i in ALPHABET:
                if i not in data:
                    data[i] = 0
            result[char] = sorted(list(data.items()), key=lambda x:x[0])
        return result

def create_language_model(data: list) -> BigramLanguageModel:
    model = BigramLanguageModel()
    for name in data:
        model.update(name)
    return model

def table():
    pandas.DataFrame(model.get_all_probabilities())

def graph():
    for i in ALPHABET: 
            data = model.get_probability(i)
            courses = list(data.keys())
            values = list(data.values())
            fig = plt.figure(figsize = (10,5))
            plt.bar(courses, values, color ='black', width = 0.5)

            plt.ylabel("Occurrence in file")
            # plt.title('Probability of characters after char: ')
            plt.title(i)
            plt.show()

data = read_names(DATA_FILE)
model = create_language_model(data)
name = model.generate_name()

while(True):
        print("\n[1] Enter 1 to generate new Name.")
        print("[2] Enter 2 to get all bigram probabilities in table.")
        print("[3] Enter 3 to get bigram probabilities in picture, graph.")
        print("[q] Enter q to quit.")
        select = input('Please enter a value: \n')
        if select == '1':
            # Step 3: Generate a name
            name = model.generate_name()
            print("Generated name:", name)
        elif select == '2':
            # Visualize bigram probabilities with console
            table()
        elif select == '3':
            # Visualize bigram probabilities with picture
            graph()
        elif select == 'q':
            break




[1] Enter 1 to generate new Name.
[2] Enter 2 to get all bigram probabilities in table.
[3] Enter 3 to get bigram probabilities in picture, graph.
[q] Enter q to quit.
Generated name: aydarcheluns

[1] Enter 1 to generate new Name.
[2] Enter 2 to get all bigram probabilities in table.
[3] Enter 3 to get bigram probabilities in picture, graph.
[q] Enter q to quit.


In [6]:
#if select == 2 doesn't work
pandas.DataFrame(model.get_all_probabilities())

Unnamed: 0,a,b,c,d,e,f,g,h,i,j,...,q,r,s,t,u,v,w,x,y,z
0,"(a, 556)","(a, 321)","(a, 815)","(a, 1303)","(a, 679)","(a, 242)","(a, 330)","(a, 2244)","(a, 2445)","(a, 1473)",...,"(a, 13)","(a, 2356)","(a, 1201)","(a, 1027)","(a, 163)","(a, 642)","(a, 280)","(a, 103)","(a, 2143)","(a, 860)"
1,"(b, 541)","(b, 38)","(b, 0)","(b, 1)","(b, 121)","(b, 0)","(b, 3)","(b, 8)","(b, 110)","(b, 1)",...,"(b, 0)","(b, 41)","(b, 21)","(b, 1)","(b, 103)","(b, 1)","(b, 1)","(b, 1)","(b, 27)","(b, 4)"
2,"(c, 470)","(c, 1)","(c, 42)","(c, 3)","(c, 153)","(c, 0)","(c, 0)","(c, 2)","(c, 509)","(c, 4)",...,"(c, 0)","(c, 99)","(c, 60)","(c, 17)","(c, 103)","(c, 0)","(c, 0)","(c, 4)","(c, 115)","(c, 2)"
3,"(d, 1042)","(d, 65)","(d, 1)","(d, 149)","(d, 384)","(d, 0)","(d, 19)","(d, 24)","(d, 440)","(d, 4)",...,"(d, 0)","(d, 187)","(d, 9)","(d, 0)","(d, 136)","(d, 1)","(d, 8)","(d, 5)","(d, 272)","(d, 2)"
4,"(e, 692)","(e, 655)","(e, 551)","(e, 1283)","(e, 1271)","(e, 123)","(e, 334)","(e, 674)","(e, 1653)","(e, 440)",...,"(e, 1)","(e, 1697)","(e, 884)","(e, 716)","(e, 169)","(e, 568)","(e, 149)","(e, 36)","(e, 301)","(e, 373)"
5,"(f, 134)","(f, 0)","(f, 0)","(f, 5)","(f, 82)","(f, 44)","(f, 1)","(f, 2)","(f, 101)","(f, 0)",...,"(f, 0)","(f, 9)","(f, 2)","(f, 2)","(f, 19)","(f, 0)","(f, 2)","(f, 3)","(f, 12)","(f, 0)"
6,"(g, 168)","(g, 0)","(g, 2)","(g, 25)","(g, 125)","(g, 1)","(g, 25)","(g, 2)","(g, 428)","(g, 0)",...,"(g, 0)","(g, 76)","(g, 2)","(g, 2)","(g, 47)","(g, 0)","(g, 1)","(g, 0)","(g, 30)","(g, 1)"
7,"(h, 2332)","(h, 41)","(h, 664)","(h, 118)","(h, 152)","(h, 1)","(h, 360)","(h, 1)","(h, 95)","(h, 45)",...,"(h, 0)","(h, 121)","(h, 1285)","(h, 647)","(h, 58)","(h, 1)","(h, 23)","(h, 1)","(h, 22)","(h, 43)"
8,"(i, 1650)","(i, 217)","(i, 271)","(i, 674)","(i, 818)","(i, 160)","(i, 190)","(i, 729)","(i, 82)","(i, 119)",...,"(i, 13)","(i, 3033)","(i, 684)","(i, 532)","(i, 121)","(i, 911)","(i, 148)","(i, 102)","(i, 192)","(i, 364)"
9,"(j, 175)","(j, 1)","(j, 3)","(j, 9)","(j, 55)","(j, 0)","(j, 3)","(j, 9)","(j, 76)","(j, 2)",...,"(j, 0)","(j, 25)","(j, 2)","(j, 3)","(j, 14)","(j, 0)","(j, 0)","(j, 0)","(j, 23)","(j, 2)"
