In [None]:
import random

def get_ngrams(n, text):
    words = text.split()
    
    for i in range(len(words)-(n - 1)):
        word = words[i + n-1]
        context = tuple(words[i:i + n-1])
        yield (word, context)
    return


class  NgramModel():
    def __init__(self, n):
        self.n = n
        self.ngram_counts = dict()
        self.context_counts = dict()
        self.vocabulary =[]
        
       
    def update(self, text):
        # this function will update the class/model internal counters
        self.text=text
        res = get_ngrams(self.n, text)
        no_words = len(text.split())
        # below 'n' is the number of SGML tags
        for _ in range(no_words):
            try:
                gen = next(res)
                word = gen[0]
                context = gen[1]
                if word not in self.vocabulary:
                    self.vocabulary.append(word)
                if (word,context) not in self.ngram_counts:
                    self.ngram_counts[(word, context)] = 1
                else:
                    self.ngram_counts[(word, context)] += 1
                if context not in self.context_counts:
                    self.context_counts[context] = 1
                else:
                    self.context_counts[context] += 1
            except StopIteration:
                break
        
    def get_vocab(self):
        return self.vocabulary
        
    def size_vocab(self):
        self.size_vocab=len(self.vocabulary)
    def prob(self, context, word):
        ngram = (word, context)
        if ngram not in self.context_counts and word in self.vocabulary:
            prob = 1 / len(self.vocabulary)
            return prob
        if word not in self.vocabulary:
            prob = 1 / (1+len(self.vocabulary))
            return prob
        else:
          prob = (self.ngram_counts[ngram] / self.context_counts[ngram[1]])
          return prob

    def len_text(self):
        self.len_text= len(self.text)
    
    def len_ngram(self):
        self.len_ngram=len(self.ngram_counts)
    def word_freq(self, word):
        if word not in self.vocabulary:
            self.word_freq=1/(1+self.size_vocab)
        else:
            self.word_freq=self.vocabulary[word]
    def ngram_freq(self, gram):
        if gram not in self.ngram_counts:
            self.ngram_freq=1/(1+self.size_vocab)
        else:  
            self.ngram_freq=self.ngram_counts[gram]
    def generate_text (self, context, minlength,maxlength):
        length=random.randint(minlength,maxlength)
        y=[]
        for i in context:
            y.append(i)
        if len(context)<self.n-1:
            gram=random.sample(list(self.ngram_counts.keys()), 1) 
            context=gram[0][1]
        else:
            context=context[-(self.n-1):]
       
        for i in range(length-(self.n-1)):
            m={}
            list1=[]
            for word in self.vocabulary:
                prob=self.prob(context,word)
                m[word]=prob
                
            for key,value in m.items():
                if(value == max(m.values())):
                    list1.append(key)
            gen_word=random.choice(list1)
            y.append(gen_word)
            a=(gen_word,)
            context=context+a
            context=context[-(self.n-1):]
        print(' '.join(y))
    def perplexity(self, test_text):
        res = get_ngrams(self.n, test_text)
        no_words = len(test_text.split())
        p=1
        for _ in range(no_words):
            try:
                gen = next(res)
                word = gen[0]
                context = gen[1]
                p=p*self.prob(context,word)
            except StopIteration:
                break
        length=no_words
        a=-(1/length)
        pp=p**(a)
        print (pp)
        
import requests
url = "https://storm.cis.fordham.edu/~yli/data/MyShakespeare.txt"
response=requests.get(url)
text=response.text
from string import punctuation
process_dicts={i:'' for i in punctuation}
#print(process_dicts)
punc_table = str.maketrans(process_dicts)
text= text.translate(punc_table)

In [None]:
a=NgramModel(3)
a.update(text)
#print(a.ngram_counts)

context=('our', 'business')
word='I'
print(a.prob(context,word))

context=('our','business')
minlength=maxlength=30
a.generate_text ( context, minlength,maxlength)

test_text='make you a sword for me'
a.perplexity(test_text)

0.0006153846153846154
our business particularise Your Tickled general conducted worse get fought agued quarterd advanced curse tremble remember much troth in disdain noon flayd Virgilia considering Tell Resolved Thither She wherein whereof
138.21937034197177


In [None]:
a=NgramModel(2)
a.update(text)
#print(a.ngram_counts)

context=('our', 'business')
word='I'
print(a.prob(context,word))

context=('our','business')
minlength=maxlength=30
a.generate_text ( context, minlength,maxlength)

test_text='make you a sword for me'
a.perplexity(test_text)

0.0006153846153846154
our business east countrymen serve impediment chain remain manchild dogs All Agrippa profess fit stand chain where valour prevaild any inventory Hecuba ladyship Till belly flour miles Opinion doom Thats Messenger
473.92665762299555


In [None]:
a=NgramModel(1)
a.update(text)
#print(a.ngram_counts)

context=('our', 'business')
word='I'
print(a.prob(context,word))

context=('our','business')
minlength=maxlength=30
a.generate_text ( context, minlength,maxlength)

test_text='make you a sword for me'
a.perplexity(test_text)

0.0006153846153846154
our business liking fill dined pricking noble and Brutus Those From forced Jupiter last hearts hearts Would name Your fine knee usury braggd oer they hither live covetous sword dozen spoons wonder
1624.9999999999993
