##### Name: Usama Khalid
##### Email: i191236@nu.edu.pk

### Install SpaCy and Urdu model for SpaCy
#### I have used https://github.com/mirfan899 urdu model that he built for SpaCy

In [None]:
!pip install spacy==2.2.4
!pip install https://github.com/mirfan899/Urdu/raw/master/spacy/ur_model-0.0.0.tar.gz

### 1. Importing SpaCy and loading the urdu model

In [93]:
import re
import spacy
import random
from collections import Counter

random.seed()
nlp = spacy.load("ur_model")
nazmain = ['ghalib.txt','faiz.txt','iqbal.txt']
next_line_regex = re.compile(r'\n+')

### 2. Tokenize, Clean and Tag poetry
##### - All three poetry files are read.
##### - Special characters are removed.
##### - start of sentence (<s\>) and end of sentence tags are added (<\/s>).
##### - Verses are tokenized using SpaCy

In [110]:
nazam_tokens = ['<s>']
for nazam in nazmain:
    nazam_text = open(nazam,'r').read()
    nazam_cleaned = re.sub(r'[\u200c.ٕٕ۔"_ٰٰٰٰٰٰؑA-Za-z!()%٪‘\'،ُِِّٔٔٔٔؔٓٓٓٓ:َََ؟’*۔“ـٴ]','',nazam_text)
    for token in nlp(nazam_cleaned):
        if next_line_regex.search(token.text):
            nazam_tokens.extend(['</s>','<s>'])
        else: nazam_tokens.append(token.text.strip())
nazam_tokens.append('</s>')

nazam_tokens[:8]

['<s>', 'بازیچۂ', 'اطفال', 'ہے', 'دنیا', 'مرے', 'آگے', '</s>']

### 3. Ngram model generation function
##### A generalized ngram model generation function. It takes in a list of tokens and outputs a dictionary containing ngrams and their frequencies.

In [95]:
def Ngrams(tokens,n=1):
    ngrams=Counter()
    for i in range(len(tokens)-n+1):
        gram = [tokens[j] for j in range(i,i+n)]
        ngrams[' '.join(gram)] += 1

    return ngrams

##### Initialize Uni, Bi and Tri grams

In [None]:
unigrams = Ngrams(nazam_tokens)
bigrams = Ngrams(nazam_tokens,2)
trigrams = Ngrams(nazam_tokens,3)

### 4. Define Ngram word generation functions.
##### These functions predict the next/previous word given an ngram model and a starting word. They randomly select and return a word from the top probable matches for an ngram model.

In [97]:
def unigram_model(unigram):
    return unigram.most_common(22)[2:][random.randint(0,19)][0]

# Also works for backward bigrams
def bigram_model(bigram,word='<s>',backward=False):
    matches = Counter({
        key : bigram[key]
        for key in bigram
        if word == key.split(' ')[backward] and
           key.split(' ')[not backward] != ('<s>' if backward else '</s>')})

    top_probable = matches.most_common(20)

    if len(top_probable) == 0:
        return '<s>' if backward else '</s>'
    else:
        return top_probable[random.randint(0,len(top_probable)-1)][0].split(' ')[not backward]

def trigram_model(trigram,words,default):
    word = words.split(' ')[-2:]
    matches = Counter({
        key.split(' ')[2] : trigram[key]
        for key in trigram
        if word[0] == key.split(' ')[0] and
            word[1] == key.split(' ')[1] and
            key.split(' ')[2] != '</s>'})

    top_probable = matches.most_common(5)

    if len(top_probable) == 0:
        return words +' '+ default
    else:
        return words +' '+ top_probable[random.randint(0,len(top_probable)-1)][0]


### 5. Generate the first word
##### First word can also be generated from a unigram model but here we used the bigram model. Top words that occur with start of sentence tag are picked randomly.

In [104]:
# first_word = unigram_model(unigrams)
first_word = bigram_model(bigrams)
first_word

'پھر'

### 6. Generate Sonnet using Bigram (Task 1)
##### Here we take the previously generated first word and use the bigram model to predict next words until we reach the required length or if the end of sentence (<\/s>) is encountered
##### Bonus rhyme: We randomly rhyme a verse if end of sentence is prematurely encountered. To rhyme a verse we keep track of the last word generated in previous verses and append it to current verse.

In [122]:
bi_sonnet = next_word = first_word
rhyme = ''
for stanza in range(3):
    for verse in range(4):
        for i in range(random.randint(3,7)):
            next_word = bigram_model(bigrams,next_word)
            if next_word == '</s>':
                if rhyme:
                    bi_sonnet += ' '+rhyme
                break
            bi_sonnet += ' '+next_word
        bi_sonnet += '\n'
        if next_word!='</s>':
            rhyme = next_word
        next_word = bigram_model(bigrams,first_word)
    bi_sonnet += '\n\n'

print(bi_sonnet)

کہ دل نہ کرنے میں تو
 اسد کس درمیاں کیوں اندھیری ہے اسد
 لاش بے پناہ اسد
 دن گزارا اسد


 ہے یہ لاش بے نیازی
 تم کہتے ہو چکا میں
 ہم ملیں کہاں کچھ ایسی
 اے نو بہار نہ پوچھ


 خواب گران خسرو
 عشق بادۂ گلفام مشک بو ذر و
 گئی جیسے خوشبوئے زلف سے تو
 جائے کچھ خیال کہ جس





### 7. Generate Sonnet using Trigram (Task 2)
##### Initially generate two words using bigram and pass them to trigram to generate the third. This process is repeated as described for bigram generation.
##### Rhyme is performed similarly as before.

In [10]:
tri_sonnet = ''
for stanza in range(3):
    rhyme = ''
    tri_sonnet += '\n\n'
    for verse in range(4):
        next_word = bigram_model(bigrams,first_word)
        tri_sonnet += '\n ' + next_word + ' ' + bigram_model(bigrams,next_word)
        for i in range(random.randint(3,7)):
            tri_sonnet = trigram_model(trigrams,tri_sonnet,rhyme)
            if tri_sonnet.split(' ')[-1] == rhyme:
                break
        rhyme = tri_sonnet.split(' ')[-1]

print(tri_sonnet)






 مر جائیں سو وہ بھی نہ ہوا 
 غزل سرا نہ ہوا 
 باج خواہ تسکیں ہو 
 خستہ جاں کی ہے جس فلک پہ تقدیر ء


 کیا کریں فرض ہے ادائے نماز 
 گنبد بے در کھلا 
 تیرے زمان و مکاں تک 
 ہمیں راندۂ زمانہ کیا 


 ہمیں حاصل نہیں رہا 
 اور جیتے رہتے یہی انتظار ہوتا
 کیا برا تھا مرنا اگر ایک
 اور ماتم یک شہر آرزو ایک


### 8. Generate Sonnet using Backward Bigram (Task 3)
##### A Similar approach as in Bigram just using the bigram model backwards. Initially the last word is generated using bigrams, picked from word pairs that contain end of sentence tag. The whole sonnet is generated backwards.
##### Rhyme logic is similar as in bigrams.

In [107]:
previous_word = bigram_model(bigrams,'</s>',True)
back_bi_sonnet = ''
rhyme = ''
for stanza in range(3):
    for verse in range(4):
        if random.randint(0,1) or previous_word == '<s>':
            previous_word = rhyme
        rhyme = previous_word
        for i in range(random.randint(3,7)):
            back_bi_sonnet = previous_word +' '+back_bi_sonnet
            previous_word = bigram_model(bigrams,previous_word,True)
            if previous_word == '<s>':
                break
        previous_word = bigram_model(bigrams,back_bi_sonnet.split(' ')[-1],True)
        back_bi_sonnet = '\n' + back_bi_sonnet
    back_bi_sonnet = '\n\n' + back_bi_sonnet

print(back_bi_sonnet)




روئیں گے 
شیخ ہماری جو آ بسیں گے 
نے دیکھا جھکا دیتا 
بدقسمت ملک کو دیتا 


ہوا کیا بنی 
ہے یہ انداز بہ پا جاتی ہے 
صحرا مرے رازداں تیرا اگر 
عبادت برق خرمن جلے اگر 


برہنہ پائی درد کی گویا 
آتا ہے طوطئ آئینۂ دل افسردہ گویا 
وہ خدا کے مانند گویا 
کے پیتے تھے  


### 9. Generate Sonnet using Bidirectional Bigram (Task 4)
##### Initially generate a middle word using unigram and predict the right and left words recursively to build a verse.
##### Rhyme logic is similar as in bigrams.

In [109]:
middle_word = unigram_model(unigrams)
bi_dir_sonnet = ''

for stanza in range(3):
    for verse in range(4):
        phrases=['','']

        previous_word = middle_word
        for i in range(random.randint(1,3)):
            previous_word = bigram_model(bigrams,previous_word,True)
            if previous_word == '<s>': break
            phrases[0] = previous_word +' '+phrases[0]

        next_word = middle_word
        for i in range(random.randint(1,3)):
            next_word = bigram_model(bigrams,next_word)
            if next_word == '</s>':
                if rhyme: phrases += ' '+rhyme
                break
            phrases[1] += ' '+next_word
        bi_dir_sonnet += middle_word.join(phrases)+'\n'

        if next_word!='</s>':
            rhyme = next_word

    bi_dir_sonnet+='\n\n'

print(bi_dir_sonnet)

یار نے اپنا دل
چرایا زخم نے مجھ میں
میں نے  نوکر
اہل جہاں زخم نے کیا


ہونے نے مجھ سے
نیاز عشق نے زیست کا
ناگہاں اس نے نہ مانگ لیتے
دل نے داد


اور یاں خدا نے یہ کیا کیا
عشق نے وہ
طبیعت نے کہ کبھی فرصت
کہاں کہ ہم نے نقش



