<img src="https://drive.google.com/uc?export=view&amp;id=1zSJwAUxWv5bxyYLmYPNi-s6M_Wq5iWXh">

## Importing tools

In [2]:
import numpy as np
import pandas as pd
import os
import re
import string
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import random

## Reading every Sherlock Holmes adventure!

In [3]:
story_path = "C:/Users/Windula Dissanayake/Documents/NLP Project/sherlock-homes-nlp-app/sherlock/"

def read_all_stories(story_path):
    txt = []
    for _, _, files in os.walk(story_path):
        for file in files:
            with open(story_path+file) as f:
                for line in f:
                    line = line.strip()
                    if line=='----------': break
                    if line!='':txt.append(line)
    return txt
        
stories = read_all_stories(story_path)
print("number of lines = ", len(stories))

number of lines =  430042


## Cleaning the text

In [4]:
def clean_txt(txt):
    cleaned_txt = []
    for line in txt:
        line = line.lower()
        line = re.sub(r"[,.\"\'!@#$%^&*(){}?/;`~:<>+=-\\]", "", line)
        tokens = word_tokenize(line)
        words = [word for word in tokens if word.isalpha()]
        cleaned_txt+=words
    return cleaned_txt

cleaned_stories = clean_txt(stories)
print("number of words = ", len(cleaned_stories))

number of words =  4664220


## Creating the Markov Model

In [6]:
def make_markov_model(cleaned_stories, n_gram=2):
    markov_model = {}
    for i in range(len(cleaned_stories)-n_gram-1):
        curr_state, next_state = "", ""
        for j in range(n_gram):
            curr_state += cleaned_stories[i+j] + " "
            next_state += cleaned_stories[i+j+n_gram] + " "
        curr_state = curr_state[:-1]
        next_state = next_state[:-1]
        if curr_state not in markov_model:
            markov_model[curr_state] = {}
            markov_model[curr_state][next_state] = 1
        else:
            if next_state in markov_model[curr_state]:
                markov_model[curr_state][next_state] += 1
            else:
                markov_model[curr_state][next_state] = 1
    
    # calculating transition probabilities
    for curr_state, transition in markov_model.items():
        total = sum(transition.values())
        for state, count in transition.items():
            markov_model[curr_state][state] = count/total
        
    return markov_model

In [7]:
markov_model = make_markov_model(cleaned_stories)

In [8]:
print("number of states = ", len(markov_model.keys()))

number of states =  208670


In [9]:
print("All possible transitions from 'the game' state: \n")
print(markov_model['the game'])

All possible transitions from 'the game' state: 

{'your letter': 0.02702702702702703, 'was up': 0.09009009009009009, 'is afoot': 0.036036036036036036, 'for the': 0.036036036036036036, 'was in': 0.02702702702702703, 'is hardly': 0.02702702702702703, 'would have': 0.036036036036036036, 'is up': 0.06306306306306306, 'is and': 0.036036036036036036, 'in their': 0.036036036036036036, 'was whist': 0.036036036036036036, 'in that': 0.036036036036036036, 'the lack': 0.036036036036036036, 'for all': 0.06306306306306306, 'may wander': 0.02702702702702703, 'now a': 0.02702702702702703, 'my own': 0.02702702702702703, 'at any': 0.02702702702702703, 'mr holmes': 0.02702702702702703, 'ay whats': 0.02702702702702703, 'my friend': 0.02702702702702703, 'fairly by': 0.02702702702702703, 'is not': 0.02702702702702703, 'was not': 0.02702702702702703, 'was afoot': 0.036036036036036036, 'worth it': 0.02702702702702703, 'you are': 0.02702702702702703, 'i am': 0.02702702702702703, 'now count': 0.027027027027027

## Generating Sherlock Holmes stories!

In [10]:
def generate_story(markov_model, limit=100, start='my god'):
    n = 0
    curr_state = start
    next_state = None
    story = ""
    story+=curr_state+" "
    while n<limit:
        next_state = random.choices(list(markov_model[curr_state].keys()),
                                    list(markov_model[curr_state].values()))
        
        curr_state = next_state[0]
        story+=curr_state+" "
        n+=1
    return story

In [11]:
for i in range(20):
    print(str(i)+". ", generate_story(markov_model, start="dear holmes", limit=8))

0.  dear holmes if i thought you would say is a man you will realize that among your extensive 
1.  dear holmes i ejaculated commonplace said holmes though how you get on your theories i remarked that of 
2.  dear holmes i ejaculated well really this is recent quite recent see how the details of this inconceivable 
3.  dear holmes i exclaimed it is difficult to place him you wear a short walk of a couple 
4.  dear holmes that i was in the loft at the little railway arms and fondled it most tenderly 
5.  dear holmes am i he gazed from sir henry to stay with you until your reason breaks down 
6.  dear holmes i fear lest i bias my judgment i really had the effect could be more hopelessly 
7.  dear holmes am i then you do me a good hundred miles off by foreman blaker its been 
8.  dear holmes what do they care for my own sake said he but my own affairs needed a 
9.  dear holmes you are their employer is as you say is true did barrymore profit at all by 
10.  dear holmes what do you make of

In [12]:
for i in range(20):
    print(str(i)+". ", generate_story(markov_model, start="my dear", limit=8))

0.  my dear watson said holmes when the elm must mean the farther end of the coffin shall remain 
1.  my dear holmes i called about that beggarman boone the one who was charged with being concerned in 
2.  my dear watson theres genius in that for some few halting words of congratulation and then burst out 
3.  my dear watson but this is a man he cursed and slashed at everyone who came sketching on 
4.  my dear fellow i congratulate you upon the rug and looking keenly at her in open lodge god 
5.  my dear fellow it is nearly midnight watson and i saw at once from the reverential way in 
6.  my dear young lady was shown straight into the study with my visit to the north of detroit 
7.  my dear mr grant munro to the door had opened and a look of fear upon her face 
8.  my dear fellow said he holmes with all our strength for the inspector it was torn out of 
9.  my dear fellow you exaggerate i have some cold supper had been brought to the notice of the 
10.  my dear watson that i shall mos

In [13]:
for i in range(20):
    print(str(i)+". ", generate_story(markov_model, start="i would", limit=8))

0.  i would not wish us to very fashionable epistle i remarked as he sat up with her the 
1.  i would have spared you when you were at the brambletye hotel on the neck neal outfitter vermissa 
2.  i would draw your chair up to the north we thought that it must have been it ended 
3.  i would have shown him that his lad should step into the house you are certainly a model 
4.  i would do nothing with him since his shirt and socks were lying on his face to the 
5.  i would retire to my he always apologized to me it was stuffed with pennies and it was 
6.  i would have you been able to gather the details i warn you in touch with some of 
7.  i would do nothing more unless i have not promised to marry a better view talking all the 
8.  i would ask you all to be gathered from a town hospital to a of the pilgrims women 
9.  i would not have been the victim of some absurd practical joke nothing more to be of a 
10.  i would suggest that mr cubitts body may now be too late to overtake the majo

In [14]:
print(generate_story(markov_model, start="the case", limit=100))

the case as far as i stepped across had lit his pipe for example the and then appeared again coming slowly in our direction holmes gave an enigmatic note slipped into our modest good morning mr holmes i had not the nerve to go he had ceased to enter into long and dark a passion may be known to all about him you know not whether for good men are is a pretty one one of my men has gone and the whole front was draped in ivy with a small window between us unfortunately madam i had a letter arrived for my father i shall go down to the lady who introduced herself as miss stapleton is in reality i was still more miserable ways of our old lovers are the one fixed point in that direction which we can only mean said i that the royal munsters which is the man who said them at last our intimacy turned to lead in my breast or the wife of sir eustace brackenstall i have been a comic figure if he had tossed it across with a laugh passed his handkerchief over his brow clouded however as he sat down opp