In [1]:
# Read in the corpus, including punctuation!
import pandas as pd

data = pd.read_pickle('corpus.pkl')
data

Unnamed: 0,transcripts
John,Title: The Gospel of John for Readers Editor: ...
Luke,Title: The Gospel of Luke for Readers Editor: ...
Mark,Title: The Gospel of Mark for Readers Editor: ...
Matthew,Title: The Gospel of Matthew for Readers Edito...


In [2]:
# Extract only John's gospel
john_text = data.transcripts.loc['John']
john_text[:200]

'Title: The Gospel of John for Readers Editor: Lightheart Release date: May 1, 2004 [eBook #5656]\r\n                Most recently updated: August 21, 2012 Language: English Copyright (C) 2002 by Lighthe'

In [3]:
from collections import defaultdict

def markov_chain(text):
    '''The input is a string of text and the output will be a dictionary with each word as
       a key and each value as the list of words that come after the key in the text.'''
    
    # Tokenize the text by word, though including punctuation
    words = text.split(' ')
    
    # Initialize a default dictionary to hold all of the words and next words
    m_dict = defaultdict(list)
    
    # Create a zipped list of all of the word pairs and put them in word: list of next words format
    for current_word, next_word in zip(words[0:-1], words[1:]):
        m_dict[current_word].append(next_word)

    # Convert the default dict back into a dictionary
    m_dict = dict(m_dict)
    return m_dict

In [4]:
# Create the dictionary for John's gospel, take a look at it
john_dict = markov_chain(john_text)
john_dict

{'Title:': ['The'],
 'The': ['Gospel',
  'Gospel',
  'Practice',
  'way',
  'gospel',
  'Gospel',
  'Gospel',
  'same',
  'light',
  'only',
  'next',
  'two',
  'passover',
  'zeal',
  'same',
  'Father',
  'Jews',
  'woman',
  'water',
  'woman',
  'woman',
  'woman',
  'woman',
  'woman',
  'man',
  'impotent\r\nman',
  'Jews',
  'Son',
  'hour',
  'Father\r\nHimself',
  'passover,\r\na',
  'day',
  'Jews',
  'Jews',
  'living',
  'flesh\r\nprofiteth',
  'words',
  'world',
  'Jews',
  'people',
  'Pharisees',
  'Holy',
  'officers',
  'Pharisees',
  'Father',
  'same',
  'servant',
  'Jews',
  'night',
  'neighbours',
  'man',
  'sheep',
  'good',
  'hireling',
  'Father',
  'Jews',
  'Master',
  'chief',
  'house',
  'chief',
  'people',
  'Pharisees',
  'people',
  'people',
  'Son',
  'devil',
  'servant',
  'words',
  'word',
  'Comforter,',
  'servant',
  'servant',
  'world',
  "servant's",
  'cup\r\nwhich',
  'high',
  'Jews',
  'soldiers\r\nplatted',
  'Jews',
  'chief',
  

In [5]:
import random

def generate_sentence(chain, count=15):
    '''Input a dictionary in the format of key = current word, value = list of next words
       along with the number of words you would like to see in your generated sentence.'''

    # Capitalize the first word
    word1 = random.choice(list(chain.keys()))
    sentence = word1.capitalize()

    # Generate the second word from the value list. Set the new word as the first word. Repeat.
    for i in range(count-1):
        word2 = random.choice(chain[word1])
        word1 = word2
        sentence += ' ' + word2

    # End it with a period
    sentence += '.'
    return(sentence)

In [7]:
generate_sentence(john_dict)

'Miracles, but\r\nbecause ye shall have said unto him from the cross. The servant is flesh.'

In [8]:
matthew_text = data.transcripts.loc['Matthew']
# Create the dictionary for Matthew's gospel, take a look at it
matthew_dict = markov_chain(matthew_text)
matthew_dict

{'Title:': ['The'],
 'The': ['Gospel',
  'Gospel',
  'answer',
  'Gospel',
  'first',
  'wise',
  'angel',
  'voice',
  'same',
  'axe',
  'devil',
  'light',
  'centurion',
  'foxes',
  'disciples',
  'devils',
  'herders',
  'day',
  'woman',
  'maid',
  'multitude',
  'disciple',
  'very',
  'blind',
  'lame',
  'lepers',
  'deaf',
  'man',
  'men',
  'queen',
  'disciples',
  'understanding',
  'kingdom',
  'householder\r\nanswered,',
  'servants',
  'kingdom',
  'kingdom',
  'good',
  'enemy',
  'harvest',
  'reapers',
  'Son',
  'good',
  'angels',
  'people\r\nwere',
  'multitude',
  'disciples',
  'gates',
  'Son',
  'disciples',
  'Son',
  'disciples',
  'Son',
  'kingdom',
  'servant',
  'fellowservant',
  'Pharisees',
  'two',
  'young',
  'young',
  'mother',
  'Lord',
  'disciples',
  'multitude',
  'blind',
  'fig',
  'things',
  'chief',
  'son',
  'second',
  'first',
  'publicans',
  'husbandsmen',
  'stone',
  'kingdom',
  "king's\r\noxen",
  'wedding',
  'king',
  'm

In [9]:
generate_sentence(matthew_dict)

'Next morning all His mother and the blind, dumb, maimed, and sells\r\nall that the way.'

In [10]:
mark_text = data.transcripts.loc['Mark']
# Create the dictionary for Matthew's gospel, take a look at it
mark_dict = markov_chain(mark_text)
mark_dict

{'Title:': ['The'],
 'The': ['Gospel',
  'beginning',
  'voice',
  'spirit',
  'time',
  'kingdom',
  'people',
  'next',
  'people',
  'disciples',
  'Pharisees',
  'sabbath',
  'Pharisees',
  'sons',
  'multitude',
  'fowls',
  'thorns',
  'sower',
  'wind',
  'man\r\nhad',
  'man\r\ncried',
  'man',
  'unclean',
  'herd',
  'swine',
  'people',
  'people',
  'woman,',
  'disciples',
  'daughter',
  'king\r\nsaid',
  'head',
  'king',
  'king',
  'apostles',
  'devil',
  'number',
  'disciples\r\nreasoned',
  'man',
  'man',
  'boy',
  'Son',
  'Pharisees',
  'two',
  'people',
  'young',
  'disciples\r\nwere',
  'disciples',
  'blind',
  'next',
  'disciples',
  'scribes',
  'man',
  'husbandmen',
  'man',
  'stone',
  'chief',
  'second',
  'third',
  'seven',
  'first',
  'second',
  'scribe',
  'Lord',
  'common',
  'gospel',
  'feast',
  'chief',
  'two',
  'spirit',
  'third',
  'Son',
  'one',
  'chief',
  'chief',
  'multitude',
  'soldiers\r\nled',
  'superscription',
  'Kin

In [11]:
generate_sentence(mark_dict)

'Scourged, he arose. After Jesus because it I? Is this thing. The man\r\ncried with you.'

In [12]:
# ok that one starts off ok but then goes off the rails
luke_text = data.transcripts.loc['Luke']
# Create the dictionary for Matthew's gospel, take a look at it
luke_dict = markov_chain(luke_text)
luke_dict

{'Title:': ['The'],
 'The': ['Gospel',
  'multitude',
  'angel',
  'people',
  'angel',
  'Lord',
  'Lord',
  'Holy',
  'child',
  'shepherds',
  'shepherds\r\nreturned,',
  'Holy',
  'word',
  'voice',
  'people',
  'publicans',
  'soldiers,',
  'devil',
  'devil,',
  'devil',
  'devil',
  'Spirit',
  'eyes',
  'people\r\nwere',
  'people\r\nlooked',
  'fishermen',
  'scribes',
  'scribes',
  'days\r\nwill',
  'old',
  'Son',
  'scribes',
  'man',
  'whole',
  'disciple',
  'next',
  'young',
  'Lord',
  'Son',
  'seed',
  'seed',
  'seed',
  'ship',
  'storm',
  'devils',
  'man',
  'Christ',
  'Son',
  'next',
  'Son',
  'Son',
  'seventy',
  'lawyer',
  'lawyer',
  'door',
  'crowd',
  'queen',
  'men',
  'light',
  'wisdom',
  'blood',
  'Holy',
  'ground',
  'Lord',
  'father',
  'ruler',
  'Lord',
  'kingdom',
  'host',
  'first',
  'servant',
  'servant',
  'lord',
  'younger',
  'father',
  'son',
  'elder',
  'servant',
  'elder',
  'father',
  'master',
  'steward',
  'first

In [14]:
generate_sentence(luke_dict)

'Espoused wife, Elisabeth, conceived, and he asks\r\nfor an account of the body, they were amazed.'

In [None]:
#considering we're basing this off of KJV this isn't that bad