In [6]:
import nltk, re, random

from nltk.tokenize import word_tokenize
from collections import defaultdict, deque
from document1 import training_doc1
from document2 import training_doc2
from document3 import training_doc3


nltk.download('punkt')



class MarkovChain:
  def __init__(self):
    self.lookup_dict = defaultdict(list)
    self._seeded = False
    self.__seed_me()

  def __seed_me(self, rand_seed=None):
    if self._seeded is not True:
      try:
        if rand_seed is not None:
          random.seed(rand_seed)
        else:
          random.seed()
        self._seeded = True
      except NotImplementedError:
        self._seeded = False
    
  def add_document(self, str):
    preprocessed_list = self._preprocess(str)
    pairs = self.__generate_tuple_keys(preprocessed_list)
    for pair in pairs:
      self.lookup_dict[pair[0]].append(pair[1])
  
  def _preprocess(self, str):
    cleaned = re.sub(r'\W+', ' ', str).lower()
    tokenized = word_tokenize(cleaned)
    return tokenized

  def __generate_tuple_keys(self, data):
    if len(data) < 1:
      return

    for i in range(len(data) - 1):
      yield [ data[i], data[i + 1] ]
      
  def generate_text(self, max_length=50):
    context = deque()
    output = []
    if len(self.lookup_dict) > 0:
      self.__seed_me(rand_seed=len(self.lookup_dict))
      chain_head = [list(self.lookup_dict)[0]]
      context.extend(chain_head)
      
      while len(output) < (max_length - 1):
        next_choices = self.lookup_dict[context[-1]]
        if len(next_choices) > 0:
          next_word = random.choice(next_choices)
          context.append(next_word)
          output.append(context.popleft())
        else:
          break
      output.extend(list(context))
    return " ".join(output)

my_markov = MarkovChain()
print(training_doc1)
my_markov.add_document(training_doc1)
my_markov.add_document(training_doc2)
my_markov.add_document(training_doc3)
generated_text = my_markov.generate_text()
print(generated_text)

[nltk_data] Downloading package punkt to /home/codespace/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.



From fairest creatures we desire increase,
That thereby beauty’s rose might never die,
But as the riper should by time decease,
His tender heir might bear his memory:
But thou contracted to thine own bright eyes,
Feed’st thy light’s flame with self-substantial fuel,
Making a famine where abundance lies,
Thyself thy foe, to thy sweet self too cruel:
Thou that art now the world’s fresh ornament,
And only herald to the gaudy spring,
Within thine own bud buriest thy content,
And, tender churl, mak’st waste in niggarding:
  Pity the world, or else this glutton be,
  To eat the world’s due, by the grave and thee.

from fairest creatures we desire increase that face should form another whose fresh ornament and thine this glutton be die but as the tillage of thine own deep sunken eyes were to eat the tillage of thy light s fresh repair if thou not to thy mother s glass and


Create a sonnet formatter


In [7]:
def format_sonnet(text):
  """Formats a string into a sonnet structure without a rhyme scheme.

  Args:
      text: The string to format.

  Returns:
      A string formatted as a sonnet with 14 lines, 
      without any specific rhyme scheme.
  """
  # Split the text into words
  words = text.split()

  # Define the sonnet line structure
  line_length = 14
  lines = []
  current_line = []

  # Loop through the words and add them to lines
  for word in words:
    if len(current_line) + len(word) <= line_length:
      current_line.append(word)
    else:  
      lines.append(" ".join(current_line))
      current_line = [word]

  # Add the last line (if necessary)
  if current_line:
    lines.append(" ".join(current_line))

  # Ensure we have 14 lines (pad with empty strings if needed)
  lines += [""] * (14 - len(lines))

  return "\n".join(lines)

# Example usage
text = "When I consider how my light is spent, Ere half my days in this dark world be past, And that within a fading globe I waste Breathless hours of precious time, misspent."
sonnet = format_sonnet(text)
print(sonnet)


When I consider how my light is spent, Ere half my
days in this dark world be past, And that
within a fading globe I waste
Breathless hours of precious time, misspent.












Generate with the sonnet formatter


In [8]:
my_markov2 = MarkovChain()
my_markov2.add_document(training_doc1)
my_markov2.add_document(training_doc2)
my_markov2.add_document(training_doc3)
generated_text = my_markov2.generate_text()
newSonnet = format_sonnet(generated_text)
print(newSonnet)

from fairest creatures we desire increase that face thou feel st thy
glass and she so thou
contracted to be die but if thou art old and see thy
foe to be to be a tattered weed of mine
shall besiege thy mother s field thy brow and
thriftless praise










Example output: 

> from fairest creatures we desire increase that face thou feel st thy
> glass and she so thou
> contracted to be die but if thou art old and see thy
> foe to be to be a tattered weed of mine
> shall besiege thy mother s field thy brow and
> thriftless praise










more sonnets

In [16]:
from sonnet.sonnets4_10 import *

my_markov.add_document(sonnet4)
my_markov.add_document(sonnet5)
my_markov.add_document(sonnet6)
my_markov.add_document(sonnet7)
my_markov.add_document(sonnet8)
my_markov.add_document(sonnet9)

generated_text = my_markov2.generate_text()
newSonnet = format_sonnet(generated_text)
print(newSonnet)

from fairest creatures we desire increase that thereby beauty lies
where abundance lies thyself thy blood warm when forty
winters shall sum my count and tell the world s
glass and only herald to eat the world s glass and she so
gazed on now the riper should by time









