In [154]:
import sys
import markovify
import spacy
import random


In [155]:
# Load the spaCy model for English
test_corpus_path = 'corpus/treasure.txt' 
test_output_file_path = 'output_poem.txt'
#test_num_stanzas = 5  
nlp = spacy.load("en_core_web_sm")
def load_text_from_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()
    except FileNotFoundError:
        print(f"Error: The file {file_path} was not found.")
        sys.exit(1)

corpus_text = load_text_from_file(test_corpus_path)

# Initialize the Markovify model with the loaded corpus text
text_model = markovify.Text(corpus_text)


In [156]:
def generate_poem_lines(text_model, stanza_length=5, long_line_length=20, short_line_length=10):
    poem_lines = []
    long_line_index = random.randint(0, stanza_length - 1)  # Randomly choose the long line index
    
    for i in range(stanza_length):
        while True:
            # Generate a line
            line = text_model.make_sentence(tries=100)
            if not line:
                continue
            
            # Analyze the line length in tokens (words)
            doc = nlp(line)
            if (i == long_line_index and len(doc) >= long_line_length) or (i != long_line_index and len(doc) <= short_line_length):
                poem_lines.append(line)
                break
    
    return poem_lines


In [157]:
def generate_poem_lines_debug(text_model, stanza_length=6, max_words=20, repeatable_line=None):
    poem_lines = []
    
    # Optionally insert a repeatable line if provided
    if repeatable_line:
        poem_lines.append(repeatable_line)
        print(f"Repeated Line: {repeatable_line}")  # Debugging print
    
    for i in range(stanza_length - len(poem_lines)):  # Adjust for already added repeat line
        line = text_model.make_short_sentence(100, max_words=max_words, tries=100)
        if line:
            print(f"Generated Line: {line}")  # Debugging print
            poem_lines.append(line)
        else:
            print("Failed to generate a line.")  # Debugging print
    
    # Determine next repeatable line (if any)
    next_repeatable_line = None
    for line in poem_lines:
        words = line.split()
        if 4 <= len(words) <= 5:
            next_repeatable_line = line
            break  # Choose the first line that meets the criteria
    
    return poem_lines, next_repeatable_line


In [158]:
def generate_decreasing_lines(text_model):
    initial_line = text_model.make_sentence(tries=100)
    if not initial_line:
        print("Failed to generate an initial long line.")
        return [], []
    
    lines = [initial_line]  # Store the lines for the decreasing poem
    removed_words_all_lines = []  # Store the words removed at each step
    
    words = initial_line.split()
    
    while len(words) > 3:
        num_words_to_remove = random.randint(2, 3)
        if len(words) - num_words_to_remove < 2:
            num_words_to_remove = len(words) - 2

        words_to_remove = random.sample(words, num_words_to_remove)
        removed_words_all_lines.append(words_to_remove)  # Add removed words to the list

        for word in words_to_remove:
            words.remove(word)
        
        new_line = ' '.join(words)
        lines.append(new_line)
    
    return lines, removed_words_all_lines


In [159]:
def reorder_words_with_spacy(words):
    # Process the combined text of the words to analyze their part of speech
    doc = nlp(" ".join(words))
    # Simple heuristic for sentence-like structure: subject, verb, object, others
    pos_order = ['NOUN', 'PRON', 'VERB', 'ADJ', 'ADV', 'DET', 'ADP', 'CONJ', 'PUNCT']
    ordered_words = []
    used_token_indices = set()

    for pos in pos_order:
        for token in doc:
            if token.pos_ == pos and token.i not in used_token_indices:
                ordered_words.append(token.text)
                used_token_indices.add(token.i)

    # Add any remaining words that were not included by the heuristic
    for token in doc:
        if token.i not in used_token_indices:
            ordered_words.append(token.text)
    
    return ordered_words

In [160]:
def generate_increasing_lines(removed_words_all_lines):
    lines = []
    current_line_words = []
    
    for removed_words in reversed(removed_words_all_lines):
        # Extend the current line words with the removed words and reorder them
        current_line_words.extend(removed_words)
        # Randomize the order of the current line words before reordering
        random.shuffle(current_line_words)
        # Attempt to reorder words to be more sentence-like
        ordered_words = reorder_words_with_spacy(current_line_words)
        new_line = ' '.join(ordered_words)
        lines.append(new_line)
    
    return lines

In [161]:
def main(text_model):
    # Generate the first poem with decreasing lines and capture removed words
    generate_poem_lines_debug(text_model)

    decreasing_lines, removed_words_all_lines = generate_decreasing_lines(text_model)
    
    # Open the output file
    with open('output.txt', 'w', encoding='utf-8') as f:
        #f.write("Poem with Decreasing Lines:\n")
        for line in decreasing_lines:
            f.write(line + '\n')
        
        #f.write("\nPoem with Increasing Lines:\n")
        # Generate the second poem with increasing lines using the removed words
        increasing_lines = generate_increasing_lines(removed_words_all_lines)
        for line in increasing_lines:
            f.write(line + '\n')


In [162]:



main(text_model)


Generated Line: I Iron ring which does not melt, add to the crane.
Generated Line: That this is three feet in length.
Generated Line: Even upon such lands, however, he had done it unwillingly.
Generated Line: More than one method for treating them.
Generated Line: At the season when there is no small mistake.
Generated Line: , for the right is granted, the name auripigmentum , and the latter the metals.
