<a href="https://colab.research.google.com/github/zerlake/zerlake.github.io/blob/master/AI_Book_Writer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install dependencies
!pip install transformers torch numpy pandas tqdm

import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from tqdm import tqdm
import numpy as np
import json

class TextGenerator:
    def __init__(self):
        self.model = GPT2LMHeadModel.from_pretrained('gpt2-medium')
        self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.model.to(self.device)

    def generate_text(self, prompt, max_length=1000):
        inputs = self.tokenizer.encode(prompt, return_tensors='pt').to(self.device)

        outputs = self.model.generate(
            inputs,
            max_length=max_length,
            num_return_sequences=1,
            temperature=0.7,
            top_k=50,
            top_p=0.95,
            do_sample=True,
            pad_token_id=self.tokenizer.eos_token_id
        )

        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)

class BookWriterAgent:
    def __init__(self):
        self.text_generator = TextGenerator()
        self.story_data = {
            'title': '',
            'chapters': [],
            'characters': [],
            'plot_points': [],
            'world_details': {}
        }

    def set_story_title(self, title):
        self.story_data['title'] = title

    def add_character(self, name, description, role):
        self.story_data['characters'].append({
            'name': name,
            'description': description,
            'role': role,
            'arc': []
        })

    def add_plot_point(self, chapter, description, type='major'):
        self.story_data['plot_points'].append({
            'chapter': chapter,
            'description': description,
            'type': type
        })

    def add_world_detail(self, location, description):
        self.story_data['world_details'][location] = description

    def generate_chapter(self, chapter_num, theme):
        prompt = self._create_chapter_prompt(chapter_num, theme)
        chapter_content = self.text_generator.generate_text(prompt, max_length=2000)

        self.story_data['chapters'].append({
            'number': chapter_num,
            'theme': theme,
            'content': chapter_content
        })

        return chapter_content

    def _create_chapter_prompt(self, chapter_num, theme):
        characters = ', '.join([c['name'] for c in self.story_data['characters']])
        plot_points = [p for p in self.story_data['plot_points'] if p['chapter'] == chapter_num]
        plot_desc = '; '.join([p['description'] for p in plot_points])

        return f"""
Write Chapter {chapter_num} of "{self.story_data['title']}"
Theme: {theme}
Characters: {characters}
Plot points: {plot_desc}

Chapter {chapter_num}:
"""

    def save_story(self, filename):
        with open(filename, 'w') as f:
            json.dump(self.story_data, f, indent=2)

    def load_story(self, filename):
        with open(filename, 'r') as f:
            self.story_data = json.load(f)

# Example usage
writer = BookWriterAgent()

# Set up story
writer.set_story_title("The Midnight Mystery")
writer.add_character("John Smith", "A curious detective", "protagonist")
writer.add_character("Emma Wilson", "A mysterious client", "deuteragonist")
writer.add_plot_point(1, "John receives a cryptic letter")
writer.add_world_detail("Victorian London", "Foggy streets and gaslit lamps")

# Generate first chapter
chapter1 = writer.generate_chapter(1, "Mystery begins")
print("\nGenerated Chapter 1:")
print(chapter1)

# Save story
writer.save_story("midnight_mystery.json")

In [1]:
def generate_text(self, prompt, max_length=1000):
    inputs = self.tokenizer.encode(prompt, return_tensors='pt').to(self.device)
    try:
        outputs = self.model.generate(
            inputs,
            max_length=max_length,
            num_return_sequences=1,
            temperature=0.7,
            top_k=50,
            top_p=0.95,
            do_sample=True,
            pad_token_id=self.tokenizer.eos_token_id
        )
        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
    except IndexError as e:
        print(f"IndexError encountered during generation or decoding: {e}")
        return self.tokenizer.decode(outputs[0][:self.tokenizer.vocab_size], skip_special_tokens=True)
    except Exception as e:
        print(f"Error during text generation: {e}")
        return ""

In [None]:
def generate_text(self, prompt, max_length=1000):
    # ... (rest of the method) ...
    try:
        outputs = self.model.generate(
            # ... (input parameters) ...
        )
        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
    except IndexError as e:
        print(f"IndexError encountered during generation or decoding: {e}")
        # Handle the error, e.g., return a truncated version of the text
        return self.tokenizer.decode(outputs[0][:self.tokenizer.vocab_size], skip_special_tokens=True)
    except Exception as e:  # Catch other potential exceptions
        print(f"Error during text generation: {e}")
        # Handle other errors appropriately

In [None]:
try:
    outputs = self.model.generate(
        # ... (input parameters) ...
    )
except Exception as e:
    print(f"Error during text generation: {e}")
    # Handle the error, e.g., log it, retry with different parameters, or return a default value

In [None]:
def generate_text(self, prompt, max_length=1000):
    # ... (rest of the method) ...

    try:
        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
    except IndexError:
        print("Warning: IndexError encountered during decoding. Returning truncated text.")
        # Handle the error, e.g., return a truncated version of the text
        return self.tokenizer.decode(outputs[0][:self.tokenizer.vocab_size], skip_special_tokens=True)

In [None]:
def generate_chapter(self, chapter_num, theme):
    prompt = self._create_chapter_prompt(chapter_num, theme)
    # Reducing max_length to 1000
    chapter_content = self.text_generator.generate_text(prompt, max_length=1000)
    # ... (rest of the method remains the same) ...

In [None]:
class TextGenerator:
    def __init__(self):
        self.model = GPT2LMHeadModel.from_pretrained('gpt2-medium')
        self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.model.to(self.device)
        # Increase the maximum position embeddings to accommodate longer sequences
        self.model.config.max_position_embeddings = 2048
        # Update the model's positional embeddings to reflect the new size
        self.model.resize_token_embeddings(len(self.tokenizer))

    def generate_text(self, prompt, max_length=1000):
        inputs = self.tokenizer.encode(prompt, return_tensors='pt').to(self.device)

        outputs = self.model.generate(
            inputs,
            max_length=max_length,
            num_return_sequences=1,
            temperature=0.7,
            top_k=50,
            top_p=0.95,
            do_sample=True,
            pad_token_id=self.tokenizer.eos_token_id
        )

        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)

In [None]:
class TextGenerator:
    def __init__(self):
        self.model = GPT2LMHeadModel.from_pretrained('gpt2-medium')
        self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.model.to(self.device)
        # Set the maximum sequence length during initialization
        self.model.config.n_positions = 2048  # Increased to accommodate longer sequences
        self.model.config.max_position_embeddings = 2048

    def generate_text(self, prompt, max_length=1000):
        inputs = self.tokenizer.encode(prompt, return_tensors='pt').to(self.device)

        outputs = self.model.generate(
            inputs,
            max_length=max_length,
            num_return_sequences=1,
            temperature=0.7,
            top_k=50,
            top_p=0.95,
            do_sample=True,
            pad_token_id=self.tokenizer.eos_token_id
        )

        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)

In [None]:
from IPython import get_ipython
from IPython.display import display
# %%
# Install dependencies
!pip install transformers torch numpy pandas tqdm

import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from tqdm import tqdm
import numpy as np
import json

class TextGenerator:
    def __init__(self):
        self.model = GPT2LMHeadModel.from_pretrained('gpt2-medium')
        self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.model.to(self.device)

    def generate_text(self, prompt, max_length=1000):
        inputs = self.tokenizer.encode(prompt, return_tensors='pt').to(self.device)

        # The issue is likely that the generated sequence is longer than the model's maximum sequence length.
        # Setting max_length to a smaller value should fix the issue.
        # But to be more sure, we adjust the config

        # Note that 'n_positions' corresponds to the maximum sequence length the model can handle.
        # Setting it to max_length makes sure the model can handle the specified length.
        self.model.config.n_positions = max_length
        self.model.config.max_position_embeddings = max_length

        outputs = self.model.generate(
            inputs,
            max_length=max_length,
            num_return_sequences=1,
            temperature=0.7,
            top_k=50,
            top_p=0.95,
            do_sample=True,
            pad_token_id=self.tokenizer.eos_token_id
        )

        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)

# Rest of the code remains the same
# ...

In [8]:
def generate_text(self, prompt, max_length=1000):
    inputs = self.tokenizer.encode(prompt, return_tensors='pt').to(self.device)
    try:
        outputs = self.model.generate(
            inputs,
            max_length=max_length,
            num_return_sequences=1,
            temperature=0.7,
            top_k=50,
            top_p=0.95,
            do_sample=True,
            pad_token_id=self.tokenizer.eos_token_id
        )
        try:
            return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        except IndexError as e:
            print(f"IndexError encountered during generation or decoding: {e}")
            # Handle the error, e.g., return a truncated version of the text
            return self.tokenizer.decode(outputs[0][:self.tokenizer.vocab_size], skip_special_tokens=True)
    except Exception as e:  # Catch other potential exceptions
        print(f"Error during text generation: {e}")
        # Handle other errors appropriately, e.g., log them, retry with different parameters, or return a default value
        return "" # or another appropriate default value