In [12]:
!pip install python-dotenv langchain openai pypdf python-docx langchain-community

Collecting langchain-community
  Obtaining dependency information for langchain-community from https://files.pythonhosted.org/packages/45/a7/b9f3cd12510fe9a5fe2dcd7f12d095b0d5bd95fb2cd9c5362de45ebc18f9/langchain_community-0.3.13-py3-none-any.whl.metadata
  Downloading langchain_community-0.3.13-py3-none-any.whl.metadata (2.9 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain-community)
  Obtaining dependency information for httpx-sse<0.5.0,>=0.4.0 from https://files.pythonhosted.org/packages/e1/9b/a181f281f65d776426002f330c31849b86b31fc9d848db62e16f03ff739f/httpx_sse-0.4.0-py3-none-any.whl.metadata
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Obtaining dependency information for pydantic-settings<3.0.0,>=2.4.0 from https://files.pythonhosted.org/packages/b4/46/93416fdae86d40879714f72956ac14df9c7b76f7d41a4d68aa9f71a0028b/pydantic_settings-2.7.1-py3-none-any.whl.metadata
  Downloading pydantic

In [13]:
import os
from kaggle_secrets import UserSecretsClient
os.environ['OPENAI_API_KEY'] = UserSecretsClient().get_secret('OPENAI_API_KEY')

**Utils**

In [14]:
%%writefile utils.py
from langchain.chains import LLMChain
from langchain.chat_models import ChatOpenAI

class BaseStructureChain:

    PROMPT = ''

    def __init__(self) -> None:

        self.llm = ChatOpenAI()

        self.chain = LLMChain.from_string(
            llm=self.llm,
            template=self.PROMPT,
        )

        self.chain.verbose = True


class BaseEventChain:
    
    PROMPT = ''

    def __init__(self) -> None:

        self.llm = ChatOpenAI(model_name='gpt-3.5-turbo-16k')

        self.chain = LLMChain.from_string(
            llm=self.llm,
            template=self.PROMPT,
        )

        self.chain.verbose = True

Overwriting utils.py


**Characters**

In [15]:
%%writefile characters.py
import os
from langchain.document_loaders import PyPDFLoader
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain

class MainCharacterChain:

    PROMPT = """
    You are provided with the resume of a person. 
    Describe the person's profile in a few sentences and include that person's name.

    Resume: {text}

    Profile:"""

    def __init__(self) -> None:

        self.llm = ChatOpenAI()
        self.chain = LLMChain.from_string(
            llm=self.llm,
            template=self.PROMPT
        )

        self.chain.verbose = True

    def load_resume(self, file_name):
        folder = '/kaggle/input/resume'
        file_path = os.path.join(folder, file_name)
        loader = PyPDFLoader(file_path)
        docs = loader.load_and_split()
        return docs

    def run(self, file_name):
        # load the resume
        # generate a summary
        docs = self.load_resume(file_name)
        resume = '\n\n'.join([doc.page_content for doc in docs])
        return self.chain.run(resume)


class CharactersChain:

    PROMPT = """
    You are writer and your job is to generate the profiles for the characters of a novel. 
    You are provided with a the subject and the genre of the book. 
    Create all the characters for that novel.
    
    Generate the characters' names followed by their profiles.
    Describe the characters' profiles in a few sentences.

    Follow this template:

    [Character 1's name]: 
    [Character 1's profile]

    [Character 2's name]: 
    [Character 2's profile]

    ...

    Subject: {subject}
    Genre: {genre}

    Characters:"""

    def __init__(self) -> None:

        self.llm = ChatOpenAI()
        self.chain = LLMChain.from_string(
            llm=self.llm,
            template=self.PROMPT
        )

        self.chain.verbose = True

    def run(self, subject, genre):
        return self.chain.predict(
            subject=subject,
            genre=genre
        )

Overwriting characters.py


**Structure**

In [16]:
%%writefile structure.py
from utils import BaseStructureChain, ChatOpenAI

class TitleChain(BaseStructureChain):

    PROMPT = """
    Your job is to generate the title for a novel about the following subject and characters. 
    Return a title and only a title!
    The title should be consistent with the genre of the novel.
    The title should be consistent with the style of the author.

    Subject: {subject}
    Genre: {genre}
    Author: {author}

    Novel's characters: {profile}

    Title:"""

    def run(self, subject, genre, author, profile):
        return self.chain.predict(
            subject=subject,
            genre=genre,
            author=author,
            profile=profile
        )
    

class PlotChain(BaseStructureChain):

    PROMPT = """
    Your job is to generate the plot for a novel. Return a plot and only a plot!
    Describe the full plot of the story.
    You are provided the following subject, title and characters.
    The plot should be consistent with the genre of the novel.
    The plot should be consistent with the style of the author.

    Consider the following attributes to write an exciting story:
    {features}

    subject: {subject}
    Genre: {genre}
    Author: {author}

    Title: {title}
    Novel's characters: {profile}

    DON'T refer to the author in the plot!
    Return a detailled plot and only the plot of the story
    Plot:"""

    HELPER_PROMPT = """
    Generate a list of attributes that characterized an exciting story.

    List of attributes:"""
    
    def run(self, subject, genre, author, profile, title):
        features = ChatOpenAI().predict(self.HELPER_PROMPT)

        plot = self.chain.predict(
            features=features,
            subject=subject,
            genre=genre,
            author=author,
            profile=profile,
            title=title
        )

        return plot
    

class ChaptersChain(BaseStructureChain):

    PROMPT = """
    Your job is to generate a list of chapters. 
    ONLY the list and nothing more!
    You are provided with a title, a plot and the characters for a novel.
    Generate a list of chapters describing the plot of that novel.
    Make sure the chapters are consistent with the plot.
    The chapters should be consistent with the genre of the novel. 
    The chapters should be consistent with the style of the author. 

    Follow this template: 

    Prologue: [description of prologue]
    Chapter 1: [description of chapter 1]
    ...
    Epilogue: [description of epilogue]

    Make sure the chapter is followed by the character `:` and its description. For example: `Chapter 1: [description of chapter 1]`
    
    subject: {subject}
    Genre: {genre}
    Author: {author}

    Title: {title}
    Novel's characters: {profile}

    Plot: {plot}
    
    Return the chapter list and only the chapter list
    Chapters list:"""
    
    def run(self, subject, genre, author, profile, title, plot):
        response = self.chain.predict(
            subject=subject,
            genre=genre,
            author=author,
            profile=profile,
            title=title,
            plot=plot
        )

        return self.parse(response)

    def parse(self, response):
        chapter_list = response.strip().split('\n')
        chapter_list = [chapter for chapter in chapter_list if ':' in chapter]
        chapter_dict = dict([
            chapter.strip().split(':') 
            for chapter in chapter_list
        ])

        return chapter_dict
    

def get_structure(subject, genre, author, profile):

    title_chain = TitleChain()
    plot_chain = PlotChain()
    chapters_chain = ChaptersChain()

    title = title_chain.run(
        subject, 
        genre, 
        author, 
        profile
    )
    plot = plot_chain.run(
        subject, 
        genre, 
        author, 
        profile, 
        title
    )
    chapter_dict = chapters_chain.run(
        subject, 
        genre, 
        author, 
        profile, 
        title, 
        plot
    )

    return title, plot, chapter_dict

Overwriting structure.py


**events**

In [17]:
%%writefile events.py
from utils import BaseEventChain, ChatOpenAI

class ChapterPlotChain(BaseEventChain):

    HELPER_PROMPT = """
    Generate a list of attributes that characterized an exciting story.

    List of attributes:"""

    PROMPT = """
    You are a writer and your job is to generate the plot for one and only one chapter of a novel. 
    You are provided with the title, the main plot of the novel and the characters.
    Additionally, you are provided with the plots of the previous chapters and the outline of the novel.
    Make sure to generate a plot that describe accurately the story of the chapter. 
    Each chapter should have its own arc, but should be consistent with the other chapters and the overall story.
    The chapter's plot should be consistent with the genre of the novel.
    The chapter's plot should be consistent with the style of the author. 

    Consider the following attributes to write an exciting story:
    {features}

    subject: {subject}
    Genre: {genre}
    Author: {author}

    Title: {title}
    Novel's characters: {profile}

    Novel's Plot: {plot}

    Outline:
    {outline}

    Chapter Plots:
    {summaries}

    Return a detailled plot. DON'T refer to the author nor the chapter's name in the plot!
    Return the plot and only the plot of the story in the chapter
    Plot of {chapter}:"""

    def run(self, subject, genre, author, profile, title,
            plot, summaries_dict, chapter_dict, chapter):
        
        features = ChatOpenAI().predict(self.HELPER_PROMPT)

        outline = '\n'.join([
            '{} - {}'.format(chapter, description)
            for chapter, description in chapter_dict.items()
        ])

        summaries = '\n\n'.join([
            'Plot of {}: {}'.format(chapter, summary)
            for chapter, summary in summaries_dict.items()
        ])

        return self.chain.predict(
            subject=subject,
            genre=genre,
            author=author,
            profile=profile,
            title=title,
            plot=plot,
            features=features,
            outline=outline,
            summaries=summaries,
            chapter=chapter
        )
    

class EventsChain(BaseEventChain):

    PROMPT = """
    You are a writer and your job is to come up with a detailled list of events happens in the current chapter of a novel.
    Be very specific about the actions of the different characters.
    Those events describes the plot of that chapter and the actions of the different characters in chronological order. 
    You are provided with the title, the main plot of the novel, the characters, and the plot of that chapter.
    Additionally, you are provided with the list of the events that were outlined in the previous chapters.
    The event list should be consistent with the genre of the novel.
    The event list should be consistent with the style of the author.

    The each element of that list should be returned on different lines. Follow this template:

    Event 1
    Event 2
    ...
    Final event

    subject: {subject}
    Genre: {genre}
    Author: {author}

    Title: {title}
    Novel's characters: {profile}

    Novel's Plot: {plot}

    Events you outlined for previous chapters: {previous_events}

    Plot of the current chapter:
    {summary}

    Don't hesitate to create the necessary events to generate a meaningful story
    Return the events and only the events that capture the story!
    Event list for that chapter:"""
    
    def run(self, subject, genre, author, profile, 
            title, plot, summary, event_dict):
        
        previous_events = ''
        for chapter, events in event_dict.items():
            previous_events += '\n' + chapter
            for event in events:
                previous_events += '\n' + event

        response = self.chain.predict(
            subject=subject,
            genre=genre,
            author=author,
            profile=profile,
            title=title,
            plot=plot,
            summary=summary,
            previous_events=previous_events,
        )

        return self.parse(response)
    
    def parse(self, response):

        event_list = response.strip().split('\n')
        event_list = [
            event.strip() for event in event_list if event.strip()
        ]
        return event_list
    

def get_events(subject, genre, author, profile, title, plot, chapter_dict):
    chapter_plot_chain = ChapterPlotChain()
    events_chain = EventsChain()
    summaries_dict = {}
    event_dict = {}

    for chapter, _ in chapter_dict.items():

        summaries_dict[chapter] = chapter_plot_chain.run(
            subject=subject, 
            genre=genre, 
            author=author, 
            profile=profile, 
            title=title, 
            plot=plot, 
            summaries_dict=summaries_dict, 
            chapter_dict=chapter_dict, 
            chapter=chapter
        )

        event_dict[chapter] = events_chain.run(
            subject=subject, 
            genre=genre, 
            author=author, 
            profile=profile, 
            title=title, 
            plot=plot, 
            summary=summaries_dict[chapter], 
            event_dict=event_dict
        )

    return summaries_dict, event_dict

Overwriting events.py


**writing**

In [18]:
%%writefile writing.py
from utils import BaseEventChain

class WriterChain(BaseEventChain):

    PROMPT = """
    You are a novel writer. The novel is described by a list of events. 
    You have already written the novel up to the last event. 
    Your job is to generate the paragraphs of the novel about the new event.
    You are provided with a the title, the novel's plot, the characters and a plot of the current chapter.
    Make sure the paragraphs are consistent with the plot of the chapter.
    Additionally you are provided with the list of events you have already written about.
    The paragraphs should be consistent with the genre of the novel.
    The paragraphs should be consistent with the style of the author.

    Genre: {genre}
    Author: {author}

    Title: {title}

    Novel's characters: {profile}

    Novel's Plot: {plot}

    Previous events:
    {previous_events}

    Current Chapter summary: {summary}

    Previous paragraphs:
    {previous_paragraphs}

    New event you need to write about now: 
    {current_event}

    You are the author and write the paragraphs as if they were part of the novel.
    DON'T refer to the author nor the chapters in the paragraphs!
    Only write the story related to that event with the necessary dialogues between the characters!
    Paragraphs of the novel describing that event:"""

    def run(self, genre, author, title, profile, plot, 
            previous_events, summary, previous_paragraphs, current_event):

        previous_events = '\n'.join(previous_events)

        return self.chain.predict(
            genre=genre, 
            author=author, 
            title=title, 
            profile=profile, 
            plot=plot, 
            previous_events=previous_events, 
            summary=summary,
            previous_paragraphs=previous_paragraphs, 
            current_event=current_event
        )
    
def write_book(genre, author, title, profile, plot, summaries_dict, event_dict):
    
    writer_chain = WriterChain()
    previous_events = []
    book = {}
    paragraphs = ''

    for chapter, event_list in event_dict.items():

        book[chapter] = []

        for event in event_list:

            paragraphs = writer_chain.run(
                genre=genre, 
                author=author, 
                title=title, 
                profile=profile, 
                plot=plot, 
                previous_events=previous_events, 
                summary=summaries_dict[chapter], 
                previous_paragraphs=paragraphs, 
                current_event=event
            )

            previous_events.append(event)
            book[chapter].append(paragraphs)

    return book

Overwriting writing.py


**publishing**

In [19]:
%%writefile publishing.py
import docx


class DocWriter:

    def __init__(self) -> None:
        self.doc = docx.Document()

    def write_doc(self, book, chapter_dict, title):

        self.doc.add_heading(title, 0)

        for chapter, paragraphs_list in book.items():

            description = chapter_dict[chapter]
            chapter_name = '{}: {}'.format(
                chapter.strip(), description.strip()
            )

            self.doc.add_heading(chapter_name, 1)

            text = '\n\n'.join(paragraphs_list)
            self.doc.add_paragraph(text)

        self.doc.save('{}.docx'.format((title)))

Overwriting publishing.py


**Main**

In [20]:
import json
from characters import MainCharacterChain, CharactersChain
from structure import get_structure
from events import get_events
from writing import write_book
from publishing import DocWriter

subject = """
Embroiled in the excruciating war between human and machines, the main character is in a moral dilema to choose which side.
"""

author='Dan Brown'
genre='Thriller and Adventure'

main_character_chain = MainCharacterChain()
profiles = main_character_chain.run('Profile.pdf')
print('Main character profile: ', profiles)

#characters_chain = CharactersChain()
#profiles = characters_chain.run(subject, genre)

doc_writer = DocWriter()

title, plot, chapter_dict = get_structure(
    subject, 
    genre, 
    author, 
    profiles
)

print(title)
print()
print(plot)
print()
print(chapter_dict)

summaries_dict, event_dict = get_events(
    subject, 
    genre, 
    author, 
    profiles, 
    title, 
    plot, 
    chapter_dict
)

d = [
    title, 
    plot, 
    chapter_dict,
    summaries_dict, 
    event_dict
]

with open("save_buffer_{}.json".format(title), 'w') as outfile:
    json.dump(d, outfile)


book = write_book(
    genre, 
    author, 
    title, 
    profiles, 
    plot, 
    summaries_dict, 
    event_dict
)

doc_writer.write_doc(
    book, 
    chapter_dict, 
    title
)

  self.llm = ChatOpenAI()


ValueError: File path /kaggle/input/resume/Profile.pdf is not a valid file or url