Use Retrieval Q&A to write a bio summary

In [None]:
# langchain
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.prompts import PromptTemplate
from langchain.document_loaders import PyPDFLoader, TextLoader
from langchain.document_loaders import TextLoader
from langchain.llms import OpenAI

# base python
import os
from dotenv import load_dotenv
import openai

# ibm genai
from genai.credentials import Credentials
from genai.model import Model
from genai.schemas import GenerateParams, ModelType
from genai.extensions.langchain import LangChainInterface  # langchain support

# authentication
load_dotenv()
api_key = os.getenv("GENAI_KEY", None)
api_url = os.getenv("GENAI_API", None)
creds = Credentials(api_key, api_endpoint=api_url)
openai.api_key = os.getenv('OPENAI_API_KEY')

LLM from IBM

In [None]:
# load model

'''
llm = LangChainInterface(
        model=ModelType.FLAN_UL2,  # choice of model is not necessarily optimal
        credentials=Credentials(os.environ.get("GENAI_KEY")),
        params=GenerateParams(
            decoding_method="greedy",
            max_new_tokens=1500,
            min_new_tokens=1,
            repetition_penalty=2,
        ).dict()
    )
'''
llm = OpenAI(temperature=0)

Custom Prompt Template

In [None]:
prompt_template = """Use the following pieces of context about the same entity
to write a one-paragraph biography. 

{context}

The response should have the following format:
1. First state the person's name and current job.
2. Provide a summary of their work experience.
If the person is a board member or holds other very senior roles,
be sure to include that information. 
3. Then summarize the schools they attended and what degrees were earned.

Question: {question}
Answer: """
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

Load document

In [None]:
#loader = TextLoader("../../state_of_the_union.txt")
#documents = loader.load()

loader = PyPDFLoader("data/Profile.pdf")
documents = loader.load_and_split()  # each page is a different document

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

#embeddings = HuggingFaceInstructEmbeddings(
#            model_name="hkunlp/instructor-large",
#            model_kwargs={"device": "cpu"}
#        )
embeddings = OpenAIEmbeddings()

docsearch = Chroma.from_documents(texts, embeddings)

Retrieval

In [None]:
chain_type_kwargs = {"prompt": PROMPT}  # use the above custom prompt
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever(), chain_type_kwargs=chain_type_kwargs)

In [None]:
query = "Write me a biography about Subbu Iyer"
qa.run(query)

In [None]:
query = 'Where does Subbu Iyer work currently and is the job title?'
current_work = qa.run(query)
current_work

In [None]:
query = "Describe the work history of Subbu Iyer, excluding the current position."
work_experience = qa.run(query)
work_experience

In [None]:
query = "Describe the education of Subbu Iyer"
education = qa.run(query)
education

Separate call to write the summary:

In [None]:
summary_prompt = f'''Given the following information about work and education
history, write a biography about the person. Use complete and grammatically correct sentences.

Current work:
{current_work}

Past work:
{work_experience}

Education:
{education}

Be sure to include at least one sentence
for each of these categories: current work, past work, and education.
Your response here.
'''
print(summary_prompt)

In [None]:
summary = llm.generate([summary_prompt])
summary.generations[0][0].text