Write a function to generate answer based on user questions.
Input variables
1. question
2. Flag if include prompt template
3. Flag if need defined structure

In [106]:
import os
import openai
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.output_parsers import JsonOutputParser
from langchain.prompts import PromptTemplate
from langchain.callbacks import get_openai_callback
import json 

sysmessage = """
You are a company information extractor.
You will receive questions from users, give detailed answers (include data if available) based on the below context, and generate 3 related questions that users might be interested in.
Answer I don't know if the information is not in the context.
"""

# Specify the directory for persistent storage
persist_directory = os.path.dirname(os.getcwd()) + '\\data\\chroma\\vietcap_reports'
embedding = OpenAIEmbeddings()
# Initialize Chroma with embeddings and directory
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)

# Define your desired data structure.
class Answer(BaseModel):
    answer_: str = Field(description="answer of the user's query")
    related_q1: str = Field(description="related question no.1")
    related_q2: str = Field(description="related question no.2")
    related_q3: str = Field(description="related question no.3")
        
def qachatbot(question, include_prompt=False, structured_result=False):
    if not include_prompt and structured_result:
        raise Exception("include prompt if want to have structured answer")
    #---------------------------------------------------#
    # Initialize OpenAI embeddings
    openai.api_key = os.environ.get('OPENAI_API_KEY')

    # Initialize ChatOpenAI with temperature set to 0
    llm = ChatOpenAI(temperature=0)
    
    #---------------------------------------------------#
    if include_prompt and structured_result:
        template = sysmessage + """
        Context: {context}
        Question: {question}
        {format_instructions}
        """
        # Set up a parser + inject instructions into the prompt template.
        parser = JsonOutputParser(pydantic_object=Answer)
        
        # Instantiation using initializer
        prompt = PromptTemplate(input_variables=["query","context"],
                                partial_variables={"format_instructions": parser.get_format_instructions()},
                                template=template)

        # Set up RetrievalQA with the language model and retriever
        qa_chain = RetrievalQA.from_chain_type(llm, 
                                            retriever=vectordb.as_retriever(search_kwargs={"k": 3}),
                                            chain_type_kwargs= {"prompt": prompt}
                                            )   
    #---------------------------------------------------#
    if include_prompt and not structured_result:
        template = sysmessage + """
        Context: {context}
        Question: {question}
        """
        
        # Instantiation using initializer
        prompt = PromptTemplate(input_variables=["query","context"],
                                template=template)

        # Set up RetrievalQA with the language model and retriever
        qa_chain = RetrievalQA.from_chain_type(llm, 
                                            retriever=vectordb.as_retriever(search_kwargs={"k": 3}),
                                            chain_type_kwargs= {"prompt": prompt}
                                            )  
    #---------------------------------------------------#
    if not include_prompt and not structured_result:
        # Set up RetrievalQA with the language model and retriever
        qa_chain = RetrievalQA.from_chain_type(llm, 
                                            retriever=vectordb.as_retriever(search_kwargs={"k": 3}))
        
    #---------------------------------------------------#
         
    # Call the _call method
    with get_openai_callback() as cb:
        result = qa_chain({"query": question})        
    
    return result, cb    

In [108]:
result, cb = qachatbot(question="Tell me about water sector in vietnam",include_prompt=True, structured_result=True)

In [109]:
# Assuming result["result"] contains the JSON string
result_json_string = result["result"]

# Parse the JSON string
parsed_result = json.loads(result_json_string)

# Access the parsed data
answer = parsed_result["answer_"]
related_q1 = parsed_result["related_q1"]
related_q2 = parsed_result["related_q2"]
related_q3 = parsed_result["related_q3"]

# Print the parsed data
print("Answer:", answer)
print("Related Question 1:", related_q1)
print("Related Question 2:", related_q2)
print("Related Question 3:", related_q3)

Answer: The water sector in Vietnam is experiencing growth, particularly in the residential segment. Volume growth is being supported by strong residential water demand, with projected 2023 volume growth for BWE at 7% YoY driven by 9% YoY residential water volume growth and 5% YoY industrial water volume growth. TDM is also expected to see volume growth in 2023 following its 2022 results that exceeded expectations. The water industry in Long An Province shows positive prospects for attracting FDI registrations and has a projected CAGR of 5% from 2020 to 2030, with industrial water demand estimated to double over the next 10 years.
Related Question 1: What are the key drivers of growth in the water sector in Vietnam?
Related Question 2: How does the water industry in Long An Province compare to other regions in Vietnam?
Related Question 3: What impact does rapid urbanization and population growth have on the water sector in Vietnam?
