# Chat bot for CADTH recommendations report

### Load Environment variables

In [2]:
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())

In [None]:
# define a global prompt template to be used for Q/A agaist the pdf 
from langchain.prompts import ChatPromptTemplate
PDF_PROMPT = """
You are the world's best data-analyst. You are provided with a `context` delimited by ```.
Following information about the `context` is provided:
1. `context` is a combination of extracts from a report.
2. this report contains details about a medical drug and a recommendation on it. Note: you are not provided with entire report.
3. this recommendation is based on certain evidences within the `context`
4. the report is published by an independent company on behalf of a government.

You are assigned with following tasks:
1. Read and analyse the `context` carefully. Keep in mind all the information provided above about the context.
2. You will be provided with a question and need to answer it based solely on `context`
3. along with the answer , you need to return a key called as "evidences" which contains all the 

Note: Make sure to not make up answers on your own. If you cannot answer the question ,respond with "Not enough information available in provided context"

"""

### import all dependencies

In [1]:
import langchain # to debug if required
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import SpacyTextSplitter # specific for preserving context and meaning
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory


## Define a method to load PDF and init conversation chain

In [None]:
def load_pdf_into_db(file , chain_type = "stuff" , k = 4):
    """
    params:
    ---------
    :file - `str` path of PDF file to be loaded
    :chain_type - `str` technique used to fill up context in a LLM prompt , values can be `stuff` , `map_reduce` , `refine`

    :k - number of top chunks to be used for searching in vectorstore
    """

    # load the pdf 
    loader = PyPDFLoader(file)
    docs = loader.load()

    # split the documents 
    text_splitter = SpacyTextSplitter(kwargs={"chunk_size":2000, "chunk_overlap":200})
    doc_chunks = text_splitter.split_documents(documents= docs)

    # define embedding 
    embedding_model = OpenAIEmbeddings()
    persist_directory = "db/chroma"
    # create vector db :: chroma
    vectorstore = Chroma.from_documents(
          documents = doc_chunks ,
          embedding= embedding_model , 
          persist_directory= persist_directory,
    )

    # create memory 
    chat_memory = ConversationBufferMemory(
        memory_key="chat_history", # name used inside retrieval chain
        return_messages=True
    ) # will contain pairs of HumanMessage and AIMessage

    chat_model = ChatOpenAI(temperature=0.0)

    # define retreival 
    retriever = vectorstore.as_retriever(search_type = "mmr" , search_kwargs= {"k":k}) # need to check with "mmr"

     
    


    


