In [None]:
# Install required dependencies
! pip install openai num2words matplotlib plotly scipy scikit-learn pandas tiktoken langchain pypdf faiss-cpu

In [1]:
# Setup openAI connections

import keys

embed_key = keys.embed_key
embed_endpoint = "https://raid-openai-e27bcf212.openai.azure.com/"

gpt_key = keys.gpt_key
gpt_endpoint = "https://raid-ses-openai.openai.azure.com/"

In [None]:
# some logic here to better split the documents instead of just by page??

In [2]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chat_models import AzureChatOpenAI
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.chains import RetrievalQA
from langchain.chains.question_answering import load_qa_chain

def load_docs(filepath):
    loader = DirectoryLoader(filepath, glob='**/*.pdf', loader_cls=PyPDFLoader)

    docs = loader.load()
    
    return docs

def vector_load(docs, key, endpoint):
    
    embedding_model = OpenAIEmbeddings(
    openai_api_type="azure",
    openai_api_key=key, 
    openai_api_base=endpoint,
    openai_api_version="2023-05-15",
    deployment="text-embedding-ada-002"
    )
    
    # need to build some logic here for checking the database - if exists then just add if not, create
    
    db = FAISS.from_documents(docs, embedding_model)
    
    return db
    

In [135]:
docs = load_docs('./data/')
db = vector_load(docs, embed_key, embed_endpoint)

In [4]:
db = FAISS.load_local("dbstore", OpenAIEmbeddings(
    openai_api_type="azure",
    openai_api_key=embed_key, 
    openai_api_base=embed_endpoint,
    openai_api_version="2023-05-15",
    deployment="text-embedding-ada-002"
    ))

In [5]:
from langchain.chat_models import AzureChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationSummaryMemory

# initiate llm
llm = AzureChatOpenAI(openai_api_type="azure", 
                      openai_api_version="2023-05-15", 
                      openai_api_base=gpt_endpoint, 
                      openai_api_key=gpt_key, 
                      deployment_name="raidGPT", 
                      temperature=0.0)

retriever = db.as_retriever(search_kwargs = {"k": 10})

memory = ConversationSummaryMemory(llm=llm,
                                   memory_key="chat_history", 
                                   input_key="question", 
                                   output_key="answer", 
                                   return_messages=True)

qa = ConversationalRetrievalChain.from_llm(llm=llm, 
                                           retriever=retriever, 
                                           return_source_documents=True, 
                                           memory = memory)

In [5]:
result = qa({'question' : "What is a FOEL check?"})

In [9]:
print(result["source_documents"][0].metadata["source"])

data\PC-21 Employment Manual - Chapt 10 Straight _ Level AL16 Mar 2023.pdf


## Testing custom prompt

In [6]:
from langchain.prompts.prompt import PromptTemplate

In [42]:
custom_template = """
You are a bot designed to answer military pilot trainees' questions from various flying handbooks and rulebooks. Use the context provided below to answer their questions. If you don't know the answer, just say that you don't know, don't try to make up an answer. 

{context}

Additionally, this was the chat history of your conversation with the user.
{chat_history}

Question: {question}

"""

PROMPT = PromptTemplate.from_template(template=custom_template)

In [43]:
qa = ConversationalRetrievalChain.from_llm(llm=llm, 
                                           retriever=retriever, 
                                           return_source_documents=True, 
                                           memory = memory,
                                           combine_docs_chain_kwargs={"prompt" : PROMPT})

In [44]:
result = qa({'question' : "What is the recommended height for a normal circuit?"})

In [45]:
result["answer"]

'The recommended altitude for a normal flight circuit is 1000 feet Above Ground Level (AGL), as mentioned in section 4.1.1 of Chapter 15.'