In [8]:
from langchain.utilities import SerpAPIWrapper
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType

from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate

from key_storage import key_dict
print(list(key_dict.keys()))

%reload_ext autoreload
%autoreload 2

['OPENAI_API_KEY', 'PINECONE_API_KEY', 'PINECONE_API_ENV', 'PROXYCURL_API_KEY', 'SERPAPI_API_KEY']


In [6]:
def google_lookup(query: str) -> str:
    """Look up query on Google."""
    try:
        search = SerpAPIWrapper(serpapi_api_key=key_dict["SERPAPI_API_KEY"])
        res = search.run(query)
        return res
    
    except ValueError as e:  
        print("SerpApi problem")
        print ('error type: ', e)
        return "answer not found"

In [7]:
google_lookup("financial risk in banking")

'These risks are: Credit, Interest Rate, Liquidity, Price, Foreign Exchange, Transaction, Compliance, Strategic and Reputation. These categories are not mutually exclusive; any product or service may expose the bank to multiple risks.'

In [11]:
from utils.utils import pdf_loader,pdf_2_chunks,pinecone_init,load_chunks_2_pinecone,delete_pinecone_index,pinecone_status,query_pinecone
index = pinecone_init(key_dict["PINECONE_API_KEY"],key_dict["PINECONE_API_ENV"],index_name="langchain-pdf-qna",distance_metric="cosine",embed_dimension=1536)

def document_pinecone_lookup_hardcoded(query: str) -> str: 
    res = query_pinecone(index_name="langchain-pdf-qna",embed_model_api_key=key_dict["OPENAI_API_KEY"],query=query,topk=3,embed_model_name='text-embedding-ada-002',qna_model="gpt-3.5-turbo",temperature=0.0,chain_type="stuff")

    return res


Index status: 

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 39}},
 'total_vector_count': 39}


In [12]:
document_pinecone_lookup_hardcoded("""what is the total net revenue of jpmorgan chase bank in year 2021""")

{'question': 'what is the total net revenue of jpmorgan chase bank in year 2021',
 'answer': 'The total net revenue of JPMorgan Chase Bank in 2021 was $104,205 million.\n',
 'sources': 'files/JPMorgan_Chase_Bank_Annual_Consolidated_Financial_Statements_onlybalancesheet.pdf'}

In [13]:
document_pinecone_lookup_hardcoded("""what is the total net revenue of jpmorgan chase bank in year 2022""")

{'question': 'what is the total net revenue of jpmorgan chase bank in year 2022',
 'answer': 'The total net revenue of JPMorgan Chase Bank in year 2022 was $118,977 million.\n',
 'sources': 'files/JPMorgan_Chase_Bank_Annual_Consolidated_Financial_Statements_onlybalancesheet.pdf'}

In [None]:
def qna_agent(question: str, context: str=None) -> str:
    """For the provided question and context, it will try to answer the question"""


    llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo",openai_api_key = key_dict["OPENAI_API_KEY"],)

    if context:
        template = """Given the question {question} and the context {context} I want you to answer the question.
                      Your answer should be a string. If you cannot find the answer return "not found". """ #perhaps explicitely mention  that it should use the embed database
        #you are an analyst working in a bank?
        #provide the context ourselves either via sending documents or via sending a summary of those or the answer to the question via pinecone
    else:
        template = """Given the question {question}, I want you to answer the question.
                      Your answer should be a string. If you cannot find the answer return "not found". """
    
    tools_for_agent = [
        Tool(
            name="database lookup",
            func=document_pinecone_lookup_hardcoded,
            description="queries our embedding database in order to find the documents most related to our question", #maybe have it return an answer or not answer instead of raw documents?
        ),
        Tool(
            name="web search",
            func=google_lookup,
            description="allows you to search the web, it can be used in order to answer a question or gain context about a question or terminology",
        ),
        
        ]

    agent = initialize_agent(tools_for_agent, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True ) #verbose helps us see the reasoning and the subtasks the llm have made      

    if context:

        prompt_template = PromptTemplate(input_variables=["question", "context"], template=template)
        answer = agent.run(prompt_template.format_prompt(question=question, context=context))

    else:
        prompt_template = PromptTemplate(input_variables=["question"], template=template)
        answer = agent.run(prompt_template.format_prompt(question=question))

    return answer