In [8]:
from langchain.utilities import SerpAPIWrapper
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType

from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate

from key_storage import key_dict
print(list(key_dict.keys()))

%reload_ext autoreload
%autoreload 2

['OPENAI_API_KEY', 'PINECONE_API_KEY', 'PINECONE_API_ENV', 'PROXYCURL_API_KEY', 'SERPAPI_API_KEY']


In [9]:
def google_lookup(query: str) -> str:
    """Look up query on Google."""
    try:
        search = SerpAPIWrapper(serpapi_api_key=key_dict["SERPAPI_API_KEY"])
        res = search.run(query)
        return res
    
    except ValueError as e:  
        print("SerpApi problem")
        print ('error type: ', e)
        return "answer not found"

In [10]:
google_lookup("financial risk in banking")

'Credit risk, liquidity risk, asset-backed risk, foreign investment risk, equity risk, and currency risk are all common forms of financial risk.'

In [11]:
from utils.utils import pdf_loader,pdf_2_chunks,pinecone_init,load_chunks_2_pinecone,delete_pinecone_index,pinecone_status,query_pinecone
index = pinecone_init(key_dict["PINECONE_API_KEY"],key_dict["PINECONE_API_ENV"],index_name="langchain-pdf-qna",distance_metric="cosine",embed_dimension=1536)

# def document_qna_pinecone_lookup_hardcoded(query: str) -> str: 
#     res = query_pinecone(index_name="langchain-pdf-qna",embed_model_api_key=key_dict["OPENAI_API_KEY"],query=query,topk=5,embed_model_name='text-embedding-ada-002',qna_model="gpt-3.5-turbo",temperature=0.0,chain_type="stuff")

#     return res

def document_qna_pinecone_lookup_hardcoded(query: str) -> str: 
    res = query_pinecone(index_name="langchain-pdf-qna",embed_model_api_key=key_dict["OPENAI_API_KEY"],query=query,topk=5,embed_model_name='text-embedding-ada-002',qna_model="gpt-3.5-turbo",temperature=0.0,chain_type="map_rerank")

    return res



Index status: 

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 39}},
 'total_vector_count': 39}


In [5]:
document_qna_pinecone_lookup_hardcoded("""what is the total net revenue of jpmorgan chase bank in year 2021""")

{'question': 'what is the total net revenue of jpmorgan chase bank in year 2021',
 'answer': '104,205 million',
 'sources': ''}

In [6]:
document_qna_pinecone_lookup_hardcoded("""what is the total net revenue of jpmorgan chase bank in year 2022""")

{'question': 'what is the total net revenue of jpmorgan chase bank in year 2022',
 'answer': '118,977 million',
 'sources': ''}

In [12]:
def qna_agent(question: str, context: str=None) -> str:
    """For the provided question and context, it will try to answer the question"""


    llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo",openai_api_key = key_dict["OPENAI_API_KEY"],)

    if context:
        template = """Given the question {question} and the context {context} I want you to answer the question.
                      Your answer should be a string. If you cannot find the answer return "not found". """ #perhaps explicitely mention  that it should use the embed database
        #you are an analyst working in a bank?
        #provide the context ourselves either via sending documents or via sending a summary of those or the answer to the question via pinecone
    else:
        template = """Given the question {question}, I want you to answer the question.
                      Your answer should be a string. If you cannot find the answer return "not found". """
    
    tools_for_agent = [
        Tool(
            name="database lookup",
            func=document_qna_pinecone_lookup_hardcoded,
            description="queries our embedding database in order to find the documents most related to our question", #maybe have it return an answer or not answer instead of raw documents?
        ),
        Tool(
            name="web search",
            func=google_lookup,
            description="allows you to search the web, it can be used in order to answer a question or gain context about a question or terminology",
        ),
        
        ]

    agent = initialize_agent(tools_for_agent, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True ) #verbose helps us see the reasoning and the subtasks the llm have made      

    if context:

        prompt_template = PromptTemplate(input_variables=["question", "context"], template=template)
        answer = agent.run(prompt_template.format_prompt(question=question, context=context))

    else:
        prompt_template = PromptTemplate(input_variables=["question"], template=template)
        answer = agent.run(prompt_template.format_prompt(question=question))

    return answer

In [None]:
#to do add in the description that it should start with the database and not the web search

In [13]:
qna_agent("""what is the "total net revenue" of jpmorgan chase bank in year 2021""")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to find information about JPMorgan Chase Bank's net revenue in 2021.
Action: web search
Action Input: "JPMorgan Chase Bank net revenue 2021"[0m
Observation: [33;1m[1;3mJPMorgan Chase revenue for the twelve months ending March 31, 2023 was $177.093B, a 40.67% increase year-over-year. JPMorgan Chase annual revenue for 2022 was $154.792B, a 21.69% increase from 2021. JPMorgan Chase annual revenue for 2021 was $127.202B, a 2.09% decline from 2020.[0m
Thought:[32;1m[1;3mI found the information I needed, but I need to extract the specific net revenue for JPMorgan Chase Bank in 2021.
Action: database lookup
Action Input: "JPMorgan Chase Bank net revenue 2021"[0m
Observation: [36;1m[1;3m{'question': 'JPMorgan Chase Bank net revenue 2021', 'answer': '104,205 million', 'sources': ''}[0m
Thought:[32;1m[1;3mI have the specific net revenue for JPMorgan Chase Bank in 2021.
Final Answer: "127.202B"[0m

[1m> Finished cha

'"127.202B"'

In [14]:
#Analysis of Financial results

qna_agent("""You are a financial analyst working in a bank. You are asked to analyze the financial results of jpmorgan chase bank. What is the total net revenue of jpmorgan chase bank in year 2021""")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to find the total net revenue of JPMorgan Chase Bank in 2021.
Action: database lookup
Action Input: "JPMorgan Chase Bank net revenue 2021"[0m
Observation: [36;1m[1;3m{'question': 'JPMorgan Chase Bank net revenue 2021', 'answer': '104,205 million', 'sources': ''}[0m
Thought:[32;1m[1;3mI need to confirm this information with a web search.
Action: web search
Action Input: "JPMorgan Chase Bank net revenue 2021"[0m
Observation: [33;1m[1;3mJPMorgan Chase revenue for the twelve months ending March 31, 2023 was $177.093B, a 40.67% increase year-over-year. JPMorgan Chase annual revenue for 2022 was $154.792B, a 21.69% increase from 2021. JPMorgan Chase annual revenue for 2021 was $127.202B, a 2.09% decline from 2020.[0m
Thought:[32;1m[1;3mI have found multiple sources confirming the net revenue of JPMorgan Chase Bank in 2021.
Final Answer: The total net revenue of JPMorgan Chase Bank in year 2021 was $127.202 billio

'The total net revenue of JPMorgan Chase Bank in year 2021 was $127.202 billion.'

In [15]:
qna_agent("""You are a financial analyst working in a bank. You are asked to analyze the financial results of jpmorgan chase bank. Write an analysis of the financial results of jpmorgan chase bank""")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to understand what financial results are and what information is typically included in an analysis.
Action: web search
Action Input: "what is included in a financial analysis"[0m
Observation: [33;1m[1;3mThese factors include a company's overall financial health, analysis of financial statements, the products and services offered, supply and demand, and other …[0m
Thought:[32;1m[1;3mI need to find information specifically about analyzing the financial results of a bank.
Action: web search
Action Input: "how to analyze financial results of a bank"[0m
Observation: [33;1m[1;3mOverall, a careful review of a bank's financial statements can highlight the key factors that should be considered before making an investment ...[0m
Thought:[32;1m[1;3mI should search for information specifically about JPMorgan Chase's financial results.
Action: web search
Action Input: "JPMorgan Chase financial results analysis"[0m
Obse

'JPMorgan Chase Bank, National Association had a total net revenue of $118,977 million in 2022, with investment banking fees of $2,955 million, principal transactions of $17,895 million, and asset management, administration and commissions of $13,935 million. They also had a net interest income of $68,422 million and a provision for credit losses of $6,347 million. Their noninterest expense included compensation expense of $34,000 million, occupancy expense of $4,469 million, and technology, communications and equipment expense of $8,646 million, among others.'

In [16]:
qna_agent("""You are a financial analyst working in a bank. You are asked to answer the following questions: What is the percentage difference between net revenue of 2022 and 2021 of jpmorgan chase bank? What are the reasons that caused this difference? if you don't know the answer, you can search the web for it or say you don't know.""")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to find information about the net revenue of JPMorgan Chase Bank in 2021 and 2022 and the reasons for any difference.
Action: database lookup
Action Input: "JPMorgan Chase Bank net revenue 2021 and 2022"[0m
Observation: [36;1m[1;3m{'question': 'JPMorgan Chase Bank net revenue 2021 and 2022', 'answer': "JPMorgan Chase Bank's net revenue was $104,205 million in 2021 and $118,977 million in 2022.", 'sources': ''}[0m
Thought:[32;1m[1;3mI have the information about the net revenue of JPMorgan Chase Bank in 2021 and 2022. Now I need to find the percentage difference and the reasons for any difference.
Action: calculation
Action Input: (118977-104205)/104205*100[0m
Observation: calculation is not a valid tool, try another one.
Thought:[32;1m[1;3mI need to use a calculator to find the percentage difference.
Action: web search
Action Input: "percentage difference calculator"[0m
Observation: [33;1m[1;3mCalculator Use

'"The percentage difference between JPMorgan Chase Bank\'s net revenue in 2022 and 2021 is approximately 14.1%. The reasons for this difference could not be found."'

In [17]:
qna_agent("""You are a financial analyst working in a bank. You are asked to answer the following questions: What is the percentage difference between net revenue of 2022 and 2021 of jpmorgan chase bank? What are the factors that caused this difference? if you don't know the answer, you can search the web for it or say you don't know.""")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to find information about the net revenue of JPMorgan Chase Bank in 2021 and 2022 and the factors that caused any difference.
Action: web search
Action Input: "JPMorgan Chase Bank net revenue 2021 2022 difference factors"[0m
Observation: [33;1m[1;3mSelected income statement data. Total net revenue. $ 128,695. $ 121,649. $ 119,951. Total noninterest expense.[0m
Thought:[32;1m[1;3mI need to calculate the percentage difference between the net revenue of JPMorgan Chase Bank in 2022 and 2021.
Action: database lookup
Action Input: "JPMorgan Chase Bank net revenue 2021 2022"[0m
Observation: [36;1m[1;3m{'question': 'JPMorgan Chase Bank net revenue 2021 2022', 'answer': "JPMorgan Chase Bank's net revenue was $104,205 million in 2021 and $118,977 million in 2022.", 'sources': ''}[0m
Thought:[32;1m[1;3mI have the necessary information to calculate the percentage difference.
Action: calculation
Action Input: (118977-10

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID ca36250c7677808b2686459e53171a83 in your message.).


[32;1m[1;3mI need to use a calculator to calculate the percentage difference.
Action: web search
Action Input: "percentage difference calculator"[0m
Observation: [33;1m[1;3mCalculator Use. The Percentage Difference Calculator (% difference calculator) will find the percent difference between two positive numbers greater than 0.[0m
Thought:[32;1m[1;3mI can now calculate the percentage difference between the net revenue of JPMorgan Chase Bank in 2022 and 2021 and find the factors that caused any difference.
Final Answer: The percentage difference between the net revenue of JPMorgan Chase Bank in 2022 and 2021 is approximately 14.1%. The factors that caused this difference are not found.[0m

[1m> Finished chain.[0m


'The percentage difference between the net revenue of JPMorgan Chase Bank in 2022 and 2021 is approximately 14.1%. The factors that caused this difference are not found.'

In [19]:
analysis = qna_agent("""You are a financial analyst working in a bank. You are asked to analyze the financial results of jpmorgan chase bank. Write an analysis of the financial results of jpmorgan chase bank""")
analysis



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to understand what financial results are and what information is typically included in an analysis.
Action: web search
Action Input: "what is included in a financial analysis"[0m
Observation: [33;1m[1;3mThese factors include a company's overall financial health, analysis of financial statements, the products and services offered, supply and demand, and other …[0m
Thought:[32;1m[1;3mI need to find information specifically about analyzing the financial results of a bank.
Action: web search
Action Input: "how to analyze financial results of a bank"[0m
Observation: [33;1m[1;3mOverall, a careful review of a bank's financial statements can highlight the key factors that should be considered before making an investment ...[0m
Thought:[32;1m[1;3mI should search for information specifically about JPMorgan Chase's financial results.
Action: web search
Action Input: "JPMorgan Chase financial results analysis"[0m
Obse

'Total net revenue for JPMorgan Chase Bank, National Association in 2022 was $118,977 million, with investment banking fees of $2,955 million, principal transactions of $17,895 million, and asset management, administration and commissions of $13,935 million. Noninterest revenue was $50,555 million and net interest income was $68,422 million. Noninterest expense was $68,736 million, including compensation expense of $34,000 million and occupancy expense of $4,469 million. There was a provision for credit losses of $6,347 million.'

In [21]:
qna_agent(f"""You are a financial analyst working in a bank. You are asked to answer the following questions: What is the percentage difference between net revenue of 2022 and 2021 of jpmorgan chase bank? What are the factors that caused this difference? Those factors could be found in the following analysis {analysis} .If you don't know the answer, you can search the web for it or say you don't know.""")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to calculate the percentage difference between net revenue of 2022 and 2021 of JPMorgan Chase Bank and identify the factors that caused this difference.
Action: database lookup
Action Input: "JPMorgan Chase Bank net revenue 2021 and 2022"[0m
Observation: [36;1m[1;3m{'question': 'JPMorgan Chase Bank net revenue 2021 and 2022', 'answer': "JPMorgan Chase Bank's net revenue was $104,205 million in 2021 and $118,977 million in 2022.", 'sources': ''}[0m
Thought:[32;1m[1;3mI have the net revenue values for 2021 and 2022, now I need to calculate the percentage difference.
Action: calculation
Action Input: (118977-104205)/104205[0m
Observation: calculation is not a valid tool, try another one.
Thought:[32;1m[1;3mI need to use a calculator to calculate the percentage difference.
Action: web search
Action Input: "percentage difference calculator"[0m
Observation: [33;1m[1;3mCalculator Use. The Percentage Difference Cal

'"not found"'

In [20]:
fctrs = "investment banking fees, principal transactions, and asset management, administration and commissions"
qna_agent(f"""You are a financial analyst working in a bank. You are asked to answer the following questions: What is the percentage difference between net revenue of 2022 and 2021 of jpmorgan chase bank? What are the factors that caused this difference? Those factors could be the following {fctrs} .If you don't know the answer, you can search the web for it or say you don't know.""")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to find information about the net revenue of JPMorgan Chase Bank for 2021 and 2022, as well as the factors that caused any difference in percentage.
Action: database lookup
Action Input: "JPMorgan Chase Bank net revenue 2021 and 2022"[0m
Observation: [36;1m[1;3m{'question': 'JPMorgan Chase Bank net revenue 2021 and 2022', 'answer': "JPMorgan Chase Bank's net revenue was $104,205 million in 2021 and $118,977 million in 2022.", 'sources': ''}[0m
Thought:[32;1m[1;3mI need to calculate the percentage difference between the net revenue of JPMorgan Chase Bank for 2021 and 2022.
Action: web search
Action Input: "how to calculate percentage difference"[0m
Observation: [33;1m[1;3mThe procedure to calculate the percentage difference is given as follows:[0m
Thought:[32;1m[1;3mI need to calculate the percentage difference between the net revenue of JPMorgan Chase Bank for 2021 and 2022 using the formula I found in the 

'The percentage difference between the net revenue of JPMorgan Chase Bank for 2021 and 2022 is approximately 14.1%. The factors that caused this difference include investment banking fees, principal transactions, lending- and deposit-related fees, asset management, administration and commissions, investment securities gains/(losses), mortgage fees and related income, card income, other income, interest income, and interest expense.'

In [None]:
index = pinecone.Index(index_name)
    vectorstore = Pinecone(
        index, embeddings.embed_query,"text" #text correlates to the text key of the metadata, thus it necessary when we send data to the index to have such key value
    )

    #find topk closest documents to our query
    vectorstore.similarity_search(
        query,  # our search query
        k=topk  # return top k most relevant docs
    )


In [25]:
from utils.utils import pdf_loader,pdf_2_chunks,pinecone_init,load_chunks_2_pinecone,delete_pinecone_index,pinecone_status,query_pinecone
import pinecone
from langchain.vectorstores import Pinecone
from langchain.embeddings import OpenAIEmbeddings

index = pinecone_init(key_dict["PINECONE_API_KEY"],key_dict["PINECONE_API_ENV"],index_name="langchain-pdf-qna",distance_metric="cosine",embed_dimension=1536)


def document_pinecone_lookup_hardcoded(query: str) -> str: 
    embeddings = OpenAIEmbeddings(
    model="text-embedding-ada-002",
    openai_api_key=key_dict["OPENAI_API_KEY"])


    vectorstore = Pinecone(
        index, embeddings.embed_query,"text" #text correlates to the text key of the metadata, thus it necessary when we send data to the index to have such key value
    )
    




Index status: 

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 39}},
 'total_vector_count': 39}


In [26]:
embeddings = OpenAIEmbeddings(
    model="text-embedding-ada-002",
    openai_api_key=key_dict["OPENAI_API_KEY"])


index = pinecone.Index("langchain-pdf-qna")

vectorstore = Pinecone(
        index, embeddings.embed_query,"text" #text correlates to the text key of the metadata, thus it necessary when we send data to the index to have such key value
    )

vectorstore.similarity_search(
        "test test ",  # our search query
        k=5  # return top k most relevant docs
    )

[Document(page_content='JPMorgan Chase Bank, National Association/2022 Consolidated Financial Statements 2', metadata={'chunk': 1.0, 'source': 'files/JPMorgan_Chase_Bank_Annual_Consolidated_Financial_Statements_onlybalancesheet.pdf'}),
 Document(page_content='134 JPMorgan Chase Bank, National Association/2022 Consolidated Financial Statements', metadata={'chunk': 15.0, 'source': 'files/JPMorgan_Chase_Bank_Annual_Consolidated_Financial_Statements_onlybalancesheet.pdf'}),
 Document(page_content='1Q23 Financial Results\nApril 14, 2023', metadata={'chunk': 16.0, 'source': 'files/JPM_FinancialResult.pdf'}),
 Document(page_content='ex. LoansMarket Risk 1Q23\n(4)1,6541,650\n(6)6AOCI Other6 84Q22 RWA 1Q23 Capital\nDistributions7Net \nincome5\n13.8%\n13.2%\nNet share \nrepurchases: (12 bps)74 bps14 bps\n(29 bps)3 bps\n(9 bps)\n3', metadata={'chunk': 21.0, 'source': 'files/JPM_FinancialResult.pdf'}),
 Document(page_content='services 6,983 6,510 5,692 Marketing 3,877 2,936 2,338 Other expense 10,