In [1]:
from langchain.utilities import SerpAPIWrapper
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType

from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate

from key_storage import key_dict
print(list(key_dict.keys()))

%reload_ext autoreload
%autoreload 2

['OPENAI_API_KEY', 'PINECONE_API_KEY', 'PINECONE_API_ENV', 'PROXYCURL_API_KEY', 'SERPAPI_API_KEY']


In [2]:
def google_lookup(query: str) -> str:
    """Look up query on Google."""
    try:
        search = SerpAPIWrapper(serpapi_api_key=key_dict["SERPAPI_API_KEY"])
        res = search.run(query)
        return res
    
    except ValueError as e:  
        print("SerpApi problem")
        print ('error type: ', e)
        return "answer not found"

In [3]:
google_lookup("financial risk in banking")

'Credit risk, liquidity risk, asset-backed risk, foreign investment risk, equity risk, and currency risk are all common forms of financial risk.'

In [4]:
from utils.utils import pdf_loader,pdf_2_chunks,pinecone_init,load_chunks_2_pinecone,delete_pinecone_index,pinecone_status,query_pinecone
index = pinecone_init(key_dict["PINECONE_API_KEY"],key_dict["PINECONE_API_ENV"],index_name="langchain-pdf-qna",distance_metric="cosine",embed_dimension=1536)

# def document_qna_pinecone_lookup_hardcoded(query: str) -> str: 
#     res = query_pinecone(index_name="langchain-pdf-qna",embed_model_api_key=key_dict["OPENAI_API_KEY"],query=query,topk=5,embed_model_name='text-embedding-ada-002',qna_model="gpt-3.5-turbo",temperature=0.0,chain_type="stuff")

#     return res

def document_qna_pinecone_lookup_hardcoded(query: str) -> str: 
    res = query_pinecone(index_name="langchain-pdf-qna",embed_model_api_key=key_dict["OPENAI_API_KEY"],query=query,topk=5,embed_model_name='text-embedding-ada-002',qna_model="gpt-3.5-turbo",temperature=0.0,chain_type="map_rerank")

    return res



  from tqdm.autonotebook import tqdm


Index status: 

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 39}},
 'total_vector_count': 39}


In [5]:
document_qna_pinecone_lookup_hardcoded("""what is the total net revenue of jpmorgan chase bank in year 2021""")

{'question': 'what is the total net revenue of jpmorgan chase bank in year 2021',
 'answer': '104,205 million',
 'sources': ''}

In [6]:
document_qna_pinecone_lookup_hardcoded("""what is the total net revenue of jpmorgan chase bank in year 2022""")

{'question': 'what is the total net revenue of jpmorgan chase bank in year 2022',
 'answer': '118,977 million',
 'sources': ''}

In [13]:
def qna_agent(question: str, context: str=None) -> str:
    """For the provided question and context, it will try to answer the question"""


    llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo",openai_api_key = key_dict["OPENAI_API_KEY"],)

    if context:
        template = """Given the question {question} and the context {context} I want you to answer the question.
                      Your answer should be a string. If you cannot find the answer return "not found". """ #perhaps explicitely mention  that it should use the embed database
        #you are an analyst working in a bank?
        #provide the context ourselves either via sending documents or via sending a summary of those or the answer to the question via pinecone
    else:
        template = """Given the question {question}, I want you to answer the question.
                      Your answer should be a string. If you cannot find the answer return "not found". """
    
    tools_for_agent = [
        Tool(
            name="database lookup",
            func=document_qna_pinecone_lookup_hardcoded,
            description="queries our embedding database in order to find the documents most related to our question", #maybe have it return an answer or not answer instead of raw documents?
        ),
        Tool(
            name="web search",
            func=google_lookup,
            description="allows you to search the web, it can be used in order to answer a question or gain context about a question or terminology",
        ),
        
        ]

    agent = initialize_agent(tools_for_agent, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True ) #verbose helps us see the reasoning and the subtasks the llm have made      

    if context:

        prompt_template = PromptTemplate(input_variables=["question", "context"], template=template)
        answer = agent.run(prompt_template.format_prompt(question=question, context=context))

    else:
        prompt_template = PromptTemplate(input_variables=["question"], template=template)
        answer = agent.run(prompt_template.format_prompt(question=question))

    return answer

In [15]:
qna_agent("""what is the "total net revenue" of jpmorgan chase bank in year 2021""")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to find information about JPMorgan Chase Bank's net revenue in 2021.
Action: web search
Action Input: "JPMorgan Chase Bank net revenue 2021"[0m
Observation: [33;1m[1;3mJPMorgan Chase revenue for the twelve months ending March 31, 2023 was $177.093B, a 40.67% increase year-over-year. JPMorgan Chase annual revenue for 2022 was $154.792B, a 21.69% increase from 2021. JPMorgan Chase annual revenue for 2021 was $127.202B, a 2.09% decline from 2020.[0m
Thought:[32;1m[1;3mI found the information I needed, but I need to extract the specific net revenue for JPMorgan Chase Bank in 2021.
Action: database lookup
Action Input: "JPMorgan Chase Bank net revenue 2021"[0m
Observation: [36;1m[1;3m{'question': 'JPMorgan Chase Bank net revenue 2021', 'answer': '104,205 million', 'sources': ''}[0m
Thought:[32;1m[1;3mI have the specific net revenue for JPMorgan Chase Bank in 2021.
Final Answer: "127.202B"[0m

[1m> Finished cha

'"127.202B"'

In [16]:
#Analysis of Financial results

qna_agent("""You are a financial analyst working in a bank. You are asked to analyze the financial results of jpmorgan chase bank. What is the total net revenue of jpmorgan chase bank in year 2021""")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to find the total net revenue of JPMorgan Chase Bank in 2021.
Action: database lookup
Action Input: "JPMorgan Chase Bank net revenue 2021"[0m
Observation: [36;1m[1;3m{'question': 'JPMorgan Chase Bank net revenue 2021', 'answer': '104,205 million', 'sources': ''}[0m
Thought:[32;1m[1;3mI need to verify this information with a web search.
Action: web search
Action Input: "JPMorgan Chase Bank net revenue 2021"[0m
Observation: [33;1m[1;3mJPMorgan Chase revenue for the twelve months ending March 31, 2023 was $177.093B, a 40.67% increase year-over-year. JPMorgan Chase annual revenue for 2022 was $154.792B, a 21.69% increase from 2021. JPMorgan Chase annual revenue for 2021 was $127.202B, a 2.09% decline from 2020.[0m
Thought:[32;1m[1;3mI have found the answer to the question.
Final Answer: The total net revenue of JPMorgan Chase Bank in year 2021 was $127.202B.[0m

[1m> Finished chain.[0m


'The total net revenue of JPMorgan Chase Bank in year 2021 was $127.202B.'

In [17]:
qna_agent("""You are a financial analyst working in a bank. You are asked to analyze the financial results of jpmorgan chase bank. Write an analysis of the financial results of jpmorgan chase bank""")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to gather information about JPMorgan Chase's financial results.
Action: web search
Action Input: "JPMorgan Chase financial results"[0m
Observation: [33;1m[1;3mJPMorgan Chase & Co. is an American multinational financial services firm headquartered in New York City and incorporated in Delaware. It is the largest bank in the United States and the world's largest bank by market capitalization.[0m
Thought:[32;1m[1;3mI need to find more specific information about JPMorgan Chase's financial results.
Action: web search
Action Input: "JPMorgan Chase financial results 2020"[0m
Observation: [33;1m[1;3mJPMorgan Chase annual revenue for 2022 was $154.792B, a 21.69% increase from 2021. JPMorgan Chase annual revenue for 2021 was $127.202B, a 2.09% decline from 2020. JPMorgan Chase annual revenue for 2020 was $129.911B, a 8.84% decline from 2019.[0m
Thought:[32;1m[1;3mI need to analyze JPMorgan Chase's financial results an

'Total net revenue for JPMorgan Chase Bank, National Association in 2022 was $118,977 million, with investment banking fees being the largest revenue source at $2,955 million. The bank also had a net interest income of $68,422 million. Noninterest expense for the year included compensation expense of $34,000 million and occupancy expense of $4,469 million. There was a provision for credit losses of $6,347 million.'

In [None]:
index = pinecone.Index(index_name)
    vectorstore = Pinecone(
        index, embeddings.embed_query,"text" #text correlates to the text key of the metadata, thus it necessary when we send data to the index to have such key value
    )

    #find topk closest documents to our query
    vectorstore.similarity_search(
        query,  # our search query
        k=topk  # return top k most relevant docs
    )


In [25]:
from utils.utils import pdf_loader,pdf_2_chunks,pinecone_init,load_chunks_2_pinecone,delete_pinecone_index,pinecone_status,query_pinecone
import pinecone
from langchain.vectorstores import Pinecone
from langchain.embeddings import OpenAIEmbeddings

index = pinecone_init(key_dict["PINECONE_API_KEY"],key_dict["PINECONE_API_ENV"],index_name="langchain-pdf-qna",distance_metric="cosine",embed_dimension=1536)


def document_pinecone_lookup_hardcoded(query: str) -> str: 
    embeddings = OpenAIEmbeddings(
    model="text-embedding-ada-002",
    openai_api_key=key_dict["OPENAI_API_KEY"])


    vectorstore = Pinecone(
        index, embeddings.embed_query,"text" #text correlates to the text key of the metadata, thus it necessary when we send data to the index to have such key value
    )
    




Index status: 

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 39}},
 'total_vector_count': 39}


In [26]:
embeddings = OpenAIEmbeddings(
    model="text-embedding-ada-002",
    openai_api_key=key_dict["OPENAI_API_KEY"])


index = pinecone.Index("langchain-pdf-qna")

vectorstore = Pinecone(
        index, embeddings.embed_query,"text" #text correlates to the text key of the metadata, thus it necessary when we send data to the index to have such key value
    )

vectorstore.similarity_search(
        "test test ",  # our search query
        k=5  # return top k most relevant docs
    )

[Document(page_content='JPMorgan Chase Bank, National Association/2022 Consolidated Financial Statements 2', metadata={'chunk': 1.0, 'source': 'files/JPMorgan_Chase_Bank_Annual_Consolidated_Financial_Statements_onlybalancesheet.pdf'}),
 Document(page_content='134 JPMorgan Chase Bank, National Association/2022 Consolidated Financial Statements', metadata={'chunk': 15.0, 'source': 'files/JPMorgan_Chase_Bank_Annual_Consolidated_Financial_Statements_onlybalancesheet.pdf'}),
 Document(page_content='1Q23 Financial Results\nApril 14, 2023', metadata={'chunk': 16.0, 'source': 'files/JPM_FinancialResult.pdf'}),
 Document(page_content='ex. LoansMarket Risk 1Q23\n(4)1,6541,650\n(6)6AOCI Other6 84Q22 RWA 1Q23 Capital\nDistributions7Net \nincome5\n13.8%\n13.2%\nNet share \nrepurchases: (12 bps)74 bps14 bps\n(29 bps)3 bps\n(9 bps)\n3', metadata={'chunk': 21.0, 'source': 'files/JPM_FinancialResult.pdf'}),
 Document(page_content='services 6,983 6,510 5,692 Marketing 3,877 2,936 2,338 Other expense 10,