In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate 

from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from langchain_community.tools.tavily_search import TavilySearchResults

from langchain.agents import AgentExecutor, create_tool_calling_agent


from pydantic import BaseModel, Field
from typing import Literal
from langchain.tools import tool


In [4]:
l_llm = {v: ChatOpenAI(model=v) for v in ["gpt-4o","gpt-4o-mini"]} 

In [5]:
llm = l_llm['gpt-4o']
create_prompt = lambda system_prompt : ChatPromptTemplate.from_messages([('system',system_prompt),('human','{input}')])


In [4]:
class primaryClf(BaseModel):
    """ 
    Classify the input into one or more of the categories
    """ 
    company          : str = Field(description = "Company name in the query")
    ticker           : str = Field(description = "Ticker name of the Query")
    information_type : Literal["General", 'Financial','Market'] = Field(description = 'Type of Information Requested.')
    required_data    : Literal["longName", "longBusinessSummary", "sector", "industry", "website", "fax", "phone", "country", "zip", "city", "address2", "address1",
                               'income_stmt', 'balance_sheet', 'cash_flow', 'quarterly_income_stmt', 'quarterly_balance_sheet', 'quarterly_cash_flow',
                               "history"
                               ] \
        = Field(description = 'List of data Required for fulfilling the query. Allowed values given in the prompt')
    start_date       : str = Field(description = 'Start date of the time requested in user query. YYYY-MM-DD format')
    end_date         : str = Field(description = 'End date of the time requested in user query. YYYY-MM-DD format')


In [None]:
class primaryClf(BaseModel):
    """ 
    Classify the input into one or more of the categories
    """ 
    company          : str = Field(description = "Company name in the query")
    ticker           : str = Field(description = "Ticker name of the Query")
    information_type : str = Field(description = 'Type of Information Requested. One of "company", "information_type", and "time_duration"')
    required_data    : list[str] = Field(description = 'List of data Required for fulfilling the query. Allowed values given in the prompt')
    time_duration    : str = Field(description = 'Time requested in user query. Start and end date')


def get_primary_chain():
    

    primary_prompt = """
You are a equity research analyst. The user query will be about some information regarding a company, and you have to help identify keywords from the user query. 
You have to identify the following items
- 1) "company" : Identify the company the user is talking about. Eg "Tata Consultancy Services", "HDFC Bank", "Aditya Birla Fashion" etc
- 2) "ticker" : The NSE ticker of the identified company. Use Tavily Search to confirm. eg. "TCS.NS", "HDFCBANK.NS", "ABFRL.NS"   
- 3) "information_type" : What type of information the user has requested. The value of this key can only be one of the following "General", "Financial", or "Market". The meaning of these terms is as follows 
        "General" information refers to non-numeric information like its adress, contact information, website, industry, or company description
        "Financial" implies information regarding its balance sheet, cashflows, or PnL statement, either annual or quarterly
        "Market" refers to information regarding its share price
- 4) "required_data": Choose the set of data you need access to. This would be a list of relevant data point , conditional on "information_type" extracted above
    - If "information_type"="General", choose all relevant fields from  "longName", "longBusinessSummary", "sector", "industry", "website", "fax", "phone", "country", "zip", "city", "address2", "address1"
    - If "information_type"="Financial", if the query is requesting quarterly data, choose from ['quarterly_income_stmt', 'quarterly_balance_sheet', 'quarterly_cash_flow'] Else, for annual data choose from ['income_stmt', 'balance_sheet', 'cash_flow']
    - If "information_type"="Market", choose "history" for the stock history

- 5) "time_duration" : Is the user talking about particular time-duration. Respond keeping in mind that the date today is 16-Feb-2025 .
     This should expressed be in the form <start date> - <end date>, eg a query talking about this year should be "1-Jan-2025 to 31-12-2025"
     In case of no time mentioned it should be "NA"

Your response has to be a json with these 5 keys - "company", "ticker" "information_type", "required_data" and "time_duration"
Do not have any other information in the response
""".strip()

    tool_search = TavilySearchResults(max_results=2)
    l_tools = [tool_search]
    llm = l_llm['gpt-4o-mini']

    llm_tools = llm.bind_tools(l_tools)
    llm_st = llm_tools.with_structured_output(primaryClf)
    chain  = create_prompt(primary_prompt) | llm_st |  
    return chain



In [45]:

p_chain = get_primary_chain()

query = "How has the earnings of Varun Beverages changed over the list 5 years"
res = p_chain.invoke(query)
print(res)

company='Varun Beverages' ticker='VBL.NS' information_type='Financial' required_data=['income_stmt'] time_duration='1-Jan-2020 to 31-12-2024'


In [None]:
# tem_chain = RunnablePassthrough() | dict
# tem_chain.invoke(res)

{'company': 'Varun Beverages',
 'ticker': 'VBL.NS',
 'information_type': 'General',
 'time_duration': 'NA'}

In [20]:
res.json()

'{"company": "Varun Beverages", "ticker": "VBL.NS", "information_type": "General", "time_duration": "NA"}'

### Trying Lang Graph graphs 

In [62]:
from langgraph.graph import START, END, StateGraph
from langgraph.checkpoint.memory import MemorySaver
from langgraph.checkpoint.sqlite import SqliteSaver

from langchain_core.runnables import RunnablePassthrough, RunnableLambda


In [None]:
from typing import TypedDict, List , Annotated, Dict, Any
from langgraph.graph.message import add_messages
from langchain_core.messages import AIMessage, HumanMessage




In [54]:

# Defining state of Graph

class GraphState(TypedDict):
    """
    Represents the state of the  graph.

    Attributes:
        query: User Query
        company: Name of company in user query
        ticker: Ticker of comapny in user query
        information_type: Type of user query
        required_data: Information required from yfinance API
        time_duration: time_duration extracted from the user query
        generation: LLM generated Response
        messages: List of All Messages

    """

    query: str
    company: str
    ticker:str
    information_type:str
    required_data: List[str]
    time_duration: str
    generation: str
    messages:  Annotated[list, add_messages]


In [61]:
#### Chains
primary_chain = get_primary_chain()

##### Langgraph NODES

In [None]:
def human_node(state: GraphState)-> Dict[str, Any]:
    return {'messages':HumanMessage(state['query'])} 



###  Testing YF Tool Agent

In [None]:
import yfinance as yf

stock = yf.Ticker('ASIANPAINT.NS')




In [None]:
# from datetime import datetime
# datetime.today().strftime('%Y-%m-%d')

'2025-02-19'

In [22]:
str_keys = ['address1', 'address2', 'city', 'zip', 'country', 'phone', 'fax', 'website', 'industry', 'industryKey', 'industryDisp', 'sector', 'sectorKey', 'sectorDisp', 'longBusinessSummary', 'currency', 'lastSplitFactor', 'exchange', 'quoteType', 'symbol', 'underlyingSymbol', 'shortName', 'longName', 'timeZoneFullName', 'timeZoneShortName', 'uuid', 'messageBoardId', 'recommendationKey', 'financialCurrency']

"""
    Fetches information about the ticker mentioned. 
    argument l_fields is the set of relevant fields to chosen from the following list - {%s}
    """%(str(str_keys))


"\n    Fetches information about the ticker mentioned. \n    argument l_fields is the set of relevant fields to chosen from the following list - {['address1', 'address2', 'city', 'zip', 'country', 'phone', 'fax', 'website', 'industry', 'industryKey', 'industryDisp', 'sector', 'sectorKey', 'sectorDisp', 'longBusinessSummary', 'currency', 'lastSplitFactor', 'exchange', 'quoteType', 'symbol', 'underlyingSymbol', 'shortName', 'longName', 'timeZoneFullName', 'timeZoneShortName', 'uuid', 'messageBoardId', 'recommendationKey', 'financialCurrency']}\n    "

In [28]:
str_keys = ['address1', 'address2', 'city', 'zip', 'country', 'phone', 'fax', 'website', 'industry', 'industryKey', 'industryDisp', 'sector', 'sectorKey', 'sectorDisp', 'longBusinessSummary', 'currency', 'lastSplitFactor', 'exchange', 'quoteType', 'symbol', 'underlyingSymbol', 'shortName', 'longName', 'timeZoneFullName', 'timeZoneShortName', 'uuid', 'messageBoardId', 'recommendationKey', 'financialCurrency']


@tool
def gen_info_tool(ticker, l_fields) -> str:
    """
    Fetches information about the ticker mentioned. 
    argument l_fields is the set of relevant fields to chosen from the following list - 
    ['address1', 'address2', 'city', 'zip', 'country', 'phone', 'fax', 'website', 'industry', 'industryKey', 'industryDisp', 'sector', 'sectorKey', 'sectorDisp', 'longBusinessSummary', 'currency', 'lastSplitFactor', 'exchange', 'quoteType', 'symbol', 'underlyingSymbol', 'shortName', 'longName', 'timeZoneFullName', 'timeZoneShortName', 'uuid', 'messageBoardId', 'recommendationKey', 'financialCurrency']    "
    """
    stock = yf.Ticker(ticker)
    d_ret = {}
    for field in l_fields:
        if field in stock.info.keys():
            d_ret[field] = stock.info[field]
    return str(d_ret)

    

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You have to give response to the users query, given the following information"),
        ("ai", "{context}"),
        ("human", "Query : {input}"),
        ("placeholder", "{agent_scratchpad}"),
    ]
)

tools = [gen_info_tool]
agent = create_tool_calling_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools)

query = "What is the full address of Varun Beverages?"

agent_executor.invoke({"input": query, "context": res.json()})

{'input': 'What is the full address of Varun Beverages?',
 'context': '{"company": "Varun Beverages", "ticker": "VBL.NS", "information_type": "General", "time_duration": "NA"}',
 'output': 'The full address of Varun Beverages is:\n\nRJ Corp House,  \nPlot No. 31 Institutional Area Sector-44,  \nGurugram,  \n122002,  \nIndia.'}

## Testing Pandas DF Agent
 

In [16]:
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain.agents.agent_types import AgentType



In [29]:
# df_stock = stock.history()
# df_stock = 
stock.income_stmt

Unnamed: 0,2024-03-31,2023-03-31,2022-03-31,2021-03-31,2020-03-31
Tax Effect Of Unusual Items,560574715.866174,285503565.759567,297421093.817161,308292401.872524,
Tax Rate For Calcs,0.243622,0.262532,0.263368,0.254998,
Normalized EBITDA,81758400000.0,65578900000.0,49730900000.0,50663500000.0,
Total Unusual Items,2301000000.0,1334100000.0,1264000000.0,1209000000.0,
Total Unusual Items Excluding Goodwill,2301000000.0,1087500000.0,1129300000.0,1209000000.0,
Net Income From Continuing Operation Net Minority Interest,54602300000.0,41064500000.0,30305700000.0,31392900000.0,
Reconciled Depreciation,8530000000.0,8580200000.0,8163600000.0,7912700000.0,
Reconciled Cost Of Revenue,201743000000.0,212462900000.0,183775400000.0,121567400000.0,
EBITDA,84059400000.0,66913000000.0,50994900000.0,51872500000.0,
EBIT,75529400000.0,58332800000.0,42831300000.0,43959800000.0,


In [19]:
agent = create_pandas_dataframe_agent(
    llm,
    df_stock,
    verbose=True,
    agent_type=AgentType.OPENAI_FUNCTIONS,
    allow_dangerous_code=True
)

In [26]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", """
         You are a equity research analyst given information about the company's stock data. 
         Answer the user's query with the most relevant information a stock trader may be concerned about.
         Try keeping your answer as concise as possible.         
         """),
        ("human", "Query : {input}"),
        ("placeholder", "{agent_scratchpad}"),
    ]
)
res = agent.invoke("How has the share performed over the last 5 days?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "# We will calculate the percentage change from the first day to the last of the 'Close' prices over the last 5 days to assess the performance.\n\n# Get the first and last close prices in the last 5 days\nfirst_close_price = df['Close'].iloc[0]\nlast_close_price = df['Close'].iloc[4]\n\n# Calculate the change\npercentage_change = ((last_close_price - first_close_price) / first_close_price) * 100\npercentage_change"}`


[0m[36;1m[1;3m-0.795867053814564[0m[32;1m[1;3mOver the last 5 days, the share's closing price has decreased by approximately 0.80%.[0m

[1m> Finished chain.[0m


In [27]:
res

{'input': 'How has the share performed over the last 5 days?',
 'output': "Over the last 5 days, the share's closing price has decreased by approximately 0.80%."}

### Now testing for financial data


In [40]:


agent_fin = create_pandas_dataframe_agent(
    llm,
    stock.income_stmt,
    verbose=False,
    agent_type=AgentType.OPENAI_FUNCTIONS,
    allow_dangerous_code=True

)
prompt_fin = ChatPromptTemplate.from_messages(
    [
        ("system", """
         You are a equity research analyst given information about the company's Financial Statements. 
         Answer the user's query with the most relevant information an investor may be concerned about.
         Quote relevant ratios where required.
         Try keeping your answer concise.
         """),
        ("human", "Query : {input}"),
        ("placeholder", "{agent_scratchpad}"),
    ]
)


In [41]:
query = "How have earnings performed over the past 3 years?"

chain = prompt_fin | agent_fin 
res = chain.invoke(query)

print(res['output'])

Error in LangChainTracer.on_tool_end callback: TypeError('keys must be str, int, float, bool or None, not Timestamp')
Error in LangChainTracer.on_chain_end callback: TypeError('keys must be str, int, float, bool or None, not Timestamp')
Error in LangChainTracer.on_chain_start callback: TypeError('keys must be str, int, float, bool or None, not Timestamp')


Over the past three years, the company's earnings, as measured by Normalized EBITDA, have shown an upward trend:

- From 2022 to 2023, Normalized EBITDA increased from approximately $49.73 billion to $65.58 billion, representing a growth of around 31.8%.
- From 2023 to 2024, further growth occurred as Normalized EBITDA rose from approximately $65.58 billion to $81.76 billion, resulting in an increase of about 24.6%.

Overall, there has been consistent and substantial growth in earnings over the past three years, indicating strong performance.


In [35]:
print(res["output"])

Over the past three years, the company's earnings, measured by Normalized EBITDA, have shown a strong upward trend:

- FY 2022: $49.73 billion
- FY 2023: $65.58 billion
- FY 2024: $81.76 billion

This indicates robust growth in earnings, increasing by approximately 32% from FY 2022 to FY 2023 and about 25% from FY 2023 to FY 2024. This performance reflects positively on the company's operational profitability.
