In [1]:
from dotenv import load_dotenv
import os
import cohere
from qdrant_client import QdrantClient
from langchain_qdrant import QdrantVectorStore
from langchain_cohere import CohereEmbeddings
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.documents import Document
from json import loads
from langchain_core.output_parsers import JsonOutputParser
from typing import Dict, List, Optional, Any, Tuple
from pydantic import BaseModel, Field

In [2]:
load_dotenv()
COHERE_API_KEY = os.getenv("COHERE_API_KEY")
cohere_client = cohere.ClientV2(COHERE_API_KEY)
cohere_model = "embed-english-v3.0"

openai_api_key = os.getenv("OPENAI_API_KEY")
llm = ChatOpenAI(
    temperature=0, model="gpt-4o", api_key=openai_api_key
)

qdrant_client = QdrantClient(url="http://localhost:6333")
collection_names = [
            "financial_news",
            "earnings_calls",
            "aapl_10k_10q_forms",
        ]

financial_entities: List[Dict] = Field(
        default_factory=list, description="Extracted financial entities"
    )
output_parser = JsonOutputParser(pydantic_object=financial_entities)

In [3]:
query ="""
Provide a summary of the 10K report for Apple in 2024
"""

In [4]:
prompt = ChatPromptTemplate.from_template(
                """You are a financial data assistant that helps determine which data source to query.
                
                Available collections:
                - financial_news: Recent financial news articles
                - aapl_10k_10q_forms: SEC filings including 10-K and 10-Q forms
                - earnings_calls: Transcripts from company earnings calls
                
                User query: {query}
                
                Based on this query, which ONE collection should I search to provide the most relevant information?
                Reply with ONLY ONE of: "financial_news", "aapl_10k_10q_forms", or "earnings_calls".
                """
            )

chain = prompt | llm | StrOutputParser()
collection = chain.invoke({"query": query})

print(collection)

# Validate the collection name
if collection not in collection_names:
    collection = collection_names[0]  # Default to first collection

# Update state
collection_choice = collection

aapl_10k_10q_forms


In [12]:
print(qdrant_client)

<qdrant_client.qdrant_client.QdrantClient object at 0x10eb80d60>


In [13]:
query_embeddings = cohere_client.embed(
                texts=[query],
                model=cohere_model,
                input_type="search_query",
                embedding_types=["float"],
            )

print(query_embeddings)

response = qdrant_client.query_points(
                collection_name=collection_choice,
                query=query_embeddings.embeddings.float_[0],
                limit=10,
                with_payload = True,
                with_vectors = False,
            ).points

print(response)

docs = []
for point in response:
    print(point.payload.keys())
    content = point.payload.get("document", "")
    metadata = {k: v for k, v in point.payload.items() if k not in ["document"]}
    docs.append(Document(page_content=content, metadata=metadata))

context = docs
source_documents = [
            {"content": doc.page_content or "No content available", 
             "metadata": doc.metadata}
            for doc in docs
        ]

for i in source_documents:
    print(i['metadata']['file_name'])

[ScoredPoint(id=232703908057093973, version=1, score=0.65427256, payload={'document': 'basis, for the Company, the S&P 500 Index and the Dow Jones U.S. Technology Supersector Index. The graph assumes $100 was invested in each of the Company’s common stock, the S&P 500 Index and the Dow Jones U.S. Technology Supersector Index as of the market close on September\xa027, 2019. Past stock price performance is not necessarily indicative of future stock price performance.\nSeptember 2019\nSeptember 2020\nSeptember 2021\nSeptember 2022\nSeptember 2023\nSeptember 2024\nApple Inc.\n$\n100\n$\n207\n$\n273\n$\n281\n$\n322\n$\n430\nS&P 500 Index\n$\n100\n$\n113\n$\n156\n$\n131\n$\n155\n$\n210\nDow Jones U.S. Technology Supersector Index\n$\n100\n$\n146\n$\n216\n$\n156\n$\n215\n$\n322\nItem 6.\xa0\xa0\xa0\xa0[Reserved]\nApple Inc. | 2024 Form 10-K | 20\nItem 7.\xa0\xa0\xa0\xa0Management’s Discussion and Analysis of Financial Condition and Results of Operations\nThe following discussion should be rea

In [6]:
context_text = "\n\n".join([doc.page_content for doc in context])
print(context_text)

basis, for the Company, the S&P 500 Index and the Dow Jones U.S. Technology Supersector Index. The graph assumes $100 was invested in each of the Company’s common stock, the S&P 500 Index and the Dow Jones U.S. Technology Supersector Index as of the market close on September 27, 2019. Past stock price performance is not necessarily indicative of future stock price performance.
September 2019
September 2020
September 2021
September 2022
September 2023
September 2024
Apple Inc.
$
100
$
207
$
273
$
281
$
322
$
430
S&P 500 Index
$
100
$
113
$
156
$
131
$
155
$
210
Dow Jones U.S. Technology Supersector Index
$
100
$
146
$
216
$
156
$
215
$
322
Item 6.    [Reserved]
Apple Inc. | 2024 Form 10-K | 20
Item 7.    Management’s Discussion and Analysis of Financial Condition and Results of Operations
The following discussion should be read in conjunction with the consolidated financial statements and accompanying notes included in Part II, Item 8 of this Form 10-K. This Item generally discusses 202

In [7]:
context_text = "\n\n".join([doc.page_content for doc in context])

prompt = ChatPromptTemplate.from_template(
    """You are a financial entity extraction specialist.
    
    Extract key financial entities from the following financial text:
    
    {context}
    
    Extract and return a JSON array of objects with the following properties:
    - entity_type: The type of entity (e.g., company, metric, stock_symbol, financial_term, person, date)
    - entity_name: The name of the entity
    - value: Any associated value or metric (if applicable)
    If null, return as "".
    
    Format your response as a valid JSON array, nothing else.
    """
)

chain = prompt | llm | StrOutputParser()
result = chain.invoke({"context": context_text})
entities = output_parser.parse(result)

# Update state
financial_entities = entities

In [8]:
print(financial_entities)

[{'entity_type': 'company', 'entity_name': 'Apple Inc.', 'value': ''}, {'entity_type': 'index', 'entity_name': 'S&P 500 Index', 'value': ''}, {'entity_type': 'index', 'entity_name': 'Dow Jones U.S. Technology Supersector Index', 'value': ''}, {'entity_type': 'date', 'entity_name': 'September 27, 2019', 'value': ''}, {'entity_type': 'date', 'entity_name': 'September 2019', 'value': ''}, {'entity_type': 'date', 'entity_name': 'September 2020', 'value': ''}, {'entity_type': 'date', 'entity_name': 'September 2021', 'value': ''}, {'entity_type': 'date', 'entity_name': 'September 2022', 'value': ''}, {'entity_type': 'date', 'entity_name': 'September 2023', 'value': ''}, {'entity_type': 'date', 'entity_name': 'September 2024', 'value': ''}, {'entity_type': 'metric', 'entity_name': 'Apple Inc. stock price', 'value': '$100, $207, $273, $281, $322, $430'}, {'entity_type': 'metric', 'entity_name': 'S&P 500 Index value', 'value': '$100, $113, $156, $131, $155, $210'}, {'entity_type': 'metric', 'en

In [9]:
context_snippets = [f"Filename {doc.metadata['file_name']}:\n{doc.page_content}\n" 
                           for i, doc in enumerate(context)]

context_snippets

['Filename 000032019324000123-aapl-20240928.txt:\nbasis, for the Company, the S&P 500 Index and the Dow Jones U.S. Technology Supersector Index. The graph assumes $100 was invested in each of the Company’s common stock, the S&P 500 Index and the Dow Jones U.S. Technology Supersector Index as of the market close on September\xa027, 2019. Past stock price performance is not necessarily indicative of future stock price performance.\nSeptember 2019\nSeptember 2020\nSeptember 2021\nSeptember 2022\nSeptember 2023\nSeptember 2024\nApple Inc.\n$\n100\n$\n207\n$\n273\n$\n281\n$\n322\n$\n430\nS&P 500 Index\n$\n100\n$\n113\n$\n156\n$\n131\n$\n155\n$\n210\nDow Jones U.S. Technology Supersector Index\n$\n100\n$\n146\n$\n216\n$\n156\n$\n215\n$\n322\nItem 6.\xa0\xa0\xa0\xa0[Reserved]\nApple Inc. | 2024 Form 10-K | 20\nItem 7.\xa0\xa0\xa0\xa0Management’s Discussion and Analysis of Financial Condition and Results of Operations\nThe following discussion should be read in conjunction with the consolidate

In [10]:
print(source_documents)

[{'content': 'basis, for the Company, the S&P 500 Index and the Dow Jones U.S. Technology Supersector Index. The graph assumes $100 was invested in each of the Company’s common stock, the S&P 500 Index and the Dow Jones U.S. Technology Supersector Index as of the market close on September\xa027, 2019. Past stock price performance is not necessarily indicative of future stock price performance.\nSeptember 2019\nSeptember 2020\nSeptember 2021\nSeptember 2022\nSeptember 2023\nSeptember 2024\nApple Inc.\n$\n100\n$\n207\n$\n273\n$\n281\n$\n322\n$\n430\nS&P 500 Index\n$\n100\n$\n113\n$\n156\n$\n131\n$\n155\n$\n210\nDow Jones U.S. Technology Supersector Index\n$\n100\n$\n146\n$\n216\n$\n156\n$\n215\n$\n322\nItem 6.\xa0\xa0\xa0\xa0[Reserved]\nApple Inc. | 2024 Form 10-K | 20\nItem 7.\xa0\xa0\xa0\xa0Management’s Discussion and Analysis of Financial Condition and Results of Operations\nThe following discussion should be read in conjunction with the consolidated financial statements and accompany

In [11]:
context_snippets = [f"File {doc.metadata['file_name']}:\n{doc.page_content}\n" 
                           for i, doc in enumerate(context)]
        
context_text = "\n".join(context_snippets)

# Format entity information
entity_info = ""
if financial_entities:
    entity_info = "Key entities identified:\n"
    for entity in financial_entities:
        entity_info += f"- {entity.get('entity_name', 'Unknown')} ({entity.get('entity_type', 'Unknown')})"
        if entity.get('value'):
            entity_info += f": {entity.get('value')}"
        if entity.get('sentiment'):
            entity_info += f" [{entity.get('sentiment')}]"
        entity_info += "\n"

prompt = ChatPromptTemplate.from_template(
    """You are a financial analysis assistant that provides accurate information based on the retrieved documents.
    
    User query: {query}

    Collection choice: {collection_choice}
    
    Retrieved information:
    {context}
    
    {entity_info}
    
    Based on the retrieved information, provide a comprehensive response to the user's query.
    Be specific and cite information from the documents where appropriate.
    If the information is not sufficient to answer the query completely, acknowledge the limitations.
    
    Response:
    """
)

chain = prompt | llm | StrOutputParser()
response = chain.invoke({
    "query": query,
    "collection_choice": collection_choice,
    "context": context_text,
    "entity_info": entity_info
})

# Update state
rag_response = response

print(rag_response)

The 2024 10-K report for Apple Inc. provides a detailed overview of the company's financial performance, product announcements, and macroeconomic impacts during the fiscal year. Here are the key highlights:

1. **Financial Performance:**
   - **Net Sales:** Apple reported total net sales of $391.035 billion in 2024, a 2% increase from $383.285 billion in 2023. The Americas and Europe segments saw increases in net sales, while Greater China experienced a decline due to lower iPhone and iPad sales and unfavorable currency exchange rates.
   - **Operating Income:** The total operating income for the year was $93.625 billion, up from $87.332 billion in 2023.
   - **Net Income:** The net income for 2024 was $93.736 billion, slightly down from $96.995 billion in 2023.
   - **Earnings Per Share:** Basic earnings per share were $6.11, and diluted earnings per share were $6.08.
   - **Gross Margin:** The total gross margin was $180.683 billion, with a gross margin percentage of 46.2%. The produ