# 1. Install Necessary Libraries

In [1]:
!pip install langchain
!pip install pypdf
!pip install openai
!pip install faiss-cpu
!pip install tiktoken
!pip install rouge-score

Collecting pypdf
  Downloading pypdf-5.1.0-py3-none-any.whl.metadata (7.2 kB)
Downloading pypdf-5.1.0-py3-none-any.whl (297 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m298.0/298.0 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pypdf
Successfully installed pypdf-5.1.0
Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.5/27.5 MB[0m [31m61.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.9.0.post1
Collecting tiktoken
  Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[2K 

In [3]:
!pip install -U langchain-community

Collecting langchain-community
  Downloading langchain_community-0.3.9-py3-none-any.whl.metadata (2.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting langchain<0.4.0,>=0.3.8 (from langchain-community)
  Downloading langchain-0.3.9-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-core<0.4.0,>=0.3.21 (from langchain-community)
  Downloading langchain_core-0.3.21-py3-none-any.whl.metadata (6.3 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.6.1-py3-none-any.whl.metadata (3.5 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.23.1-py3-none-any.whl.metadata (7.5 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-jso

# 2: Load necessary libraries

In [None]:

from pypdf import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from rouge_score import rouge_scorer


# 3. Load and read PDF

In [None]:
pdf_path = 'Part3.pdf'

# Read PDF content
pdf_reader = PdfReader(pdf_path)
text = ""
for page in pdf_reader.pages:
    text += page.extract_text()

print("PDF content loaded.")


PDF content loaded.


# 4: Split text into chunks

In [None]:
text_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=300,
    chunk_overlap=50,
    length_function=len
)
chunks = text_splitter.split_text(text)

print(f"Split PDF content into {len(chunks)} chunks.")
chunks

Split PDF content into 105 chunks.


['NEWS RELEASE\nMeta Reports Fourth Quarter and Full Year 2023Results; Initiates Quarterly Dividend\nMENLO PARK, Calif., Feb. 1, 2024 /PRNewswire/ -- Meta Platforms, Inc. (Nasdaq: META) today reported \x00nancial\nresults for the quarter and full year ended December 31, 2023.',
 '"We had a good quarter as our community and business continue to grow," said Mark Zuckerberg, Meta founder\nand CEO. "We\'ve made a lot of progress on our vision for advancing AI and the metaverse."\xa0\n\xa0\nFourth Quarter and Full Year 2023 Financial Highlights\n Three Months Ended December 31, \n\xa0% Change',
 'Three Months Ended December 31, \n\xa0% Change\n Twelve Months Ended December 31, \n% ChangeIn millions, except percentages and per\xa0share amounts \xa0 \xa0\xa02023 2022  2023 2022  \nRevenue $ 40,111 $ 32,165 25\xa0% $ 134,902 $ 116,609 16\xa0%\nCosts and expenses 23,727  25,766 (8)\xa0%  88,151  87,665 1\xa0%',
 'Income from operations$ 16,384 $ 6,399 156\xa0% $ 46,751 $ 28,944 62\xa0%\nOperati

# 5: Create embeddings and FAISS retriever with API key as a variable


In [None]:

embeddings = OpenAIEmbeddings(openai_api_key='Your Key here')
knowledge_base = FAISS.from_texts(chunks, embeddings)
retriever = knowledge_base.as_retriever(search_type="similarity", search_kwargs={"k": 5})

print("FAISS retriever created.")



FAISS retriever created.


In [55]:
from langchain.chat_models import ChatOpenAI


# 6: Initialize RAG model


In [None]:
llm = ChatOpenAI(model="gpt-4",temperature=0,openai_api_key='Your Key Here')
prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template=(
        "Use the context below to answer the question concisely and with detailed accuracy.\n\n"
        "Context:\n{context}\n\n"
        "Question:\n{question}\n\n"
        "Answer (be specific and quantitative):"
    )
)


qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt_template}
)

print("RAG model initialized.")


RAG model initialized.


# 7: Prompt user for input and generate response


In [None]:
user_question = "How did Meta’s workforce change by the end of 2023?"

if user_question:
    result = qa_chain({"query": user_question})
    response = result["result"]  # Extract the generated response
    source_documents = result["source_documents"]  # Retrieved chunks

    print(f"\n**Response:** {response}")
    print("\n**Source Documents:**")
    for doc in source_documents:
        print(doc.page_content)



**Response:** Meta's workforce decreased by 22% by the end of 2023, with a headcount of 67,317.

**Source Documents:**
NEWS RELEASE
Meta Reports Fourth Quarter and Full Year 2023Results; Initiates Quarterly Dividend
MENLO PARK, Calif., Feb. 1, 2024 /PRNewswire/ -- Meta Platforms, Inc. (Nasdaq: META) today reported  nancial
results for the quarter and full year ended December 31, 2023.
look to build on our progress in each of those areas in 2024 while advancing our ambitious, longer-term e orts in
AI and Reality Labs.
Webcast and Conference Call Information
Meta will host a conference call to discuss the results at 1:30 p.m. PT / 4:30 p.m. ET today. The live webcast of
Headcount – Headcount was 67,317 as of December 31, 2023, a decrease of 22% year-over-year.
Meta Initiates Quarterly Dividend
Today, Meta's board of directors declared a cash dividend of $0.50 per share of our outstanding common stock
View original content to download multimedia:https://www.prnewswire.com/news-releases/m

# 8: Evaluate response with ROUGE


In [None]:
reference_answer = "By the end of 2023, Meta had 67,317 employees. In 2022, Meta had 87,314 employees, so Meta had a 22.9% decrease in 2023 compared to 2022."

if reference_answer:
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = scorer.score(reference_answer, response)

    print("\n**ROUGE Scores:**")
    print(f"ROUGE-1: {scores['rouge1']}")
    print(f"ROUGE-2: {scores['rouge2']}")
    print(f"ROUGE-L: {scores['rougeL']}")

    # Log metrics for improvement
    with open("rag_metrics_log.txt", "a") as log_file:
        log_file.write(f"User Question: {user_question}\n")
        log_file.write(f"Reference Answer: {reference_answer}\n")
        log_file.write(f"Response: {response}\n")
        log_file.write(f"ROUGE-1: {scores['rouge1']}\n")
        log_file.write(f"ROUGE-2: {scores['rouge2']}\n")
        log_file.write(f"ROUGE-L: {scores['rougeL']}\n")
        log_file.write("\n" + "-"*50 + "\n")

    print("Metrics logged.")



**ROUGE Scores:**
ROUGE-1: Score(precision=0.6470588235294118, recall=0.3793103448275862, fmeasure=0.4782608695652174)
ROUGE-2: Score(precision=0.3125, recall=0.17857142857142858, fmeasure=0.22727272727272727)
ROUGE-L: Score(precision=0.4117647058823529, recall=0.2413793103448276, fmeasure=0.3043478260869565)
Metrics logged.


## Question 2

In [48]:
user_question2 = "What is the report quarter, and when did it end?"

if user_question2:
    result = qa_chain({"query": user_question2})
    response = result["result"]
    source_documents = result["source_documents"]

    print(f"\n**Response:** {response}")
    print("\n**Source Documents:**")


**Response:** 
The report quarter is the fourth quarter of 2023, which ended on December 31, 2023.

**Source Documents:**


In [49]:
# Chunk 8: Evaluate response with ROUGE
reference_answer2 = "The reported quarter is the fourth quarter of 2023, The quarter ended on December 31, 2023."

if reference_answer2:
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = scorer.score(reference_answer2, response)

    print("\n**ROUGE Scores:**")
    print(f"ROUGE-1: {scores['rouge1']}")
    print(f"ROUGE-2: {scores['rouge2']}")
    print(f"ROUGE-L: {scores['rougeL']}")

    # Log metrics for improvement
    with open("rag_metrics_log.txt", "a") as log_file:
        log_file.write(f"User Question: {user_question}\n")
        log_file.write(f"Reference Answer: {reference_answer2}\n")
        log_file.write(f"Response: {response}\n")
        log_file.write(f"ROUGE-1: {scores['rouge1']}\n")
        log_file.write(f"ROUGE-2: {scores['rouge2']}\n")
        log_file.write(f"ROUGE-L: {scores['rougeL']}\n")
        log_file.write("\n" + "-"*50 + "\n")

    print("Metrics logged.")



**ROUGE Scores:**
ROUGE-1: Score(precision=0.9333333333333333, recall=0.875, fmeasure=0.9032258064516129)
ROUGE-2: Score(precision=0.8571428571428571, recall=0.8, fmeasure=0.8275862068965518)
ROUGE-L: Score(precision=0.9333333333333333, recall=0.875, fmeasure=0.9032258064516129)
Metrics logged.


# Array of questions


In [None]:
questions = [
    "What were the key financial highlights this quarter (revenue, gross margin, operating expenses, operating margin, net income, and EPS)?",
    "How much did Meta spend on restructuring for the whole year and Q4?",
    "What happened with Meta’s ad impressions and average price per ad in Q4 and for the whole year?",
    "What’s the revenue outlook for Q1 2024?",
    "What were Meta’s total costs and expenses for Q4 and the full year 2023?",
    "How much cash and marketable securities did Meta have on hand as of December 31, 2023?",
    "What were the main areas Meta invested in during 2023?",
    "How did the Family of Apps and Reality Labs perform in Q4 2023?",
    "How much free cash flow did Meta generate in Q4 and the full year 2023?",
    "Did Meta make any changes to its stock repurchase program or dividends for 2024?",
    "What risks did Meta highlight for 2024?",
    "What drove Meta’s revenue growth in Q4 2023?",
    "How did Reality Labs perform throughout 2023, and what’s Meta’s plan for 2024?"
]

# Process each question
for i, question in enumerate(questions, 1):
    print(f"Question {i}: {question}")
    result = qa_chain({"query": question})
    response = result["result"]
    print(f"Response: {response}\n")


Question 1: What were the key financial highlights this quarter (revenue, gross margin, operating expenses, operating margin, net income, and EPS)?
Response: 
The key financial highlights for this quarter were:

1. Revenue: $16.38 billion, an increase of 156% compared to the same period last year.

2. Gross margin: 41%, an increase of 21 percentage points compared to the same period last year.

3. Operating expenses: $23.73 billion, an increase of 22% compared to the same period last year.

4. Operating margin: 41%, an increase of 21 percentage points compared to the same period last year.

5. Net income: $14.02 billion, an increase of 201% compared to the same period last year.

6. EPS: $4.65, an increase of 201% compared to the same period last year.

Question 2: How much did Meta spend on restructuring for the whole year and Q4?
Response: 
Meta spent $3.45 billion on restructuring charges for the full year 2023 and $1.15 billion for the fourth quarter of 2023.

Question 3: What happ

# Results

### Question 1: 
1. ROUGE-1: Score(precision=0.6470588235294118, recall=0.3793103448275862, fmeasure=0.4782608695652174)
2. ROUGE-2: Score(precision=0.3125, recall=0.17857142857142858, fmeasure=0.22727272727272727)
3. ROUGE-L: Score(precision=0.4117647058823529, recall=0.2413793103448276, fmeasure=0.3043478260869565)
### Question 2:
1. ROUGE-1: Score(precision=0.9333333333333333, recall=0.875, fmeasure=0.9032258064516129)
2. ROUGE-2: Score(precision=0.8571428571428571, recall=0.8, fmeasure=0.8275862068965518)
3. ROUGE-L: Score(precision=0.9333333333333333, recall=0.875, fmeasure=0.9032258064516129)