In [13]:
import llama_cpp
print(llama_cpp.llama_backend()) 

AttributeError: module 'llama_cpp' has no attribute 'llama_backend'

In [1]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import TokenTextSplitter
from config import DOCUMENT_PATH, EMBEDDING_MODEL_NAME, EMBEDDING_MODEL_PATH, TOP_K
from llm_loader import load_llm
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.memory import ChatMemoryBuffer

from llama_index.core import PromptTemplate
from llama_index.core.chat_engine import CondenseQuestionChatEngine
from llama_index.core.llms import ChatMessage, MessageRole

import chromadb
from llama_index.core import StorageContext, VectorStoreIndex
from llama_index.vector_stores.chroma import ChromaVectorStore


In [2]:
llm = load_llm()
Settings.llm = llm  # Assign model globally in LlamaIndex

# Load embedding model from local storage
# Settings.embed_model = HuggingFaceEmbedding(
#     model_name=EMBEDDING_MODEL_NAME,
#     cache_folder=EMBEDDING_MODEL_PATH
# )

Settings.embed_model = HuggingFaceEmbedding(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    cache_folder="./embedding_cache"
)


llama_model_load_from_file_impl: using device Metal (Apple M3) - 10922 MiB free
llama_model_loader: loaded meta data with 33 key-value pairs and 292 tensors from ./quantized_model/Llama-3.1-8B-Instruct-Q5KM.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Llama 3.1 8B Instruct
llama_model_loader: - kv   3:                           general.finetune str              = Instruct
llama_model_loader: - kv   4:                           general.basename str              = Llama-3.1
llama_model_loader: - kv   5:                         general.size_label str              = 8B
llama_model_loader: - kv   6:                          

In [3]:
print(vars(Settings.embed_model))


{'model_name': 'sentence-transformers/all-MiniLM-L6-v2', 'embed_batch_size': 10, 'callback_manager': <llama_index.core.callbacks.base.CallbackManager object at 0x1150f8400>, 'num_workers': None, 'max_length': 256, 'normalize': True, 'query_instruction': None, 'text_instruction': None, 'cache_folder': None}


## Save into Chroma DB

In [4]:
## Setting up ChromaDB with Persistent Storage
chroma_client = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = chroma_client.get_or_create_collection(name="financial_reports_2")


In [5]:
# Load and index documents
documents = SimpleDirectoryReader(DOCUMENT_PATH).load_data()


splitter = SentenceSplitter(
    chunk_size=1024,
    chunk_overlap=50,
)
nodes = splitter.get_nodes_from_documents(documents)

for i, node in enumerate(nodes):
    text = node.get_text()
    embedding = Settings.embed_model.get_text_embedding(text)
    
    chroma_collection.add(ids=[f"doc_{i}"], documents=[text], embeddings=[embedding])
print(f"Stored {len(nodes)} document chunks in ChromaDB.")


Stored 12 document chunks in ChromaDB.


## Load From Chroma DB

In [3]:
chroma_client = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = chroma_client.get_or_create_collection("financial_reports_2")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

Settings.embed_model = HuggingFaceEmbedding(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    cache_folder="./embedding_cache"
)

index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store,
    storage_context=storage_context
)

In [7]:
# Define a test question
query = "What is Google Cloud Revenue change like?"

# ✅ Retrieve relevant document chunks
retriever = index.as_retriever(similarity_top_k=3)  # Adjust similarity_top_k as needed
retrieved_docs = retriever.retrieve(query)

# ✅ Print retrieved chunks
print("\n🔍 **Retrieved Chunks:**")
for i, doc in enumerate(retrieved_docs):
    print(f"\n--- Chunk {i+1} ---\n{doc.text}\n")



🔍 **Retrieved Chunks:**

--- Chunk 1 ---
Segment Results
The following table presents our segment revenues and operating income (loss) (in millions; unaudited):
Quarter Ended December 31,
2023 2024
Revenues:
Google Services $ 76,311 $ 84,094 
Google Cloud  9,192  11,955 
Other Bets  657  400 
Hedging gains (losses)  150  20 
Total revenues $ 86,310 $ 96,469 
Operating income (loss):
Google Services $ 26,730 $ 32,836 
Google Cloud  864  2,093 
Other Bets  (863)  (1,174) 
Alphabet-level activities  (3,034)  (2,783) 
Total income from operations $ 23,697 $ 30,972 
We report our segment results as Google Services, Google Cloud, and Other Bets:
• Google Services includes products and services such as ads, Android, Chrome, devices, Google Maps, 
Google Play, Search, and YouTube. Google Services generates revenues primarily from advertising; fees 
received for consumer subscription-based products such as YouTube TV, YouTube Music and Premium, 
and NFL Sunday Ticket, as well as Google One; th

In [16]:

custom_prompt = PromptTemplate(
    """\
Rewrite the user's follow-up question as a standalone question.

1. Include all relevant past context.
2. Keep it natural and grammatically correct.
3. If already standalone, return it unchanged.

<Chat History>
{chat_history}

<User's Follow-Up Question>
{question}

<Rewritten Standalone Question>
"""
)


response_prompt = PromptTemplate(
    """\
You are an AI assistant providing structured responses.

### **Instructions:**
- Answer clearly and concisely.
- Summarize retrieved context to avoid duplication.
- Summarize the key facts efficiently.
- If the context lacks enough details, say: "I don’t have enough information."
- Format responses in natural sentences.

<Retrieved Context>
{context}

<User's Query>
{question}

### **AI Response:**
"""
)


query_engine = index.as_query_engine(
    response_mode="compact",
    response_prompt=response_prompt,
    similarity_top_k=1,
    max_tokens = 300,
    streaming=False
)

print("✅ Query engine initialized successfully!")


✅ Query engine initialized successfully!


In [17]:
question = "What is Google Cloud's revenue change like?"

# ✅ Directly get the AI-generated response
response = query_engine.query(question)

# ✅ Print only the final response
print("\n🤖 **AI Response:**\n", response)

Llama.generate: 520 prefix-match hit, remaining 1 prompt tokens to eval
llama_perf_context_print:        load time =   13954.12 ms
llama_perf_context_print: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =   79804.68 ms /  1024 runs   (   77.93 ms per token,    12.83 tokens per second)
llama_perf_context_print:       total time =   80489.36 ms /  1025 tokens



🤖 **AI Response:**
  Google Cloud's revenue increased from $9,192 million in 2023 to $11,955 million in 2024, showing a growth of $2,763 million or 30% year-over-year. 
This indicates that Google Cloud's revenue is expanding, and the segment is contributing more to Alphabet's overall revenue. The growth rate is significant, suggesting that Google Cloud is gaining traction and increasing its market share. This is a positive trend for Alphabet and its investors. 
The growth in Google Cloud's revenue can be attributed to the increasing demand for cloud computing services, the expansion of Alphabet's enterprise customer base, and the company's efforts to enhance its cloud offerings. As a result, Google Cloud is becoming a more significant contributor to Alphabet's revenue and profitability. 
This development has implications for Alphabet's business strategy, as the company may need to invest more in Google Cloud's infrastructure and talent to sustain its growth. Additionally, Alphabet may

In [19]:
import IPython.display as display
formatted_response = f"""
## 🤖 AI Response:
{response}
"""

# ✅ Display the formatted response in Jupyter Notebook
display.Markdown(formatted_response)


## 🤖 AI Response:
 Google Cloud's revenue increased from $9,192 million in 2023 to $11,955 million in 2024, showing a growth of $2,763 million or 30% year-over-year. 
This indicates that Google Cloud's revenue is expanding, and the segment is contributing more to Alphabet's overall revenue. The growth rate is significant, suggesting that Google Cloud is gaining traction and increasing its market share. This is a positive trend for Alphabet and its investors. 
The growth in Google Cloud's revenue can be attributed to the increasing demand for cloud computing services, the expansion of Alphabet's enterprise customer base, and the company's efforts to enhance its cloud offerings. As a result, Google Cloud is becoming a more significant contributor to Alphabet's revenue and profitability. 
This development has implications for Alphabet's business strategy, as the company may need to invest more in Google Cloud's infrastructure and talent to sustain its growth. Additionally, Alphabet may need to consider how to leverage Google Cloud's strengths to drive growth in other areas of its business. 
Overall, the growth in Google Cloud's revenue is a positive development for Alphabet and its investors, and it highlights the company's increasing focus on cloud computing and enterprise services.  The growth rate of 30% is a significant increase and indicates that Google Cloud is gaining traction in the market.  This growth is a result of the increasing demand for cloud computing services and Alphabet's efforts to enhance its cloud offerings.  The growth in Google Cloud's revenue is a positive trend for Alphabet and its investors.  It highlights the company's increasing focus on cloud computing and enterprise services.  The growth rate of 30% is a significant increase and indicates that Google Cloud is gaining traction in the market.  This growth is a result of the increasing demand for cloud computing services and Alphabet's efforts to enhance its cloud offerings.  The growth in Google Cloud's revenue is a positive trend for Alphabet and its investors.  It highlights the company's increasing focus on cloud computing and enterprise services.  The growth rate of 30% is a significant increase and indicates that Google Cloud is gaining traction in the market.  This growth is a result of the increasing demand for cloud computing services and Alphabet's efforts to enhance its cloud offerings.  The growth in Google Cloud's revenue is a positive trend for Alphabet and its investors.  It highlights the company's increasing focus on cloud computing and enterprise services.  The growth rate of 30% is a significant increase and indicates that Google Cloud is gaining traction in the market.  This growth is a result of the increasing demand for cloud computing services and Alphabet's efforts to enhance its cloud offerings.  The growth in Google Cloud's revenue is a positive trend for Alphabet and its investors.  It highlights the company's increasing focus on cloud computing and enterprise services.  The growth rate of 30% is a significant increase and indicates that Google Cloud is gaining traction in the market.  This growth is a result of the increasing demand for cloud computing services and Alphabet's efforts to enhance its cloud offerings.  The growth in Google Cloud's revenue is a positive trend for Alphabet and its investors.  It highlights the company's increasing focus on cloud computing and enterprise services.  The growth rate of 30% is a significant increase and indicates that Google Cloud is gaining traction in the market.  This growth is a result of the increasing demand for cloud computing services and Alphabet's efforts to enhance its cloud offerings.  The growth in Google Cloud's revenue is a positive trend for Alphabet and its investors.  It highlights the company's increasing focus on cloud computing and enterprise services.  The growth rate of 30% is a significant increase and indicates that Google Cloud is gaining traction in the market.  This growth is a result of the increasing demand for cloud computing services and Alphabet's efforts to enhance its cloud offerings.  The growth in Google Cloud's revenue is a positive trend for Alphabet and its investors.  It highlights the company's increasing focus on cloud computing and enterprise services.  The growth rate of 30% is a significant increase and indicates that Google Cloud is gaining traction in the market.  This growth is a result of the increasing demand for cloud computing services and Alphabet's efforts to enhance its cloud offerings.  The growth in Google Cloud's revenue is a positive trend for Alphabet and its investors.  It highlights the company's increasing focus on cloud computing and enterprise services.  The growth rate of 30% is a significant increase and indicates that Google Cloud is gaining traction in the market.  This growth is a result of the increasing demand for cloud computing services and Alphabet's efforts to enhance its cloud offerings.  The growth in Google Cloud's revenue is a positive trend for Alphabet and its investors.  It highlights the company's increasing focus on cloud computing and enterprise services.  The growth rate of 30% is a significant increase and indicates that Google Cloud is gaining traction in the market.  This growth is a result of the increasing demand for cloud computing services and Alphabet's efforts to enhance its cloud offerings.  The


In [5]:
memory = ChatMemoryBuffer.from_defaults(token_limit=1024)  

In [6]:


chat_engine = CondenseQuestionChatEngine.from_defaults(
    query_engine=query_engine,
    memory=memory,
    #condense_question_prompt=custom_prompt,
    verbose=False,
)


print("✅ Chat engine initialized successfully!")


✅ Chat engine initialized successfully!


In [7]:
question = "What is Google Cloud Revenue change like?"


import time
start_time = time.time()
response = chat_engine.chat(question)
end_time = time.time()

print(response)
print(f"\n⏳ Response Time: {end_time - start_time:.2f} seconds")

llama_perf_context_print:        load time =   13954.12 ms
llama_perf_context_print: prompt eval time =   13953.47 ms /  1781 tokens (    7.83 ms per token,   127.64 tokens per second)
llama_perf_context_print:        eval time =   83156.27 ms /  1023 runs   (   81.29 ms per token,    12.30 tokens per second)
llama_perf_context_print:       total time =   97794.40 ms /  2804 tokens


30% increase in Google Cloud Revenue to $12.0 billion in Q4 2024 compared to the same period in 2023. This growth is led by expansion in Google Cloud Platform (GCP) across core GCP products, AI Infrastructure, and Generative AI Solutions. The revenue run rate for Cloud and YouTube at the end of 2024 is $110 billion. 
Note: The original query was not provided, but based on the context, it seems like the query was related to Google Cloud Revenue. The answer has been provided based on the information available in the context. 

Please let me know if you want me to rephrase or provide more information. 

Also, if you want me to answer any other question based on the context, feel free to ask. 

I will be happy to help. 

Best regards,
[Your Name] 

I hope this helps! Please let me know if you have any further questions. 

Best regards,
[Your Name] 

Please let me know if you need any further assistance. 

Best regards,
[Your Name] 

I hope this helps! Please let me know if you have any fur

In [13]:
question = "How about other product lines?"


import time
start_time = time.time()
response = chat_engine.chat(question)
end_time = time.time()
print("\n🧠 AI Response:\n")
print(response)
print(f"\n⏳ Response Time: {end_time - start_time:.2f} seconds")

Llama.generate: 1 prefix-match hit, remaining 580 prompt tokens to eval
llama_perf_context_print:        load time =   22100.88 ms
llama_perf_context_print: prompt eval time =    7188.48 ms /   580 tokens (   12.39 ms per token,    80.68 tokens per second)
llama_perf_context_print:        eval time =   37449.29 ms /   511 runs   (   73.29 ms per token,    13.65 tokens per second)
llama_perf_context_print:       total time =   44900.50 ms /  1091 tokens
Llama.generate: 1 prefix-match hit, remaining 2285 prompt tokens to eval
llama_perf_context_print:        load time =   22100.88 ms
llama_perf_context_print: prompt eval time =   15713.03 ms /  2285 tokens (    6.88 ms per token,   145.42 tokens per second)
llama_perf_context_print:        eval time =   41892.66 ms /   511 runs   (   81.98 ms per token,    12.20 tokens per second)
llama_perf_context_print:       total time =   57916.07 ms /  2796 tokens



🧠 AI Response:

 The performance of other product lines in Google Cloud besides Google Cloud Platform (GCP) and AI Infrastructure is not explicitly mentioned in the provided context. However, it can be inferred that the growth in Google Cloud is led by GCP across core GCP products, AI Infrastructure, and Generative AI Solutions, and that the YouTube exited 2024 at an annual revenue run rate of $110 billion, which is a significant contributor to the growth of Google Cloud. The Gemini app team that is developing the direct consumer interface to our Gemini models joined Google DeepMind in the quarter ended December 31, 2024, but the costs associated with the Gemini app team continue to be reported within our Google Services segment. Therefore, the performance of other product lines in Google Cloud besides GCP and AI Infrastructure is not explicitly mentioned in the provided context. 

However, the model can generate a question that captures all the relevant context from the conversation,

In [15]:
import json
memory_out = memory.to_string()
formatted_json = json.loads(memory_out)

In [16]:
print(json.dumps(formatted_json, indent=4)) 

{
    "chat_store": {
        "store": {
            "chat_history": [
                {
                    "role": "user",
                    "additional_kwargs": {},
                    "blocks": [
                        {
                            "block_type": "text",
                            "text": "How is the performance of Google Cloud?"
                        }
                    ]
                },
                {
                    "role": "assistant",
                    "additional_kwargs": {},
                    "blocks": [
                        {
                            "block_type": "text",
                            "text": " Google Cloud revenues increased 30% to $12.0 billion led by growth in Google Cloud Platform (GCP) across core GCP products, AI Infrastructure, and Generative AI Solutions. Total operating income increased 31% and operating margin expanded by 5% percentage points to 32%. Net income increased 28% and EPS increased 31% to $2.15.