In [1]:
import os
from dotenv import load_dotenv

import pandas as pd
from langchain.chat_models import init_chat_model
from langchain.tools import tool
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_huggingface import HuggingFaceEmbeddings

load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


True

In [2]:
# GROQ API Key

GROQ_API_KEY = os.getenv("GROQ_API_KEY")
os.environ["GROQ_API_KEY"] = GROQ_API_KEY

In [3]:
model = init_chat_model("groq:qwen/qwen3-32b")
response = model.invoke("Who acted in The Matrix?")
response

AIMessage(content="<think>\nOkay, so the user is asking who acted in The Matrix. Let me start by recalling the main cast. Keanu Reeves plays Neo, right? And Laurence Fishburne is Morpheus. Carrot Top is Mr. Smith, I think. Wait, no, Carrot Top is actually the character Mr. Anderson, the leader of the machines, but his name is Smith? Wait, maybe I'm mixing up the names. Let me get that straight. Mr. Anderson is the human character who becomes the leader of the machines. Oh, right, the character is named Smith, but his real name is Thomas Anderson, which is the name Neo starts with. So the actor playing Mr. Smith is Hugo Weaving. Wait, no, that's not right. Wait, Mr. Anderson is the human who becomes the leader of the machines. So the actor is Hugo Weaving? Hmm, maybe I'm getting confused here.\n\nWait, let me check. Neo's real name is Thomas Anderson, and he's played by Keanu Reeves. Then there's the character Mr. Smith, who is the leader of the machines. But the actor who plays Mr. Smi

# Data Ingestion 

In [4]:
# Data Loader
class MovieDataLoader:
    def __init__(self, original_csv: str, processed_csv: str):
        self.original_csv = original_csv
        self.processed_csv = processed_csv

    def load_and_process(self):
        df = pd.read_csv(
            self.original_csv,
            encoding="utf-8",
            on_bad_lines="skip"
        ).dropna()

# Columns : movie_id,,release_date,runtime,,rating,vote_count,actors,overview

        required_cols = {"title", "genres","actors","overview"}
        if not required_cols.issubset(df.columns):
            raise ValueError("Missing required columns in CSV")

        df["combined_info"] = (
            "Title: " + df["title"]
            + " Overview: " + df["overview"]
            + " Genres: " + df["genres"]
            + " Actors: " + df["actors"]
            
        )

        df[["combined_info"]].to_csv(
            self.processed_csv,
            index=False,
            encoding="utf-8"
        )

        return self.processed_csv


In [5]:
# Splitting and Storing the data
class VectorStoreBuilder:
    def __init__(self, csv_path: str, persist_dir: str = "chroma_db"):
        self.csv_path = csv_path
        self.persist_dir = persist_dir
        self.embedding = HuggingFaceEmbeddings(
            model_name="all-MiniLM-L6-v2"
        )

    def build_and_save_vectorstore(self):
        loader = CSVLoader(
            file_path=self.csv_path,
            encoding="utf-8",
            metadata_columns=[]
        )

        documents = loader.load()

        splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=0
        )
        chunks = splitter.split_documents(documents)

        db = Chroma.from_documents(
            chunks,
            self.embedding,
            persist_directory=self.persist_dir
        )
        db.persist()

    def load_vector_store(self):
        return Chroma(
            persist_directory=self.persist_dir,
            embedding_function=self.embedding
        )


In [6]:
print("Building vector database...")

original_csv = "data/movies.csv"
processed_csv = "data/movies_updated.csv"
persist_dir = "chroma_db"

loader = MovieDataLoader(original_csv, processed_csv)
processed_csv_path = loader.load_and_process()

vector_builder = VectorStoreBuilder(
    processed_csv_path,
    persist_dir=persist_dir
)
vector_builder.build_and_save_vectorstore()

print("Vector database ready.")

Building vector database...


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


Vector database ready.


  db.persist()


In [7]:
vector_builder = VectorStoreBuilder(
    csv_path="",
    persist_dir="chroma_db"
)

retriever = vector_builder.load_vector_store().as_retriever()

  return Chroma(


In [8]:
@tool
def movie_retriever_tool(query: str) -> str:
    """
    Use this tool to search the movie knowledge base.

    Input: User's movie preference or question.

    Output: Relevant movie information retrieved from the vector database.
    """
    docs = retriever.invoke(query)
    return "\n\n".join(doc.page_content for doc in docs)

Initialize Model

In [9]:
model = init_chat_model("groq:qwen/qwen3-32b")
model_with_tools = model.bind_tools([movie_retriever_tool])

In [10]:
messages = [
    {"role": "user", "content": "Can you suggest me some movies like Interstellar?"}
]

# Step 1: Model decides tool usage
ai_msg = model_with_tools.invoke(messages)
messages.append(ai_msg)

# Step 2: Execute tools
for tool_call in ai_msg.tool_calls:
    tool_result = movie_retriever_tool.invoke(tool_call)
    messages.append(tool_result)

# Step 3: Final response
final_response = model_with_tools.invoke(messages)
print(final_response.text)

Here are some movies related to **Interstellar** and its themes of space exploration, time, and humanity's place in the cosmos:

---

### 1. **Afterlight**  
   - **Why watch?** Explores fractured timelines, AI, and humanity's struggle for survival across space and time. Perfect if you enjoyed *Interstellar*'s interstellar voyage and existential themes.  
   - **Cast:** Anna Fraser, Ellise Chappell, Linda-Jean Barry.  
   - **Genre:** Sci-Fi, Drama.  

---

### 2. **Don't Look Up**  
   - **Why watch?** A dark comedy about a planet-killing comet (similar to *Interstellar*'s cosmic stakes) and society's apathy. Mixes sci-fi with biting social commentary.  
   - **Cast:** Leonardo DiCaprio, Jennifer Lawrence.  
   - **Genre:** Sci-Fi, Comedy, Drama.  

---

### 3. **In Time**  
   - **Why watch?** Time is currency in this dystopian future, blending *Interstellar*'s time manipulation with a gritty action thriller.  
   - **Cast:** Justin Timberlake, Amanda Seyfried.  
   - **Genre:** Sci-