### RAG Casestudy (CSV)

In [1]:
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.vectorstores import Qdrant
from langchain.indexes import VectorstoreIndexCreator
from langchain.chains import RetrievalQA

In [3]:
loader = CSVLoader(file_path='./data/movies.csv',
                   source_column='original_title',
                   encoding='utf-8',
                   csv_args={'delimiter': ',', 'fieldnames': ['id', 'original_language', 'original_title', 'popularity', 'release_date', 'vote_average', 'vote_count', 'genre', 'overview', 'revenue', 'runtime', 'tagline']})

data = loader.load()

In [4]:
print('Loaded %s movies' % len(data))

Loaded 501 movies


In [5]:
openai_api_key = os.environ["OPENAI_API_KEY"]
model_name = "gpt-3.5-turbo-0125"
embedding_model_name = "text-embedding-3-large"

In [6]:
embeddings = OpenAIEmbeddings(
    model=embedding_model_name,
    openai_api_key=openai_api_key
)

In [7]:
llm = ChatOpenAI(
    model=model_name,
    openai_api_key=openai_api_key,
    temperature=0.1,
    max_tokens=1000
)

In [8]:
url = "http://localhost:6333"

In [10]:
qdrant = Qdrant.from_documents(
    data,
    embeddings,
    url=url,
    prefer_grpc=False,
    collection_name="my_movies",
)

In [15]:
vectorstore = qdrant

In [12]:
query = "Can you suggest similar movies to The Matrix?"

In [13]:
query_results = qdrant.similarity_search(query)

In [14]:
for doc in query_results:
    print(doc.metadata['source'])

Source Code
Paycheck
Waking Life
Replicas


In [16]:
index_creator = VectorstoreIndexCreator(embedding=embeddings, vectorstore_cls=Qdrant)
docsearch = index_creator.from_loaders([loader])

In [17]:
chain = RetrievalQA.from_chain_type(llm=llm,
                                    chain_type="stuff",
                                    retriever=docsearch.vectorstore.as_retriever(),
                                    input_key="question",
                                    return_source_documents=True)

In [18]:
query = "Do you have a column called popularity?"
response = chain.invoke({"question": query})

In [19]:
print(response['result'])

Yes, there is a column called "popularity" in the dataset.


In [20]:
print(response['source_documents'])

[Document(metadata={'source': 'original_title', 'row': 0, '_id': '25b47ef1-3c68-42a5-8b20-9c397c4e36d3', '_collection_name': 'd320ed54af5a47d69bc5c57ee63de01b'}, page_content='id: id\noriginal_language: original_language\noriginal_title: original_title\npopularity: popularity\nrelease_date: release_date\nvote_average: vote_average\nvote_count: vote_count\ngenre: genre\noverview: overview\nrevenue: revenue\nruntime: runtime\ntagline: tagline'), Document(metadata={'source': 'Fame', 'row': 321, '_id': 'bbe8712d-c990-44bb-affa-cfd1d4522cc6', '_collection_name': 'd320ed54af5a47d69bc5c57ee63de01b'}, page_content="id: 3537.0\noriginal_language: en\noriginal_title: Fame\npopularity: 7.651\nrelease_date: 1980-05-16\nvote_average: 6.5\nvote_count: 320.0\ngenre: ['Drama', 'Music']\noverview: A chronicle of the lives of several teenagers who attend a New York high school for students gifted in the performing arts.\nrevenue: 42000000.0\nruntime: 134.0\ntagline: If they've really got what it takes, 

In [21]:
query = """If the popularity score is defined as a higher value being a more popular movie,
what is the name of the most popular movie in the data provided?"""

In [22]:
response = chain.invoke({"question": query})

In [23]:
print(response['result'])

The most popular movie in the data provided is "Ben-Hur" with a popularity score of 35.076.


In [24]:
print(response['source_documents'])

[Document(metadata={'source': 'Casablanca', 'row': 483, '_id': 'f5cb5068-147e-434d-8ce4-0c721213c3e4', '_collection_name': 'd320ed54af5a47d69bc5c57ee63de01b'}, page_content="id: 289.0\noriginal_language: en\noriginal_title: Casablanca\npopularity: 22.587\nrelease_date: 1942-11-26\nvote_average: 8.2\nvote_count: 3961.0\ngenre: ['Drama', 'Romance']\noverview: In Casablanca, Morocco in December 1941, a cynical American expatriate meets a former lover, with unforeseen complications.\nrevenue: 10462500.0\nruntime: 102.0\ntagline: They had a date with fate in Casablanca!"), Document(metadata={'source': 'Ben-Hur', 'row': 226, '_id': 'e80bc7f9-9cc0-46c0-ba9d-b584b16f9e38', '_collection_name': 'd320ed54af5a47d69bc5c57ee63de01b'}, page_content="id: 665.0\noriginal_language: en\noriginal_title: Ben-Hur\npopularity: 35.076\nrelease_date: 1959-11-18\nvote_average: 7.8\nvote_count: 1938.0\ngenre: ['Action', 'Drama', 'History']\noverview: In 25 AD,Judah Ben-Hur, a Jew in ancient Judea, opposes the oc

In [32]:
query = """find the spider man movie released in 2002?"""
response = chain.invoke({"question": query})
print(response['result'])
print(response['source_documents'])

The Spider-Man movie released in 2002 is simply titled "Spider-Man."
[Document(metadata={'source': 'Spider-Man: Far From Home', 'row': 353, '_id': '1a9eda05-0384-488d-af90-054baab037ec', '_collection_name': 'd320ed54af5a47d69bc5c57ee63de01b'}, page_content="id: 429617.0\noriginal_language: en\noriginal_title: Spider-Man: Far From Home\npopularity: 273.535\nrelease_date: 2019-06-28\nvote_average: 7.5\nvote_count: 10893.0\ngenre: ['Action', 'Adventure', 'Science Fiction']\noverview: Peter Parker and his friends go on a summer trip to Europe. However, they will hardly be able to rest - Peter will have to agree to help Nick Fury uncover the mystery of creatures that cause natural disasters and destruction throughout the continent.\nrevenue: 1131927996.0\nruntime: 129.0\ntagline: It’s time to step up."), Document(metadata={'source': 'Men in Black II', 'row': 358, '_id': 'a36c125c-24be-4eb3-9e7c-e985aaa43ad1', '_collection_name': 'd320ed54af5a47d69bc5c57ee63de01b'}, page_content="id: 608.0\n

In [34]:
query = """do you know Bangalore?"""
response = chain.invoke({"question": query})
print(response['result'])
print(response['source_documents'])

Yes, Bangalore is a city in southern India and is the capital of the state of Karnataka. It is known for its pleasant climate, vibrant nightlife, and being a hub for technology companies and startups.
[Document(metadata={'source': 'कुछ कुछ होता है', 'row': 409, '_id': '83545ee3-4844-45e2-b03a-346e723ed3f6', '_collection_name': 'd320ed54af5a47d69bc5c57ee63de01b'}, page_content="id: 11854.0\noriginal_language: hi\noriginal_title: कुछ कुछ होता है\npopularity: 9.783\nrelease_date: 1998-10-16\nvote_average: 7.7\nvote_count: 290.0\ngenre: ['Drama', 'Romance']\noverview: Anjali is left heartbroken when her best friend and secret crush, Rahul, falls in love with Tina. Years later, Tina's young daughter tries to fulfil her mother's last wish of uniting Rahul and Anjali.\nrevenue: 15306000.0\nruntime: 185.0\ntagline: Love is Friendship."), Document(metadata={'source': 'Blood Money', 'row': 79, '_id': 'd410638b-a239-4441-816f-368662d63577', '_collection_name': 'd320ed54af5a47d69bc5c57ee63de01b'},