In [None]:
!pip install -q python-dotenv google-generativeai==0.7.2 pandas==2.1.4 chromadb

In [None]:
!pip install langchain langchain-community langchain-experimental langchain-google-genai

In [None]:
import google.generativeai as genai
from google.colab import userdata

GOOGLE_API_KEY = userdata.get('GeminiProKey')
genai.configure(api_key=GOOGLE_API_KEY)

In [None]:
!echo -e 'GOOGLE_API_KEY={YOUR_KEY}' > .env

In [None]:
from dotenv import load_dotenv
load_dotenv()

In [None]:
import pandas as pd

df = pd.read_csv('top_albums.csv')
df

Unnamed: 0,Album Name,Artist,Year of Release,Genre,Label,Number of Tracks,Sales (millions),Commentary
0,Abbey Road,The Beatles,1969,Rock,Apple Records,17,31,A timeless classic that showcases The Beatles ...
1,The Dark Side of the Moon,Pink Floyd,1973,Progressive Rock,Harvest Records,10,45,One of the most influential albums of all time...
2,Thriller,Michael Jackson,1982,Pop,Epic Records,9,70,"The best-selling album of all time, blending p..."
3,Rumours,Fleetwood Mac,1977,Rock,Warner Bros. Records,11,40,An emotionally charged record capturing the ba...
4,The Wall,Pink Floyd,1979,Rock,Harvest Records,26,30,A rock opera that explores themes of isolation...
5,Back in Black,AC/DC,1980,Hard Rock,Atlantic Records,10,50,"A tribute to their former lead singer, it beca..."
6,Hotel California,Eagles,1976,Rock,Asylum Records,9,32,"Known for its title track, the album is a defi..."
7,Born to Run,Bruce Springsteen,1975,Rock,Columbia Records,8,6,Springsteen's breakout album with epic tales o...
8,Nevermind,Nirvana,1991,Grunge,DGC Records,12,30,A defining album of the 1990s that brought gru...
9,The Joshua Tree,U2,1987,Rock,Island Records,11,25,A politically charged and spiritually influenc...


In [None]:
import pprint

from langchain_community.document_loaders import CSVLoader

loader = CSVLoader('top_albums.csv')
data = loader.load_and_split()

pprint.pp(data[0].page_content)

('Album Name: Abbey Road\n'
 'Artist: The Beatles\n'
 'Year of Release: 1969\n'
 'Genre: Rock\n'
 'Label: Apple Records\n'
 'Number of Tracks: 17\n'
 'Sales (millions): 31\n'
 'Commentary: A timeless classic that showcases The Beatles at their creative '
 'peak.')


In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
context = "\n\n".join(
    str(row.page_content) for row in data
)
print(context)

Album Name: Abbey Road
Artist: The Beatles
Year of Release: 1969
Genre: Rock
Label: Apple Records
Number of Tracks: 17
Sales (millions): 31
Commentary: A timeless classic that showcases The Beatles at their creative peak.

Album Name: The Dark Side of the Moon
Artist: Pink Floyd
Year of Release: 1973
Genre: Progressive Rock
Label: Harvest Records
Number of Tracks: 10
Sales (millions): 45
Commentary: One of the most influential albums of all time with innovative sound engineering.

Album Name: Thriller
Artist: Michael Jackson
Year of Release: 1982
Genre: Pop
Label: Epic Records
Number of Tracks: 9
Sales (millions): 70
Commentary: The best-selling album of all time, blending pop, rock, and R&B seamlessly.

Album Name: Rumours
Artist: Fleetwood Mac
Year of Release: 1977
Genre: Rock
Label: Warner Bros. Records
Number of Tracks: 11
Sales (millions): 40
Commentary: An emotionally charged record capturing the band's internal turmoil.

Album Name: The Wall
Artist: Pink Floyd
Year of Release: 1

In [None]:
texts = text_splitter.split_text(context)
len(texts)

5

In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model='models/embedding-001')

In [None]:
from langchain.vectorstores import Chroma

vector_index = Chroma.from_texts(texts, embeddings).as_retriever()

In [None]:
question = "Make a correlation between the number of sales and genres"
docs = vector_index.invoke(question)
len(docs)

4

In [None]:
from langchain import PromptTemplate

prompt_template = """
    Answer the question as much detail as possible from the given context, be sure to provide all details.
    If there is not enough detail in the given context, you can be creative on the answer.
    Do not answer with correct information.

    Context:\n{context}\n

    Question:\n{question}\n

    Answer:
"""
prompt = PromptTemplate(template=prompt_template, input=['context', 'question'])
prompt

PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='\n    Answer the question as much detail as possible from the given context, be sure to provide all details.\n    If there is not enough detail in the given context, you can be creative on the answer.\n    Do not answer with correct information.\n\n    Context:\n{context}\n\n\n    Question:\n{question}\n\n\n    Answer:\n')

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
model = ChatGoogleGenerativeAI(model='gemini-pro', temperature=0.8)

In [None]:
from langchain.chains.question_answering import load_qa_chain

chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)

In [None]:
response = chain.invoke(
    {
        'input_documents': docs,
        'question': question
    },
    return_only_outputs=True
)


In [None]:
response

{'output_text': 'The provided context does not contain information about the correlation between the number of sales and genres of the albums. Therefore, I cannot generate an answer from the provided context.'}