# First Load File

In [10]:
from langchain_community.document_loaders.csv_loader import CSVLoader

In [11]:
file_path = "TSX Data  - Sheet1.csv"
loader = CSVLoader(file_path=file_path)
data = loader.load()

# Create a Text Splitter / Chunker

In [12]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [13]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
)

# Apply text_spliiter to your doc

In [14]:
split = text_splitter.split_documents(data)

# DB and Embeddign Model

In [15]:
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings

In [16]:
embedder = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001"
)

# Store vector into the db

In [17]:
import os

if not os.path.exists("TSX"):
    store = Chroma.from_documents(
        documents=split,
        embedding=embedder,
        collection_name="TSX",
        persist_directory="TSX",
    )
    print("New Chroma DB created.")
else:
    print("Directory already exists. Skipping creation.")

New Chroma DB created.


# Creating one retriever from the db

In [18]:
retriver = store.as_retriever(
    search_type = "similarity",
    search_kwargs={
        "k": 5,
    }
)

# Creating a rag chain 

In [19]:
from langchain.chains import create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_google_genai import ChatGoogleGenerativeAI


# Chat Model

In [20]:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro",temperature=0.5, max_tokens=500)

In [21]:
system_prompt = (
    "You are a helpful assistant that answers questions about the TSX data Of CSV file. {context}"
)

In [22]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]

)

# Creating a chain

In [23]:
question_answer_chain = create_stuff_documents_chain(llm, prompt) # connect llm and prompt

In [24]:
retrieve_chain = create_retrieval_chain(  # connect retriever/db and chain
    retriver,
    question_answer_chain,
)

In [25]:
query = "Provide me a full list of all user"

In [26]:
response = retrieve_chain.invoke({"input": query})
print(response['answer'])

Here's a list of all users from the provided data:

1. **Sarah Johnson:**
   - Contact Number: (212) 555-4567
   - Gender: Female
   - Area: North
   - Sub Area: Downtown
   - Guptin (No): 1458
   - Repeat Guption (No): 2
   - Sales ROI: 3.2
   - Margin: 11.50%

2. **Michael Chen:**
   - Contact Number: (212) 555-4567
   - Gender: Male
   - Area: South
   - Sub Area: Riverside
   - Guptin (No): 2367
   - Repeat Guption (No): 0
   - Sales ROI: 2.8
   - Margin: 11.50%

3. **Emma Wilson:**
   - Contact Number: (212) 555-4567
   - Gender: Male
   - Area: West
   - Sub Area: Hillside
   - Guptin (No): 2104
   - Repeat Guption (No): 3
   - Sales ROI: 2.5
   - Margin: 11.50%

4. **David Garcia:**
   - Contact Number: (212) 555-4567
   - Gender: Female
   - Area: North
   - Sub Area: Uptown
   - Guptin (No): 1743
   - Repeat Guption (No): 1
   - Sales ROI: 3.7
   - Margin: 16.20%

5. **James Patel:**
   - Contact Number: (212) 555-4567
   - Gender: Female
   - Area: East
   - Sub Area: Harbor
