In [6]:
from dotenv import load_dotenv
import os
import langchain
from langchain_google_genai import ChatGoogleGenerativeAI


## API Key Setup and Verification

In [7]:
load_dotenv()

True

In [10]:
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-001")
llm_response = llm.invoke("Tell me a joke")
print(llm_response)


content="Why don't scientists trust atoms?\n\nBecause they make up everything!" additional_kwargs={} response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': []} id='run--48896eb8-321d-44d9-821f-8483efd9c294-0' usage_metadata={'input_tokens': 4, 'output_tokens': 16, 'total_tokens': 20, 'input_token_details': {'cache_read': 0}}


## Output Parser

In [11]:
from langchain_core.output_parsers import StrOutputParser

In [12]:
output_parser = StrOutputParser()
output_parser.invoke(llm_response)

"Why don't scientists trust atoms?\n\nBecause they make up everything!"

## Simple Chain


In [13]:
chain = llm | output_parser

In [14]:
chain.invoke("Tell me something about sun")

"Okay, here's a fact about the Sun:\n\nThe Sun is actually a pretty average star! While it's obviously crucial to us, in the grand scheme of the universe, it's a relatively common type of star called a G-type main-sequence star (also known as a yellow dwarf). It's not particularly massive, hot, or bright compared to many other stars out there. This is actually a good thing for us, as more massive and energetic stars have much shorter lifespans and wouldn't be suitable for supporting life for billions of years."

## Structured Output


In [15]:
from typing import List
from pydantic import BaseModel, Field

class MobileReview(BaseModel):
    phone_model: str = Field(description="Name and model of the phone  '\n'")
    rating: float = Field(description="Overall rating out of 5")
    pros: List[str] = Field(description="List of positive aspects")
    cons: List[str] = Field(description="List of negative aspects")
    summary: str = Field(description="Brief summary of the review")

review_text = """
Just got my hands on the new Galaxy S21 and wow, this thing is slick! The screen is gorgeous,
colors pop like crazy. Camera's insane too, especially at night - my Insta game's never been
stronger. Battery life's solid, lasts me all day no problem.
Not gonna lie though, it's pretty pricey. And what's with ditching the charger? C'mon Samsung.
Also, still getting used to the new button layout, keep hitting Bixby by mistake.
Overall, I'd say it's a solid 4 out of 5. Great phone, but a few annoying quirks keep it from
being perfect. If you're due for an upgrade, definitely worth checking out!
"""

structured_llm = llm.with_structured_output(MobileReview)
output = structured_llm.invoke(review_text)
output


MobileReview(phone_model='Galaxy S21', rating=4.0, pros=['Gorgeous screen', 'Insane camera', 'Solid battery life'], cons=['Pricey', 'No charger', 'Accidental Bixby presses'], summary="Great phone, but a few annoying quirks keep it from being perfect. If you're due for an upgrade, definitely worth checking out!")

In [16]:
output.pros

['Gorgeous screen', 'Insane camera', 'Solid battery life']

## Prompt Template

In [17]:
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template("Tell me a short joke about {topic}")
chain = prompt | llm | output_parser
result = chain.invoke({"topic": "programming"})
print(result)


Why do programmers prefer dark mode?

Because light attracts bugs!


## LLM Messages

In [18]:
from langchain_core.messages import HumanMessage, SystemMessage

messages = [
    SystemMessage(content="You are a helpful assistant that tells jokes."),
    HumanMessage(content="Tell me about programming")
]
response = llm.invoke(messages)
print(response.content)


Alright, here's a programming joke for you:

> Why do programmers prefer dark mode?
>
> Because light attracts bugs!

Want to hear another one? Or maybe you'd like some actual information about programming? I can do both! Just let me know.


In [19]:

template = ChatPromptTemplate([
    ("system", "You are a helpful assistant that tells jokes."),
    ("human", "Tell me about {topic}")
])
chain = template | llm
response = chain.invoke({"topic": "cars"})
print(response.content)


Alright, buckle up, buttercup, because here comes some car-related humor!

*   Why did the bicycle fall over? Because it was two tired! (Okay, not strictly cars, but close enough, right?)

*   What do you call a lazy kangaroo? Pouch potato! (Again, not cars, but imagine a kangaroo driving a car! Hilarious!)

*   I just got a new car! It's a limited edition... it only comes in one color and it doesn't run.

*   Why did the car get a flat tire? Because it was feeling deflated!

*   I tried to explain to my friend how electricity works, but he wasn't getting it. I said, "Watt's the problem?"

I hope those gave you a chuckle! Do you want to hear another one? Maybe something about a specific type of car?


## Document Processing for RAG Systems

### 1. Loading Documents

In [20]:
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from typing import List
from langchain_core.documents import Document
import os

def load_documents(folder_path: str) -> List[Document]:
    documents = []
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)
        if filename.endswith('.pdf'):
            loader = PyPDFLoader(file_path)
        elif filename.endswith('.docx'):
            loader = Docx2txtLoader(file_path)
        else:
            print(f"Unsupported file type: {filename}")
            continue
        documents.extend(loader.load())
    return documents

folder_path = "contents/docs/"
documents = load_documents(folder_path)
print(f"Loaded {len(documents)} documents from the folder.")


Unsupported file type: .DS_Store
Loaded 5 documents from the folder.


### 2. Splitting Documents

In [21]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len
)

splits = text_splitter.split_documents(documents)
print(f"Split the documents into {len(splits)} chunks.")


Split the documents into 49 chunks.


### 3. Creating google genAi Embeddings for RAG Systems

In [22]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embedding_function = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")
document_embeddings = embedding_function.embed_documents([split.page_content for split in splits])
print(f"Created embeddings for {len(document_embeddings)} document chunks.")

Created embeddings for 49 document chunks.


## Vector Store for RAG system

In [23]:
from langchain_chroma import Chroma

collection_name = "my_collection"
vectorstore = Chroma.from_documents(
    collection_name=collection_name,
    documents=splits,
    embedding=embedding_function,
    persist_directory="./chroma_db"
)
print("Vector store created and persisted to './chroma_db'")


Vector store created and persisted to './chroma_db'


## Performing similarity Search

In [24]:
query = "What are the types of cars"
search_results = vectorstore.similarity_search(query, k=2)
print(f"\nTop 2 most relevant chunks for the query: '{query}'\n")
for i, result in enumerate(search_results, 1):
    print(f"Result {i}:")
    print(f"Source: {result.metadata.get('source', 'Unknown')}")
    print(f"Content: {result.page_content}")
    print()



Top 2 most relevant chunks for the query: 'What are the types of cars'

Result 1:
Source: contents/docs/Types_of_Cars.docx
Content: Types of Cars

Cars can be categorized by body style. Sedans are four-door vehicles offering comfort and practicality. Coupes are sportier, while hatchbacks provide flexible cargo space. SUVs dominate global markets for their size and versatility. Convertibles emphasize style and leisure, and trucks are valued for utility and towing capabilities.

Fuel type also defines vehicles. Gasoline cars remain common due to infrastructure. Diesel engines provide torque and efficiency, especially for trucks. Hybrids combine combustion engines with electric motors to balance efficiency. Electric vehicles are gaining adoption, offering zero tailpipe emissions. Hydrogen fuel cell cars, though rare, represent another pathway.

Result 2:
Source: contents/docs/Types_of_Cars.docx
Content: Types of Cars

Cars can be categorized by body style. Sedans are four-door vehicles o

### Creating a Retriever

In [25]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 2})
retriever_results = retriever.invoke("What are the types of cars")
print(retriever_results)


[Document(id='75593228-0a8e-4284-bf5c-dd6334807874', metadata={'source': 'contents/docs/Types_of_Cars.docx'}, page_content='Types of Cars\n\nCars can be categorized by body style. Sedans are four-door vehicles offering comfort and practicality. Coupes are sportier, while hatchbacks provide flexible cargo space. SUVs dominate global markets for their size and versatility. Convertibles emphasize style and leisure, and trucks are valued for utility and towing capabilities.\n\nFuel type also defines vehicles. Gasoline cars remain common due to infrastructure. Diesel engines provide torque and efficiency, especially for trucks. Hybrids combine combustion engines with electric motors to balance efficiency. Electric vehicles are gaining adoption, offering zero tailpipe emissions. Hydrogen fuel cell cars, though rare, represent another pathway.'), Document(id='09acf294-e3cf-45bb-a191-3cca7c5c4ffe', metadata={'source': 'contents/docs/Types_of_Cars.docx'}, page_content='Types of Cars\n\nCars can

## Building a RAG Chain

In [26]:
from langchain_core.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

template = """Answer the question based only on the following context:
{context}
Question: {question}
Answer: """

prompt = ChatPromptTemplate.from_template(template)

def docs2str(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | docs2str, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)


## Using RAG Chain

In [27]:
question = "What are the types of cars?"
response = rag_chain.invoke(question)
print(f"Question: {question}")
print(f"Answer: {response}")


Question: What are the types of cars?
Answer: Cars can be categorized by body style (Sedans, Coupes, Hatchbacks, SUVs, Convertibles, and Trucks) and fuel type (Gasoline, Diesel, Hybrids, Electric, and Hydrogen fuel cell).


# Conversational RAG 

## Handling Follow-Up Questions

In [28]:
from langchain_core.prompts import MessagesPlaceholder
from langchain.chains import create_history_aware_retriever
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain


contextualize_q_system_prompt = """
Given a chat history and the latest user question
which might reference context in the chat history,
formulate a standalone question which can be understood
without the chat history. Do NOT answer the question,
just reformulate it if needed and otherwise return it as is.
"""

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

contextualize_chain = contextualize_q_prompt | llm | StrOutputParser()
print(contextualize_chain.invoke({"input": "Tell its strengths and weaknesses", "chat_history": []}))


What are the strengths and weaknesses of the thing we are discussing?


In [29]:
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

qa_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful AI assistant. Use the following context to answer the user's question."),
    ("system", "Context: {context}"),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{input}")
])

question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)


In [30]:
from langchain_core.messages import HumanMessage, AIMessage

chat_history = []
question1 = "What are the types of cars?"
answer1 = rag_chain.invoke({"input": question1, "chat_history": chat_history})['answer']
chat_history.extend([
    HumanMessage(content=question1),
    AIMessage(content=answer1)
])

print(f"Human: {question1}")
print(f"AI: {answer1}\n")


Human: What are the types of cars?
AI: Cars can be categorized by body style, such as sedans, coupes, hatchbacks, SUVs, convertibles, and trucks. They can also be defined by fuel type, including gasoline, diesel, hybrid, electric, and hydrogen fuel cell cars.



In [31]:

question2 = "Tell its strengths and weaknesses"
answer2 = rag_chain.invoke({"input": question2, "chat_history": chat_history})['answer']
chat_history.extend([
    HumanMessage(content=question2),
    AIMessage(content=answer2)
])

print(f"Human: {question2}")
print(f"AI: {answer2.remove("**")}")


Human: Tell its strengths and weaknesses
AI: Okay, here's a breakdown of the strengths and weaknesses of the car types mentioned in the context:

**Body Styles:**

*   **Sedans:**
    *   **Strengths:** Comfortable, practical, good for families, generally fuel-efficient.
    *   **Weaknesses:** Can lack cargo space compared to other body styles, not as sporty as coupes.
*   **Coupes:**
    *   **Strengths:** Sporty styling, often better handling than sedans.
    *   **Weaknesses:** Less practical than sedans, limited rear passenger space, smaller cargo area.
*   **Hatchbacks:**
    *   **Strengths:** Flexible cargo space, practical, often fuel-efficient, can be sporty.
    *   **Weaknesses:** Styling can be polarizing.
*   **SUVs:**
    *   **Strengths:** Spacious, versatile, good for families, often have all-wheel drive, high driving position.
    *   **Weaknesses:** Can be less fuel-efficient than smaller cars, can be more expensive, can be harder to park.
*   **Convertibles:**
    *

In [33]:
print(f"AI: {answer2.remove("**")})

SyntaxError: unterminated string literal (detected at line 1) (1559491492.py, line 1)