In [1]:
from langchain_google_genai import ChatGoogleGenerativeAI
from dotenv import load_dotenv
import os
import langchain


## API Key Setup and Verification

In [2]:
load_dotenv()

True

In [3]:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
llm_response = llm.invoke("Tell me a joke")
print(llm_response)

content="Why don't scientists trust atoms? \n\nBecause they make up everything!" additional_kwargs={} response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-1.5-flash', 'safety_ratings': []} id='run--c8c61a39-ceb0-4885-8ef7-b67a22243314-0' usage_metadata={'input_tokens': 4, 'output_tokens': 17, 'total_tokens': 21, 'input_token_details': {'cache_read': 0}}


## Output Parser

In [4]:
from langchain_core.output_parsers import StrOutputParser

In [5]:
output_parser = StrOutputParser()
output_parser.invoke(llm_response)

"Why don't scientists trust atoms? \n\nBecause they make up everything!"

## Simple Chain


In [6]:
chain = llm | output_parser

In [7]:
chain.invoke("Tell me something about sun")

"The Sun is a nearly perfect sphere of hot plasma, held together by its own gravity.  It's a main-sequence star, meaning it generates energy through nuclear fusion in its core, converting hydrogen into helium. This process releases enormous amounts of energy in the form of light, heat, and other forms of radiation, which are essential for life on Earth.  The Sun accounts for about 99.86% of the total mass of the Solar System."

## Structured Output


In [8]:
from typing import List
from pydantic import BaseModel, Field

class MobileReview(BaseModel):
    phone_model: str = Field(description="Name and model of the phone  '\n'")
    rating: float = Field(description="Overall rating out of 5")
    pros: List[str] = Field(description="List of positive aspects")
    cons: List[str] = Field(description="List of negative aspects")
    summary: str = Field(description="Brief summary of the review")

review_text = """
Just got my hands on the new Galaxy S21 and wow, this thing is slick! The screen is gorgeous,
colors pop like crazy. Camera's insane too, especially at night - my Insta game's never been
stronger. Battery life's solid, lasts me all day no problem.
Not gonna lie though, it's pretty pricey. And what's with ditching the charger? C'mon Samsung.
Also, still getting used to the new button layout, keep hitting Bixby by mistake.
Overall, I'd say it's a solid 4 out of 5. Great phone, but a few annoying quirks keep it from
being perfect. If you're due for an upgrade, definitely worth checking out!
"""

structured_llm = llm.with_structured_output(MobileReview)
output = structured_llm.invoke(review_text)
output


MobileReview(phone_model='Galaxy S21', rating=4.0, pros=['Gorgeous screen', 'Amazing camera, especially at night', 'Solid battery life'], cons=['Pricey', 'No charger included', 'New button layout takes some getting used to'], summary="Great phone, but a few annoying quirks keep it from being perfect. If you're due for an upgrade, definitely worth checking out!")

In [9]:
output.pros

['Gorgeous screen',
 'Amazing camera, especially at night',
 'Solid battery life']

## Prompt Template

In [10]:
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template("Tell me a short joke about {topic}")
chain = prompt | llm | output_parser
result = chain.invoke({"topic": "programming"})
print(result)


Why do programmers prefer dark mode?  Because light attracts bugs!


## LLM Messages

In [14]:
from langchain_core.messages import HumanMessage, SystemMessage

messages = [
    SystemMessage(content="You are a helpful assistant that tells jokes."),
    HumanMessage(content="Tell me about programming")
]
response = llm.invoke(messages)
print(response.content)


Why do programmers prefer dark mode?

Because light attracts bugs!


In [15]:

template = ChatPromptTemplate([
    ("system", "You are a helpful assistant that tells jokes."),
    ("human", "Tell me about {topic}")
])
chain = template | llm
response = chain.invoke({"topic": "cars"})
print(response.content)


Why did the car get a flat tire?  Because it ran over a nail!


What do you call a car that can't stop?  A runaway!


Why did the bicycle fall over? Because it was two tired! (This one's a bit of a cheat, but it's related to vehicles!)


## Document Processing for RAG Systems

### 1. Loading Documents

In [23]:
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from typing import List
from langchain_core.documents import Document
import os

def load_documents(folder_path: str) -> List[Document]:
    documents = []
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)
        if filename.endswith('.pdf'):
            loader = PyPDFLoader(file_path)
        elif filename.endswith('.docx'):
            loader = Docx2txtLoader(file_path)
        else:
            print(f"Unsupported file type: {filename}")
            continue
        documents.extend(loader.load())
    return documents

folder_path = "contents/docs/"
documents = load_documents(folder_path)
print(f"Loaded {len(documents)} documents from the folder.")


Loaded 5 documents from the folder.


### 2. Splitting Documents

In [24]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len
)

splits = text_splitter.split_documents(documents)
print(f"Split the documents into {len(splits)} chunks.")


Split the documents into 6 chunks.


### 3. Creating google genAi Embeddings for RAG Systems

In [26]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")
document_embeddings = embeddings.embed_documents([split.page_content for split in splits])
print(f"Created embeddings for {len(document_embeddings)} document chunks.")

Created embeddings for 6 document chunks.
