In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [None]:
from  langchain_google_genai import ChatGoogleGenerativeAI
from langchain_ollama.llms import OllamaLLM

google_llm = ChatGoogleGenerativeAI(model = "gemini-2.5-flash")
gemma_llm = OllamaLLM(model="gemma3:4b")


In [4]:
from langchain_core.prompts import ChatPromptTemplate


template = """Question: {question}

Answer: Let's think step by step."""

prompt = ChatPromptTemplate.from_template(template)
chain = prompt | gemma_llm
chain.invoke({"question": "What is LangChain?"})

'Okay, that\'s a perfectly reasonable and helpful starting point! "Let\'s think step by step" indicates you\'re going to break down the explanation of LangChain into manageable pieces. \n\nNow, you need to actually *explain* what LangChain is.  It\'s a fantastic starting point, but the next step is to deliver the core information.\n\nHere\'s a possible expanded answer building on your "Let\'s think step by step" response:\n\n"Let\'s think step by step. LangChain is a framework designed to simplify the process of building applications that use large language models (LLMs) like GPT-3, GPT-4, and others.  It\'s not a single model itself, but rather a toolkit – a collection of components and abstractions – that make it easier to chain together LLMs with other tools and data sources.\n\nHere\'s a breakdown:\n\n1. **Chaining LLMs:** The core idea is to link multiple LLM calls together.  Instead of just asking one LLM a question and getting a single response, you can create a sequence of step

In [None]:
from langchain_core.prompts import ChatPromptTemplate

prompt  = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant that translates {input_language} to {output_language}"),
    ("user", "Translate the following text: {text}"), ])

a = prompt.invoke({"input_language": "English", "output_language": "French", "text": "I love programming."})
google_chain = prompt | google_llm
gemma_chain =prompt | gemma_llm 
output_google =google_chain.invoke({"input_language": "English", "output_language": "French", "text": "he eats apples. "})
output_gemma =  gemma_chain.invoke({"input_language": "English", "output_language": "French", "text": "he eats apples. "})

In [None]:
print(output_google.content)
print(output_gemma)

Il mange des pommes.

Il mange des pommes. 

Would you like me to translate anything else?


In [12]:
## string output parser

from langchain_core.output_parsers import StrOutputParser
parser = StrOutputParser()
output = parser.parse("Hello, world!")
print(output)

Hello, world!


In [15]:
chain = prompt | gemma_llm | parser
output = chain.invoke({"input_language": "English", "output_language": "French", "text": "MY name is Yashas"})


In [37]:
chain

ChatPromptTemplate(input_variables=['input_language', 'output_language', 'text'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input_language', 'output_language'], input_types={}, partial_variables={}, template='You are a helpful assistant that translates {input_language} to {output_language}.'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['text'], input_types={}, partial_variables={}, template='Translate the following text: {text}'), additional_kwargs={})])
| OllamaLLM(model='gemma3:4b')
| StrOutputParser()

## creating a simple llm with data injection


In [47]:
target_url = "https://www.headphonezone.in/pages/our-team"

import requests
from bs4 import BeautifulSoup

def scrape(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    return soup

def scrape_clean_text(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Remove script and style elements
    for script in soup(["script", "style"]):
        script.extract()

    # Get raw text
    text = soup.get_text(separator="\n")

    # Clean up whitespace
    lines = [line.strip() for line in text.splitlines() if line.strip()]
    clean_text = "\n".join(lines)

    return clean_text

In [48]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader(target_url)


In [49]:
doc = loader.load()
len(doc)

1

In [50]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Clean up whitespace
cleaned_doc = []
for d in doc:
    d.page_content = d.page_content.replace("\n", " ").replace("\t", " ")
    d.page_content = " ".join(d.page_content.split())  # remove extra spaces
    cleaned_doc.append(d)

# Split into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100
)

texts = text_splitter.split_documents(cleaned_doc)
len(texts)


15

In [51]:
from langchain_ollama import OllamaEmbeddings
from langchain_community.vectorstores import FAISS

embeddings = OllamaEmbeddings(
    model="mxbai-embed-large:latest",
)


vectorstore = FAISS.from_documents(texts, embedding=embeddings)

In [53]:
query  = "FOUNDER & CEO"
result= vectorstore.similarity_search(query, k=2)
result

[Document(id='cf896cb0-b388-48fb-9c7c-7bc80ea1cb45', metadata={'source': 'https://www.headphonezone.in/pages/our-team', 'title': 'Meet The Team Of Headphone Zone', 'description': 'Headphone Zone is India’s First Exclusive Headphone Retail Brand. Know more about our team and what everyone here at Headphone Zone does.', 'language': 'en'}, page_content="Engaging in Something New Listen to Taniya's Playlist Nandana Nair WEBSITE Just a Self-Proclaimed Singer, Dancer and Sunset Chaser Listen to Nandana's Playlist Simran Soni OPERATIONS Ice cream, Books And Music Keeps Me Going Listen to Simran's Playlist Pushkar Tambe CONTENT Goals With a Side Of Groove Listen to Pushkar's Playlist Fiona Buthello PEOPLE & CULTURE Kindness in Action, Purpose at Heart Listen to Fiona's Playlist Fiona Dewey CUSTOMER HAPPINESS You Only Have You Listen to Fiona's Playlist Sanskruti Khune OPERATIONS Apna Har Din Aise Jiyo, Jaise Ki Akhiri Ho. Jiyo Toh Iss Pal Aise Jiyo, Jaise Ki Akhiri Ho Listen to Sanskruti's Pla

In [66]:
## retervial chain, document chain

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import  ChatPromptTemplate

prompt = ChatPromptTemplate.from_template(
    """
    Answer the question based on the context below, if you don't know the answer, just say that you don't know, don't try to make up an answer.
    <content>
    {context}
    </content>

    """, 
)

document_chain = create_stuff_documents_chain(google_llm, prompt)


In [67]:
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template="\n    Answer the question based on the context below, if you don't know the answer, just say that you don't know, don't try to make up an answer.\n    <content>\n    {context}\n    </content>\n\n    "), additional_kwargs={})])
| ChatGoogleGenerativeAI(model='models/gemini-2.5-flash', google_api_key=SecretStr('**********'), client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x0000012B9430F3A0>, default_metadata=(), model_kwargs={})
| StrOutputParser(), kwargs={}, config={'run_name': 'stuff_documents_chain'}, config_factories=[])

In [68]:
from langchain.chains import create_retrieval_chain

retriver = vectorstore.as_retriever()

retrival_chain = create_retrieval_chain(retriver, document_chain)

In [73]:
retrival_chain.invoke({"input": "is there any sale?"})

{'input': 'is there any sale?',
 'context': [Document(id='fcd89a55-fbfc-429b-9d1a-8118e8df1250', metadata={'source': 'https://www.headphonezone.in/pages/our-team', 'title': 'Meet The Team Of Headphone Zone', 'description': 'Headphone Zone is India’s First Exclusive Headphone Retail Brand. Know more about our team and what everyone here at Headphone Zone does.', 'language': 'en'}, page_content="to UsPrefer to drop us a line? Our team will get back to you via email within a working day. Mail us Write to Us Request a Callback Sorry, we are unable to receive callback requests at the moment. You may Write to us instead. Join the Discussion We're not the experts on everything, but our community is. Join The Indian Audiophile Forum. Join community Go to item 1 Go to item 2 Go to item 3 Let Us Help Help Center Contact Us Track My Order Return My Order Report a Bug Visit Our Experience Studio Our Policies Shipping & Delivery Returns, Refunds & Cancellations Terms & Conditions Privacy Policy Com

In [70]:
retrival_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000012B911FD240>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template="\n    Answer the question based on the context below, if you don't know the answer, just say that you don't know, don't try to make up an answer.\n  