In [55]:
import os
from dotenv import load_dotenv
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate


In [None]:
load_dotenv()
groq_api_key=os.getenv("GROQ_API_KEY")

os.environ["GROQ_API_KEY"]=groq_api_key

In [None]:
# Loading Data

directoryLoader = DirectoryLoader(
    path="../data/practical_rag_data/",
    loader_cls=TextLoader,
    show_progress=True,
    glob="**/*.txt"
)

data=directoryLoader.load()
print(data[0])

100%|██████████| 4/4 [00:00<00:00, 3127.74it/s]

page_content='Climate Change
Climate change refers to long-term shifts in temperatures and weather patterns, largely driven by human activities such as burning fossil fuels. These activities increase greenhouse gas concentrations in the atmosphere, trapping heat and altering global climates. Consequences include rising sea levels, extreme weather events, and biodiversity loss.

Space Exploration
Humanity’s interest in space has evolved from simple stargazing to landing rovers on Mars. Organizations like NASA and private companies like SpaceX are pushing boundaries by exploring planets, planning moon bases, and even contemplating missions to Jupiter's moons. Space exploration drives technological innovation and inspires global collaboration.

Artificial Intelligence Ethics
As AI systems become more capable, ethical concerns arise about their usage, decision-making, and transparency. Issues like algorithmic bias, lack of accountability, and potential misuse in surveillance or warfare rai




In [13]:
text_splitter=RecursiveCharacterTextSplitter(
    separators=[
        "\n\n",
        "\n",
        " ",
        ".",
        ",",
        "\u200b",  # Zero-width space
        "\uff0c",  # Fullwidth comma
        "\u3001",  # Ideographic comma
        "\uff0e",  # Fullwidth full stop
        "\u3002",  # Ideographic full stop
        "",
    ],
    chunk_size=500,
    chunk_overlap=100,
    length_function=len,
    is_separator_regex=False,   
)

chunks=text_splitter.split_documents(data)
print(chunks[0],chunks[1])



page_content='Climate Change
Climate change refers to long-term shifts in temperatures and weather patterns, largely driven by human activities such as burning fossil fuels. These activities increase greenhouse gas concentrations in the atmosphere, trapping heat and altering global climates. Consequences include rising sea levels, extreme weather events, and biodiversity loss.' metadata={'source': '../data/practical_rag_data/data1.txt'} page_content='Space Exploration
Humanity’s interest in space has evolved from simple stargazing to landing rovers on Mars. Organizations like NASA and private companies like SpaceX are pushing boundaries by exploring planets, planning moon bases, and even contemplating missions to Jupiter's moons. Space exploration drives technological innovation and inspires global collaboration.' metadata={'source': '../data/practical_rag_data/data1.txt'}


In [62]:
model_name = "sentence-transformers/all-MiniLM-L6-v2"
hf = HuggingFaceEmbeddings(
    model_name=model_name,
)
# Creating Chroma DB

db=Chroma.from_documents(
    documents=chunks,
    embedding=hf,
    persist_directory="../rag_db",
    collection_name="rag_collection"
)

retriever=db.as_retriever(
    search_kwargs={"k":3}
)


In [66]:
from langchain_core.prompts import ChatPromptTemplate
system_prompt="""You are an assistant for question-answering tasks. 
Use the following pieces of retrieved context to answer the question. 
If you don't know the answer, just say that you don't know. 
Use three sentences maximum and keep the answer concise.

Context: {context}"""

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{input}")
])

In [None]:
# Creating Chain
from os import read


llm=ChatGroq(
    model_name="llama-3.3-70b-versatile",
    api_key=groq_api_key,
)

combined_chain=create_stuff_documents_chain(
    llm=llm,
    prompt=prompt
)

retrieval_chain=create_retrieval_chain(
    retriever=retriever,
    combine_docs_chain=combined_chain,
)

response=retrieval_chain.invoke({"input": "What does climate change refer to?"})

response

{'input': 'What does climate change refer to?',
 'context': [Document(id='21536dd7-b392-4608-b4ad-9bdf9afada57', metadata={'source': '../data/practical_rag_data/data1.txt'}, page_content='Climate Change\nClimate change refers to long-term shifts in temperatures and weather patterns, largely driven by human activities such as burning fossil fuels. These activities increase greenhouse gas concentrations in the atmosphere, trapping heat and altering global climates. Consequences include rising sea levels, extreme weather events, and biodiversity loss.'),
  Document(id='eedf34f4-a99d-404f-8166-33a7f0337edf', metadata={'source': '../data/practical_rag_data/data1.txt'}, page_content='Climate Change\nClimate change refers to long-term shifts in temperatures and weather patterns, largely driven by human activities such as burning fossil fuels. These activities increase greenhouse gas concentrations in the atmosphere, trapping heat and altering global climates. Consequences include rising sea l