<a href="https://colab.research.google.com/github/vikasdatta/ML-CICD-Projects/blob/main/RAG_LLM_WC_Code_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import sagemaker
import boto3

# Initialize SageMaker Session
session = sagemaker.Session()
role = "arn:aws:iam::AKIAZONDITIAWMUNT46M:/role/service-role/AmazonSageMaker-ExecutionRole"

# Upload data to S3 (AWS Storage)
# Test 1


In [None]:
!pip install -q langchain langchain-community chromadb sentence-transformers transformers accelerate bitsandbytes langchain_text_splitters



In [None]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from transformers import pipeline
import torch

In [None]:
# 1. Load Data: Create a dummy text file to act as your 'knowledge base'
with open("knowledge.txt", "w") as f:
    f.write("In 2026, the global tech industry shifted heavily towards decentralized RAG architectures.")

loader = TextLoader("knowledge.txt")
documents = loader.load()

In [None]:
# 2. Chunking: Split text into manageable pieces
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(documents)

In [None]:
# 3. Embedding & Vector Store: Convert text to vectors and store them
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = Chroma.from_documents(chunks, embeddings)

In [None]:
# 4. Retrieval: Find relevant chunks based on a query
query = "What happened to the tech industry in 2026?"
docs = vectorstore.similarity_search(query, k=1)
context = docs[0].page_content

In [None]:
# 5. Generation: Use an LLM to answer using the retrieved context
# We use flan-t5-large as a lightweight local model for Colab
qa_pipeline = pipeline("text2text-generation",
                       model="google/flan-t5-large",
                       device=0 if torch.cuda.is_available() else -1)

In [None]:
prompt = f"Context: {context}\n\nQuestion: {query}\n\nAnswer:"
result = qa_pipeline(prompt, max_length=50)

In [None]:
print(f"Retrieved Context: {context}")
print(f"LLM Response: {result[0]['generated_text']}")

In [None]:
# 1. Ask the same question without giving it any context
base_query = "What happened to the tech industry in 2026?"

# 2. Run the same model we loaded earlier
# This time, the prompt is JUST the question.
base_result = qa_pipeline(base_query, max_length=50)

# 3. Compare the results
print(f"Base Model Response: {base_result[0]['generated_text']}")