In [7]:
# data loading and splitting in chunks

from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

loader = TextLoader("data.txt")
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,
    chunk_overlap=50
)

chunks = text_splitter.split_documents(documents)


In [8]:
# embedding the chunks and creating a vector store (each chunk is represented as a(one) vector in high-dimensional space)
# remember that on similarity search it returns the relevant chunks (which we have embedded as vectors) based on the query

from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

embeddings = HuggingFaceEmbeddings(
    model_name="all-MiniLM-L6-v2"
)

vectorstore = FAISS.from_documents(
    chunks,
    embedding=embeddings
)

retriever = vectorstore.as_retriever(
    search_kwargs={"k": 2}
)

query = "platform i use to store files"
docs = retriever.invoke(query)
for doc in docs:
	print(doc.page_content)
	print(doc.metadata)


* File upload and download functionality
  * Uses **Azure Blob Storage** as the storage backend
  * Authentication layer that allows users to connect **directly to Blob Storage** without routing files through the Next.js server
{'source': 'data.txt'}
### Systems & Platforms

* Interested in **Linux-based development**, particularly:

  * Building and contributing to **AI-related applications**
  * Prefers application-layer development over kernel-level work
* Exploring alternatives to Windows due to perceived performance slowdowns
{'source': 'data.txt'}


In [9]:
# the result of the similarity search (the relevant chunks) will be used as context to answer the query

context = "\n\n".join([doc.page_content for doc in docs])

prompt = f"""
You are a helpful assistant.
Answer the question using ONLY the context below.
If the answer is not present, say "I don't know".

Context:
{context}

Question:
{query}
"""

from dotenv import load_dotenv
load_dotenv()

from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0
)

response = llm.invoke(prompt)

print("Answer:")
print(response.content)


Answer:
Azure Blob Storage
