# 🦙 Fully Open-Source RAG pipeline using LlamaIndex + FAISS + local LLM


This script cannot be run in colab, as ollama server could not be install in colab

In [1]:
pip install llama-index-core llama-index-llms-ollama llama-index-embeddings-huggingface llama-index-vector-stores-faiss


Collecting llama-index-core
  Downloading llama_index_core-0.14.4-py3-none-any.whl.metadata (2.5 kB)
Collecting llama-index-llms-ollama
  Downloading llama_index_llms_ollama-0.8.0-py3-none-any.whl.metadata (3.6 kB)
Collecting llama-index-embeddings-huggingface
  Downloading llama_index_embeddings_huggingface-0.6.1-py3-none-any.whl.metadata (458 bytes)
Collecting llama-index-vector-stores-faiss
  Downloading llama_index_vector_stores_faiss-0.5.1-py3-none-any.whl.metadata (377 bytes)
Collecting aiosqlite (from llama-index-core)
  Downloading aiosqlite-0.21.0-py3-none-any.whl.metadata (4.3 kB)
Collecting banks<3,>=2.2.0 (from llama-index-core)
  Downloading banks-2.2.0-py3-none-any.whl.metadata (12 kB)
Collecting dataclasses-json (from llama-index-core)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting deprecated>=1.2.9.3 (from llama-index-core)
  Downloading Deprecated-1.2.18-py2.py3-none-any.whl.metadata (5.7 kB)
Collecting dirtyjson<2,>=1.0.8 (from llama

In [2]:
# Install dependencies (run in terminal once)

from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.ollama import Ollama
from llama_index.vector_stores.faiss import FaissVectorStore

In [None]:
! ollama pull llama3

In [None]:
# 🦙 Fully Open-Source RAG pipeline using LlamaIndex + FAISS + local LLM


# 1️⃣ Load your custom data (directory of .txt, .pdf, .md, etc.)
data_path = "path_to_your_documents"
documents = SimpleDirectoryReader(data_path).load_data()


# 2️⃣ Create a local embedding model (Hugging Face)
# Choose any small embedding model that works offline
embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")


# 3️⃣ Setup FAISS vector store (fully local)
faiss_store = FaissVectorStore.from_params(dim=384)  # dimension must match embedding size
storage_context = StorageContext.from_defaults(vector_store=faiss_store)


# 4️⃣ Create an index (store embeddings locally)
index = VectorStoreIndex.from_documents(
    documents,
    storage_context=storage_context,
    embed_model=embed_model
)


# 5️⃣ Configure your local LLM (via Ollama)
# Make sure Ollama is installed and running (https://ollama.ai)
# You can use: `ollama pull llama3`  or  `ollama pull mistral`
llm = Ollama(model="llama3")  # or "mistral", "phi3", etc.


# 6️⃣ Create the query engine (retrieve + reason locally)
query_engine = index.as_query_engine(
    llm=llm,
    similarity_top_k=3  # number of most relevant chunks to retrieve
)


# 7️⃣ Function to query your data locally
def generate_response(query: str):
    """RAG query using local embeddings + local LLM."""
    response = query_engine.query(query)
    return str(response)


# Example usage
if __name__ == "__main__":
    user_query = "How do I reset my device?"
    print(generate_response(user_query))
