# Build RAG System


## Стъпка 1: Imports и Setup

In [3]:
import os
import json
import pickle
from typing import List, Dict, Tuple
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
import ollama
from tqdm import tqdm

In [4]:
with open('data/raw/knowledge_base.json', 'r', encoding='utf-8') as f:
    documents = json.load(f)

print("\nSample document:")
print(f"ID: {documents[0]['id']}")
print(f"Title: {documents[0]['title']}")
print(f"Product: {documents[0]['product']}")
print(f"Category: {documents[0]['category']}")
print(f"Content length: {len(documents[0]['content'])} characters")


Sample document:
ID: install_cloudsync_pro_windows
Title: How to Install CloudSync Pro on Windows
Product: CloudSync Pro
Category: installation
Content length: 1094 characters


In [5]:
def simple_chunk_by_paragraphs(text: str, max_chunk_size: int = 500) -> List[str]:
    """
    Devide text on chunks per paragraphs
    
    Args:
        text: text for chunking
        max_chunk_size: max chunk size
    
    Returns:
        List of chunks
    """
    paragraphs = text.split('\n\n')
    
    chunks = []
    current_chunk = ""
    
    for para in paragraphs:
        para = para.strip()
        if not para:
            continue
            
        if len(current_chunk) + len(para) > max_chunk_size and current_chunk:
            chunks.append(current_chunk.strip())
            current_chunk = para
        else:
            current_chunk += "\n\n" + para if current_chunk else para
    
    if current_chunk:
        chunks.append(current_chunk.strip())
    
    return chunks

test_text = documents[0]['content']
test_chunks = simple_chunk_by_paragraphs(test_text, max_chunk_size=500)

print(f"Original text: {len(test_text)} characters")
print(f"Number of chunks: {len(test_chunks)}")
print(f"\nFirst chunk ({len(test_chunks[0])} chars):")
print(test_chunks[0])
print("\n" + "="*80)

Original text: 1094 characters
Number of chunks: 3

First chunk (379 chars):
# Installing CloudSync Pro on Windows

## System Requirements
- Windows operating system (latest version recommended)
- At least 2GB of free disk space
- Internet connection for download and activation

## Installation Steps

1. **Download the installer**
   - Visit our official website at www.example.com
   - Navigate to Downloads section
   - Select CloudSync Pro for Windows



In [6]:
all_chunks = []        
all_metadatas = []       

chunk_counter = 0

for doc in tqdm(documents, desc="Processing documents"):
    chunks = simple_chunk_by_paragraphs(doc['content'], max_chunk_size=500)
    
    for i, chunk in enumerate(chunks):
        chunk_id = f"{doc['id']}_chunk_{i}"
        
        metadata = {
            'chunk_id': chunk_id,
            'doc_id': doc['id'],
            'title': doc['title'],
            'product': doc['product'],
            'category': doc['category'],
            'chunk_index': i,
            'total_chunks': len(chunks),
            'text': chunk 
        }
        
        all_chunks.append(chunk)
        all_metadatas.append(metadata)
        chunk_counter += 1

print(f"Average chunks per document: {chunk_counter / len(documents):.1f}")

chunk_lengths = [len(chunk) for chunk in all_chunks]
print(f"\nChunk size statistics:")
print(f"  Min: {min(chunk_lengths)} characters")
print(f"  Max: {max(chunk_lengths)} characters")
print(f"  Average: {sum(chunk_lengths) / len(chunk_lengths):.0f} characters")

Processing documents: 100%|██████████| 51/51 [00:00<00:00, 54708.31it/s]

Average chunks per document: 1.6

Chunk size statistics:
  Min: 165 characters
  Max: 491 characters
  Average: 359 characters





In [7]:
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

test_embedding = embedding_model.encode("Hello world")
print(f"Embedding dimension: {len(test_embedding)}")
print(f"Embedding type: {type(test_embedding)}")

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Loading weights: 100%|██████████| 103/103 [00:00<00:00, 489.23it/s, Materializing param=pooler.dense.weight]                             
[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


Embedding dimension: 384
Embedding type: <class 'numpy.ndarray'>


In [8]:
embeddings = embedding_model.encode(
    all_chunks,
    show_progress_bar=True,
    batch_size=32
)

embeddings = np.array(embeddings).astype('float32')

print(f"  Shape: {embeddings.shape}")
print(f"  Dimension: {embeddings.shape[1]}")
print(f"  Total vectors: {embeddings.shape[0]}")

Batches: 100%|██████████| 3/3 [00:01<00:00,  2.01it/s]

  Shape: (82, 384)
  Dimension: 384
  Total vectors: 82





In [9]:
vector_db_dir = "vector_db"
os.makedirs(vector_db_dir, exist_ok=True)

dimension = embeddings.shape[1]

# IndexFlatL2 = exact search using L2 distance (Euclidean)
index = faiss.IndexFlatL2(dimension)

index.add(embeddings)

faiss.write_index(index, os.path.join(vector_db_dir, 'faiss_index.bin'))

with open(os.path.join(vector_db_dir, 'metadata.pkl'), 'wb') as f:
    pickle.dump(all_metadatas, f)
print(f"Metadata saved to {vector_db_dir}/metadata.pkl")

Metadata saved to vector_db/metadata.pkl


In [10]:
def search_knowledge_base(query: str, n_results: int = 3) -> Dict:
    """
    Search in knowledge base with FAISS
    
    Args:
        query: User's question
        n_results: Number of results
    
    Returns:
        Dict with results
    """
    query_embedding = embedding_model.encode([query])
    query_embedding = np.array(query_embedding).astype('float32')
    
    distances, indices = index.search(query_embedding, n_results)
    
    results = {
        'distances': distances[0].tolist(),
        'indices': indices[0].tolist(),
        'documents': [],
        'metadatas': []
    }
    
    for idx in indices[0]:
        metadata = all_metadatas[idx]
        results['documents'].append(metadata['text'])
        results['metadatas'].append(metadata)
    
    return results

test_queries = [
    "How do I install CloudSync Pro on Windows?",
    "My files are not syncing",
    "What is selective sync?",
    "TeamChat video call problems"
]

print("Testing retrieval with sample queries:\n")
print("="*80)

for query in test_queries:
    results = search_knowledge_base(query, n_results=3)
    
    for i in range(len(results['documents'])):
        doc = results['documents'][i]
        metadata = results['metadatas'][i]
        distance = results['distances'][i]
        
        print(f"Result {i+1}:")
        print(f"  Product: {metadata['product']}")
        print(f"  Title: {metadata['title']}")
        print(f"  Category: {metadata['category']}")
        print(f"  Distance: {distance:.4f} (lower = better match)")
        print(f"  Content preview: {doc[:150]}...")
        print()
    
    print("-"*80)

Testing retrieval with sample queries:

Result 1:
  Product: CloudSync Pro
  Title: How to Install CloudSync Pro on Windows
  Category: installation
  Distance: 0.2742 (lower = better match)
  Content preview: # Installing CloudSync Pro on Windows

## System Requirements
- Windows operating system (latest version recommended)
- At least 2GB of free disk spac...

Result 2:
  Product: CloudSync Pro
  Title: How to Install CloudSync Pro on Windows
  Category: installation
  Distance: 0.2804 (lower = better match)
  Content preview: 2. **Run the installer**
   - Locate the downloaded file in your Downloads folder
   - Double-click to run the installer
   - Follow the on-screen ins...

Result 3:
  Product: CloudSync Pro
  Title: How to Install CloudSync Pro on Mac
  Category: installation
  Distance: 0.2804 (lower = better match)
  Content preview: 2. **Run the installer**
   - Locate the downloaded file in your Downloads folder
   - Double-click to run the installer
   - Follow the on-scre

In [11]:
try:
    models = ollama.list()
    available_models = [m.model for m in models.models]
    
    if 'llama3.2:3b' in available_models:
        MODEL_NAME = 'llama3.2:3b'
    elif 'llama3.2:1b' in available_models:
        MODEL_NAME = 'llama3.2:1b'
    elif available_models:
        MODEL_NAME = available_models[0]
    else:
        raise Exception("No models found. Run: ollama pull llama3.2:3b")
    
except Exception as e:
    print("Make sure Ollama is running!")

In [12]:
def rag_query(question: str, n_results: int = 3, verbose: bool = True) -> Dict:
    """
    Full RAG pipeline: Retrieve + Generate
    
    Args:
        question: User's question
        n_results: Number of documents
        verbose: show debug info flag
    
    Returns:
        Dict with results
    """
    
    if verbose:
        print(f"Searching for: '{question}'\n")
    
    search_results = search_knowledge_base(question, n_results=n_results)
    
    retrieved_docs = search_results['documents']
    retrieved_metadata = search_results['metadatas']
    distances = search_results['distances']
    
    if verbose:
        print(f"Retrieved {len(retrieved_docs)} documents\n")
        for i, (meta, dist) in enumerate(zip(retrieved_metadata, distances)):
            print(f"  {i+1}. {meta['title']} (distance: {dist:.4f})")
        print()
    
    context = "\n\n---\n\n".join([
        f"Document {i+1} (from {meta['product']} - {meta['category']}):\n{doc}"
        for i, (doc, meta) in enumerate(zip(retrieved_docs, retrieved_metadata))
    ])
    
    prompt = f"""You are a helpful technical support assistant.

Use the following documentation to answer the user's question. 
If the answer is not in the documentation, say so.
Be concise and helpful.

Documentation:
{context}

User Question: {question}

Answer:"""
    
    if verbose:
        print("Generating answer with LLM...\n")
    
    response = ollama.generate(
        model=MODEL_NAME,
        prompt=prompt
    )
    
    answer = response['response']
    
    return {
        'question': question,
        'answer': answer,
        'retrieved_docs': retrieved_docs,
        'retrieved_metadata': retrieved_metadata,
        'distances': distances,
        'context': context
    }

print("RAG pipeline function created!")

RAG pipeline function created!


In [13]:
# Test query 1
question = "How do I install CloudSync Pro on Windows?"

print("="*80)
print("RAG SYSTEM TEST")
print("="*80)
print()

result = rag_query(question, n_results=3, verbose=True)

print("="*80)
print("FINAL ANSWER:")
print("="*80)
print(result['answer'])
print("\n" + "="*80)

RAG SYSTEM TEST

Searching for: 'How do I install CloudSync Pro on Windows?'

Retrieved 3 documents

  1. How to Install CloudSync Pro on Windows (distance: 0.2742)
  2. How to Install CloudSync Pro on Windows (distance: 0.2804)
  3. How to Install CloudSync Pro on Mac (distance: 0.2804)

Generating answer with LLM...

FINAL ANSWER:
To install CloudSync Pro on Windows, follow these steps:

1. Go to [this page](https://www.example.com) for the official installation.
2. Click "Download" to start downloading the installer file.
3. Double-click the downloaded installer file to run it.
4. Follow the on-screen instructions carefully to complete the installation.

Once the installation is done, you can launch CloudSync Pro from your applications menu or sign in with your account credentials to get started.

