# Watershed Navigator: A RAG-Based AI Assistant for Environmental Analysis

This notebook demonstrates a Retrieval-Augmented Generation (RAG) system for answering environmental questions using document-based context.

**Technologies:** Streamlit, SentenceTransformers, local TinyLLaMA model, cosine similarity search.

Use this notebook to test embedding, retrieval, and generation behavior of the backend system independently.

In [1]:
# Setup and imports
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np, pickle, os, requests

  from .autonotebook import tqdm as notebook_tqdm


KeyboardInterrupt: 

In [None]:
# Load model and data
model = SentenceTransformer('all-MiniLM-L6-v2')
with open("store/embeddings.pkl", "rb") as f:
    data = pickle.load(f)
print(f"Loaded {len(data)} document chunks.")

In [None]:
# Define search and prompt logic
def embed_text(text):
    return model.encode([text])[0]

def search(query, k=3, threshold=0.5):
    query_vec = embed_text(query).reshape(1, -1)
    doc_vecs = np.array([d["embedding"] for d in data])
    scores = cosine_similarity(query_vec, doc_vecs)[0]
    top_indices = scores.argsort()[::-1][:k]
    if scores[top_indices[0]] < threshold:
        return []
    return [data[i] for i in top_indices]

def format_prompt(context, question):
    return f"""Answer the question below using the information provided.

Context:
{context}

Question:
{question}"""

def ask_llama(prompt):
    response = requests.post("http://localhost:11434/api/generate", json={
        "model": "tinyllama",
        "prompt": prompt,
        "stream": False
    })
    return response.json().get("response", "[No response returned]")

In [None]:
# Test a relevant query
docs = search("What are the effects of unrefined hydrocarbons?")
context = "\n\n".join([d["text"] for d in docs])
prompt = format_prompt(context, "What are the effects of unrefined hydrocarbons?")
print(ask_llama(prompt))

In [None]:
# Test an unrelated query
docs = search("Who won the 2024 NBA championship?")
if not docs:
    print("❌ No relevant documents found. Query deemed out of scope.")

## Evaluation Notes

- Relevant questions returned grounded answers.
- Unrelated questions were rejected as expected.
- Use this notebook to validate new document ingestion or test query behavior offline.