In [None]:
import pickle
import re
import faiss
import torch
import numpy as np
from datasets import Dataset
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from unsloth import FastLanguageModel
from transformers import AutoTokenizer
import os

# ===== 1. Load test Q/A texts =====
with open("test_texts.pkl", "rb") as f:
    test_texts = pickle.load(f)

# ===== 2. Load retrieval data =====
embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
index = faiss.read_index("qa_index_cleaned.faiss")
with open("qa_chunks_cleaned.pkl", "rb") as f:
    texts = pickle.load(f)

# ===== 3. Load local Qwen model =====
model_name = "unsloth/qwen2-1.5b-bnb-4bit"
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=2048,
    dtype=None,
    load_in_4bit=True
)
model.eval()
device = "cuda" if torch.cuda.is_available() else "cpu"

# ===== 4. Helper function: extract question and answer =====
def extract_qa(text):
    q_match = re.search(r"Q:\s*(.*)", text)
    a_match = re.search(r"A:\s*(.*)", text, re.DOTALL)
    question = q_match.group(1).strip() if q_match else ""
    answer = a_match.group(1).strip() if a_match else ""
    return question, answer

# ===== 5. Retrieve contexts =====
def retrieve_contexts(query, top_k=3):
    query_vec = embedder.encode([query], convert_to_numpy=True)
    D, I = index.search(query_vec, top_k)
    return [texts[i] for i in I[0]]

# ===== 6. Generate answer with Qwen =====
def answer_question_with_context(query, context):
    prompt = f"""You are an intelligent QA assistant. Please answer the user's question based on the following background knowledge:

Background documents:
{context}

User question:
{query}

Answer:"""
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(
        input_ids=inputs['input_ids'],
        attention_mask=inputs['attention_mask'],
        max_new_tokens=256,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        eos_token_id=tokenizer.eos_token_id,
    )
    generated = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return generated[len(prompt):].strip()

# ===== 7. Build dataset for local evaluation =====
def build_ragas_dataset(test_texts, max_samples=100):
    questions, answers, contexts, references = [], [], [], []
    for i, item in enumerate(test_texts[:max_samples]):
        q, a = extract_qa(item)
        if not q or not a:
            print(f"Skipping sample {i+1}: missing question or answer")
            continue
        ctxs = retrieve_contexts(q)
        context = "\n---\n".join(ctxs)
        pred = answer_question_with_context(q, context)
        questions.append(q)
        answers.append(pred)
        contexts.append(ctxs)
        references.append(a)
        print(f" Sample {i+1}/{max_samples} completed")
    dataset_dict = {
        "question": questions,
        "answer": answers,
        "contexts": contexts,
        "ground_truth": references
    }
    return Dataset.from_dict(dataset_dict)

# ===== 8. Local ragas-lite evaluation functions =====
def average_cos_sim(a, b):
    return cosine_similarity(a, b).mean()

def evaluate_ragas_locally(dataset, embedder):
    print("\n Running local embedding-based ragas-lite evaluation...\n")
    scores = {
        "faithfulness": [],
        "answer_relevancy": [],
        "context_precision": [],
        "context_recall": [],
    }

    for i in range(len(dataset)):
        q = dataset["question"][i]
        a = dataset["answer"][i]
        gt = dataset["ground_truth"][i]
        ctxs = dataset["contexts"][i]

        try:
            q_vec = embedder.encode([q])
            a_vec = embedder.encode([a])
            gt_vec = embedder.encode([gt])
            ctx_vecs = embedder.encode(ctxs)

            # 1. Faithfulness: answer vs. context similarity
            faith = average_cos_sim(a_vec, ctx_vecs)
            scores["faithfulness"].append(faith)

            # 2. Answer relevancy: answer vs. question similarity
            rel = cosine_similarity(a_vec, q_vec)[0][0]
            scores["answer_relevancy"].append(rel)

            # 3. Context precision: ground truth vs. most relevant context
            precision = max([cosine_similarity(gt_vec, c.reshape(1, -1))[0][0] for c in ctx_vecs])
            scores["context_precision"].append(precision)

            # 4. Context recall: ground truth vs. all contexts (average)
            recall = average_cos_sim(gt_vec, ctx_vecs)
            scores["context_recall"].append(recall)

        except Exception as e:
            print(f" Evaluation failed at sample {i+1}: {e}")

    # Print average scores
    print("\n Local ragas-lite evaluation completed:\n")
    for k, v in scores.items():
        avg = np.mean(v)
        print(f"{k}: {avg:.3f}")

# ===== 9. Main entry point =====
if __name__ == "__main__":
    dataset_path = "ragas_data.pkl"
    if os.path.exists(dataset_path):
        print(f" Found saved dataset, loading {dataset_path} ...")
        with open(dataset_path, "rb") as f:
            ragas_data = pickle.load(f)
    else:
        print(" Building dataset...")
        ragas_data = build_ragas_dataset(test_texts, max_samples=100)
        print(f" Saving dataset to {dataset_path} ...")
        with open(dataset_path, "wb") as f:
            pickle.dump(ragas_data, f)

    print("\n Dataset ready, starting evaluation...\n")
    evaluate_ragas_locally(ragas_data, embedder)


In [None]:
import pickle
import torch
import jieba
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from unsloth import FastLanguageModel
from transformers import AutoTokenizer
from langchain.text_splitter import RecursiveCharacterTextSplitter
from rank_bm25 import BM25Okapi

# ===== 1. Document Preprocessing Layer =====
class DocumentPreprocessor:
    def __init__(self, chunk_size=512, chunk_overlap=50):
        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=chunk_size,
            chunk_overlap=chunk_overlap,
            separators=["\n\n", "\n", "。", "！", "？", "；", "，", " ", ""],
            keep_separator=True,
            length_function=len
        )
    
    def dynamic_chunking(self, documents):
        print("Starting dynamic chunking...")
        all_chunks = []
        for i, doc in enumerate(documents):
            chunks = self.text_splitter.split_text(doc)
            all_chunks.extend(chunks)
        print(f"Dynamic chunking completed. Generated {len(all_chunks)} chunks.")
        return all_chunks

# ===== 2. Hybrid Retrieval Layer =====
class HybridRetriever:
    def __init__(self, embedder_model="sentence-transformers/all-MiniLM-L6-v2"):
        self.embedder = SentenceTransformer(embedder_model)
        self.faiss_index = None
        self.texts = None
        self.bm25 = None
        self.tokenized_docs = None
    
    def build_vector_index(self, texts):
        print("Building vector index...")
        self.texts = texts
        embeddings = self.embedder.encode(texts, convert_to_numpy=True, show_progress_bar=True)
        dimension = embeddings.shape[1]
        self.faiss_index = faiss.IndexFlatIP(dimension)
        faiss.normalize_L2(embeddings)
        self.faiss_index.add(embeddings.astype('float32'))
        print(f"Vector index built. Dimension: {dimension}, Documents: {len(texts)}")
    
    def build_bm25_index(self, texts):
        print("Building BM25 keyword index...")
        self.tokenized_docs = [list(jieba.cut(text)) for text in texts]
        self.bm25 = BM25Okapi(self.tokenized_docs)
        print("BM25 index built.")
    
    def vector_search(self, query, top_k=5):
        query_vec = self.embedder.encode([query], convert_to_numpy=True)
        faiss.normalize_L2(query_vec)
        scores, indices = self.faiss_index.search(query_vec.astype('float32'), top_k)
        results = []
        for score, idx in zip(scores[0], indices[0]):
            if idx != -1:
                results.append({'text': self.texts[idx], 'score': float(score), 'method': 'vector'})
        return results
    
    def bm25_search(self, query, top_k=5):
        query_tokens = list(jieba.cut(query))
        scores = self.bm25.get_scores(query_tokens)
        top_indices = np.argsort(scores)[::-1][:top_k]
        results = []
        for idx in top_indices:
            results.append({'text': self.texts[idx], 'score': float(scores[idx]), 'method': 'bm25'})
        return results
    
    def hybrid_search(self, query, top_k=6, vector_weight=0.6, bm25_weight=0.4):
        vector_results = self.vector_search(query, top_k)
        bm25_results = self.bm25_search(query, top_k)
        combined_results = {}
        for result in vector_results:
            text = result['text']
            if text not in combined_results:
                combined_results[text] = {'text': text, 'vector_score': 0, 'bm25_score': 0}
            combined_results[text]['vector_score'] = result['score']
        for result in bm25_results:
            text = result['text']
            if text not in combined_results:
                combined_results[text] = {'text': text, 'vector_score': 0, 'bm25_score': 0}
            combined_results[text]['bm25_score'] = result['score']
        final_results = []
        for item in combined_results.values():
            hybrid_score = (vector_weight * item['vector_score'] + bm25_weight * item['bm25_score'])
            final_results.append({'text': item['text'], 'score': hybrid_score, 'method': 'hybrid'})
        final_results.sort(key=lambda x: x['score'], reverse=True)
        return final_results[:top_k]

# ===== 3. Query Enhancement Layer (HyDE) =====
class QueryEnhancer:
    def __init__(self, model, tokenizer, device):
        self.model = model
        self.tokenizer = tokenizer
        self.device = device
    
    def generate_hypothetical_document(self, query):
        hyde_prompt = f"""Please generate a detailed hypothetical document based on the following question. The document should include the answer and related background information:

Question: {query}

Hypothetical Document:"""
        inputs = self.tokenizer(hyde_prompt, return_tensors="pt", truncation=True, max_length=512).to(self.device)
        with torch.no_grad():
            outputs = self.model.generate(
                input_ids=inputs['input_ids'],
                attention_mask=inputs['attention_mask'],
                max_new_tokens=200,
                do_sample=True,
                temperature=0.7,
                top_p=0.9,
                eos_token_id=self.tokenizer.eos_token_id,
                pad_token_id=self.tokenizer.eos_token_id
            )
        generated = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        hypothetical_doc = generated[len(hyde_prompt):].strip()
        return hypothetical_doc
    
    def enhanced_query(self, original_query):
        hypothetical_doc = self.generate_hypothetical_document(original_query)
        enhanced_query = f"{original_query} {hypothetical_doc}"
        return enhanced_query, hypothetical_doc

# ===== 4. Answer Generation Layer =====
class QAChain:
    def __init__(self, model, tokenizer, device):
        self.model = model
        self.tokenizer = tokenizer
        self.device = device
    
    def format_context(self, retrieved_docs):
        if not retrieved_docs:
            return "No relevant background information available."
        context_parts = []
        for i, doc in enumerate(retrieved_docs, 1):
            context_parts.append(f"Document {i}: {doc['text']}")
        return "\n\n".join(context_parts)
    
    def generate_answer(self, query, context):
        qa_prompt = f"""You are an intelligent QA assistant. Please answer the user's question based on the provided background knowledge.

Background knowledge:
{context}

User question: {query}

Please provide an accurate and detailed answer:"""
        inputs = self.tokenizer(qa_prompt, return_tensors="pt", truncation=True, max_length=1800).to(self.device)
        with torch.no_grad():
            outputs = self.model.generate(
                input_ids=inputs['input_ids'],
                attention_mask=inputs['attention_mask'],
                max_new_tokens=256,
                do_sample=True,
                temperature=0.7,
                top_p=0.9,
                eos_token_id=self.tokenizer.eos_token_id,
                pad_token_id=self.tokenizer.eos_token_id
            )
        generated = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        answer = generated[len(qa_prompt):].strip()
        return answer

# ===== 5. Complete RAG System =====
class CompleteRAGSystem:
    def __init__(self, model_name="unsloth/qwen2-1.5b-bnb-4bit"):
        self.preprocessor = DocumentPreprocessor()
        self.retriever = HybridRetriever()
        print("Loading generation model...")
        self.model, self.tokenizer = FastLanguageModel.from_pretrained(
            model_name=model_name,
            max_seq_length=2048,
            dtype=None,
            load_in_4bit=True
        )
        self.model.eval()
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.query_enhancer = QueryEnhancer(self.model, self.tokenizer, self.device)
        self.qa_chain = QAChain(self.model, self.tokenizer, self.device)
    
    def build_knowledge_base(self, documents):
        chunks = self.preprocessor.dynamic_chunking(documents)
        self.retriever.build_vector_index(chunks)
        self.retriever.build_bm25_index(chunks)
        return chunks
    
    def answer_question(self, query, use_hyde=True, use_hybrid=True):
        if use_hyde:
            enhanced_query, hypothetical_doc = self.query_enhancer.enhanced_query(query)
            search_query = enhanced_query
        else:
            search_query = query
        if use_hybrid:
            retrieved_docs = self.retriever.hybrid_search(search_query, top_k=3)
        else:
            retrieved_docs = self.retriever.vector_search(search_query, top_k=3)
        context = self.qa_chain.format_context(retrieved_docs)
        answer = self.qa_chain.generate_answer(query, context)
        return {'question': query, 'answer': answer, 'retrieved_docs': retrieved_docs, 'context': context}

# ===== 6. Main Interactive QA Entry =====
def main():
    # Load document library
    with open("test_texts.pkl", "rb") as f:
        test_texts = pickle.load(f)
    documents = [text for text in test_texts]
    
    # Initialize system and build knowledge base
    rag_system = CompleteRAGSystem()
    rag_system.build_knowledge_base(documents)
    
    # Command-line interaction
    print("\nRAG QA System Started! (Enter q to exit)")
    while True:
        user_q = input("\nEnter your question: ")
        if user_q.strip().lower() == "q":
            print("Goodbye!")
            break
        result = rag_system.answer_question(user_q, use_hyde=True, use_hybrid=True)
        print("\nAnswer:", result['answer'])
        print("\nRelevant Documents:")
        for i, doc in enumerate(result['retrieved_docs'], 1):
            print(f"--- Document {i} (score={doc['score']:.4f}, method={doc['method']}) ---")
            print(doc['text'][:200], "...\n")

if __name__ == "__main__":
    main()


In [None]:
import pickle
import re
import faiss
import torch
import numpy as np
from datasets import Dataset
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from unsloth import FastLanguageModel
from transformers import AutoTokenizer
from langchain.text_splitter import RecursiveCharacterTextSplitter
from rank_bm25 import BM25Okapi
import jieba
import os

# ===== 1. Document Preprocessing Layer =====
class DocumentPreprocessor:
    """Document preprocessing layer: dynamic chunking"""
    
    def __init__(self, chunk_size=512, chunk_overlap=50):
        # Use RecursiveCharacterTextSplitter for dynamic chunking
        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=chunk_size,
            chunk_overlap=chunk_overlap,
            separators=["\n\n", "\n", "。", "！", "？", "；", "，", " ", ""],
            keep_separator=True,
            length_function=len
        )
    
    def dynamic_chunking(self, documents):
        """Perform dynamic chunking based on semantic structure"""
        print("Starting dynamic chunking...")
        all_chunks = []
        
        for i, doc in enumerate(documents):
            chunks = self.text_splitter.split_text(doc)
            all_chunks.extend(chunks)
            
            if (i + 1) % 100 == 0:
                print(f"Processed {i + 1} documents")
        
        print(f"Dynamic chunking completed. Generated {len(all_chunks)} chunks")
        return all_chunks

# ===== 2. Embedding and Indexing Layer =====
class HybridRetriever:
    """Hybrid retriever layer: combines vector and keyword search"""
    
    def __init__(self, embedder_model="sentence-transformers/all-MiniLM-L6-v2"):
        self.embedder = SentenceTransformer(embedder_model)
        self.faiss_index = None
        self.texts = None
        self.bm25 = None
        self.tokenized_docs = None
    
    def build_vector_index(self, texts):
        """Build FAISS vector index"""
        print("Building vector index...")
        self.texts = texts
        
        embeddings = self.embedder.encode(texts, convert_to_numpy=True, show_progress_bar=True)
        dimension = embeddings.shape[1]
        self.faiss_index = faiss.IndexFlatIP(dimension)
        faiss.normalize_L2(embeddings)
        self.faiss_index.add(embeddings.astype('float32'))
        
        print(f"Vector index built. Dimension: {dimension}, Documents: {len(texts)}")
    
    def build_bm25_index(self, texts):
        """Build BM25 keyword index"""
        print("Building BM25 keyword index...")
        self.tokenized_docs = [list(jieba.cut(text)) for text in texts]
        self.bm25 = BM25Okapi(self.tokenized_docs)
        print("BM25 index built")
    
    def vector_search(self, query, top_k=5):
        """Vector search"""
        if self.faiss_index is None:
            raise ValueError("Vector index not built. Call build_vector_index first.")
        
        query_vec = self.embedder.encode([query], convert_to_numpy=True)
        faiss.normalize_L2(query_vec)
        scores, indices = self.faiss_index.search(query_vec.astype('float32'), top_k)
        
        results = []
        for score, idx in zip(scores[0], indices[0]):
            if idx != -1:
                results.append({'text': self.texts[idx], 'score': float(score), 'method': 'vector'})
        return results
    
    def bm25_search(self, query, top_k=5):
        """BM25 keyword search"""
        if self.bm25 is None:
            raise ValueError("BM25 index not built. Call build_bm25_index first.")
        
        query_tokens = list(jieba.cut(query))
        scores = self.bm25.get_scores(query_tokens)
        top_indices = np.argsort(scores)[::-1][:top_k]
        
        results = []
        for idx in top_indices:
            results.append({'text': self.texts[idx], 'score': float(scores[idx]), 'method': 'bm25'})
        return results
    
    def hybrid_search(self, query, top_k=6, vector_weight=0.6, bm25_weight=0.4):
        """Hybrid search: combine vector and BM25 search"""
        vector_results = self.vector_search(query, top_k)
        bm25_results = self.bm25_search(query, top_k)
        combined_results = {}
        
        for result in vector_results:
            text = result['text']
            if text not in combined_results:
                combined_results[text] = {'text': text, 'vector_score': 0, 'bm25_score': 0}
            combined_results[text]['vector_score'] = result['score']
        
        for result in bm25_results:
            text = result['text']
            if text not in combined_results:
                combined_results[text] = {'text': text, 'vector_score': 0, 'bm25_score': 0}
            combined_results[text]['bm25_score'] = result['score']
        
        final_results = []
        for item in combined_results.values():
            hybrid_score = (vector_weight * item['vector_score'] + bm25_weight * item['bm25_score'])
            final_results.append({'text': item['text'], 'score': hybrid_score, 'method': 'hybrid'})
        
        final_results.sort(key=lambda x: x['score'], reverse=True)
        return final_results[:top_k]

# ===== 3. Query Enhancement Layer =====
class QueryEnhancer:
    """Query enhancement using HyDE (hypothetical document generation)"""
    
    def __init__(self, model, tokenizer, device):
        self.model = model
        self.tokenizer = tokenizer
        self.device = device
    
    def generate_hypothetical_document(self, query):
        prompt = f"""Generate a detailed hypothetical document for the following question. Include the answer and related background information:

Question: {query}

Hypothetical Document:"""
        inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(self.device)
        with torch.no_grad():
            outputs = self.model.generate(
                input_ids=inputs['input_ids'],
                attention_mask=inputs['attention_mask'],
                max_new_tokens=200,
                do_sample=True,
                temperature=0.7,
                top_p=0.9,
                eos_token_id=self.tokenizer.eos_token_id,
                pad_token_id=self.tokenizer.eos_token_id
            )
        generated = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        hypothetical_doc = generated[len(prompt):].strip()
        return hypothetical_doc
    
    def enhanced_query(self, original_query):
        hypothetical_doc = self.generate_hypothetical_document(original_query)
        enhanced_query = f"{original_query} {hypothetical_doc}"
        return enhanced_query, hypothetical_doc

# ===== 4. QA Generation Layer =====
class QAChain:
    """QA Chain for structured question answering"""
    
    def __init__(self, model, tokenizer, device):
        self.model = model
        self.tokenizer = tokenizer
        self.device = device
    
    def format_context(self, retrieved_docs):
        if not retrieved_docs:
            return "No relevant background information available."
        
        context_parts = []
        for i, doc in enumerate(retrieved_docs, 1):
            if isinstance(doc, dict):
                context_parts.append(f"Document {i}: {doc['text']}")
            else:
                context_parts.append(f"Document {i}: {doc}")
        return "\n\n".join(context_parts)
    
    def generate_answer(self, query, context):
        prompt = f"""You are an intelligent QA assistant. Answer the user's question based on the following background knowledge.

Background:
{context}

User question: {query}

Provide an accurate and detailed answer:"""
        inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1800).to(self.device)
        with torch.no_grad():
            outputs = self.model.generate(
                input_ids=inputs['input_ids'],
                attention_mask=inputs['attention_mask'],
                max_new_tokens=256,
                do_sample=True,
                temperature=0.7,
                top_p=0.9,
                eos_token_id=self.tokenizer.eos_token_id,
                pad_token_id=self.tokenizer.eos_token_id
            )
        generated = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        answer = generated[len(prompt):].strip()
        return answer

# ===== 5. Complete RAG System =====
class CompleteRAGSystem:
    """Full RAG system integrating all layers"""
    
    def __init__(self, model_name="unsloth/qwen2-1.5b-bnb-4bit"):
        self.preprocessor = DocumentPreprocessor()
        self.retriever = HybridRetriever()
        print("Loading generation model...")
        self.model, self.tokenizer = FastLanguageModel.from_pretrained(
            model_name=model_name,
            max_seq_length=2048,
            dtype=None,
            load_in_4bit=True
        )
        self.model.eval()
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.query_enhancer = QueryEnhancer(self.model, self.tokenizer, self.device)
        self.qa_chain = QAChain(self.model, self.tokenizer, self.device)
    
    def build_knowledge_base(self, documents):
        chunks = self.preprocessor.dynamic_chunking(documents)
        self.retriever.build_vector_index(chunks)
        self.retriever.build_bm25_index(chunks)
        return chunks
    
    def retrieve_contexts(self, query, top_k=3, use_hyde=True, use_hybrid=True):
        if use_hyde:
            enhanced_query, _ = self.query_enhancer.enhanced_query(query)
            search_query = enhanced_query
        else:
            search_query = query
        
        if use_hybrid:
            retrieved_docs = self.retriever.hybrid_search(search_query, top_k=top_k)
        else:
            retrieved_docs = self.retriever.vector_search(search_query, top_k=top_k)
        
        return [doc['text'] for doc in retrieved_docs]
    
    def answer_question_with_context(self, query, context):
        if isinstance(context, list):
            context = "\n---\n".join(context)
        return self.qa_chain.generate_answer(query, context)
    
    def answer_question(self, query, use_hyde=True, use_hybrid=True):
        print(f"\nUser question: {query}")
        retrieved_contexts = self.retrieve_contexts(query, top_k=3, use_hyde=use_hyde, use_hybrid=use_hybrid)
        context = "\n---\n".join(retrieved_contexts)
        answer = self.answer_question_with_context(query, context)
        return {'question': query, 'answer': answer, 'contexts': retrieved_contexts, 'context': context}

# ===== 6. Helper function: extract QA =====
def extract_qa(text):
    q_match = re.search(r"Q:\s*(.*)", text)
    a_match = re.search(r"A:\s*(.*)", text, re.DOTALL)
    question = q_match.group(1).strip() if q_match else ""
    answer = a_match.group(1).strip() if a_match else ""
    return question, answer

# ===== 7. Build dataset for local evaluation =====
def build_ragas_dataset(test_texts, rag_system, max_samples=100, use_hyde=True, use_hybrid=True):
    questions, answers, contexts, references = [], [], [], []
    for i, item in enumerate(test_texts[:max_samples]):
        q, a = extract_qa(item)
        if not q or not a:
            print(f"Skipping sample {i+1}: missing question or answer")
            continue
        ctxs = rag_system.retrieve_contexts(q, top_k=3, use_hyde=use_hyde, use_hybrid=use_hybrid)
        context = "\n---\n".join(ctxs)
        pred = rag_system.answer_question_with_context(q, context)
        questions.append(q)
        answers.append(pred)
        contexts.append(ctxs)
        references.append(a)
        print(f"Sample {i+1}/{max_samples} processed")
    dataset_dict = {"question": questions, "answer": answers, "contexts": contexts, "ground_truth": references}
    return Dataset.from_dict(dataset_dict)

# ===== 8. Local ragas-lite evaluation =====
def average_cos_sim(a, b):
    return cosine_similarity(a, b).mean()

def evaluate_ragas_locally(dataset, embedder):
    print("Evaluating ragas-lite metrics using local embeddings...\n")
    scores = {"faithfulness": [], "answer_relevancy": [], "context_precision": [], "context_recall": []}
    
    for i in range(len(dataset)):
        q = dataset["question"][i]
        a = dataset["answer"][i]
        gt = dataset["ground_truth"][i]
        ctxs = dataset["contexts"][i]
        try:
            q_vec = embedder.encode([q])
            a_vec = embedder.encode([a])
            gt_vec = embedder.encode([gt])
            ctx_vecs = embedder.encode(ctxs)
            scores["faithfulness"].append(average_cos_sim(a_vec, ctx_vecs))
            scores["answer_relevancy"].append(cosine_similarity(a_vec, q_vec)[0][0])
            scores["context_precision"].append(max([cosine_similarity(gt_vec, c.reshape(1,-1))[0][0] for c in ctx_vecs]))
            scores["context_recall"].append(average_cos_sim(gt_vec, ctx_vecs))
        except Exception as e:
            print(f"Evaluation failed for sample {i+1}: {e}")
    
    print("Local ragas-lite evaluation completed:\n")
    for k, v in scores.items():
        avg = np.mean(v)
        print(f"{k}: {avg:.3f}")
    return scores

# ===== 9. Main entry point =====
if __name__ == "__main__":
    with open("test_texts.pkl", "rb") as f:
        test_texts = pickle.load(f)
    
    print("Initializing RAG system...")
    rag_system = CompleteRAGSystem()
    
    print("Building knowledge base...")
    chunks = rag_system.build_knowledge_base(test_texts)
    
    dataset_path = "enhanced_ragas_data.pkl"
    if os.path.exists(dataset_path):
        print(f"Found saved dataset, loading {dataset_path}...")
        with open(dataset_path, "rb") as f:
            ragas_data = pickle.load(f)
    else:
        print("Building enhanced RAG evaluation dataset...")
        ragas_data = build_ragas_dataset(test_texts, rag_system, max_samples=50, use_hyde=True, use_hybrid=True)
        print(f"Saving dataset to {dataset_path}...")
        with open(dataset_path, "wb") as f:
            pickle.dump(ragas_data, f)
    
    print("Dataset ready, starting evaluation...\n")
    embedder = rag_system.retriever.embedder
    scores = evaluate_ragas_locally(ragas_data, embedder)
