#### 1. Environment Setup and Dependencies

In [2]:
# Core dependencies
import os
import getpass
import json
from typing import List, Dict, Any
from pathlib import Path

# Updated LangChain imports
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, OpenAI  # Updated imports
from langchain_community.vectorstores import FAISS 
# from langchain_community.vectorstores import Qdrant  # Updated import
from langchain.chains import RetrievalQA
from langchain.agents import AgentExecutor, create_openai_tools_agent
from langchain.tools import Tool
from langchain.prompts import PromptTemplate
from langchain.schema import Document

# Qdrant client
#from qdrant_client import QdrantClient
#from qdrant_client.models import VectorParams, Distance

# RAGAS evaluation
from ragas import evaluate
from ragas.metrics import faithfulness, answer_relevancy, context_precision, context_recall

# Tavily for web search
from tavily import TavilyClient

os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API Key:")
os.environ["TAVILY_API_KEY"] = getpass.getpass("Enter your Tavily API Key:") 


#### 2.Document Ingestion and Chunking

In [3]:
from pathlib import Path
from typing import List
from langchain_community.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document

class DocumentProcessor:
    def __init__(self, data_path: str = "/Users/powertothefuture/Documents/aimakerspace/AIE8-cert-challenge/data"):
        self.data_path = Path(data_path)
        self.chunk_size = 800  # tokens as per project spec
        self.chunk_overlap = 100  # tokens as per project spec
        
    def load_documents(self) -> List[Document]:
        """Load all markdown documents from the data directory"""
        loader = DirectoryLoader(
            str(self.data_path),
            glob="**/*.md",
            loader_cls=TextLoader,
            loader_kwargs={'encoding': 'utf-8'}
        )
        documents = loader.load()
        print(f"Loaded {len(documents)} documents")
        return documents
    
    def chunk_documents(self, documents: List[Document]) -> List[Document]:
        """Split documents into chunks using the specified strategy"""
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=self.chunk_size,
            chunk_overlap=self.chunk_overlap,
            separators=["\n\n", "\n", " ", ""]
        )
        chunks = text_splitter.split_documents(documents)
        print(f"Created {len(chunks)} chunks from {len(documents)} documents")
        return chunks

#### 3. Vector Store Setup with Qdrant

In [20]:
from typing import List
from langchain_community.vectorstores import Qdrant
from langchain_openai import OpenAIEmbeddings
from langchain.schema import Document
from qdrant_client import QdrantClient
from qdrant_client.http.models import VectorParams, Distance

class VectorStoreManager:
    def __init__(self, qdrant_url: str = ":memory:"):
        self.embeddings = OpenAIEmbeddings()
        self.client = QdrantClient(qdrant_url)
        self.embedding_dim = len(self.embeddings.embed_query("test"))

    def create_vectorstore(self, chunks: List[Document], collection_name: str = "rfp_docs", recreate: bool = True):
        """
        Create (or recreate) a Qdrant vector store from document chunks.
        If recreate=True, it will delete and recreate the collection if it already exists.
        """
        existing_collections = [c.name for c in self.client.get_collections().collections]

        # ✅ Handle recreation cleanly
        if collection_name in existing_collections:
            if recreate:
                self.client.delete_collection(collection_name)
                print(f"🧹 Deleted existing collection: {collection_name}")
            else:
                print(f"ℹ️  Using existing collection: {collection_name}")

        # ✅ Create fresh collection if needed
        if recreate or collection_name not in existing_collections:
            self.client.create_collection(
                collection_name=collection_name,
                vectors_config=VectorParams(
                    size=self.embedding_dim,
                    distance=Distance.COSINE
                )
            )
            print(f"✅ Created collection: {collection_name}")

        # ✅ Use explicit client-based Qdrant integration
        try:
            vectorstore = Qdrant(
                client=self.client,
                collection_name=collection_name,
                embeddings=self.embeddings
            )
            vectorstore.add_documents(chunks)
            print(f"✅ Added {len(chunks)} documents to vector store '{collection_name}'")
            return vectorstore
        except Exception as e:
            print(f"❌ Failed to add documents to vector store: {e}")
            raise

    def delete_collection(self, collection_name: str):
        """Manually delete a Qdrant collection"""
        existing_collections = [c.name for c in self.client.get_collections().collections]
        if collection_name in existing_collections:
            self.client.delete_collection(collection_name)
            print(f"🧹 Deleted collection: {collection_name}")
        else:
            print(f"ℹ️ Collection '{collection_name}' not found.")


#### 4. RFP Agent with Advanced Retrieval

In [32]:
from typing import List, Dict, Any
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.agents import Tool, initialize_agent, AgentType
from tavily import TavilyClient

class RFPAgent:
    def __init__(self, vectorstore, tavily_client: TavilyClient = None):
        self.vectorstore = vectorstore
        self.llm = ChatOpenAI(model_name="gpt-4", temperature=0.1)
        self.tavily_client = tavily_client or TavilyClient()

        self.qa_chain = RetrievalQA.from_chain_type(
            llm=self.llm,
            chain_type="stuff",
            retriever=self.vectorstore.as_retriever(search_kwargs={"k": 5}),
            return_source_documents=True
        )

        self.tools = self._create_tools()
        self.agent = self._create_agent()

    def _create_tools(self) -> List[Tool]:
        def search_documentation(query: str) -> str:
            try:
                results = self.qa_chain({"query": query})
                answer = results["result"]
                sources = [doc.metadata.get("source", "Unknown") for doc in results["source_documents"]]
                unique_sources = ", ".join(sorted(set(sources)))
                return f"Answer: {answer}\n\nSources: {unique_sources}"
            except Exception as e:
                return f"Error searching documentation: {str(e)}"

        def search_web(query: str) -> str:
            try:
                response = self.tavily_client.search(query=query, max_results=3)
                results = []
                for result in response.get("results", []):
                    results.append(
                        f"Title: {result.get('title','N/A')}\n"
                        f"Content: {result.get('content','')}\n"
                        f"URL: {result.get('url','')}"
                    )
                return "\n\n".join(results) if results else "No results found."
            except Exception as e:
                return f"Error searching web: {str(e)}"

        return [
            Tool(
                name="search_documentation",
                description="Search internal company documentation for RFP responses and technical specifications",
                func=search_documentation
            ),
            Tool(
                name="search_web",
                description="Search the web for supplementary information, competitor analysis, or current market trends",
                func=search_web
            )
        ]

    def _create_agent(self):
        return initialize_agent(
            tools=self.tools,
            llm=self.llm,
            agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
            verbose=True
        )

    def respond_to_rfp(self, question: str) -> Dict[str, Any]:
        try:
            response = self.agent.invoke({"input": question})
            return {
                "question": question,
                "response": response["output"],
                "status": "success"
            }
        except Exception as e:
            return {
                "question": question,
                "response": f"Error generating response: {str(e)}",
                "status": "error"
            }


In [33]:
# 1. Import your document processor and vector store manager
processor = DocumentProcessor("/Users/powertothefuture/Documents/aimakerspace/AIE8-cert-challenge/data")

# 2. Load the markdown documents
documents = processor.load_documents()

# 3. Chunk/split the documents into smaller pieces
chunks = processor.chunk_documents(documents)

# 4. Create the vector store (Qdrant)
vector_manager = VectorStoreManager()
vectorstore = vector_manager.create_vectorstore(chunks, collection_name="rfp_docs")

# 5. Initialize your RFP agent
agent = RFPAgent(vectorstore=vectorstore)

# 6. Ask a question
response = agent.respond_to_rfp("What are the compliance standards for our product?")
print(response["response"])


Loaded 3 documents
Created 27 chunks from 3 documents
✅ Created collection: rfp_docs
✅ Added 27 documents to vector store 'rfp_docs'


[1m> Entering new AgentExecutor chain...[0m


  self.llm = ChatOpenAI(model_name="gpt-4", temperature=0.1)


[32;1m[1;3mI need to check our internal documentation for information on our product's compliance standards.
Action: search_documentation
Action Input: "product compliance standards"[0m

  results = self.qa_chain({"query": query})



Observation: [36;1m[1;3mAnswer: The platform meets the following compliance standards:
- SOC 2 Type II: This pertains to security, availability, and confidentiality controls.
- ISO 27001: This is related to the information security management system.
- GDPR: This is for data privacy and protection compliance.
- HIPAA: This is for healthcare data protection and is an optional add-on.
- PCI DSS: This is for payment card industry compliance and is also an optional add-on.

Sources: /Users/powertothefuture/Documents/aimakerspace/AIE8-cert-challenge/data/sample_faq.md, /Users/powertothefuture/Documents/aimakerspace/AIE8-cert-challenge/data/sample_product_specs.md, /Users/powertothefuture/Documents/aimakerspace/AIE8-cert-challenge/data/sample_rfp_responses.md[0m
Thought:[32;1m[1;3mI now know the final answer
Final Answer: Our product meets the following compliance standards: SOC 2 Type II, ISO 27001, GDPR. Additionally, we offer HIPAA and PCI DSS as optional add-ons.[0m

[1m> Finishe

#### 4. Ignore

In [None]:
class RFPAgent:
    def __init__(self, vectorstore, tavily_client=None):
        self.vectorstore = vectorstore
        self.llm = OpenAI(model_name="gpt-4", temperature=0.1)
        self.tavily_client = tavily_client or TavilyClient()
        
        # Create retrieval QA chain
        self.qa_chain = RetrievalQA.from_chain_type(
            llm=self.llm,
            chain_type="stuff",
            retriever=vectorstore.as_retriever(search_kwargs={"k": 5}),
            return_source_documents=True
        )
        
        # Create tools for the agent
        self.tools = self._create_tools()
        
        # Create agent
        self.agent = self._create_agent()
    
    def _create_tools(self) -> List[Tool]:
        """Create tools for the RFP agent"""
        
        def search_documentation(query: str) -> str:
            """Search internal documentation for RFP responses"""
            try:
                results = self.qa_chain({"query": query})
                sources = [doc.metadata.get('source', 'Unknown') for doc in results['source_documents']]
                return f"Answer: {results['result']}\n\nSources: {', '.join(set(sources))}"
            except Exception as e:
                return f"Error searching documentation: {str(e)}"
        
        def search_web(query: str) -> str:
            """Search web for supplementary information"""
            try:
                response = self.tavily_client.search(query=query, max_results=3)
                results = []
                for result in response['results']:
                    results.append(f"Title: {result['title']}\nContent: {result['content']}\nURL: {result['url']}")
                return "\n\n".join(results)
            except Exception as e:
                return f"Error searching web: {str(e)}"
        
        return [
            Tool(
                name="search_documentation",
                description="Search internal company documentation for RFP responses and technical specifications",
                func=search_documentation
            ),
            Tool(
                name="search_web",
                description="Search the web for supplementary information, competitor analysis, or current market trends",
                func=search_web
            )
        ]
    
    def _create_agent(self) -> AgentExecutor:
        """Create the RFP agent with tools"""
        prompt = PromptTemplate(
            template="""You are an expert Solutions Engineer helping respond to customer RFPs and technical questions.

You have access to:
1. Internal company documentation (product specs, RFP responses, FAQs)
2. Web search for supplementary information

Guidelines:
- Always prioritize internal documentation for product-specific questions
- Use web search for market trends, competitor analysis, or general industry information
- Provide comprehensive, accurate responses with proper citations
- Structure responses clearly with bullet points when appropriate
- Include relevant technical specifications and performance metrics
- Reference specific compliance standards and certifications when applicable

Question: {input}

Use the available tools to gather information and provide a comprehensive response.""",
            input_variables=["input"]
        )
        
        agent = create_openai_tools_agent(
            llm=self.llm,
            tools=self.tools,
            prompt=prompt
        )
        
        return AgentExecutor(agent=agent, tools=self.tools, verbose=True)
    
    def respond_to_rfp(self, question: str) -> Dict[str, Any]:
        """Generate RFP response using the agent"""
        try:
            response = self.agent.invoke({"input": question})
            return {
                "question": question,
                "response": response["output"],
                "status": "success"
            }
        except Exception as e:
            return {
                "question": question,
                "response": f"Error generating response: {str(e)}",
                "status": "error"
            }

In [31]:
from typing import List, Dict, Any
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.agents import Tool, initialize_agent, AgentType
from tavily import TavilyClient

class RFPAgent:
    def __init__(self, vectorstore, tavily_client: TavilyClient = None):
        self.vectorstore = vectorstore
        self.llm = ChatOpenAI(model_name="gpt-4", temperature=0.1)
        self.tavily_client = tavily_client or TavilyClient()

        self.qa_chain = RetrievalQA.from_chain_type(
            llm=self.llm,
            chain_type="stuff",
            retriever=self.vectorstore.as_retriever(search_kwargs={"k": 5}),
            return_source_documents=True
        )

        self.tools = self._create_tools()
        self.agent = self._create_agent()

    def _create_tools(self) -> List[Tool]:
        def search_documentation(query: str) -> str:
            try:
                results = self.qa_chain({"query": query})
                answer = results["result"]
                sources = [doc.metadata.get("source", "Unknown") for doc in results["source_documents"]]
                unique_sources = ", ".join(sorted(set(sources)))
                return f"Answer: {answer}\n\nSources: {unique_sources}"
            except Exception as e:
                return f"Error searching documentation: {str(e)}"

        def search_web(query: str) -> str:
            try:
                response = self.tavily_client.search(query=query, max_results=3)
                results = []
                for result in response.get("results", []):
                    results.append(
                        f"Title: {result.get('title','N/A')}\n"
                        f"Content: {result.get('content','')}\n"
                        f"URL: {result.get('url','')}"
                    )
                return "\n\n".join(results) if results else "No results found."
            except Exception as e:
                return f"Error searching web: {str(e)}"

        return [
            Tool(
                name="search_documentation",
                description="Search internal company documentation for RFP responses and technical specifications",
                func=search_documentation
            ),
            Tool(
                name="search_web",
                description="Search the web for supplementary information, competitor analysis, or current market trends",
                func=search_web
            )
        ]

    def _create_agent(self):
        return initialize_agent(
            tools=self.tools,
            llm=self.llm,
            agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
            verbose=True
        )

    def respond_to_rfp(self, question: str) -> Dict[str, Any]:
        try:
            response = self.agent.invoke({"input": question})
            return {
                "question": question,
                "response": response["output"],
                "status": "success"
            }
        except Exception as e:
            return {
                "question": question,
                "response": f"Error generating response: {str(e)}",
                "status": "error"
            }


In [56]:
## New v1
class RFPAgent:
    def __init__(self, vectorstore, tavily_client=None):
        self.vectorstore = vectorstore
        self.llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.1)  # More reliable model
        self.tavily_client = tavily_client or TavilyClient()
        
        # Create retrieval QA chain
        self.qa_chain = RetrievalQA.from_chain_type(
            llm=self.llm,
            chain_type="stuff",
            retriever=vectorstore.as_retriever(search_kwargs={"k": 5}),
            return_source_documents=True
        )
    
    def respond_to_rfp(self, question: str) -> Dict[str, Any]:
        """Generate RFP response using simple QA chain"""
        try:
            # Use invoke for chat models
            results = self.qa_chain.invoke({"query": question})
            
            response_text = results['result']
            
            return {
                "question": question,
                "response": response_text,
                "status": "success"
            }
        except Exception as e:
            return {
                "question": question,
                "response": f"Error generating response: {str(e)}",
                "status": "error"
            }

#### 5. RAGAS Evaluation System

In [52]:
from typing import List, Dict
from langchain.chat_models import ChatOpenAI
from ragas.metrics import faithfulness, answer_relevancy, context_precision, context_recall
from ragas import evaluate
from datasets import Dataset

class RAGEvaluator:
    def __init__(self, qa_chain):
        """
        RAG Evaluator for QA systems using RAGAS metrics.
        Uses GPT-4 chat model to evaluate:
        - Faithfulness
        - Answer relevancy
        - Context precision
        - Context recall
        """
        self.qa_chain = qa_chain
        self.metrics = [
            faithfulness,
            answer_relevancy,
            context_precision,
            context_recall
        ]
        # ✅ Use ChatOpenAI for chat completions
        self.llm = ChatOpenAI(model_name="gpt-4", temperature=0)

    def create_evaluation_dataset(self) -> List[Dict]:
        """Create evaluation dataset with sample RFP questions"""
        evaluation_questions = [
            {
                "question": "What security features does the Enterprise Data Platform provide?",
                "ground_truth": "The platform provides AES-256 encryption for data at rest, TLS 1.3 for data in transit, LDAP/AD authentication, RBAC authorization, and comprehensive audit logging."
            },
            {
                "question": "What is the maximum data volume the platform can handle?",
                "ground_truth": "The platform handles petabytes of data with linear scaling capabilities and supports clusters up to 10,000 nodes."
            },
            {
                "question": "What cloud platforms does the platform integrate with?",
                "ground_truth": "The platform provides native integration with AWS (S3, EC2, RDS, Redshift), Azure (Blob Storage, Data Factory, Synapse Analytics), and GCP (BigQuery, Cloud Storage, Dataflow)."
            },
            {
                "question": "What are the minimum system requirements for on-premises deployment?",
                "ground_truth": "Minimum requirements include Intel Xeon or AMD EPYC processors (16 cores), 64GB RAM minimum (256GB recommended), SSD storage with 1TB minimum capacity, and 10 Gigabit Ethernet."
            },
            {
                "question": "What compliance standards does the platform meet?",
                "ground_truth": "The platform meets SOC 2 Type II, ISO 27001, GDPR compliance standards, with optional HIPAA and PCI DSS add-ons."
            }
        ]
        return evaluation_questions

    def evaluate_system(self) -> Dict[str, float]:
        """Evaluate the RAG system using RAGAS metrics"""
        try:
            eval_dataset = self.create_evaluation_dataset()
            records = []

            for item in eval_dataset:
                result = self.qa_chain({"query": item["question"]})
                records.append({
                    "question": item["question"],
                    "answer": result["result"],
                    "contexts": [doc.page_content for doc in result["source_documents"]],
                    "ground_truth": item["ground_truth"]
                })

            # ✅ Convert Python list to Hugging Face Dataset
            hf_dataset = Dataset.from_list(records)

            # ✅ Run RAGAS evaluation
            results = evaluate(
                dataset=hf_dataset,
                metrics=self.metrics,
                llm=self.llm
            )

            return results

        except Exception as e:
            print(f"Evaluation error: {str(e)}")
            return {"error": str(e)}

In [48]:
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI  # ✅ Use ChatOpenAI instead of OpenAI

# ✅ Initialize ChatOpenAI properly
llm = ChatOpenAI(model_name="gpt-4", temperature=0)

# ✅ Build QA chain with chat model
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(search_kwargs={"k": 5}),
    return_source_documents=True
)


In [53]:
evaluator = RAGEvaluator(qa_chain)
scores = evaluator.evaluate_system()
print("RAGAS evaluation results:", scores)


Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]

Exception in thread Thread-5:
Traceback (most recent call last):
  File [35m"/Users/powertothefuture/.local/share/uv/python/cpython-3.13.0-macos-aarch64-none/lib/python3.13/threading.py"[0m, line [35m1041[0m, in [35m_bootstrap_inner[0m
    [31mself.run[0m[1;31m()[0m
    [31m~~~~~~~~[0m[1;31m^^[0m
  File [35m"/Users/powertothefuture/Documents/aimakerspace/AIE8-cert-challenge/.venv/lib/python3.13/site-packages/tqdm/_monitor.py"[0m, line [35m84[0m, in [35mrun[0m
    [31minstance.refresh[0m[1;31m(nolock=True)[0m
    [31m~~~~~~~~~~~~~~~~[0m[1;31m^^^^^^^^^^^^^[0m
  File [35m"/Users/powertothefuture/Documents/aimakerspace/AIE8-cert-challenge/.venv/lib/python3.13/site-packages/tqdm/std.py"[0m, line [35m1347[0m, in [35mrefresh[0m
    [31mself.display[0m[1;31m()[0m
    [31m~~~~~~~~~~~~[0m[1;31m^^[0m
  File [35m"/Users/powertothefuture/Documents/aimakerspace/AIE8-cert-challenge/.venv/lib/python3.13/site-packages/tqdm/notebook.py"[0m, line [35m171[0m, 

RAGAS evaluation results: {'faithfulness': nan, 'answer_relevancy': 0.9708, 'context_precision': 0.7917, 'context_recall': 1.0000}


In [None]:
import csv
import os
from typing import List, Dict, Optional
from datetime import datetime
from datasets import Dataset
from langchain.chat_models import ChatOpenAI
from ragas.metrics import faithfulness, answer_relevancy, context_precision, context_recall, answer_similarity

from ragas import evaluate

class RAGEvaluator:
    def __init__(
        self,
        qa_chain,
        model_name: str = "gpt-4",
        retriever_type: str = "qdrant",
        chunk_size: int = 500
    ):
        """
        RAG Evaluator for QA systems using RAGAS metrics with experiment tracking.

        Args:
            qa_chain: The RetrievalQA chain or similar
            model_name: The LLM used for generation and evaluation
            retriever_type: Type of retriever or vector store (e.g., qdrant, faiss)
            chunk_size: Size of text chunks used in preprocessing
        """
        self.qa_chain = qa_chain
        self.model_name = model_name
        self.retriever_type = retriever_type
        self.chunk_size = chunk_size

        self.metrics = [
            faithfulness,
            answer_relevancy,
            context_precision,
            context_recall
        ]

        self.llm = ChatOpenAI(model_name=model_name, temperature=0)

    def create_evaluation_dataset(self) -> List[Dict]:
        """Static evaluation dataset with sample RFP questions."""
        return [
            {
                "question": "What security features does the Enterprise Data Platform provide?",
                "ground_truth": "The platform provides AES-256 encryption for data at rest, TLS 1.3 for data in transit, LDAP/AD authentication, RBAC authorization, and comprehensive audit logging."
            },
            {
                "question": "What is the maximum data volume the platform can handle?",
                "ground_truth": "The platform handles petabytes of data with linear scaling capabilities and supports clusters up to 10,000 nodes."
            },
            {
                "question": "What cloud platforms does the platform integrate with?",
                "ground_truth": "The platform provides native integration with AWS (S3, EC2, RDS, Redshift), Azure (Blob Storage, Data Factory, Synapse Analytics), and GCP (BigQuery, Cloud Storage, Dataflow)."
            },
            {
                "question": "What are the minimum system requirements for on-premises deployment?",
                "ground_truth": "Minimum requirements include Intel Xeon or AMD EPYC processors (16 cores), 64GB RAM minimum (256GB recommended), SSD storage with 1TB minimum capacity, and 10 Gigabit Ethernet."
            },
            {
                "question": "What compliance standards does the platform meet?",
                "ground_truth": "The platform meets SOC 2 Type II, ISO 27001, GDPR compliance standards, with optional HIPAA and PCI DSS add-ons."
            }
        ]

    def evaluate_system(self) -> Dict:
        """Run RAGAS evaluation and return metrics and detailed results."""
        try:
            eval_dataset = self.create_evaluation_dataset()
            records = []

            for item in eval_dataset:
                result = self.qa_chain({"query": item["question"]})
                records.append({
                    "question": item["question"],
                    "answer": result["result"],
                    "contexts": [doc.page_content for doc in result["source_documents"]],
                    "ground_truth": item["ground_truth"]
                })

            # Convert to HF dataset
            hf_dataset = Dataset.from_list(records)

            # Run RAGAS
            scores = evaluate(
                dataset=hf_dataset,
                metrics=self.metrics,
                llm=self.llm
            )

            return {"scores": scores, "records": records}

        except Exception as e:
            print(f"Evaluation error: {str(e)}")
            return {"error": str(e)}

    def save_results_to_csv(self, evaluation, scores_file="ragas_scores.csv", details_file="ragas_details.csv"):
    import os
    import csv
    from datetime import datetime

    if "scores" in evaluation:
        # Convert scores to dict if needed
        if not isinstance(evaluation["scores"], dict):
            scores = evaluation["scores"].to_dict()
        else:
            scores = evaluation["scores"]

        scores_fieldnames = [
            "timestamp", "model_name", "retriever_type", "chunk_size"
        ] + list(scores.keys())

        file_exists = os.path.isfile(scores_file)

        with open(scores_file, mode='a', newline='') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=scores_fieldnames)
            if not file_exists:
                writer.writeheader()
            row = {
                "timestamp": datetime.now().isoformat(),
                "model_name": self.model_name,
                "retriever_type": self.retriever_type,
                "chunk_size": self.chunk_size,
                **scores
            }
            writer.writerow(row)

    # You can similarly handle 'details' if you save detailed results
def save_results_to_csv(
        self,
        evaluation: Dict,
        scores_file: str = "ragas_scores.csv",
        details_file: str = "ragas_details.csv"
    ):
        """
        Save RAGAS evaluation results to CSV with metadata (model, retriever, chunk size).
        """
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

        # 1️⃣ Save summary metric scores
        if "scores" in evaluation:
            scores = evaluation["scores"]
            scores_fieldnames = [
                "timestamp", "model_name", "retriever_type", "chunk_size"
            ] + list(scores.keys())
            file_exists = os.path.isfile(scores_file)

            with open(scores_file, mode='a', newline='') as csvfile:
                writer = csv.DictWriter(csvfile, fieldnames=scores_fieldnames)
                if not file_exists:
                    writer.writeheader()

                row = {
                    "timestamp": timestamp,
                    "model_name": self.model_name,
                    "retriever_type": self.retriever_type,
                    "chunk_size": self.chunk_size
                }
                row.update(scores)
                writer.writerow(row)

            print(f"✅ RAGAS metric scores saved to: {scores_file}")

        # 2️⃣ Save detailed Q&A logs
        if "records" in evaluation:
            records = evaluation["records"]
            details_fieldnames = [
                "timestamp", "model_name", "retriever_type", "chunk_size",
                "question", "answer", "ground_truth", "contexts"
            ]
            file_exists = os.path.isfile(details_file)

            with open(details_file, mode='a', newline='') as csvfile:
                writer = csv.DictWriter(csvfile, fieldnames=details_fieldnames)
                if not file_exists:
                    writer.writeheader()

                for record in records:
                    writer.writerow({
                        "timestamp": timestamp,
                        "model_name": self.model_name,
                        "retriever_type": self.retriever_type,
                        "chunk_size": self.chunk_size,
                        "question": record["question"],
                        "answer": record["answer"],
                        "ground_truth": record["ground_truth"],
                        "contexts": " | ".join(record["contexts"])
                    })

            print(f"✅ Detailed evaluation logs saved to: {details_file}")

In [55]:
# Pass the same qa_chain you used in RFPAgent
evaluator = RAGEvaluator(
    qa_chain,
    model_name="gpt-4",
    retriever_type="qdrant",
    chunk_size=500
)

# Run RAGAS evaluation
evaluation = evaluator.evaluate_system()
print("📊 RAGAS Metrics:", evaluation["scores"])

# Save experiment results with metadata
evaluator.save_results_to_csv(evaluation)


Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]

Exception raised in Job[0]: TimeoutError()
Exception raised in Job[4]: TimeoutError()
Exception raised in Job[6]: TimeoutError()
Exception raised in Job[8]: TimeoutError()
Exception raised in Job[12]: TimeoutError()
Exception raised in Job[16]: TimeoutError()
Exception raised in Job[17]: TimeoutError()
Exception raised in Job[19]: TimeoutError()


📊 RAGAS Metrics: {'faithfulness': nan, 'answer_relevancy': 0.9619, 'context_precision': 0.8889, 'context_recall': 0.9375}


AttributeError: 'EvaluationResult' object has no attribute 'keys'

In [9]:
class RAGEvaluator:
    def __init__(self, qa_chain):
        self.qa_chain = qa_chain
        self.metrics = [faithfulness, answer_relevancy, context_precision, context_recall]
        self.llm = OpenAI(model_name="gpt-4")
    
    def create_evaluation_dataset(self) -> List[Dict]:
        """Create evaluation dataset with sample RFP questions"""
        evaluation_questions = [
            {
                "question": "What security features does the Enterprise Data Platform provide?",
                "ground_truth": "The platform provides AES-256 encryption for data at rest, TLS 1.3 for data in transit, LDAP/AD authentication, RBAC authorization, and comprehensive audit logging."
            },
            {
                "question": "What is the maximum data volume the platform can handle?",
                "ground_truth": "The platform handles petabytes of data with linear scaling capabilities and supports clusters up to 10,000 nodes."
            },
            {
                "question": "What cloud platforms does the platform integrate with?",
                "ground_truth": "The platform provides native integration with AWS (S3, EC2, RDS, Redshift), Azure (Blob Storage, Data Factory, Synapse Analytics), and GCP (BigQuery, Cloud Storage, Dataflow)."
            },
            {
                "question": "What are the minimum system requirements for on-premises deployment?",
                "ground_truth": "Minimum requirements include Intel Xeon or AMD EPYC processors (16 cores), 64GB RAM minimum (256GB recommended), SSD storage with 1TB minimum capacity, and 10 Gigabit Ethernet."
            },
            {
                "question": "What compliance standards does the platform meet?",
                "ground_truth": "The platform meets SOC 2 Type II, ISO 27001, GDPR compliance standards, with optional HIPAA and PCI DSS add-ons."
            }
        ]
        return evaluation_questions
    
    def evaluate_system(self) -> Dict[str, float]:
        """Evaluate the RAG system using RAGAS metrics"""
        try:
            # Create evaluation dataset
            eval_dataset = self.create_evaluation_dataset()
            
            # Convert to RAGAS format
            dataset = []
            for item in eval_dataset:
                # Get response from QA chain
                result = self.qa_chain({"query": item["question"]})
                
                dataset.append({
                    "question": item["question"],
                    "answer": result["result"],
                    "contexts": [doc.page_content for doc in result["source_documents"]],
                    "ground_truth": item["ground_truth"]
                })
            
            # Evaluate using RAGAS
            results = evaluate(
                dataset=dataset,
                metrics=self.metrics,
                llm=self.llm
            )
            
            return results
            
        except Exception as e:
            print(f"Evaluation error: {str(e)}")
            return {"error": str(e)}

In [57]:
# new v1
class RAGEvaluator:
    def __init__(self, qa_chain):
        self.qa_chain = qa_chain
        self.metrics = [faithfulness, answer_relevancy, context_precision, context_recall]
        self.llm = ChatOpenAI(model_name="gpt-3.5-turbo")  # Use ChatOpenAI for evaluator too
    
    def create_evaluation_dataset(self) -> List[Dict]:
        """Create evaluation dataset with sample RFP questions"""
        evaluation_questions = [
            {
                "question": "What security features does the Enterprise Data Platform provide?",
                "ground_truth": "The platform provides AES-256 encryption for data at rest, TLS 1.3 for data in transit, LDAP/AD authentication, RBAC authorization, and comprehensive audit logging."
            },
            {
                "question": "What is the maximum data volume the platform can handle?",
                "ground_truth": "The platform handles petabytes of data with linear scaling capabilities and supports clusters up to 10,000 nodes."
            },
            {
                "question": "What cloud platforms does the platform integrate with?",
                "ground_truth": "The platform provides native integration with AWS (S3, EC2, RDS, Redshift), Azure (Blob Storage, Data Factory, Synapse Analytics), and GCP (BigQuery, Cloud Storage, Dataflow)."
            }
        ]
        return evaluation_questions
    
    def evaluate_system(self) -> Dict[str, float]:
        """Evaluate the RAG system using RAGAS metrics"""
        try:
            # Create evaluation dataset
            eval_dataset = self.create_evaluation_dataset()
            
            # Convert to RAGAS format
            dataset = []
            for item in eval_dataset:
                # Get response from QA chain
                result = self.qa_chain.invoke({"query": item["question"]})
                
                dataset.append({
                    "question": item["question"],
                    "answer": result["result"],
                    "contexts": [doc.page_content for doc in result["source_documents"]],
                    "ground_truth": item["ground_truth"]
                })
            
            # Evaluate using RAGAS
            results = evaluate(
                dataset=dataset,
                metrics=self.metrics,
                llm=self.llm
            )
            
            return results
            
        except Exception as e:
            print(f"Evaluation error: {str(e)}")
            return {"error": str(e)}

In [12]:
## new v2
from ragas import evaluate
from ragas.metrics import faithfulness, answer_relevancy, context_precision, context_recall
from datasets import Dataset  # Add this import

class RAGEvaluator:
    def __init__(self, qa_chain):
        self.qa_chain = qa_chain
        self.metrics = [faithfulness, answer_relevancy, context_precision, context_recall]
        self.llm = ChatOpenAI(model_name="gpt-3.5-turbo")
    
    def create_evaluation_dataset(self) -> List[Dict]:
        """Create evaluation dataset with sample RFP questions"""
        evaluation_questions = [
            {
                "question": "What security features does the Enterprise Data Platform provide?",
                "ground_truth": "The platform provides AES-256 encryption for data at rest, TLS 1.3 for data in transit, LDAP/AD authentication, RBAC authorization, and comprehensive audit logging."
            },
            {
                "question": "What is the maximum data volume the platform can handle?",
                "ground_truth": "The platform handles petabytes of data with linear scaling capabilities and supports clusters up to 10,000 nodes."
            },
            {
                "question": "What cloud platforms does the platform integrate with?",
                "ground_truth": "The platform provides native integration with AWS (S3, EC2, RDS, Redshift), Azure (Blob Storage, Data Factory, Synapse Analytics), and GCP (BigQuery, Cloud Storage, Dataflow)."
            }
        ]
        return evaluation_questions
    
    def evaluate_system(self) -> Dict[str, float]:
        """Evaluate the RAG system using RAGAS metrics"""
        try:
            # Create evaluation dataset
            eval_dataset = self.create_evaluation_dataset()
            
            # Convert to RAGAS format
            dataset_list = []
            for item in eval_dataset:
                # Get response from QA chain
                result = self.qa_chain.invoke({"query": item["question"]})
                
                dataset_list.append({
                    "question": item["question"],
                    "answer": result["result"],
                    "contexts": [doc.page_content for doc in result["source_documents"]],
                    "ground_truth": item["ground_truth"]
                })
            
            # Convert to HuggingFace Dataset format
            dataset = Dataset.from_list(dataset_list)
            
            # Evaluate using RAGAS
            results = evaluate(
                dataset=dataset,
                metrics=self.metrics,
                llm=self.llm
            )
            
            return results
            
        except Exception as e:
            print(f"Evaluation error: {str(e)}")
            return {"error": str(e)}

#### 6. Main Application Class

In [13]:
class RFPAssistantApp:
    def __init__(self, data_path: str = "/Users/powertothefuture/Documents/aimakerspace/AIE8-cert-challenge/data"):
        self.data_path = data_path
        self.processor = DocumentProcessor(data_path)
        self.vector_manager = VectorStoreManager()
        self.agent = None
        self.evaluator = None
        
    def initialize(self):
        """Initialize the RFP Assistant system"""
        print("🚀 Initializing RFP Assistant...")
        
        # Load and chunk documents
        documents = self.processor.load_documents()
        chunks = self.processor.chunk_documents(documents)
        
        # Create vector store
        vectorstore = self.vector_manager.create_vectorstore(chunks)
        
        # Initialize agent (using RFPAgent)
        self.agent = RFPAgent(vectorstore)
        
        # Initialize evaluator
        self.evaluator = RAGEvaluator(self.agent.qa_chain)
        
        print("✅ RFP Assistant initialized successfully!")
    
    def ask_question(self, question: str) -> Dict[str, Any]:
        """Ask a question to the RFP Assistant"""
        if not self.agent:
            return {"error": "System not initialized. Call initialize() first."}
        
        return self.agent.respond_to_rfp(question)
    
    def evaluate_performance(self) -> Dict[str, float]:
        """Evaluate the system performance"""
        if not self.evaluator:
            return {"error": "Evaluator not initialized. Call initialize() first."}
        
        return self.evaluator.evaluate_system()
    
    def demo_questions(self):
        """Run demo questions to showcase the system"""
        demo_questions = [
            "What security features does the Enterprise Data Platform provide?",
            "What is the platform's availability and disaster recovery capabilities?",
            "What cloud platforms does the platform integrate with?",
            "What are the pricing and licensing options?",
            "What support options are available for enterprise customers?"
        ]
        
        print("\n🎯 Running Demo Questions...")
        for i, question in enumerate(demo_questions, 1):
            print(f"\n--- Question {i} ---")
            print(f"Q: {question}")
            
            response = self.ask_question(question)
            print(f"A: {response['response']}")
            print(f"Status: {response['status']}")

In [14]:
# Quick test to verify everything works
def quick_test():
    """Quick test of the RFP Assistant"""
    try:
        # Create the application
        app = RFPAssistantApp()
        
        # Initialize the system
        app.initialize()
        
        # Test a simple question
        test_question = "What security features does the Enterprise Data Platform provide?"
        print(f"\n🔍 Testing question: {test_question}")
        
        response = app.ask_question(test_question)
        print(f"Response: {response['response']}")
        print(f"Status: {response['status']}")
        
        return True
        
    except Exception as e:
        print(f"❌ Error during quick test: {str(e)}")
        return False

# Run the quick test
if __name__ == "__main__":
    success = quick_test()
    if success:
        print("\n✅ Quick test completed successfully!")
    else:
        print("\n❌ Quick test failed. Check the error messages above.")

🚀 Initializing RFP Assistant...
Loaded 3 documents
Created 27 chunks from 3 documents
✅ Created collection: rfp_docs
❌ Failed to add documents: Client.__init__() got an unexpected keyword argument 'client'
❌ Error during quick test: Client.__init__() got an unexpected keyword argument 'client'

❌ Quick test failed. Check the error messages above.


#### 7. Usage Example

In [72]:
# Initialize and run the RFP Assistant
if __name__ == "__main__":
    # Create the application
    app = RFPAssistantApp()
    
    # Initialize the system
    app.initialize()
    
    # Run demo questions
    app.demo_questions()
    
    # Evaluate system performance
    print("\n📊 Evaluating System Performance...")
    evaluation_results = app.evaluate_performance()
    print(f"Evaluation Results: {evaluation_results}")
    
    # Interactive mode
    print("\n💬 Interactive Mode - Ask your RFP questions:")
    while True:
        question = input("\nEnter your RFP question (or 'quit' to exit): ")
        if question.lower() == 'quit':
            break
        
        response = app.ask_question(question)
        print(f"\nResponse: {response['response']}")

🚀 Initializing RFP Assistant...
Loaded 3 documents
Created 27 chunks from 3 documents
Created Chroma vector store with 27 documents
✅ RFP Assistant initialized successfully!

🎯 Running Demo Questions...

--- Question 1 ---
Q: What security features does the Enterprise Data Platform provide?
A: The Enterprise Data Platform provides several security features:

- Data at Rest: All data is encrypted using AES-256 encryption with automatic key rotation.
- Data in Transit: It uses TLS 1.3 encryption for all network communications.
- Key Management: The platform integrates with enterprise key management systems such as AWS KMS, Azure Key Vault, and HashiCorp Vault.
- Compliance: The platform meets SOC 2 Type II, ISO 27001, and GDPR requirements.

Additional Web Information:
- Essential Features of an Enterprise Data Platform for Optimized ...: Security and privacy controls are essential, encompassing fine-grained access controls, data encryption, anonymization and pseudonymization...
- Enterp

Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

ERROR:ragas.executor:Exception raised in Job[11]: NotFoundError(Error code: 404 - {'error': {'message': 'This is a chat model and not supported in the v1/completions endpoint. Did you mean to use v1/chat/completions?', 'type': 'invalid_request_error', 'param': 'model', 'code': None}})
ERROR:ragas.executor:Exception raised in Job[9]: NotFoundError(Error code: 404 - {'error': {'message': 'This is a chat model and not supported in the v1/completions endpoint. Did you mean to use v1/chat/completions?', 'type': 'invalid_request_error', 'param': 'model', 'code': None}})
ERROR:ragas.executor:Exception raised in Job[7]: NotFoundError(Error code: 404 - {'error': {'message': 'This is a chat model and not supported in the v1/completions endpoint. Did you mean to use v1/chat/completions?', 'type': 'invalid_request_error', 'param': 'model', 'code': None}})
ERROR:ragas.executor:Exception raised in Job[2]: NotFoundError(Error code: 404 - {'error': {'message': 'This is a chat model and not supported i

Evaluation Results: {'faithfulness': nan, 'answer_relevancy': nan, 'context_precision': nan, 'context_recall': nan}

💬 Interactive Mode - Ask your RFP questions:


In [71]:
# new 
class RFPAssistantApp:
    def __init__(self, data_path: str = "/Users/powertothefuture/Documents/aimakerspace/AIE8-cert-challenge/data"):
        self.data_path = data_path
        self.processor = DocumentProcessor(data_path)
        self.vector_manager = VectorStoreManager()
        self.agent = None
        self.evaluator = None
        
    def initialize(self):
        """Initialize the RFP Assistant system"""
        print("🚀 Initializing RFP Assistant...")
        
        # Load and chunk documents
        documents = self.processor.load_documents()
        chunks = self.processor.chunk_documents(documents)
        
        # Create vector store
        vectorstore = self.vector_manager.create_vectorstore(chunks)
        
        # Initialize agent
        self.agent = RFPAgent(vectorstore)
        
        # Initialize evaluator (using simple evaluator)
        self.evaluator = RAGEvaluator(self.agent.qa_chain)
        
        print("✅ RFP Assistant initialized successfully!")
    
    def ask_question(self, question: str) -> Dict[str, Any]:
        """Ask a question to the RFP Assistant"""
        if not self.agent:
            return {"error": "System not initialized. Call initialize() first."}
        
        return self.agent.respond_to_rfp(question)
    
    def evaluate_performance(self) -> Dict[str, Any]:
        """Evaluate the system performance"""
        if not self.evaluator:
            return {"error": "Evaluator not initialized. Call initialize() first."}
        
        return self.evaluator.evaluate_system()
    
    def demo_questions(self):
        """Run demo questions to showcase the system"""
        demo_questions = [
            "What security features does the Enterprise Data Platform provide?",
            "What is the platform's availability and disaster recovery capabilities?",
            "What cloud platforms does the platform integrate with?",
            "What are the pricing and licensing options?",
            "What support options are available for enterprise customers?"
        ]
        
        print("\n🎯 Running Demo Questions...")
        for i, question in enumerate(demo_questions, 1):
            print(f"\n--- Question {i} ---")
            print(f"Q: {question}")
            
            response = self.ask_question(question)
            print(f"A: {response['response']}")
            print(f"Status: {response['status']}")