# Imports and Basic Setup


In [1]:
import pandas as pd
import numpy as np
from pymilvus import connections, Collection, FieldSchema, CollectionSchema, DataType, utility
from sentence_transformers import SentenceTransformer
import logging
import pandas as pd
import numpy as np
from pymilvus import connections, Collection, FieldSchema, CollectionSchema, DataType, utility
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
import io
from typing import List, Dict, Tuple, Optional
import logging
import os
#import warnings
#warnings.filterwarnings('ignore')



In [44]:
#!pip install numpy

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




# Configuration Variables

In [2]:
MILVUS_HOST = "localhost"
MILVUS_PORT = "19530"
COLLECTION_NAME = "faq_bootcamp_collection"
CSV_FILE_PATH = "codebasics_faqs.csv"

# Embedding model configuration
EMBEDDING_MODEL_NAME = 'all-MiniLM-L6-v2'
EMBEDDING_DIM = 384

print("⚙️ Configuration:")
print(f"   Milvus: {MILVUS_HOST}:{MILVUS_PORT}")
print(f"   Collection: {COLLECTION_NAME}")
print(f"   CSV File: {CSV_FILE_PATH}")
print(f"   Embedding Model: {EMBEDDING_MODEL_NAME}")


⚙️ Configuration:
   Milvus: localhost:19530
   Collection: faq_bootcamp_collection
   CSV File: codebasics_faqs.csv
   Embedding Model: all-MiniLM-L6-v2


# Milvus Connection Function


In [3]:
def connect_to_milvus(host=MILVUS_HOST, port=MILVUS_PORT):
    """Connect to Milvus database"""
    try:
        connections.connect("default", host=host, port=port)
        print(f"✅ Connected to Milvus at {host}:{port}")
        return True
    except Exception as e:
        print(f"❌ Failed to connect to Milvus: {e}")
        return False




# Connect to Milvus

In [4]:

connection_success = connect_to_milvus()

✅ Connected to Milvus at localhost:19530


# Load Embedding Model Function


In [5]:
def load_embedding_model(model_name=EMBEDDING_MODEL_NAME):
    """Load sentence transformer model for embeddings"""
    try:
        model = SentenceTransformer(model_name)
        print(f"✅ Loaded embedding model: {model_name}")
        return model
    except Exception as e:
        print(f"❌ Failed to load embedding model: {e}")
        return None

# Load embedding model

In [6]:
embedding_model = load_embedding_model()


✅ Loaded embedding model: all-MiniLM-L6-v2


# Milvus utils

In [7]:
def drop_collection_if_exists(collection_name):
    """Drop collection if it already exists"""
    try:
        if utility.has_collection(collection_name):
            utility.drop_collection(collection_name)
            print(f"🗑️ Dropped existing collection: {collection_name}")
            return True
        else:
            print(f"ℹ️ Collection {collection_name} doesn't exist")
            return True
    except Exception as e:
        print(f"❌ Error checking/dropping collection: {e}")
        return False

# Ingestion

In [10]:
def create_faq_collection(collection_name, dim=EMBEDDING_DIM):
    """Create new collection for FAQ documents"""
    try:
        # Define schema
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
            FieldSchema(name="question", dtype=DataType.VARCHAR, max_length=2000),
            FieldSchema(name="answer", dtype=DataType.VARCHAR, max_length=4000),
            FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=dim)
        ]
        
        schema = CollectionSchema(fields, f"FAQ Collection: {collection_name}")
        collection = Collection(collection_name, schema)
        
        # Create index for vector search
        index_params = {
            "metric_type": "COSINE",
            "index_type": "IVF_FLAT", 
            "params": {"nlist": 128}
        }
        collection.create_index("embedding", index_params)
        
        print(f"✅ Created collection: {collection_name}")
        return collection
        
    except Exception as e:
        print(f"❌ Failed to create collection: {e}")
        return None



In [9]:
# Create new collection
drop_collection_if_exists(COLLECTION_NAME)


🗑️ Dropped existing collection: faq_bootcamp_collection


True

In [11]:
collection = create_faq_collection(COLLECTION_NAME)
# collection = Collection(COLLECTION_NAME)

✅ Created collection: faq_bootcamp_collection


In [12]:
def load_faq_csv(csv_path):
    """Load FAQ data with encoding handling and bad line skipping"""
    encodings = ['utf-8', 'cp1252', 'latin-1', 'iso-8859-1']
    
    for encoding in encodings:
        try:
            print(f"🔍 Trying encoding: {encoding}")
            
            # Try with pandas 1.3+ syntax first
            try:
                df = pd.read_csv(
                    csv_path, 
                    encoding=encoding,
                    on_bad_lines='skip',
                    engine='python'
                )
            except TypeError:
                # Fallback for older pandas
                df = pd.read_csv(
                    csv_path, 
                    encoding=encoding,
                    error_bad_lines=False,
                    warn_bad_lines=True,
                    engine='python'
                )
            
            print(f"✅ Successfully loaded with {encoding}!")
            print(f"📊 CSV Data Info:")
            print(f"   Shape: {df.shape}")
            print(f"   Columns: {list(df.columns)}")
            
            # Display first few rows
            print(f"\n📋 First 3 rows:")
            for i in range(min(3, len(df))):
                print(f"Row {i+1}:")
                print(f"  Prompt: {df.iloc[i]['prompt']}")
                print(f"  Response: {df.iloc[i]['response'][:100]}...")
                print()
            
            return df
            
        except Exception as e:
            print(f"❌ Failed with {encoding}: {e}")
            continue
    
    print("❌ Failed to load CSV with any encoding")
    return None

In [13]:
df = load_faq_csv(CSV_FILE_PATH)


🔍 Trying encoding: utf-8
❌ Failed with utf-8: 'utf-8' codec can't decode byte 0x92 in position 1197: invalid start byte
🔍 Trying encoding: cp1252
✅ Successfully loaded with cp1252!
📊 CSV Data Info:
   Shape: (75, 2)
   Columns: ['prompt', 'response']

📋 First 3 rows:
Row 1:
  Prompt: I have never done programming in my life. Can I take this bootcamp?
  Response: Yes, this is the perfect bootcamp for anyone who has never done coding and wants to build a career i...

Row 2:
  Prompt: Why should I trust Codebasics?
  Response: Till now 9000 + learners have benefitted from the quality of our courses. You can check the review s...

Row 3:
  Prompt: Is there any prerequisite for taking this bootcamp ?
  Response: Our bootcamp is specifically designed for beginners with no prior experience in this field. The only...



In [16]:
df = df.rename(columns={'prompt': 'question', 'response': 'answer'})
df

Unnamed: 0,question,answer
0,I have never done programming in my life. Can ...,"Yes, this is the perfect bootcamp for anyone w..."
1,Why should I trust Codebasics?,Till now 9000 + learners have benefitted from ...
2,Is there any prerequisite for taking this boot...,Our bootcamp is specifically designed for begi...
3,What datasets are used in this bootcamp? Is it...,The datasets used in this bootcamp are crafted...
4,I’m not sure if this bootcamp is good enough f...,We got you covered. Go ahead and watch our you...
...,...,...
70,It appears that the X-axis of the chart is not...,Check this reference:\nhttps://discordapp.com/...
71,Why we use Net error in place of absolute net ...,Directional Insight: The net error metric offe...
72,I am encountering an issue where the NIS IND c...,Have you taken the 'market' column from dim_ma...
73,How do I update source in power query ?,Follow the discord link : \n\n https://discord...


In [19]:
def clean_faq_data(df):
    """Clean and validate FAQ data"""
    if df is None:
        return None
        
    print("🧹 Cleaning data...")
    
    # Check required columns
    required_cols = ['question', 'answer']
    if not all(col in df.columns for col in required_cols):
        print(f"❌ Missing required columns. Found: {list(df.columns)}")
        return None
    
    # Original shape
    original_shape = df.shape
    
    # Remove null values
    df = df.dropna(subset=required_cols)
    
    # Remove empty strings
    df = df[(df['question'].str.strip() != '') & (df['answer'].str.strip() != '')]
    
    # Reset index
    df = df.reset_index(drop=True)
    
    print(f"   Original shape: {original_shape}")
    print(f"   Clean shape: {df.shape}")
    print(f"   Removed: {original_shape[0] - df.shape[0]} rows")
    
    return df

In [20]:
df_clean = clean_faq_data(df)

🧹 Cleaning data...
   Original shape: (75, 2)
   Clean shape: (75, 2)
   Removed: 0 rows


In [21]:
def generate_embeddings(texts, model, batch_size=32):
    """Generate embeddings for list of texts"""
    try:
        print(f"🔄 Generating embeddings for {len(texts)} texts...")
        
        embeddings = model.encode(texts, 
                                 convert_to_tensor=False,
                                 batch_size=batch_size,
                                 show_progress_bar=True)
        
        print(f"✅ Generated embeddings shape: {embeddings.shape}")
        return embeddings
        
    except Exception as e:
        print(f"❌ Failed to generate embeddings: {e}")
        return None

In [22]:
if df_clean is not None and embedding_model is not None:
    prompts = df_clean['question'].tolist()
    embeddings = generate_embeddings(prompts, embedding_model)
else:
    embeddings = None

🔄 Generating embeddings for 75 texts...


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

✅ Generated embeddings shape: (75, 384)


In [23]:
def ingest_documents(collection, df, embeddings, batch_size=1000):
    """Ingest FAQ documents into Milvus collection in batches"""
    try:
        if collection is None or df is None or embeddings is None:
            print("❌ Missing required components for ingestion")
            return False
        
        print("📥 Preparing data for batched ingestion...")
        
        # Prepare data
        prompts = df['question'].astype(str).tolist()
        responses = df['answer'].astype(str).tolist()
        embeddings_list = embeddings.tolist()
        
        total_docs = len(prompts)
        print(f"   Total documents: {total_docs}")
        print(f"   Batch size: {batch_size}")
        print(f"   Number of batches: {(total_docs + batch_size - 1) // batch_size}")
        
        # Track successful insertions
        total_inserted = 0
        successful_batches = 0
        failed_batches = 0
        all_insert_ids = []
        
        # Process in batches
        for i in range(0, total_docs, batch_size):
            batch_num = (i // batch_size) + 1
            end_idx = min(i + batch_size, total_docs)
            batch_size_actual = end_idx - i
            
            print(f"🔄 Processing batch {batch_num} (documents {i+1}-{end_idx})...")
            
            try:
                # Prepare batch data
                batch_prompts = prompts[i:end_idx]
                batch_responses = responses[i:end_idx]
                batch_embeddings = embeddings_list[i:end_idx]
                
                # Create entities for this batch
                batch_entities = [batch_prompts, batch_responses, batch_embeddings]
                
                # Insert batch
                insert_result = collection.insert(batch_entities)
                
                # Track success
                inserted_count = len(insert_result.primary_keys)
                total_inserted += inserted_count
                successful_batches += 1
                all_insert_ids.extend(insert_result.primary_keys)
                
                print(f"   ✅ Batch {batch_num}: {inserted_count} documents inserted")
                
            except Exception as batch_error:
                print(f"   ❌ Batch {batch_num} failed: {batch_error}")
                failed_batches += 1
                continue
        
        # Load collection to memory after all batches
        print("🔄 Loading collection to memory...")
        collection.load()
        
        # Print summary
        print(f"\n📊 Ingestion Summary:")
        print(f"   Total documents processed: {total_docs}")
        print(f"   Successfully inserted: {total_inserted}")
        print(f"   Failed documents: {total_docs - total_inserted}")
        print(f"   Successful batches: {successful_batches}")
        print(f"   Failed batches: {failed_batches}")
        print(f"   Success rate: {(total_inserted/total_docs)*100:.1f}%")
        
        if total_inserted > 0:
            print(f"✅ Ingestion completed with {total_inserted} documents")
            return True
        else:
            print(f"❌ No documents were successfully inserted")
            return False
            
    except Exception as e:
        print(f"❌ Failed to ingest documents: {e}")
        return False

In [24]:
if collection and df_clean is not None and embeddings is not None:
    ingestion_success = ingest_documents(collection, df_clean, embeddings)
else:
    ingestion_success = False
    print("❌ Cannot proceed with ingestion - missing components")


📥 Preparing data for batched ingestion...
   Total documents: 75
   Batch size: 1000
   Number of batches: 1
🔄 Processing batch 1 (documents 1-75)...
   ✅ Batch 1: 75 documents inserted
🔄 Loading collection to memory...

📊 Ingestion Summary:
   Total documents processed: 75
   Successfully inserted: 75
   Failed documents: 0
   Successful batches: 1
   Failed batches: 0
   Success rate: 100.0%
✅ Ingestion completed with 75 documents


In [30]:
def get_collection_stats(collection, collection_name):
    """Get and display collection statistics"""
    try:
        if collection is None:
            print("❌ No collection available")
            return 0
            
        # Get collection info
        num_entities = collection.num_entities
        
        print("📊 Collection Statistics:")
        print(f"   Collection Name: {collection_name}")
        print(f"   Total Documents: {num_entities}")
        print(f"   Status: {'Loaded' if collection.is_loaded else 'Not Loaded'}")
        
        return num_entities
        
    except Exception as e:
        print(f"❌ Failed to get collection stats: {e}")
        return 0

In [12]:
"""
Refactored FAQ System using Phi-3 Mini for CPU inference
Replace your existing LLM code with this optimized version
"""

def load_phi3_model():
    """Load Microsoft Phi-3 Mini - excellent for CPU inference"""
    try:
        from transformers import AutoTokenizer, AutoModelForCausalLM
        import torch
        
        print("🧠 Loading Microsoft Phi-3 Mini (3.8B params - CPU optimized)...")
        
        model_name = "microsoft/Phi-3-mini-4k-instruct"
        
        # Load tokenizer
        tokenizer = AutoTokenizer.from_pretrained(
            model_name, 
            trust_remote_code=True
        )
        
        # Load model optimized for CPU
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float32,  # float32 better for CPU
            device_map="cpu",
            trust_remote_code=True,
            low_cpu_mem_usage=True
        )
        
        # Set pad token
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
        
        print("✅ Phi-3 Mini loaded successfully on CPU!")
        return tokenizer, model
        
    except Exception as e:
        print(f"❌ Error loading Phi-3: {e}")
        return None, None

def generate_answer_phi3(query, relevant_faqs, tokenizer, model):
    """Generate answer using Phi-3 Mini - replaces your generate_answer function"""
    try:
        import torch
        
        if not relevant_faqs:
            return "I don't have information to answer that question."
        
        # Create focused prompt for Phi-3
        context = f"Relevant FAQ: {relevant_faqs[0]['response']}"
        if len(relevant_faqs) > 1:
            context += f"\nAdditional info: {relevant_faqs[1]['response']}"
        
        # Phi-3 chat format
        messages = [
            {"role": "system", "content": "You are a helpful customer service assistant for a data analytics bootcamp. Use the provided FAQ information to answer user questions concisely and accurately."},
            {"role": "user", "content": f"FAQ Information:\n{context}\n\nUser Question: {query}\n\nProvide a helpful answer based on the FAQ information:"}
        ]
        
        # Apply chat template
        prompt = tokenizer.apply_chat_template(
            messages, 
            tokenize=False, 
            add_generation_prompt=True
        )
        
        # Tokenize
        inputs = tokenizer(
            prompt,
            return_tensors="pt",
            max_length=1024,  # Keep context manageable for CPU
            truncation=True
        )
        
        # Generate with CPU-optimized settings
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=200,
                temperature=0.3,
                do_sample=True,
                top_p=0.9,
                pad_token_id=tokenizer.eos_token_id,
                eos_token_id=tokenizer.eos_token_id,
                use_cache=True,
                num_return_sequences=1
            )
        
        # Extract response
        response = tokenizer.decode(
            outputs[0][inputs['input_ids'].shape[1]:], 
            skip_special_tokens=True
        ).strip()
        print('response', response)
        
        return response if response else f"Based on our FAQ: {relevant_faqs[0]['response']}"
        
    except Exception as e:
        print(f"⚠️ Phi-3 generation failed: {e}")
        return f"Based on our FAQ: {relevant_faqs[0]['response']}" if relevant_faqs else "I couldn't generate an answer."

def generate_simple_answer(query, relevant_faqs):
    """Generate simple rule-based answer (fallback method)"""
    
    if not relevant_faqs:
        return "I don't have information to answer that question."
    
    # Get the best match
    best_match = relevant_faqs[0]
    
    # Create contextual answer
    if best_match['score'] > 0.8:
        # High similarity - direct answer
        answer = f"Based on our FAQ: {best_match['response']}"
    elif best_match['score'] > 0.6:
        # Medium similarity - contextual answer
        answer = f"Here's related information from our FAQ: {best_match['response']}"
        
        # Add second best if available and relevant
        if len(relevant_faqs) > 1 and relevant_faqs[1]['score'] > 0.5:
            answer += f"\n\nAdditionally: {relevant_faqs[1]['response']}"
    else:
        # Low similarity - general response
        answer = f"I found some related information: {best_match['response']}"
        answer += f"\n\nIf this doesn't fully answer your question, please contact our support team for more specific information."
    
    return answer


def search_faqs(collection, query, embedding_model, top_k=3):
    """Search for similar FAQs"""
    try:
        if collection is None or embedding_model is None:
            print("❌ Missing collection or embedding model")
            return []
        
        # Generate embedding for query
        query_embedding = embedding_model.encode([query])[0]
        
        # Search parameters
        search_params = {"metric_type": "COSINE", "params": {"nprobe": 10}}
        
        # Perform search
        results = collection.search(
            data=[query_embedding],
            anns_field="embedding",
            param=search_params, 
            limit=top_k,
            output_fields=["prompt", "response"]
        )
        
        # Format results
        search_results = []
        for i, result in enumerate(results[0]):
            search_results.append({
                'rank': i + 1,
                'prompt': result.entity.get('prompt'),
                'response': result.entity.get('response'), 
                'score': float(result.score),
                'id': result.id
            })
        
        return search_results
        
    except Exception as e:
        print(f"❌ Search failed: {e}")
        return []

def display_search_results(query, results):
    """Display search results in a formatted way"""
    print(f"🔍 Search Query: '{query}'")
    print("-" * 60)
    
    if not results:
        print("❌ No results found")
        return
    
    for result in results:
        print(f"📍 Rank {result['rank']} (Similarity: {result['score']:.3f})")
        print(f"Q: {result['prompt']}")
        print(f"A: {result['response'][:150]}{'...' if len(result['response']) > 150 else ''}")
        print()


def answer_question_with_phi3(collection, query, embedding_model, tokenizer=None, model=None, top_k=3):
    """Complete FAQ answering pipeline with Phi-3 generation - replaces your answer_question_with_llm"""
    
    print(f"🧠 Phi-3 processing query: '{query}'")
    
    # Step 1: Retrieve relevant FAQs
    relevant_faqs = search_faqs(collection, query, embedding_model, top_k=top_k)
    
    if not relevant_faqs:
        return {
            'query': query,
            'answer': "I couldn't find relevant information for your question.",
            'relevant_faqs': [],
            'method': 'no_results'
        }
    
    # Step 2: Check for high similarity - use direct FAQ response
    if relevant_faqs[0]['score'] > 0.85:
        return {
            'query': query,
            'answer': f"Based on our FAQ: {relevant_faqs[0]['response']}",
            'relevant_faqs': relevant_faqs,
            'method': 'direct_match',
            'status': 'success'
        }
    
    # Step 3: Generate answer using Phi-3 or fallback
    if tokenizer and model:
        answer = generate_answer_phi3(query, relevant_faqs, tokenizer, model)
        method = 'phi3_cpu'
    else:
        answer = generate_simple_answer(query, relevant_faqs)
        method = 'rule_based'
    
    return {
        'query': query,
        'answer': answer,
        'relevant_faqs': relevant_faqs,
        'method': method,
        'status': 'success'
    }






In [13]:
# Load Phi-3 model (replaces your llm_tokenizer, llm_model loading)
print("🚀 Setting up Phi-3 Mini for CPU inference...")
phi3_tokenizer, phi3_model = load_phi3_model()



🚀 Setting up Phi-3 Mini for CPU inference...
🧠 Loading Microsoft Phi-3 Mini (3.8B params - CPU optimized)...


`flash-attention` package not found, consider installing for better performance: No module named 'flash_attn'.
Current `flash-attention` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

✅ Phi-3 Mini loaded successfully on CPU!


In [14]:
# Test queries (same as yours)
test_queries_llm = [
    "Can I take this course without any programming background?",
    "What happens if I'm not satisfied with the bootcamp?",
    "How much time do I need to dedicate daily?",
    "Will you help me get a job after completion?",
    "csds"
]

In [15]:
# Refactored test loop using Phi-3
if collection and embedding_model:
    print("\n🧠 Testing Complete FAQ System with Phi-3 Mini:")
    print("=" * 70)
    
    import time
    total_time = 0
    
    for i, query in enumerate(test_queries_llm, 1):
        start_time = time.time()
        
        # Use Phi-3 function instead of your original function
        result = answer_question_with_phi3(
            collection, 
            query, 
            embedding_model, 
            phi3_tokenizer, 
            phi3_model, 
            top_k=3
        )
        
        duration = time.time() - start_time
        total_time += duration
        
        print(f"\n{i}. ❓ Query: {query}")
        print(f"   ⏱️  Time: {duration:.2f} seconds")
        print(f"   🎯 Method: {result['method']}")
        print(f"   💬 Answer: {result['answer']}")
        print(f"   📊 Retrieved {len(result['relevant_faqs'])} relevant FAQs")
        print("-" * 50)
    
    # Performance summary
    avg_time = total_time / len(test_queries_llm)
    print(f"\n📈 Performance Summary:")
    print(f"   Model: Phi-3 Mini (3.8B params)")
    print(f"   Average response time: {avg_time:.2f} seconds")
    print(f"   Total time: {total_time:.2f} seconds")
    print(f"   Device: CPU")
    
    
else:
    print("❌ Collection or embedding_model not available")



🧠 Testing Complete FAQ System with Phi-3 Mini:
🧠 Phi-3 processing query: 'Can I take this course without any programming background?'

1. ❓ Query: Can I take this course without any programming background?
   ⏱️  Time: 2.46 seconds
   🎯 Method: direct_match
   💬 Answer: Based on our FAQ: Yes, this is the perfect course for anyone who has never done coding and wants to build a career in the IT/Data Analytics industry or just wants to perform better in their current job or business using data.
   📊 Retrieved 3 relevant FAQs
--------------------------------------------------
🧠 Phi-3 processing query: 'What happens if I'm not satisfied with the bootcamp?'

2. ❓ Query: What happens if I'm not satisfied with the bootcamp?
   ⏱️  Time: 0.24 seconds
   🎯 Method: direct_match
   💬 Answer: Based on our FAQ: As promised we will give you a 100% refund based on the guidelines (Please refer to our course refund policy before enrolling).
   📊 Retrieved 3 relevant FAQs
-------------------------------

The `seen_tokens` attribute is deprecated and will be removed in v4.41. Use the `cache_position` model input instead.
`get_max_cache()` is deprecated for all Cache classes. Use `get_max_cache_shape()` instead. Calling `get_max_cache()` will raise error from v4.48
You are not running the flash-attention implementation, expect numerical differences.
