In [None]:
pip install scikit-learn langgraph openai

In [44]:
import re
import os
import json
import pickle
import numpy as np
from typing import Dict, Any, List, Literal
from dataclasses import dataclass
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
#from sentence_transformers import SentenceTransformer
from openai import OpenAI
from langgraph.graph import StateGraph, END
from langgraph.graph.message import add_messages
from typing_extensions import TypedDict

## Define the state and the result object


In [24]:
@dataclass
class RoutingResult:
    route: Literal["order_status", "product_info", "technical_support","billing","general"]
    confidence: float
    method: str

# LLM Routing


In [25]:
def llm_based_routing(query: str) -> RoutingResult:
    """Route using LLM analysis"""
    prompt = f"""
    Analyze the following customer service query and classify it into exactly one category.
    
    Query: "{query}"
    
    Categories:
    - order_status: Questions about order tracking, delivery, shipping status
    - product_info: Questions about product specifications, availability, features
    - technical_support: Technical issues, troubleshooting, bugs, problems
    - billing: Payment, refund, billing, invoice questions
    - general: General questions or anything that doesn't fit other categories
    
    Respond JSON format: {{"route": "", "confidence": 1, "method": "llm"}}
    """
    client = OpenAI()
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that classifies customer service queries."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=50,
        temperature=0
    )
    
    # Parse the response content as JSON
    response_content = response.choices[0].message.content
    response_data = json.loads(response_content)
    
    # Validate and return the Pydantic model
    return RoutingResult(**response_data)

In [27]:
print(llm_based_routing("What are the technical specifications of your latest smartphone?"))

RoutingResult(route='product_info', confidence=1, method='llm')


# Semantic Routing

In [30]:
## convert the route descriptions to embedding using OpenAI embeddings
def get_openai_embedding(text: str) -> np.ndarray:
    client = OpenAI()
    response = client.embeddings.create(
        input=text,
        model="text-embedding-3-small"
    )
    return np.array(response.data[0].embedding)



def embedding_based_routing(query: str) -> RoutingResult:
    """Route using embedding similarity"""
    query_embedding = get_openai_embedding(query)
    
    best_route = None
    best_similarity = -1

    route_descriptions = {
    "order_status": "track order delivery shipping status when will my order arrive",
    "product_info": "product specifications features availability price catalog information",
    "technical_support": "troubleshooting technical issues bugs problems not working",
    "billing": "payment refund billing invoice charge credit card money",
    "general": "general questions help information other inquiries"
    }
    
    route_embeddings = {route: get_openai_embedding(desc) for route, desc in route_descriptions.items()}
    
    for route, embedding in route_embeddings.items():
        similarity = np.dot(query_embedding, embedding) / (np.linalg.norm(query_embedding) * np.linalg.norm(embedding))
        if similarity > best_similarity:
            best_similarity = similarity
            best_route = route
    
    return RoutingResult(route=best_route, confidence=float(best_similarity), method="embedding")

In [31]:
print(embedding_based_routing("What are the technical specifications of your latest smartphone?"))

RoutingResult(route='product_info', confidence=0.31293595309519334, method='embedding')


# Rule-based Routing

In [33]:
def rule_based_routing(query: str) -> RoutingResult:
        """Route using predefined rules and keyword matching"""
        query_lower = query.lower()
        
        # Define routing rules
        rules = {
            "order_status": [
                r'\border\b',
                r'\btrack\b',
                r'\bdelivery\b',
                r'\bshipping\b',
                r'\bwhen.*arrive\b',
                r'\bstatus\b',
                r'\bpackage\b'
            ],
            "product_info": [
                r'\bproduct\b',
                r'\bspecs?\b',
                r'\bspecifications?\b',
                r'\bfeatures?\b',
                r'\bavailable\b',
                r'\bprice\b',
                r'\bcost\b',
                r'\bitem\b'
            ],
            "technical_support": [
                r'\btechnical\b',
                r'\bbug\b',
                r'\berror\b',
                r'\bfix\b',
                r'\btroubleshoot\b',
                r'\bnot working\b',
                r'\bissue\b',
                r'\bproblem\b'
            ],
            "billing": [
                r'\bbilling\b',
                r'\bpayment\b',
                r'\brefund\b',
                r'\bcharge\b',
                r'\binvoice\b',
                r'\bmoney\b',
                r'\bcredit card\b',
                r'\bcard\b'
            ]
        }
        
        # Check rules for each category
        route_scores = {}
        for route, patterns in rules.items():
            score = sum(1 for pattern in patterns if re.search(pattern, query_lower))
            if score > 0:
                route_scores[route] = score
        
        if route_scores:
            best_route = max(route_scores, key=route_scores.get)
            # Normalize confidence based on matches
            confidence = min(route_scores[best_route] / 3, 1.0)
        else:
            best_route = "general"
            confidence = 0.5
        
        return RoutingResult(route=best_route, confidence=confidence, method="rule")

In [34]:
print(rule_based_routing("What are the technical specifications of your latest smartphone?"))

RoutingResult(route='product_info', confidence=0.3333333333333333, method='rule')


# Machine Learning based Routing

In [48]:
def load_ml_classifier(model_path: str = "customer_service_classifier.pkl") -> Pipeline:
        """Load ML classifier from pickle file or create and save new one"""
        model_path = "customer_service_classifier.pkl"
        
        if os.path.exists(model_path):
            print(f"Loading pre-trained classifier from {model_path}")
            with open(model_path, 'rb') as f:
                return pickle.load(f)
        else:
            raise FileNotFoundError(f"{model_path} not found. Please train the model first.")

def ml_based_routing(query: str) -> RoutingResult:
        """Route using trained ML classifier loaded from pickle file"""

        ml_classifier = load_ml_classifier()

        # Predict using the loaded classifier
        prediction = ml_classifier.predict([query])[0]
        
        # Get prediction probabilities for confidence
        probabilities = ml_classifier.predict_proba([query])[0]
        confidence = float(max(probabilities))
        
        return RoutingResult(route=prediction, confidence=confidence, method="ml")

In [49]:
print(ml_based_routing("What is the status of order 124?"))

Loading pre-trained classifier from customer_service_classifier.pkl
RoutingResult(route=np.str_('order_status'), confidence=0.4443909051519013, method='ml')
