# TAXPREP_DEMO ‚Äî Scoring & Evaluation (Updated)
This notebook:
- Uses your improved `derive_true_label`
- Balances dataset properly for both classes
- Calls `scoring_service_azure` safely with fallback
- Displays full evaluation + confusion matrix

In [1]:
# 1Ô∏è‚É£ Setup & Imports
import os, sys, json
from pathlib import Path
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# Ensure project root is in path
sys.path.append(os.path.abspath(".."))

# Optional: load environment vars
try:
    from dotenv import load_dotenv
    load_dotenv()
except Exception:
    pass

pd.options.display.max_colwidth = 200
print("‚úÖ Environment setup complete.")

‚úÖ Environment setup complete.


In [2]:
# 2Ô∏è‚É£ Import scoring module
try:
    from scoring_service_azure import score_batch, balance_dataframe, derive_true_label
    print("‚úÖ scoring_service_azure module loaded successfully.")
except Exception as e:
    print("‚ö†Ô∏è Could not import scoring_service_azure:", type(e).__name__, e)
    raise

‚ö†Ô∏è Could not import scoring_service_azure: ModuleNotFoundError No module named 'dotenv'


ModuleNotFoundError: No module named 'dotenv'

In [None]:
# 3Ô∏è‚É£ Example data
data = [
    {"client_id": 1, "turnaround_time_days": 8, "error_rate_pct": 12, "communication_count": 1, "last_feedback_text": "Late delivery"},
    {"client_id": 2, "turnaround_time_days": 5, "error_rate_pct": 2, "communication_count": 4, "last_feedback_text": "Great service"},
    {"client_id": 3, "turnaround_time_days": 9, "error_rate_pct": 10, "communication_count": 0, "last_feedback_text": "No response"},
    {"client_id": 4, "turnaround_time_days": 4, "error_rate_pct": 1, "communication_count": 5, "last_feedback_text": "Helpful advisor"},
    {"client_id": 5, "turnaround_time_days": 2, "error_rate_pct": 0.5, "communication_count": 6, "last_feedback_text": "Quick and accurate filing"},
    {"client_id": 6, "turnaround_time_days": 10, "error_rate_pct": 15, "communication_count": 0, "last_feedback_text": "Terrible support"},
]
df = pd.DataFrame(data)
print("‚úÖ Loaded data:")
display(df)

In [None]:
# 4Ô∏è‚É£ Derive true labels using improved derive_true_label
df["true_label"] = df["last_feedback_text"].apply(derive_true_label)
print("‚úÖ Derived true labels:")
display(df[["client_id", "last_feedback_text", "true_label"]])

In [None]:
# 5Ô∏è‚É£ Balance dataset (ensures both classes)
balanced = balance_dataframe(df.copy())
if balanced["true_label"].nunique() < 2:
    print("‚ö†Ô∏è Only one class found ‚Äî synthesizing balanced dataset.")
    satisfied_row = df.iloc[1].copy()
    satisfied_row["client_id"] = 999
    satisfied_row["last_feedback_text"] = "Excellent and helpful service"
    satisfied_row["true_label"] = "Satisfied"
    balanced = pd.concat([df, pd.DataFrame([satisfied_row])], ignore_index=True)

print("‚úÖ Balanced dataset class counts:")
print(balanced["true_label"].value_counts())
display(balanced)

In [None]:
# 6Ô∏è‚É£ Score dataset via Azure or fallback
print("üöÄ Scoring dataset via Azure OpenAI (or fallback)...")
results = score_batch(balanced)
results_df = pd.DataFrame(results)
print("‚úÖ Scoring complete:")
display(results_df)

In [None]:
# 7Ô∏è‚É£ Merge & Evaluate
merged = balanced.merge(results_df, on="client_id", how="left").rename(columns={"label":"pred_label"})
print("üîç Merged results:")
display(merged[["client_id", "true_label", "pred_label", "confidence"]])

y_true = merged["true_label"]
y_pred = merged["pred_label"].fillna("Dissatisfied")

labels = ["Dissatisfied", "Satisfied"]
accuracy = (y_true == y_pred).mean()

print(f"\nüìä Evaluation Metrics\n‚úÖ Accuracy: {accuracy:.3f}\n")
print("Classification Report:")
print(classification_report(y_true, y_pred, labels=labels))

cm = confusion_matrix(y_true, y_pred, labels=labels)
fig, ax = plt.subplots(figsize=(5,4))
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
disp.plot(ax=ax, cmap="Blues", values_format="d")
plt.title("Confusion Matrix (Both Classes)")
plt.show()