In [None]:
# ================================================================
# 📘 TaxPrep Satisfaction Scorer Demo (Azure OpenAI Integration)
# ================================================================

import os, sys, pandas as pd, matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

module_path = os.path.abspath('taxprep_demo1')
if module_path not in sys.path:
    sys.path.append(module_path)

from scoring_service_azure import score_batch, derive_true_label, balance_dataframe

data = [
    {"client_id": 1, "turnaround_time_days": 8, "error_rate_pct": 12, "communication_count": 1, "last_feedback_text": "Late delivery"},
    {"client_id": 2, "turnaround_time_days": 5, "error_rate_pct": 2, "communication_count": 4, "last_feedback_text": "Great service"},
    {"client_id": 3, "turnaround_time_days": 9, "error_rate_pct": 10, "communication_count": 0, "last_feedback_text": "No response"},
    {"client_id": 4, "turnaround_time_days": 4, "error_rate_pct": 1, "communication_count": 5, "last_feedback_text": "Helpful advisor"},
    {"client_id": 5, "turnaround_time_days": 2, "error_rate_pct": 0.5, "communication_count": 6, "last_feedback_text": "Quick and accurate filing"},
    {"client_id": 6, "turnaround_time_days": 10, "error_rate_pct": 15, "communication_count": 0, "last_feedback_text": "Terrible support"}
]

df = pd.DataFrame(data)
df['true_label'] = df['last_feedback_text'].apply(derive_true_label)

print('✅ Original Dataset:')
display(df)

print('⚖️ Balancing dataset ...')
df_balanced = balance_dataframe(df, 'true_label')
print(df_balanced['true_label'].value_counts())

print('🚀 Scoring dataset via Azure OpenAI (or fallback)...')
scored_df = score_batch(df_balanced)
scored_df = df_balanced.merge(scored_df, on='client_id', how='left')
scored_df['pred_label'] = scored_df['label']

print('✅ Scoring complete:')
display(scored_df)

y_true = scored_df['true_label']
y_pred = scored_df['pred_label']
print('\n📊 Evaluation Metrics:')
print(classification_report(y_true, y_pred, zero_division=0))

labels = ['Satisfied', 'Dissatisfied']
cm = confusion_matrix(y_true, y_pred, labels=labels)
ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels).plot(cmap='Blues', values_format='d')
plt.title('Customer Satisfaction Confusion Matrix')
plt.show()

print('\n🔍 Sample Predictions:')
for _, row in scored_df.iterrows():
    print(f"{row['client_id']:>2} | True: {row['true_label']:<13} | Pred: {row['pred_label']:<13} | Feedback: {row['last_feedback_text']}")
