In [None]:
import pandas as pd
from datetime import datetime
import warnings
from pandas.errors import SettingWithCopyWarning

warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

----
# 1. Data wrangling (data engineer)
----
## 1.1 Lire fichier(s) source(s)

# 1.2 Analyser dataframe

## 1.3 Filtrer lignes inutiles

## 1.2 Convertir dates en valeurs numériques

## 1.3 Encodage des catégories en valeurs numériques

## 1.4 Suppression des colonnes non-numériques inutiles


----
# 2. Développement IA (data scientist)
----
## 2.1 Définition des *features* et des *targets*

In [None]:
feature_cols = [
    'gender',
    'annual_income',
    'spending_score',
    'subscribed_to_newsletter',
    'days_since_last_purchase',
    'age',
]

# TODO: define X (features) and y (target)

## 2.2 Séparation des jeux de données d'entrainement vs test

In [None]:
from sklearn.model_selection import train_test_split

# TODO: split train/test


## 2.3 Entraînement du classificateur

In [None]:
from sklearn.tree import DecisionTreeClassifier, plot_tree

# TODO: build decision tree classifier

# TODO: fit classifier (train)


## 2.4 Visualisation du classificateur généré (data analyst?)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(35, 15))  # make the figure larger
plot_tree(clf,
          filled=True,
          rounded=True,
          class_names=clf.classes_.astype(str),
          feature_names=X_train.columns,
          fontsize=10
)
plt.show()

## 2.5 Évaluation du modèle de classification

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

y_pred = clf.predict(X_test)

print("1. RAPPORT DE CLASSIFICATION")
print(classification_report(y_test, y_pred))

print("2. MATRICE DE CONFUSION")
print(confusion_matrix(y_test, y_pred))

print("\n3. PRÉCISION TRAIN VS TEST")
y_train_pred = clf.predict(X_train)
y_test_pred = clf.predict(X_test)

print(f"Train: {str(accuracy_score(y_train, y_train_pred))[:5]}")
print(f"Test:  {str(accuracy_score(y_test, y_test_pred))[:5]}")

## 2.6 Visualiser les erreurs de prédictions avec Plotly

In [None]:
import plotly.express as px

# Add predictions to X_test for plotting
results = X_test.copy()
results['Actual'] = y_test
results['Predicted'] = y_pred

# Decode labels
label_map = {0: 'Budget', 1: 'Standard', 2: 'Premium'}
results['Predicted Label'] = results['Predicted'].map(label_map)
results['Actual Label'] = results['Actual'].map(label_map)

# Determine correctness
results['Correct'] = results['Actual'] == results['Predicted']
results['Prediction Result'] = results['Correct'].map({True: '✓ Correct', False: '✗ Incorrect'})

# Create a shape label per error type
results['Symbol Label'] = results.apply(
    lambda row: f"{row['Predicted Label']}←{row['Actual Label']}" if not row['Correct'] else 'Correct',
    axis=1
)

# Plot
fig = px.scatter(
    results,
    x='annual_income',
    y='spending_score',
    color='Prediction Result',  # Green = correct, Red = incorrect
    symbol='Symbol Label',      # Symbol = type of error
    title='Customer Type Prediction Accuracy (Color = Accuracy, Symbol = Error Type)',
    hover_data=['Predicted Label', 'Actual Label', 'age', 'subscribed_to_newsletter'],
    color_discrete_map={
        '✓ Correct': 'green',
        '✗ Incorrect': 'red'
    }
)
fig.show()


## 2.7 Faire une prédiction avec des nouveaux clients

In [None]:
new_customers = pd.DataFrame([
    {
        'age': 42,
        'annual_income': 65000,
        'spending_score': 75,
        'subscribed_to_newsletter': 1,
        'days_since_last_purchase': 180,
        'gender': 0
    },
    {
        'age': 22,
        'annual_income': 31000,
        'spending_score': 30,
        'subscribed_to_newsletter': 0,
        'days_since_last_purchase': 400,
        'gender': 2
    }
])
label_map = {0: 'Budget', 1: 'Standard', 2: 'Premium'}
preds = clf.predict(new_customers)
predicted_labels = [label_map[p] for p in preds]
print(predicted_labels)

## 2.8 Enregistrer le modèle pour réutilisation future

In [None]:
from cours_13_ai.utils import save_model_to_onnx

save_model_to_onnx(clf, X_sample=X_train[:1], filename="customer_model.onnx")