In [11]:
import pandas as pd
import numpy as np
from IPython.display import display


file_name = 'Camp_Market_cleaned_omar.csv'
df = pd.read_csv(file_name, delimiter=';')

# Définition des Bornes de Quantiles (Âge et Revenu)

In [12]:

age_qcut_series, age_bins = pd.qcut(df['Age'], q=4, retbins=True)

print("\n Bornes pour Age Groupe (Quartiles d'Age) ")

print(f"  Q1_Age (les 25% plus jeunes): de {age_bins[0]} à {age_bins[1]} ans")
print(f"  Q2_Age (25% suivants):      de {age_bins[1]} à {age_bins[2]} ans")
print(f"  Q3_Age (25% suivants):      de {age_bins[2]} à {age_bins[3]} ans")
print(f"  Q4_Age (les 25% plus âgés):   de {age_bins[3]} à {age_bins[4]} ans")


# --- 3. Calcul et affichage des bornes pour 'Income_Group' ---
# retbins=True renvoie les bornes (bins)
income_qcut_series, income_bins = pd.qcut(df['Income'], q=4, retbins=True)

print("\n Bornes pour Income Groupe (Quartiles de Revenu) ")

# .2f formate le nombre avec 2 décimales pour l'affichage
print(f"  Q1_Income (les 25% revenus les plus bas): de {income_bins[0]:.2f} à {income_bins[1]:.2f}")
print(f"  Q2_Income (25% suivants):               de {income_bins[1]:.2f} à {income_bins[2]:.2f}")
print(f"  Q3_Income (25% suivants):               de {income_bins[2]:.2f} à {income_bins[3]:.2f}")
print(f"  Q4_Income (les 25% revenus les plus hauts): de {income_bins[3]:.2f} à {income_bins[4]:.2f}")


 Bornes pour Age Groupe (Quartiles d'Age) 
  Q1_Age (les 25% plus jeunes): de 29.0 à 48.0 ans
  Q2_Age (25% suivants):      de 48.0 à 55.0 ans
  Q3_Age (25% suivants):      de 55.0 à 66.0 ans
  Q4_Age (les 25% plus âgés):   de 66.0 à 85.0 ans

 Bornes pour Income Groupe (Quartiles de Revenu) 
  Q1_Income (les 25% revenus les plus bas): de 1730.00 à 35523.00
  Q2_Income (25% suivants):               de 35523.00 à 51381.50
  Q3_Income (25% suivants):               de 51381.50 à 68281.00
  Q4_Income (les 25% revenus les plus hauts): de 68281.00 à 666666.00


# Analyse de la Campagne "Response = 1"

In [13]:

# Définition des variables de DÉPENSES
mnt_columns = [
    'MntWines', 
    'MntFruits', 
    'MntMeatProducts', 
    'MntFishProducts', 
    'MntSweetProducts', 
    'MntGoldProds'
]

# Définition des variables de FRÉQUENCE (SANS TotalPurchases)
num_columns = [
    'NumDealsPurchases',
    'NumWebPurchases',
    'NumCatalogPurchases',
    'NumStorePurchases',
    'NumWebVisitsMonth'
    # TotalPurchases a été retiré
]

# Définition de la palette de couleurs
cmap_colors = 'Greens'

# --- 3. Discrétisation des variables continues (Âge et Revenu) ---
try:
    # Stratégie de discrétisation par quantiles (effectifs égaux)
    df['Age_Group'] = pd.qcut(df['Age'], q=4, labels=['Q1_Age', 'Q2_Age', 'Q3_Age', 'Q4_Age'])
    df['Income_Group'] = pd.qcut(df['Income'], q=4, labels=['Q1_Income', 'Q2_Income', 'Q3_Income', 'Q4_Income'])

except ValueError as e:
    # Fallback : stratégie par intervalles égaux
    print(f"Erreur de discrétisation (qcut) : {e}. Passage en 'cut'.")
    df['Age_Group'] = pd.cut(df['Age'], bins=4, labels=['Bin1_Age', 'Bin2_Age', 'Bin3_Age', 'Bin4_Age'])
    df['Income_Group'] = pd.cut(df['Income'], bins=4, labels=['Bin1_Income', 'Bin2_Income', 'Bin3_Income', 'Bin4_Income'])


# --- 4. Définition des groupes croisés --- MODIFICATION ---
# On filtre le DataFrame pour ne garder que ceux qui ont accepté
df_acceptants = df[df['Response'] == 1].copy()

# On met à jour les clés (Response est inutile, c'est toujours 1)
grouping_keys = ['Age_Group', 'Income_Group']


print("\n--- Analyse Croisée (Acceptants Uniquement) : DÉPENSES (Mnt) ---")

# --- 5. Heatmap : Dépenses (Mnt) par [Age_Group, Income_Group] --- MODIFICATION ---
mnt_analysis_crossed = df_acceptants.groupby(grouping_keys, observed=True)[mnt_columns].sum()
display(mnt_analysis_crossed.style.background_gradient(cmap=cmap_colors, axis=None)
        .set_caption("Dépenses cumulées (Mnt) par Âge et Revenu (Acceptants Uniquement)"))


print("\n--- Analyse Croisée (Acceptants Uniquement) : FRÉQUENCES (Num) ---")

# --- 6. Heatmap : Fréquences (Num) par [Age_Group, Income_Group] --- MODIFICATION ---
num_analysis_crossed = df_acceptants.groupby(grouping_keys, observed=True)[num_columns].sum()
display(num_analysis_crossed.style.background_gradient(cmap=cmap_colors, axis=None)
        .set_caption("Fréquences cumulées (Num) par Âge et Revenu (Acceptants Uniquement)"))


--- Analyse Croisée (Acceptants Uniquement) : DÉPENSES (Mnt) ---


Unnamed: 0_level_0,Unnamed: 1_level_0,MntWines,MntFruits,MntMeatProducts,MntFishProducts,MntSweetProducts,MntGoldProds
Age_Group,Income_Group,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Q1_Age,Q1_Income,404,148,565,159,179,802
Q1_Age,Q2_Income,1816,290,1498,343,253,721
Q1_Age,Q3_Income,5595,437,1884,304,128,590
Q1_Age,Q4_Income,39300,3082,28008,4238,3348,4086
Q2_Age,Q1_Income,792,194,505,229,165,534
Q2_Age,Q2_Income,3168,193,1242,311,261,646
Q2_Age,Q3_Income,4836,215,1792,155,195,470
Q2_Age,Q4_Income,25614,2755,18748,3319,2513,2604
Q3_Age,Q1_Income,345,138,454,162,130,453
Q3_Age,Q2_Income,2774,258,1201,238,190,638



--- Analyse Croisée (Acceptants Uniquement) : FRÉQUENCES (Num) ---


Unnamed: 0_level_0,Unnamed: 1_level_0,NumDealsPurchases,NumWebPurchases,NumCatalogPurchases,NumStorePurchases,NumWebVisitsMonth
Age_Group,Income_Group,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Q1_Age,Q1_Income,55,53,23,60,186
Q1_Age,Q2_Income,47,72,24,75,124
Q1_Age,Q3_Income,24,50,35,60,44
Q1_Age,Q4_Income,47,274,321,407,136
Q2_Age,Q1_Income,56,60,19,60,184
Q2_Age,Q2_Income,74,86,30,89,144
Q2_Age,Q3_Income,48,69,36,74,63
Q2_Age,Q4_Income,31,176,200,240,102
Q3_Age,Q1_Income,22,30,8,28,71
Q3_Age,Q2_Income,66,60,29,72,84


# Analyse de la Fréquence d'Achat par Segment

In [14]:


# --- 2. Pré-traitement et configuration de l'analyse ---

# === MODIFICATION ICI ===
# Définition des variables quantitatives (Nombre d'achats/visites)
columns_to_analyze = [
    'NumDealsPurchases',
    'NumWebPurchases',
    'NumCatalogPurchases',
    'NumStorePurchases',
    'NumWebVisitsMonth',
    
]

# Définition de la palette de couleurs (séquentielle 'Greens')
cmap_colors = 'Greens'

# --- 3. Discrétisation des variables continues (Âge et Revenu) ---
# (Cette partie reste identique)
try:
    # Stratégie de discrétisation par quantiles (effectifs égaux)
    df['Age_Group'] = pd.qcut(df['Age'], q=4, labels=['Q1_Age', 'Q2_Age', 'Q3_Age', 'Q4_Age'])
    df['Income_Group'] = pd.qcut(df['Income'], q=4, labels=['Q1_Income', 'Q2_Income', 'Q3_Income', 'Q4_Income'])
  
except ValueError as e:
    # Fallback : stratégie par intervalles égaux
    print(f"Erreur de discrétisation (qcut) : {e}. Passage en 'cut'.")
    df['Age_Group'] = pd.cut(df['Age'], bins=4, labels=['Bin1_Age', 'Bin2_Age', 'Bin3_Age', 'Bin4_Age'])
    df['Income_Group'] = pd.cut(df['Income'], bins=4, labels=['Bin1_Income', 'Bin2_Income', 'Bin3_Income', 'Bin4_Income'])


# --- 4. Analyse : Somme des Fréquences par Niveau d'études ---
education_analysis = df.groupby('Education')[columns_to_analyze].sum()
display(education_analysis.style.background_gradient(cmap=cmap_colors, axis=None)
        .set_caption("Fréquences cumulées par niveau d'études"))

# --- 5. Analyse : Somme des Fréquences par Statut marital ---
marital_analysis = df.groupby('Marital_Status')[columns_to_analyze].sum()
display(marital_analysis.style.background_gradient(cmap=cmap_colors, axis=None)
        .set_caption("Fréquences cumulées par statut marital"))

# --- 6. Analyse : Somme des Fréquences par Nombre total d'enfants ---
total_kids_analysis = df.groupby('TotalKids')[columns_to_analyze].sum()
display(total_kids_analysis.style.background_gradient(cmap=cmap_colors, axis=None)
        .set_caption("Fréquences cumulées par nombre total d'enfants"))

# --- 7. Analyse : Somme des Fréquences par Enfants (0-12 ans) ---
kidhome_analysis = df.groupby('Kidhome')[columns_to_analyze].sum()
display(kidhome_analysis.style.background_gradient(cmap=cmap_colors, axis=None)
        .set_caption("Fréquences cumulées par nombre de jeunes enfants"))

# --- 8. Analyse : Somme des Fréquences par Adolescents ---
teenhome_analysis = df.groupby('Teenhome')[columns_to_analyze].sum()
display(teenhome_analysis.style.background_gradient(cmap=cmap_colors, axis=None)
        .set_caption("Fréquences cumulées par nombre d'adolescents"))

# --- 9. Analyse : Somme des Fréquences par Groupe d'âge (quartiles) ---
age_group_analysis = df.groupby('Age_Group', observed=True)[columns_to_analyze].sum()
display(age_group_analysis.style.background_gradient(cmap=cmap_colors, axis=None)
        .set_caption("Fréquences cumulées par groupe d'âge"))

# --- 10. Analyse : Somme des Fréquences par Total de campagnes marketing acceptées ---
acceptedcmp_analysis = df.groupby('AcceptedCmp_total')[columns_to_analyze].sum()
display(acceptedcmp_analysis.style.background_gradient(cmap=cmap_colors, axis=None)
        .set_caption("Fréquences cumulées par total de campagnes acceptées"))

# --- 11. Analyse : Somme des Fréquences par Groupe de revenu (quartiles) ---
income_group_analysis = df.groupby('Income_Group', observed=True)[columns_to_analyze].sum()
display(income_group_analysis.style.background_gradient(cmap=cmap_colors, axis=None)
        .set_caption("Fréquences cumulées par groupe de revenu"))

Unnamed: 0_level_0,NumDealsPurchases,NumWebPurchases,NumCatalogPurchases,NumStorePurchases,NumWebVisitsMonth
Education,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2n Cycle,454,754,470,1114,1098
Basic,97,102,26,154,371
Graduation,2603,4649,3072,6570,5959
Master,898,1492,951,2182,1916
PhD,1153,2146,1437,2942,2555


Unnamed: 0_level_0,NumDealsPurchases,NumWebPurchases,NumCatalogPurchases,NumStorePurchases,NumWebVisitsMonth
Marital_Status,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Absurd,4,7,15,13,3
Alone,11,15,2,12,19
Divorced,564,998,619,1348,1268
Married,2067,3532,2268,5055,4635
Single,1022,1858,1248,2705,2532
Together,1347,2363,1546,3323,3050
Widow,180,356,256,494,376
YOLO,10,14,2,12,16


Unnamed: 0_level_0,NumDealsPurchases,NumWebPurchases,NumCatalogPurchases,NumStorePurchases,NumWebVisitsMonth
TotalKids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,726,2799,3030,4631,2217
1,2781,4831,2406,6306,6687
2,1497,1361,464,1815,2653
3,201,152,56,210,342


Unnamed: 0_level_0,NumDealsPurchases,NumWebPurchases,NumCatalogPurchases,NumStorePurchases,NumWebVisitsMonth
Kidhome,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,2512,6426,5112,9326,5624
1,2557,2583,810,3471,5955
2,136,134,34,165,320


Unnamed: 0_level_0,NumDealsPurchases,NumWebPurchases,NumCatalogPurchases,NumStorePurchases,NumWebVisitsMonth
Teenhome,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,1838,4234,3457,6522,5762
1,3185,4660,2374,6115,5850
2,182,249,125,325,287


Unnamed: 0_level_0,NumDealsPurchases,NumWebPurchases,NumCatalogPurchases,NumStorePurchases,NumWebVisitsMonth
Age_Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Q1_Age,1189,2154,1427,3212,3320
Q2_Age,1419,2156,1280,3045,3176
Q3_Age,1384,2438,1536,3374,2934
Q4_Age,1213,2395,1713,3331,2469


Unnamed: 0_level_0,NumDealsPurchases,NumWebPurchases,NumCatalogPurchases,NumStorePurchases,NumWebVisitsMonth
AcceptedCmp_total,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,3855,6064,3448,8940,8834
1,909,1798,1294,2344,1992
2,318,729,604,913,719
3,78,295,302,396,184
4,38,203,239,289,132
5,7,54,69,80,38


Unnamed: 0_level_0,NumDealsPurchases,NumWebPurchases,NumCatalogPurchases,NumStorePurchases,NumWebVisitsMonth
Income_Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Q1_Income,1155,1131,265,1661,3920
Q2_Income,1569,1961,689,2424,3586
Q3_Income,1686,3081,1813,4204,2755
Q4_Income,795,2970,3189,4673,1638


#  Analyse de la Fréquence basée sur les Habitudes de Dépenses


In [15]:


# Définition des variables quantitatives à agréger (calcul de la somme)
columns_to_analyze = [
    'MntWines', 
    'MntFruits', 
    'MntMeatProducts', 
    'MntFishProducts', 
    'MntSweetProducts', 
    'MntGoldProds'
]

# Définition de la palette de couleurs (séquentielle 'Greens')
# Valeurs basses = vert clair, Valeurs hautes = vert foncé
cmap_colors = 'Greens'

# --- 3. Discrétisation des variables continues (Âge et Revenu) ---
try:
    # Stratégie de discrétisation par quantiles (effectifs égaux)
    df['Age_Group'] = pd.qcut(df['Age'], q=4, labels=['Q1_Age', 'Q2_Age', 'Q3_Age', 'Q4_Age'])
    df['Income_Group'] = pd.qcut(df['Income'], q=4, labels=['Q1_Income', 'Q2_Income', 'Q3_Income', 'Q4_Income'])
except ValueError as e:
    # Fallback : stratégie par intervalles égaux (si qcut échoue, ex: doublons)
    print(f"Erreur de discrétisation (qcut) : {e}. Passage en 'cut'.")
    df['Age_Group'] = pd.cut(df['Age'], bins=4, labels=['Bin1_Age', 'Bin2_Age', 'Bin3_Age', 'Bin4_Age'])
    df['Income_Group'] = pd.cut(df['Income'], bins=4, labels=['Bin1_Income', 'Bin2_Income', 'Bin3_Income', 'Bin4_Income'])



# --- 4. Analyse : Somme des dépenses par Niveau d'études ---
education_analysis = df.groupby('Education')[columns_to_analyze].sum()
display(education_analysis.style.background_gradient(cmap=cmap_colors, axis=None)
        .set_caption("Dépenses cumulées par niveau d'études"))

# --- 5. Analyse : Somme des dépenses par Statut marital ---
marital_analysis = df.groupby('Marital_Status')[columns_to_analyze].sum()
display(marital_analysis.style.background_gradient(cmap=cmap_colors, axis=None)
        .set_caption("Dépenses cumulées par statut marital"))

# --- 6. Analyse : Somme des dépenses par Nombre total d'enfants ---
total_kids_analysis = df.groupby('TotalKids')[columns_to_analyze].sum()
display(total_kids_analysis.style.background_gradient(cmap=cmap_colors, axis=None)
        .set_caption("Dépenses cumulées par nombre total d'enfants"))

# --- 7. Analyse : Somme des dépenses par Enfants (0-12 ans) ---
kidhome_analysis = df.groupby('Kidhome')[columns_to_analyze].sum()
display(kidhome_analysis.style.background_gradient(cmap=cmap_colors, axis=None)
        .set_caption("Dépenses cumulées par nombre de jeunes enfants"))

# --- 8. Analyse : Somme des dépenses par Adolescents ---
teenhome_analysis = df.groupby('Teenhome')[columns_to_analyze].sum()
display(teenhome_analysis.style.background_gradient(cmap=cmap_colors, axis=None)
        .set_caption("Dépenses cumulées par nombre d'adolescents"))

# --- 9. Analyse : Somme des dépenses par Groupe d'âge (quartiles) ---
age_group_analysis = df.groupby('Age_Group', observed=True)[columns_to_analyze].sum()
display(age_group_analysis.style.background_gradient(cmap=cmap_colors, axis=None)
        .set_caption("Dépenses cumulées par groupe d'âge"))

# --- 10. Analyse : Somme des dépenses par Total de campagnes marketing acceptées ---
acceptedcmp_analysis = df.groupby('AcceptedCmp_total')[columns_to_analyze].sum()
display(acceptedcmp_analysis.style.background_gradient(cmap=cmap_colors, axis=None)
        .set_caption("Dépenses cumulées par total de campagnes acceptées"))

# --- 11. Analyse : Somme des dépenses par Groupe de revenu (quartiles) ---
income_group_analysis = df.groupby('Income_Group', observed=True)[columns_to_analyze].sum()
display(income_group_analysis.style.background_gradient(cmap=cmap_colors, axis=None)
        .set_caption("Dépenses cumulées par groupe de revenu"))

Unnamed: 0_level_0,MntWines,MntFruits,MntMeatProducts,MntFishProducts,MntSweetProducts,MntGoldProds
Education,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2n Cycle,40208,5872,28662,9625,6949,9392
Basic,391,600,618,921,654,1233
Graduation,320371,34683,202284,48630,35351,57307
Master,123238,8012,60450,11877,7835,14947
PhD,195830,9600,81379,12886,9764,15479


Unnamed: 0_level_0,MntWines,MntFruits,MntMeatProducts,MntFishProducts,MntSweetProducts,MntGoldProds
Marital_Status,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Absurd,711,169,625,411,61,408
Alone,554,12,79,23,21,81
Divorced,75349,6357,34840,8123,6218,10714
Married,258751,22235,138829,30569,23070,36999
Single,138391,12881,87407,18337,13086,20988
Together,177204,14559,96938,22511,15087,24713
Widow,28434,2548,14575,3957,3004,4371
YOLO,644,6,100,8,6,84


Unnamed: 0_level_0,MntWines,MntFruits,MntMeatProducts,MntFishProducts,MntSweetProducts,MntGoldProds
TotalKids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,310146,33260,237163,48733,33789,40703
1,301199,21824,111240,30037,22889,45967
2,59610,3317,21597,4794,3524,10702
3,9083,366,3393,375,351,986


Unnamed: 0_level_0,MntWines,MntFruits,MntMeatProducts,MntFishProducts,MntSweetProducts,MntGoldProds
Kidhome,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,582604,50791,327850,72507,52301,77402
1,93844,7651,43919,11061,8026,20120
2,3590,325,1624,371,226,836


Unnamed: 0_level_0,MntWines,MntFruits,MntMeatProducts,MntFishProducts,MntSweetProducts,MntGoldProds
Teenhome,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,352215,38423,262435,56235,38877,52158
1,309002,19433,104470,26461,20840,43863
2,18821,911,6488,1243,836,2337


Unnamed: 0_level_0,MntWines,MntFruits,MntMeatProducts,MntFishProducts,MntSweetProducts,MntGoldProds
Age_Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Q1_Age,143847,16356,105310,22783,16731,25810
Q2_Age,152155,13069,80060,18599,14054,21063
Q3_Age,187726,15303,87795,19963,14336,25338
Q4_Age,196310,14039,100228,22594,15432,26147


Unnamed: 0_level_0,MntWines,MntFruits,MntMeatProducts,MntFishProducts,MntSweetProducts,MntGoldProds
AcceptedCmp_total,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,353420,37081,205094,52295,37369,61830
1,157468,11524,86686,17059,12071,20475
2,85026,5023,40966,7151,5363,7927
3,41665,2640,19676,4508,2932,4412
4,32991,2099,17349,2449,2237,3122
5,9468,400,3622,477,581,592


Unnamed: 0_level_0,MntWines,MntFruits,MntMeatProducts,MntFishProducts,MntSweetProducts,MntGoldProds
Income_Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Q1_Income,12284,3310,12725,4835,3302,9424
Q2_Income,73525,4932,30453,8138,5437,16630
Q3_Income,239071,16725,86925,21581,15463,32102
Q4_Income,355158,33800,243290,49385,36351,40202
