# Credit CARD

La table credit_card_balance contient l’historique mensuel des cartes de crédit. Nous avons agrégé les soldes, les montants dus, les limites de crédit et les ratios d’utilisation au niveau SK_ID_CURR. Les agrégations incluent les moyennes, maximums, minimums et fréquences d’utilisation. Un flag CC_NO_RECORDS identifie les clients sans carte de crédit. Ces indicateurs permettent de mesurer la discipline financière du client dans l’usage de ses cartes.


In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)

pd.set_option("display.max_columns", None)
pd.set_option("display.float_format", lambda x: f"{x:,.4f}")

CWD = Path.cwd()
PROJECT_ROOT = CWD.parent.parent   # notebooks/01_data_preparation -> projet root

DATA_RAW       = PROJECT_ROOT / "data" / "raw"
DATA_CLEAN     = PROJECT_ROOT / "data" / "clean"
DATA_PROCESSED = PROJECT_ROOT / "data" / "processed"

DATA_CLEAN.mkdir(parents=True, exist_ok=True)
DATA_PROCESSED.mkdir(parents=True, exist_ok=True)

print("DATA_RAW       =", DATA_RAW)
print("DATA_PROCESSED =", DATA_PROCESSED)

# ============================
# IMPORT
# ============================
CC_PATH = DATA_RAW / "credit_card_balance.csv"
cc = pd.read_csv(CC_PATH)
print("credit_card_balance:", cc.shape)
cc.head()
cc_clean=cc.copy()


DATA_RAW       = c:\Users\yoann\Documents\open classrooms\projet 6\pret a depenser v2\data\raw
DATA_PROCESSED = c:\Users\yoann\Documents\open classrooms\projet 6\pret a depenser v2\data\processed
credit_card_balance: (3840312, 23)


## Typage

In [2]:
cc_clean["SK_ID_PREV"] = cc_clean["SK_ID_PREV"].astype(int)
cc_clean["SK_ID_CURR"] = cc_clean["SK_ID_CURR"].astype(int)
cc_clean["MONTHS_BALANCE"] = cc_clean["MONTHS_BALANCE"].astype(int)

num_cols = [
    "AMT_BALANCE", "AMT_CREDIT_LIMIT_ACTUAL",
    "AMT_DRAWINGS_ATM_CURRENT", "AMT_DRAWINGS_CURRENT",
    "AMT_DRAWINGS_OTHER_CURRENT", "AMT_DRAWINGS_POS_CURRENT",
    "AMT_INST_MIN_REGULARITY", "AMT_PAYMENT_CURRENT",
    "AMT_PAYMENT_TOTAL_CURRENT", "AMT_RECEIVABLE_PRINCIPAL",
    "AMT_RECIVABLE", "AMT_TOTAL_RECEIVABLE",
    "CNT_DRAWINGS_ATM_CURRENT", "CNT_DRAWINGS_CURRENT",
    "CNT_DRAWINGS_OTHER_CURRENT", "CNT_DRAWINGS_POS_CURRENT",
    "CNT_INSTALMENT_MATURE_CUM",
    "SK_DPD", "SK_DPD_DEF"
]

for col in num_cols:
    cc_clean[col] = pd.to_numeric(cc_clean[col], errors="coerce")

## Nettoyage

In [3]:
cc_clean["NAME_CONTRACT_STATUS"] = (
    cc_clean["NAME_CONTRACT_STATUS"]
    .astype("string")
    .str.strip()
)

## FLAG NA

In [4]:
for col in num_cols:
    cc_clean[f"CC_{col}_NA"] = cc_clean[col].isna().astype(int)

cc_clean["CC_STATUS_NA"] = cc_clean["NAME_CONTRACT_STATUS"].isna().astype(int)

## Features comportementales

In [5]:

# Utilisation du crédit
cc_clean["CC_UTILIZATION"] = (
    cc_clean["AMT_BALANCE"] /
    cc_clean["AMT_CREDIT_LIMIT_ACTUAL"].replace(0, np.nan)
)

# Total des retraits
cc_clean["CC_DRAWINGS_TOTAL"] = (
    cc_clean["AMT_DRAWINGS_ATM_CURRENT"].fillna(0) +
    cc_clean["AMT_DRAWINGS_POS_CURRENT"].fillna(0) +
    cc_clean["AMT_DRAWINGS_OTHER_CURRENT"].fillna(0)
)

# Flags de retard
cc_clean["CC_LATE"] = (cc_clean["SK_DPD"] > 0).astype(int)
cc_clean["CC_SEVERE_LATE"] = (cc_clean["SK_DPD_DEF"] > 0).astype(int)


cc_clean.shape

(3840312, 47)

## Agrégations numériques

In [6]:

num_agg_cols = num_cols + ["CC_UTILIZATION", "CC_DRAWINGS_TOTAL"]

agg_funcs = ["mean", "min", "max", "sum", "std"]

cc_num = cc_clean.groupby("SK_ID_PREV")[num_agg_cols].agg(agg_funcs)

cc_num.columns = [
    "CC_" + c[0].upper() + "_" + c[1].upper()
    for c in cc_num.columns
]

## Agrégations count

In [7]:
cc_counts = cc_clean.groupby("SK_ID_PREV").agg(
    CC_COUNT_MONTHS=("MONTHS_BALANCE", "count"),
    CC_COUNT_LATE=("CC_LATE", "sum"),
    CC_COUNT_SEVERE_LATE=("CC_SEVERE_LATE", "sum")
)


## Ratios

In [8]:
cc_ratios = pd.DataFrame(index=cc_counts.index)

cc_ratios["CC_RATIO_LATE"] = (
    cc_counts["CC_COUNT_LATE"] / cc_counts["CC_COUNT_MONTHS"]
)

cc_ratios["CC_RATIO_SEVERE_LATE"] = (
    cc_counts["CC_COUNT_SEVERE_LATE"] / cc_counts["CC_COUNT_MONTHS"]
)

cc_ratios = cc_ratios.fillna(0)


## Aggrégations catégorielles

In [9]:
cc_status_dummies = pd.get_dummies(
    cc_clean["NAME_CONTRACT_STATUS"],
    prefix="CC_STATUS"
)

cc_status_dummies["SK_ID_PREV"] = cc_clean["SK_ID_PREV"]

cc_status = cc_status_dummies.groupby("SK_ID_PREV").sum()

for col in cc_status.columns:
    cc_status[col + "_RATIO"] = (
        cc_status[col] / cc_counts["CC_COUNT_MONTHS"]
    )

## Agrégations temporelles

In [10]:
cc_time = cc_clean.groupby("SK_ID_PREV").agg(
    CC_LAST_MONTH=("MONTHS_BALANCE", "max"),
    CC_FIRST_MONTH=("MONTHS_BALANCE", "min")
)

cc_time["CC_HISTORY_SPAN"] = (
    cc_time["CC_LAST_MONTH"] - cc_time["CC_FIRST_MONTH"]
)


## Fusion

In [11]:
cc_prev_agg = (
    cc_num
    .join(cc_counts)
    .join(cc_ratios)
    .join(cc_status)
    .join(cc_time)
)

## FLAG

In [12]:
cc_prev_agg["CC_NO_RECORDS"] = cc_prev_agg.isna().all(axis=1).astype(int)

cc_prev_agg = cc_prev_agg.reset_index()


print(cc_prev_agg.shape)
cc_prev_agg.head()



(104307, 129)


Unnamed: 0,SK_ID_PREV,CC_AMT_BALANCE_MEAN,CC_AMT_BALANCE_MIN,CC_AMT_BALANCE_MAX,CC_AMT_BALANCE_SUM,CC_AMT_BALANCE_STD,CC_AMT_CREDIT_LIMIT_ACTUAL_MEAN,CC_AMT_CREDIT_LIMIT_ACTUAL_MIN,CC_AMT_CREDIT_LIMIT_ACTUAL_MAX,CC_AMT_CREDIT_LIMIT_ACTUAL_SUM,CC_AMT_CREDIT_LIMIT_ACTUAL_STD,CC_AMT_DRAWINGS_ATM_CURRENT_MEAN,CC_AMT_DRAWINGS_ATM_CURRENT_MIN,CC_AMT_DRAWINGS_ATM_CURRENT_MAX,CC_AMT_DRAWINGS_ATM_CURRENT_SUM,CC_AMT_DRAWINGS_ATM_CURRENT_STD,CC_AMT_DRAWINGS_CURRENT_MEAN,CC_AMT_DRAWINGS_CURRENT_MIN,CC_AMT_DRAWINGS_CURRENT_MAX,CC_AMT_DRAWINGS_CURRENT_SUM,CC_AMT_DRAWINGS_CURRENT_STD,CC_AMT_DRAWINGS_OTHER_CURRENT_MEAN,CC_AMT_DRAWINGS_OTHER_CURRENT_MIN,CC_AMT_DRAWINGS_OTHER_CURRENT_MAX,CC_AMT_DRAWINGS_OTHER_CURRENT_SUM,CC_AMT_DRAWINGS_OTHER_CURRENT_STD,CC_AMT_DRAWINGS_POS_CURRENT_MEAN,CC_AMT_DRAWINGS_POS_CURRENT_MIN,CC_AMT_DRAWINGS_POS_CURRENT_MAX,CC_AMT_DRAWINGS_POS_CURRENT_SUM,CC_AMT_DRAWINGS_POS_CURRENT_STD,CC_AMT_INST_MIN_REGULARITY_MEAN,CC_AMT_INST_MIN_REGULARITY_MIN,CC_AMT_INST_MIN_REGULARITY_MAX,CC_AMT_INST_MIN_REGULARITY_SUM,CC_AMT_INST_MIN_REGULARITY_STD,CC_AMT_PAYMENT_CURRENT_MEAN,CC_AMT_PAYMENT_CURRENT_MIN,CC_AMT_PAYMENT_CURRENT_MAX,CC_AMT_PAYMENT_CURRENT_SUM,CC_AMT_PAYMENT_CURRENT_STD,CC_AMT_PAYMENT_TOTAL_CURRENT_MEAN,CC_AMT_PAYMENT_TOTAL_CURRENT_MIN,CC_AMT_PAYMENT_TOTAL_CURRENT_MAX,CC_AMT_PAYMENT_TOTAL_CURRENT_SUM,CC_AMT_PAYMENT_TOTAL_CURRENT_STD,CC_AMT_RECEIVABLE_PRINCIPAL_MEAN,CC_AMT_RECEIVABLE_PRINCIPAL_MIN,CC_AMT_RECEIVABLE_PRINCIPAL_MAX,CC_AMT_RECEIVABLE_PRINCIPAL_SUM,CC_AMT_RECEIVABLE_PRINCIPAL_STD,CC_AMT_RECIVABLE_MEAN,CC_AMT_RECIVABLE_MIN,CC_AMT_RECIVABLE_MAX,CC_AMT_RECIVABLE_SUM,CC_AMT_RECIVABLE_STD,CC_AMT_TOTAL_RECEIVABLE_MEAN,CC_AMT_TOTAL_RECEIVABLE_MIN,CC_AMT_TOTAL_RECEIVABLE_MAX,CC_AMT_TOTAL_RECEIVABLE_SUM,CC_AMT_TOTAL_RECEIVABLE_STD,CC_CNT_DRAWINGS_ATM_CURRENT_MEAN,CC_CNT_DRAWINGS_ATM_CURRENT_MIN,CC_CNT_DRAWINGS_ATM_CURRENT_MAX,CC_CNT_DRAWINGS_ATM_CURRENT_SUM,CC_CNT_DRAWINGS_ATM_CURRENT_STD,CC_CNT_DRAWINGS_CURRENT_MEAN,CC_CNT_DRAWINGS_CURRENT_MIN,CC_CNT_DRAWINGS_CURRENT_MAX,CC_CNT_DRAWINGS_CURRENT_SUM,CC_CNT_DRAWINGS_CURRENT_STD,CC_CNT_DRAWINGS_OTHER_CURRENT_MEAN,CC_CNT_DRAWINGS_OTHER_CURRENT_MIN,CC_CNT_DRAWINGS_OTHER_CURRENT_MAX,CC_CNT_DRAWINGS_OTHER_CURRENT_SUM,CC_CNT_DRAWINGS_OTHER_CURRENT_STD,CC_CNT_DRAWINGS_POS_CURRENT_MEAN,CC_CNT_DRAWINGS_POS_CURRENT_MIN,CC_CNT_DRAWINGS_POS_CURRENT_MAX,CC_CNT_DRAWINGS_POS_CURRENT_SUM,CC_CNT_DRAWINGS_POS_CURRENT_STD,CC_CNT_INSTALMENT_MATURE_CUM_MEAN,CC_CNT_INSTALMENT_MATURE_CUM_MIN,CC_CNT_INSTALMENT_MATURE_CUM_MAX,CC_CNT_INSTALMENT_MATURE_CUM_SUM,CC_CNT_INSTALMENT_MATURE_CUM_STD,CC_SK_DPD_MEAN,CC_SK_DPD_MIN,CC_SK_DPD_MAX,CC_SK_DPD_SUM,CC_SK_DPD_STD,CC_SK_DPD_DEF_MEAN,CC_SK_DPD_DEF_MIN,CC_SK_DPD_DEF_MAX,CC_SK_DPD_DEF_SUM,CC_SK_DPD_DEF_STD,CC_CC_UTILIZATION_MEAN,CC_CC_UTILIZATION_MIN,CC_CC_UTILIZATION_MAX,CC_CC_UTILIZATION_SUM,CC_CC_UTILIZATION_STD,CC_CC_DRAWINGS_TOTAL_MEAN,CC_CC_DRAWINGS_TOTAL_MIN,CC_CC_DRAWINGS_TOTAL_MAX,CC_CC_DRAWINGS_TOTAL_SUM,CC_CC_DRAWINGS_TOTAL_STD,CC_COUNT_MONTHS,CC_COUNT_LATE,CC_COUNT_SEVERE_LATE,CC_RATIO_LATE,CC_RATIO_SEVERE_LATE,CC_STATUS_Active,CC_STATUS_Approved,CC_STATUS_Completed,CC_STATUS_Demand,CC_STATUS_Refused,CC_STATUS_Sent proposal,CC_STATUS_Signed,CC_STATUS_Active_RATIO,CC_STATUS_Approved_RATIO,CC_STATUS_Completed_RATIO,CC_STATUS_Demand_RATIO,CC_STATUS_Refused_RATIO,CC_STATUS_Sent proposal_RATIO,CC_STATUS_Signed_RATIO,CC_LAST_MONTH,CC_FIRST_MONTH,CC_HISTORY_SPAN,CC_NO_RECORDS
0,1000018,74946.285,38879.145,136695.42,374731.425,46691.9962,81000.0,45000,135000,405000,49295.0302,5400.0,0.0,13500.0,27000.0,7394.2545,29478.996,2032.56,69156.945,147394.98,29890.0129,0.0,0.0,0.0,0.0,0.0,24078.996,2032.56,55656.945,120394.98,23123.7342,2594.088,0.0,6206.67,12970.44,2243.0974,5541.75,3190.635,9000.0,27708.75,2723.504,5541.75,3190.635,9000.0,27708.75,2723.504,72298.197,37542.645,132903.0,361490.985,44914.3997,73602.585,37542.645,136024.92,368012.925,45779.0149,73602.585,37542.645,136024.92,368012.925,45779.0149,1.2,0.0,3.0,6.0,1.6432,8.8,2,15,44,6.3797,0.0,0.0,0.0,0.0,0.0,7.6,2.0,12.0,38.0,5.1284,2.0,0.0,4.0,10.0,1.5811,0.0,0,0,0,0.0,0.0,0,0,0,0.0,0.9231,0.8434,1.0126,4.6154,0.0741,29478.996,2032.56,69156.945,147394.98,29890.0129,5,0,0,0.0,0.0,5,0,0,0,0,0,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,-2,-6,4,0
1,1000030,55991.0644,0.0,103027.275,447928.515,40839.8558,81562.5,45000,135000,652500,39774.7564,642.8571,0.0,4500.0,4500.0,1700.8401,17257.4381,0.0,46660.5,138059.505,16467.9786,0.0,0.0,0.0,0.0,0.0,19079.9293,1849.05,46660.5,133559.505,15667.9391,2078.2238,0.0,5348.52,16625.79,2061.1449,6188.6314,2371.815,16067.25,43320.42,4628.1646,2657.9475,0.0,16067.25,21263.58,5491.1131,55474.4531,0.0,101866.725,443795.625,40486.0463,55935.3769,0.0,103027.275,447483.015,40903.0796,55935.3769,0.0,103027.275,447483.015,40903.0796,0.1429,0.0,1.0,1.0,0.378,5.125,0,14,41,4.4219,0.0,0.0,0.0,0.0,0.0,5.7143,2.0,13.0,40.0,3.9036,1.875,0.0,5.0,15.0,1.9594,0.0,0,0,0,0.0,0.0,0,0,0,0.0,0.6305,0.0,0.9158,5.0439,0.3334,17257.4381,0.0,46660.5,138059.505,16467.9786,8,0,0,0.0,0.0,8,0,0,0,0,0,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,-1,-8,7,0
2,1000031,52394.4394,0.0,154945.935,838311.03,64734.5552,149625.0,45000,225000,2394000,72140.4879,12115.3846,0.0,90000.0,157500.0,26961.511,28959.615,0.0,155340.0,463353.84,49029.8962,0.0,0.0,0.0,0.0,0.0,23527.2185,0.0,155340.0,305853.84,51109.8649,2675.3006,0.0,7780.815,42804.81,3285.5984,29543.2575,394.065,160606.8,354519.09,55358.7449,22157.4431,0.0,160606.8,354519.09,49213.0804,51402.8784,0.0,154945.935,822446.055,63635.046,52099.9706,0.0,154945.935,833599.53,64519.0757,52099.9706,0.0,154945.935,833599.53,64519.0757,0.3077,0.0,2.0,4.0,0.6304,1.3125,0,4,21,1.3022,0.0,0.0,0.0,0.0,0.0,1.3077,0.0,4.0,17.0,1.3156,3.6875,0.0,10.0,59.0,3.4199,0.0,0,0,0,0.0,0.0,0,0,0,0.0,0.3274,0.0,0.9709,5.2378,0.3997,28959.615,0.0,155340.0,463353.84,49029.8962,16,0,0,0.0,0.0,16,0,0,0,0,0,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,-1,-16,15,0
3,1000035,0.0,0.0,0.0,0.0,0.0,225000.0,225000,225000,1125000,0.0,,,,0.0,,0.0,0.0,0.0,0.0,0.0,,,,0.0,,,,,0.0,,0.0,0.0,0.0,0.0,0.0,,,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,0.0,,0.0,0,0,0,0.0,,,,0.0,,,,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5,0,0,0.0,0.0,5,0,0,0,0,0,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,-2,-6,4,0
4,1000077,0.0,0.0,0.0,0.0,0.0,94090.9091,45000,135000,1035000,47000.9671,,,,0.0,,0.0,0.0,0.0,0.0,0.0,,,,0.0,,,,,0.0,,0.0,0.0,0.0,0.0,0.0,,,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,0.0,,0.0,0,0,0,0.0,,,,0.0,,,,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11,0,0,0.0,0.0,11,0,0,0,0,0,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,-2,-12,10,0


## Export

In [13]:
CC_OUT_PATH = DATA_PROCESSED / "credit_card_balance_final.csv"
cc_prev_agg.to_csv(CC_OUT_PATH, index=False)

print("Export terminé :", CC_OUT_PATH)

Export terminé : c:\Users\yoann\Documents\open classrooms\projet 6\pret a depenser v2\data\processed\credit_card_balance_final.csv
