In [64]:
import pandas as pd
import numpy as np
import plotly.express as px
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

from utils import (
    default,
    defaults_table,
    default_and_ctx,
    risk_tables,
    select_and_sort_default_variables,
    risk_tables,
    compute_generation,
    compute_horizon,
    plot_gen_horizon,
    risk_curve,
    plot_horizon_activ,
    plot_evolution_risk_rates,
    get_horizon,
    plot_risk_by_cut,
    cramers_v
)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [65]:
df = pd.read_csv("../data/preprocessed_data.csv",low_memory=False)

In [66]:
df["Gen_Active"] = pd.to_datetime(df["Gen_Active"])
df["Gen_Demande"] = pd.to_datetime(df["Gen_Demande"])

## Comparaison Demandes / Activation

In [68]:
gen_demande = pd.DataFrame(df[['Gen_Demande','Entite']].value_counts()).reset_index()

gen_demande = gen_demande.rename(columns={'count':'Nbr_D','Gen_Demande':'Gen_Demande', 'Entite':'Entite'})

gen_demande = gen_demande.sort_values(by = 'Gen_Demande',ascending=True).reset_index(drop=True)

# Convert to datetime

gen_demande['Gen_Demande'] = pd.to_datetime(gen_demande['Gen_Demande'])

gen_demande['Gen_Demande'] = gen_demande['Gen_Demande'].dt.date

gen_dmd_A = gen_demande[gen_demande['Entite'] == 'A']

gen_dmd_B = gen_demande[gen_demande['Entite'] == 'B']

In [69]:
gen_activ = pd.DataFrame(df[['Gen_Active','Entite']].value_counts()).reset_index()

gen_activ = gen_activ.rename(columns={'count':'Nbr_A','Gen_Active':'Gen_Active', 'Entite':'Entite'})

gen_activ = gen_activ.sort_values('Gen_Active', ascending=True).reset_index(drop=True)

#on convertit to datetime

gen_activ['Gen_Active'] = pd.to_datetime(gen_activ['Gen_Active'])

gen_activ['Gen_Active'] = gen_activ['Gen_Active'].dt.date

gen_activ_A = gen_activ[gen_activ['Entite'] == 'A']

gen_activ_B = gen_activ[gen_activ['Entite'] == 'B']

In [70]:
gen_demande = pd.DataFrame(df['Gen_Demande'].value_counts()).reset_index()
gen_demande = gen_demande.sort_values(by = 'Gen_Demande',ascending=True).reset_index(drop=True)

gen_activ = pd.DataFrame(df['Gen_Active'].value_counts()).reset_index()
gen_activ = gen_activ.sort_values(by = 'Gen_Active',ascending=True).reset_index(drop=True)

import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter(x=gen_demande["Gen_Demande"], y=gen_demande["count"],
                    mode='lines+markers',
                    name='Demandes'))
fig.add_trace(go.Scatter(x=gen_activ["Gen_Active"], y=gen_activ["count"],
                    mode='lines+markers',
                    name='Activations'))
fig.update_layout(
    title="Volumes de la demande et de la production par génération. (Global)",
    bargap=0.15,
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)
fig.update_yaxes(title='Volume')
fig.update_xaxes(title='Génération')

### Entite A

In [71]:
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter(x=gen_dmd_A["Gen_Demande"], y=gen_dmd_A["Nbr_D"],
                    mode='lines+markers',
                    name='Demandes'))
fig.add_trace(go.Scatter(x=gen_activ_A["Gen_Active"], y=gen_activ_A["Nbr_A"],
                    mode='lines+markers',
                    name='Activations'))
fig.update_layout(
    title="Volumes de la demande et de la production par génération. (ENTITE A)",
    bargap=0.15,
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)
fig.update_yaxes(title='Volume')
fig.update_xaxes(title='Génération')

### Entite B

In [72]:
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter(x=gen_dmd_B["Gen_Demande"], y=gen_dmd_B["Nbr_D"],
                    mode='lines+markers',
                    name='Demandes'))
fig.add_trace(go.Scatter(x=gen_activ_B["Gen_Active"], y=gen_activ_B["Nbr_A"],
                    mode='lines+markers',
                    name='Activations'))
fig.update_layout(
    title="Volumes de la demande et de la production par génération. (ENTITE B)",
    bargap=0.15,
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)
fig.update_yaxes(title='Volume')
fig.update_xaxes(title='Génération')

## Refusal Rate

In [73]:
rate = df[['Gen_Demande','Decision_Finale','Entite']]
rate = pd.DataFrame(rate.value_counts().reset_index().rename(columns={0 : 'Nbr'}))
rate = rate.groupby(['Gen_Demande','Decision_Finale']).sum().reset_index()
rate = rate.pivot(index='Gen_Demande', columns='Decision_Finale', values='count').reset_index()

fig = go.Figure()
fig.add_trace(go.Scatter(x=rate["Gen_Demande"], y=rate['REF']/(rate['ACP']+rate['SS']+rate['REF']),
                    mode='lines+markers',
                   name='REF'))
fig.add_trace(go.Scatter(x=rate["Gen_Demande"], y=rate['SS']/(rate['ACP']+rate['SS']+rate['REF']),
                    mode='lines+markers',
                    name='SS'))
fig.add_trace(go.Scatter(x=rate["Gen_Demande"], y=rate['ACP']/(rate['ACP']+rate['SS']+rate['REF']),
                    mode='lines+markers',
                    name='ACP'))
fig.update_layout(
    title="Taux de refus par générations (global).",
    bargap=0.15,
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)
fig.update_yaxes(title='Taux de refus')
fig.update_xaxes(title='Génération')

In [74]:
# Taux de refus par entité (série temporelle)
rate_entite = (
  df.groupby(['Gen_Demande', 'Entite', 'Decision_Finale'])
    .size()
    .reset_index(name='Nbr')
  .pivot_table(index=['Gen_Demande', 'Entite'],
         columns='Decision_Finale',
         values='Nbr',
         fill_value=0)
  .reset_index()
)

total = rate_entite[['ACP', 'SS', 'REF']].sum(axis=1)
rate_entite['Taux_Refus'] = np.where(total > 0, rate_entite['REF'] / total, np.nan)
rate_entite['Gen_Demande'] = pd.to_datetime(rate_entite['Gen_Demande'])
rate_entite = rate_entite.sort_values('Gen_Demande')

# Pour éventuel usage ultérieur
rate_A = rate_entite[rate_entite['Entite'] == 'A']
rate_B = rate_entite[rate_entite['Entite'] == 'B']

# Facettes en lignes (dessus/dessous)
fig = px.line(
    rate_entite,
    x='Gen_Demande',
    y='Taux_Refus',
    facet_row='Entite',
    category_orders={'Entite': ['A', 'B']},
    markers=True,
    title="Taux de refus clients par génération et par entité"
)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_yaxes(tickformat='.0%', title='Taux de refus', matches='y')
fig.update_xaxes(title='Génération de la demande', matches='x')
fig.update_layout(height=600)
fig.show()


## Activation Rate

### Par génération

In [87]:
plot_gen_horizon(df, gen="Gen_Demande", horizon="Horizon_Activ", x_axis="g")

#### Entite A

In [89]:
plot_gen_horizon(df[df["Entite"]=="A"], gen="Gen_Demande", horizon="Horizon_Activ", x_axis="g")

#### Entite B

In [14]:
plot_gen_horizon(df[df["Entite"]=="B"], gen="Gen_Demande", horizon="Horizon_Activ", x_axis="g")

### Par horizon

In [15]:
plot_gen_horizon(df, gen="Gen_Demande", horizon="Horizon_Activ", x_axis="h", period="M")

On remarque que les périodes de 2013 et 2014 ont un taux d'activation des cartes qui est bien moindre que pour 2015 et 2016, nous pourrions envisager d'éliminer ces 2 années de la période de modélisation

#### Entite A

In [16]:
plot_gen_horizon(df[df["Entite"]=="A"], gen="Gen_Demande", horizon="Horizon_Activ", x_axis="h", period="M")

#### Entite B

In [17]:
plot_gen_horizon(df[df["Entite"]=="B"], gen="Gen_Demande", horizon="Horizon_Activ", x_axis="h", period="M")

### Cumul du taux d'activation

In [18]:
plot_horizon_activ(df, 'Horizon_Activ', t=0.75)

## Risk analysis and risk criterion selection

In [19]:
data_default  = defaults_table(df, 24, 6, ctx=True, add_to_data=True)
data_risk = risk_tables(data_default, "Gen_Demande", rates = True)

In [20]:
print(data_default["H12_CTX_R3"].value_counts(normalize=False))
print(data_default["H12_R3"].value_counts(normalize=False))
print(data_default["BP"].value_counts(normalize=False))
print(data_default["Defaut_B"].value_counts(normalize=False))

H12_CTX_R3
0.0    39096
1.0     1251
Name: count, dtype: int64
H12_R3
0.0    39128
1.0     1219
Name: count, dtype: int64
BP
1    39096
0     1251
Name: count, dtype: int64
Defaut_B
0    39096
1     1251
Name: count, dtype: int64


In [None]:
from plotly.subplots import make_subplots
from plotly.subplots import make_subplots as _orig_make_subplots
import plotly.graph_objects as _go

_avg_risk = (data_default["H12_CTX_R3"] == 1).mean()
_min_gen = data_default["Gen_Demande"].min()
_max_gen = data_default["Gen_Demande"].max()

def make_subplots(*args, **kwargs):
    fig = _orig_make_subplots(*args, **kwargs)
    fig.add_trace(
        _go.Scatter(
            x=[_min_gen, _max_gen],
            y=[_avg_risk, _avg_risk],
            mode="lines",
            line=dict(color="green", width=2, dash="dash"),
            name="Taux risque moyen"
        ),
        secondary_y=True
    )
    return fig
import plotly.graph_objects as go

risk_df = (
    data_default
    .groupby("Gen_Demande")
    .agg(
        Volume_Mauvais_Payeurs=("H12_CTX_R3", lambda s: (s == 1).sum()),
        Total=("H12_CTX_R3", "size")
    )
    .reset_index()
    .sort_values("Gen_Demande")
)
risk_df["Taux_Risque"] = risk_df["Volume_Mauvais_Payeurs"] / risk_df["Total"]

fig_bad_payers = make_subplots(specs=[[{"secondary_y": True}]])
fig_bad_payers.add_trace(
    go.Bar(
        x=risk_df["Gen_Demande"],
        y=risk_df["Volume_Mauvais_Payeurs"],
        name="Volume mauvais payeurs",
        marker_color="#636efa"
    ),
    secondary_y=False
)
fig_bad_payers.add_trace(
    go.Scatter(
        x=risk_df["Gen_Demande"],
        y=risk_df["Taux_Risque"],
        name="Taux de risque",
        mode="lines+markers",
        line=dict(color="#EF553B")
    ),
    secondary_y=True
)

fig_bad_payers.update_layout(
    title="Volume de mauvais payeurs et taux de risque par Gen_Demande",
    bargap=0.15,
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)
fig_bad_payers.update_xaxes(title="Gen_Demande")
fig_bad_payers.update_yaxes(title_text="Nombre de mauvais payeurs", secondary_y=False)
fig_bad_payers.update_yaxes(title_text="Taux de risque", secondary_y=True)

fig_bad_payers

In [22]:
plot_evolution_risk_rates(data_default, date_column="Gen_Active", ctx=True)

In [23]:
risk_curve(data_risk["CTX_R1"], curve_type="stability")

In [24]:
risk_curve(data_risk["CTX_R2"], curve_type="stability")

In [45]:
risk_curve(data_risk["CTX_R3"], curve_type="stability")

In [26]:
risk_curve(data_risk["CTX_R4"], curve_type="stability")

## Evolution du taux de risque par modalité

### Ressources

In [27]:
plot_risk_by_cut(df["Ressource"], df["BP"], bins=5)













**On remarque que les populations avec un salaire inférieur à 1700 sont à haut risque (au dessus du treshold de 0.03)**

### Age

In [28]:
plot_risk_by_cut(df["Age_Tit"], df["BP"], bins = 7)













**On remarque que les populations jeunes sont plus à risques**

In [29]:
plot_risk_by_cut(df["CSP_Tit"], df["BP"])

## Correlation Business Criterion

Mesurons la corrélation de Cramer entre le critère de modelisation et le business critète (le critère qui permet d'observer un nombre élevé de crédits en défaut et proche de la perte économique, c'est-à-dire R6 ou CTX)

Ici, le critère de modelisation est H12_R3_CTX et le business criterion par exemple H15_R6_CTX

In [30]:
data_default["H12_CTX_R3"].value_counts()

H12_CTX_R3
0.0    39096
1.0     1251
Name: count, dtype: int64

In [None]:
import re
import pandas as pd
import numpy as np
import jinja2

base_col = "H12_CTX_R3"

pattern = re.compile(r"^H\d{1,2}_CTX_R\d{1,2}$")
risk_cols = [c for c in data_default.columns if pattern.match(c)]
risk_cols = [c for c in risk_cols if c != base_col]

base = data_default[base_col].astype(float)

rows = []
n_total = len(base.dropna())
base_1 = (base == 1)
n_base = base_1.sum()
rate_base = n_base / n_total

for col in risk_cols:
    s = data_default[col].astype(float)
    mask = base.notna() & s.notna()
    a = base[mask] == 1
    b = s[mask] == 1
    n = mask.sum()
    n_var = b.sum()
    rate_var = n_var / n if n > 0 else np.nan
    overlap = (a & b).sum()
    p_var_given_base = overlap / n_base if n_base > 0 else np.nan
    p_base_given_var = overlap / n_var if n_var > 0 else np.nan
    jaccard = overlap / (n_var + n_base - overlap) if (n_var + n_base - overlap) > 0 else np.nan
    try:
        cv = cramers_v(base[mask].fillna(0), s[mask].fillna(0))
    except Exception:
        cv = np.nan
    rows.append({
        "Criterion": col,
        "N_Effective": n,
        "N_Base_1": int(n_base),
        "N_Var_1": int(n_var),
        "Rate_Var": rate_var,
        "Rate_Base": rate_base,
        "Overlap_1_1": int(overlap),
        "P(Var=1|Base=1)": p_var_given_base,
        "P(Base=1|Var=1)": p_base_given_var,
        "Jaccard": jaccard,
        "Cramers_V": cv
    })

comparison_df = (pd.DataFrame(rows)
                 .sort_values(["Cramers_V","Jaccard","P(Base=1|Var=1)"], ascending=False)
                 .reset_index(drop=True))

comparison_df.style.format({
    "Rate_Var":"{:.3%}",
    "Rate_Base":"{:.3%}",
    "P(Var=1|Base=1)":"{:.3%}",
    "P(Base=1|Var=1)":"{:.3%}",
    "Jaccard":"{:.3%}",
    "Cramers_V":"{:.4f}"
})

Unnamed: 0,Criterion,N_Effective,N_Base_1,N_Var_1,Rate_Var,Rate_Base,Overlap_1_1,P(Var=1|Base=1),P(Base=1|Var=1),Jaccard,Cramers_V
0,H11_CTX_R3,40347,1251,1202,2.979%,3.101%,1202,96.083%,100.000%,96.083%,0.9792
1,H13_CTX_R3,40347,1251,1319,3.269%,3.101%,1251,100.000%,94.845%,94.845%,0.9726
2,H10_CTX_R3,40347,1251,1149,2.848%,3.101%,1149,91.847%,100.000%,91.847%,0.9567
3,H14_CTX_R3,40347,1251,1365,3.383%,3.101%,1251,100.000%,91.648%,91.648%,0.9555
4,H15_CTX_R3,40347,1251,1404,3.480%,3.101%,1251,100.000%,89.103%,89.103%,0.9417
5,H16_CTX_R3,40347,1251,1452,3.599%,3.101%,1251,100.000%,86.157%,86.157%,0.9254
6,H9_CTX_R3,40347,1251,1062,2.632%,3.101%,1062,84.892%,100.000%,84.892%,0.9187
7,H17_CTX_R3,40347,1251,1485,3.681%,3.101%,1251,100.000%,84.242%,84.242%,0.9147
8,H18_CTX_R3,40347,1251,1522,3.772%,3.101%,1251,100.000%,82.194%,82.194%,0.9031
9,H19_CTX_R3,40347,1251,1556,3.857%,3.101%,1251,100.000%,80.398%,80.398%,0.8928


## Exclusions 

**Exclusions of the non-financed clients**

In [32]:
# Suppression des lignes non financées (Flag_Finance = 0)
rows_before = len(df)
df = df[df["Flag_Finance"] == 1].copy()
print(f"Lignes avant: {rows_before} | Lignes après suppression: {len(df)}")

Lignes avant: 40347 | Lignes après suppression: 38168


**Exclusions of the frauders**

In [33]:
# Suppression des lignes frauduleuses (Fraudeur = 1)
rows_before_fraud = len(df)
df = df[df["Fraudeur"] != 1].copy()
print(f"Lignes avant: {rows_before_fraud} | Lignes après suppression fraudeurs: {len(df)} | Lignes supprimées: {rows_before_fraud - len(df)}")

Lignes avant: 38168 | Lignes après suppression fraudeurs: 38158 | Lignes supprimées: 10


**Exclusions of the inactive cards**

In [34]:
# Suppression des carte inactives (Flag_Actif = 0)
rows_before_actif = len(df)
df = df[df["Flag_Actif"] != 0].copy()
print(f"Lignes avant: {rows_before_actif} | Lignes après suppression Flag_Actif=0: {len(df)} | Lignes supprimées: {rows_before_actif - len(df)}")

Lignes avant: 38158 | Lignes après suppression Flag_Actif=0: 33306 | Lignes supprimées: 4852


**Exclusions of the card activated after 4 months**

In [35]:
# Suppression des lignes où Horizon_Activ > 4 (le nom existant est 'Horizon_Activ', pas 'Horizon_Active')
rows_before_horizon = len(df)
df = df[df["Horizon_Activ"] <= 4].copy()
print(f"Lignes avant: {rows_before_horizon} | Lignes après suppression Horizon_Activ>4: {len(df)} | Lignes supprimées: {rows_before_horizon - len(df)}")

Lignes avant: 33306 | Lignes après suppression Horizon_Activ>4: 32645 | Lignes supprimées: 661


## Risk Analysis

In [36]:
data_default  = defaults_table(df, 24, 6, ctx=True, add_to_data=True)
data_risk = risk_tables(data_default, "Gen_Demande", rates = True)

In [46]:
from plotly.subplots import make_subplots
from plotly.subplots import make_subplots as _orig_make_subplots

# Volume de mauvais payeurs + taux de risque (axe secondaire)
import plotly.graph_objects as _go

_avg_risk = (data_default["H12_CTX_R3"] == 1).mean()
_min_gen = data_default["Gen_Demande"].min()
_max_gen = data_default["Gen_Demande"].max()

def make_subplots(*args, **kwargs):
    fig = _orig_make_subplots(*args, **kwargs)
    fig.add_trace(
        _go.Scatter(
            x=[_min_gen, _max_gen],
            y=[_avg_risk, _avg_risk],
            mode="lines",
            line=dict(color="green", width=2, dash="dash"),
            name="Taux risque moyen"
        ),
        secondary_y=True
    )
    return fig
import plotly.graph_objects as go

risk_df = (
    data_default
    .groupby("Gen_Demande")
    .agg(
        Volume_Mauvais_Payeurs=("H12_CTX_R3", lambda s: (s == 1).sum()),
        Total=("H12_CTX_R3", "size")
    )
    .reset_index()
    .sort_values("Gen_Demande")
)
risk_df["Taux_Risque"] = risk_df["Volume_Mauvais_Payeurs"] / risk_df["Total"]

fig_bad_payers = make_subplots(specs=[[{"secondary_y": True}]])
fig_bad_payers.add_trace(
    go.Bar(
        x=risk_df["Gen_Demande"],
        y=risk_df["Volume_Mauvais_Payeurs"],
        name="Volume mauvais payeurs",
        marker_color="#636efa"
    ),
    secondary_y=False
)
fig_bad_payers.add_trace(
    go.Scatter(
        x=risk_df["Gen_Demande"],
        y=risk_df["Taux_Risque"],
        name="Taux de risque",
        mode="lines+markers",
        line=dict(color="#EF553B")
    ),
    secondary_y=True
)

fig_bad_payers.update_layout(
    title="Volume de mauvais payeurs et taux de risque par mois",
    bargap=0.15,
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)
fig_bad_payers.update_xaxes(title="Gen_Demande")
fig_bad_payers.update_yaxes(title_text="Nombre de mauvais payeurs", secondary_y=False)
fig_bad_payers.update_yaxes(title_text="Taux de risque", secondary_y=True)

fig_bad_payers

## Modelling Period

In [38]:
# Création des trois sous-dataframes triés par Gen_Demande
DF1 = (df[df["Gen_Demande"].between("2014-05-01", "2015-12-01")]
    .sort_values("Gen_Demande")
    .copy())

DF2 = (df[df ["Gen_Demande"].between("2014-05-01", "2015-04-01")]
    .sort_values("Gen_Demande")
    .copy())

DF3 = (df[df["Gen_Demande"].between("2015-01-01", "2015-12-01")]
    .sort_values("Gen_Demande")
    .copy())

DF4 = (df[df["Gen_Demande"].between("2016-01-01", "2016-12-01")]
    .sort_values("Gen_Demande")
    .copy())

In [39]:
print(DF1["Gen_Demande"].unique())
print(DF2["Gen_Demande"].unique())
print(DF3["Gen_Demande"].unique())
print(DF4["Gen_Demande"].unique())

<DatetimeArray>
['2014-05-01 00:00:00', '2014-06-01 00:00:00', '2014-07-01 00:00:00',
 '2014-08-01 00:00:00', '2014-09-01 00:00:00', '2014-10-01 00:00:00',
 '2014-11-01 00:00:00', '2014-12-01 00:00:00', '2015-01-01 00:00:00',
 '2015-02-01 00:00:00', '2015-03-01 00:00:00', '2015-04-01 00:00:00',
 '2015-05-01 00:00:00', '2015-06-01 00:00:00', '2015-07-01 00:00:00',
 '2015-08-01 00:00:00', '2015-09-01 00:00:00', '2015-10-01 00:00:00',
 '2015-11-01 00:00:00', '2015-12-01 00:00:00']
Length: 20, dtype: datetime64[ns]
<DatetimeArray>
['2014-05-01 00:00:00', '2014-06-01 00:00:00', '2014-07-01 00:00:00',
 '2014-08-01 00:00:00', '2014-09-01 00:00:00', '2014-10-01 00:00:00',
 '2014-11-01 00:00:00', '2014-12-01 00:00:00', '2015-01-01 00:00:00',
 '2015-02-01 00:00:00', '2015-03-01 00:00:00', '2015-04-01 00:00:00']
Length: 12, dtype: datetime64[ns]
<DatetimeArray>
['2015-01-01 00:00:00', '2015-02-01 00:00:00', '2015-03-01 00:00:00',
 '2015-04-01 00:00:00', '2015-05-01 00:00:00', '2015-06-01 00:00:00

In [40]:
# Résumé des périodes DF1, DF2, DF3 avec volume et taux de risque (H12_CTX_R3)
def _summarize_period(nom, start, end):
    subset = data_default[(data_default["Gen_Demande"] >= start) & (data_default["Gen_Demande"] <= end)]
    total = len(subset)
    bad = (subset["H12_CTX_R3"] == 1).sum()
    taux = bad / total if total else float("nan")
    return {
        "DataFrame": nom,
        "Periode": f"{start.date()} -> {end.date()}",
        "Total_Dossiers": total,
        "Mauvais_Payeurs": bad,
        "Taux_Risque": taux
    }

periodes = [
    ("DF1", DF1["Gen_Demande"].min(), DF1["Gen_Demande"].max()),
    ("DF2", DF2["Gen_Demande"].min(), DF2["Gen_Demande"].max()),
    ("DF3", DF3["Gen_Demande"].min(), DF3["Gen_Demande"].max())
]

resume_df = pd.DataFrame([_summarize_period(*p) for p in periodes])
resume_df["Taux_Risque"] = resume_df["Taux_Risque"].map(lambda x: f"{x:.2%}")
resume_df

Unnamed: 0,DataFrame,Periode,Total_Dossiers,Mauvais_Payeurs,Taux_Risque
0,DF1,2014-05-01 -> 2015-12-01,17978,658,3.66%
1,DF2,2014-05-01 -> 2015-04-01,9594,389,4.05%
2,DF3,2015-01-01 -> 2015-12-01,10946,365,3.33%


In [41]:
df_model = DF1

In [42]:
df_monitoring = DF4

justif

## Save

In [43]:
df_model.to_csv("../data/model_data.csv", index=False)

In [44]:
df_monitoring.to_csv("../data/monitoring_data.csv", index=False)