In [42]:
from pycaret.classification import *
import pandas as pd
import numpy as np
import os

## Funciones de monitoreo

In [43]:
def calcular_psi(serie_original, serie_nueva, bins=10):
    # Esta es una función de ejemplo para calcular el PSI
    # Deberías adaptarla según tus necesidades específicas
    conteo_original, _ = np.histogram(serie_original, bins=bins)
    conteo_nuevo, _ = np.histogram(serie_nueva, bins=bins)
    psi_values = np.where(conteo_original == 0, 0, (conteo_nuevo - conteo_original) / conteo_original) ** 2
    return np.sum(psi_values)


In [44]:
def monitoring_num_vars(data:pd.DataFrame, var_group, f_ini, f_fin) -> None:
    data_filtrado = data[(data[var_group] >= f_ini) & (data[var_group] <= f_fin)]

    # Calcular la tasa de variación, PSI (requiere implementación adicional), y desviación estándar
    resultados = pd.DataFrame(index=data.columns)

    # Tasa de variación (Ejemplo simplificado, adaptar según sea necesario)
    resultados['tasa_variacion'] = (data_filtrado.groupby(var_group).mean().pct_change().iloc[-1] * 100).fillna(0)

    # Desviación estándar
    resultados['desviacion_estandar'] = data_filtrado.std()
    
    # Calculo PSI
    for col in data.columns:
        resultados.loc[col, 'psi'] = calcular_psi(data[data[var_group] == f_ini][col],
                                                data[data[var_group] == f_fin][col])

    return resultados

In [45]:
def monitoring_cat_vars(data: pd.DataFrame, vars: list, var_group: list, f_ini: str, f_fin: str) -> None:
    # Filtrar el DataFrame por el rango de fechas
    data_filtered = data[(data[var_group] >= f_ini) & (data[var_group] <= f_fin)]
    
    # Iterar sobre cada variable categórica en var_group
    for var in vars:
        print(f"Análisis de la variable: {var}")
        
        # Agrupar por la variable categórica y contar registros
        counts_before = data_filtered[data_filtered[var_group] == f_ini].groupby(var).size()
        counts_after = data_filtered[data_filtered[var_group] == f_fin].groupby(var).size()
        
        # Comparar los conteos antes y después para cada categoría
        comparison_df = pd.DataFrame({'Antes': counts_before, 'Después': counts_after})
        comparison_df['Variación'] = comparison_df['Después'] - comparison_df['Antes']
        
        # Mostrar los resultados
        print(comparison_df)
        print("\n")  # Añadir una línea en blanco entre los análisis de cada variable


## Ejecución Ejemplo

In [46]:
data  = pd.read_csv("monitoring_example.csv")
data.head()


Unnamed: 0,nit_enmascarado,num_oblig_enmascarado,num_oblig_orig_enmascarado,fecha_var_rpta_alt,var_rpta_alt,marca_pago,ajustes_banco,lote,avg_sld_cap_final_1,avg_sld_cap_final_stddev_ult3,...,prob_propension_1,prob_propension_stddev_ult3,prob_propension_stddev_ult6,prob_propension_max_ult6,prob_alrt_temprana_1,prob_alrt_temprana_stddev_ult3,prob_alrt_temprana_stddev_ult6,prob_auto_cura_1,prob_auto_cura_stddev_ult3,prob_auto_cura_stddev_ult6
0,61793,580684,499658,202401,,FACTURACION_MES_SGTE,NO,2,1809671.0,16154.520176,...,0.925839,0.00306,0.002408,0.925839,0.378441,0.091614,0.085733,0.622551,0.013563,0.056557
1,229509,483252,597090,202312,0.0,NO_PAGO,NO,3,964859.0,536.884826,...,0.910972,0.004644,0.026044,0.921502,0.163812,0.044245,0.033903,0.828343,0.005885,0.005462
2,263991,746818,333761,202312,1.0,PAGO_MENOS,NO,1,11825930.0,59862.688105,...,0.707389,0.060488,0.047178,0.839524,0.551831,0.130113,0.106325,0.310606,0.05385,0.05171
3,434373,4039,1076303,202401,,PAGO_MENOS,NO,2,4087871.0,0.0,...,0.461028,0.137048,0.145342,0.892483,0.718311,0.030133,0.13544,0.217583,0.047001,0.135671
4,379328,577317,503025,202401,,NO_PAGO,NO,1,5000845.0,0.0,...,0.581595,0.130925,0.119128,0.925459,0.617891,0.224445,0.199087,0.261496,0.240356,0.214561


In [50]:
data_num_monitoring = data.drop(columns=["nit_enmascarado",
                                         "num_oblig_enmascarado",
                                         "num_oblig_orig_enmascarado",
                                         "var_rpta_alt",
                                         "marca_pago",
                                         "ajustes_banco",
                                         "lote"])
cat_vars = ["marca_pago",
            "ajustes_banco",
            "lote"]

In [51]:
monitoring_num_vars(data_num_monitoring, "fecha_var_rpta_alt", 202312, 202401)

Unnamed: 0,tasa_variacion,desviacion_estandar,psi
fecha_var_rpta_alt,,44.42846,0.01446476
avg_sld_cap_final_1,1.034826,23721240.0,4.642107
avg_sld_cap_final_stddev_ult3,17.265766,2122726.0,6.87511
avg_sld_cap_final_stddev_ult6,3.817695,5325889.0,1.618952
avg_nueva_altura_mora_1,-6.563946,26.18273,4655151.0
avg_nueva_altura_mora_stddev_ult3,-1.88133,9.71999,196.9179
min_vlr_obligacion_stddev_ult3,-0.239754,3974945.0,1.638887
avg_vlr_vencido_1,-3.547942,1628561.0,9.21533
valor_cuota_mes_1,1.463419,2391458.0,147.9013
valor_cuota_mes_stddev_ult3,-2.44292,719492.5,138.1234


In [52]:
monitoring_cat_vars(data, cat_vars, "fecha_var_rpta_alt", 202312, 202401)

Análisis de la variable: marca_pago
                      Antes  Después  Variación
marca_pago                                     
CANCELADO              1413      141      -1272
FACTURACION_MES_SGTE  14796    32193      17397
IGUAL                   249      131       -118
NO_DATA                 117      195         78
NO_PAGO               36192    32824      -3368
PAGO_MAS                227      145        -82
PAGO_MENOS            47472    46920       -552


Análisis de la variable: ajustes_banco
                Antes  Después  Variación
ajustes_banco                            
NO             100349   112354      12005
NO_DATA           117      195         78


Análisis de la variable: lote
      Antes  Después  Variación
lote                           
-99     396      260       -136
 1    58034    63402       5368
 2    31548    37176       5628
 3    10488    11711       1223


