In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# === File Paths ===
INPUT_FILE = "test2025.csv"
METRICS_FILE = "data/output/credit_repayment_metrics.csv"
STRATEGY_FILE = "data/raw/CatEmisora_Merged.csv"
OUTPUT_FILE = "repayment_simulation_test_1.csv"

In [3]:
# === Load Input Data ===
input_df = pd.read_csv(INPUT_FILE)
input_df["idCredito"] = input_df["idCredito"].astype(str)

metrics_df = pd.read_csv(METRICS_FILE)
metrics_df["idCredito"] = metrics_df["idCredito"].astype(str)

In [4]:

# === Merge Repayment Ratio ===
merged = pd.merge(input_df, metrics_df[["idCredito", "repayment_ratio"]], on="idCredito", how="left")
merged["expected_repayment"] = merged["montoExigible"] * merged["repayment_ratio"]


In [5]:
# === Compute Seasonality Score ===
def seasonality_score(day):
    radians = (day / 30) * 2 * np.pi * 3
    return (np.cos(radians) + 1) / 2

merged["fechaEnvioCobro"] = pd.to_datetime(merged["fechaEnvioCobro"], errors='coerce')
merged["day_of_month"] = merged["fechaEnvioCobro"].dt.day
merged["seasonality_score"] = merged["day_of_month"].apply(lambda d: seasonality_score(d) if not pd.isna(d) else np.nan)
merged["adjusted_expected_repayment"] = merged["expected_repayment"] * merged["seasonality_score"]


In [6]:
# === Classify Risk ===
def classify_risk(ratio):
    if pd.isna(ratio):
        return "unknown"
    return "highrisk" if ratio < 0.5 else "lowrisk"

merged["risk_level"] = merged["repayment_ratio"].apply(classify_risk)


In [7]:
# === Load Strategy Table ===
strategy_df = pd.read_csv(STRATEGY_FILE)

In [8]:
# === Strategy Selection Function ===
def select_strategy(row, strategy_df):
    if pd.isna(row["idBanco"]):
        return pd.Series([np.nan, np.nan])

    strategies = strategy_df[strategy_df["IdBanco"] == row["idBanco"]]

    if strategies.empty:
        return pd.Series([np.nan, np.nan])

    if row["risk_level"] == "highrisk":
        chosen = strategies.sort_values(by="Riesgo", ascending=False).iloc[0]
    else:
        chosen = strategies.sort_values(by=["Riesgo", "EmisoraCostoAceptado"]).iloc[0]

    return pd.Series([chosen["idEmisora"], chosen["EmisoraCostoAceptado"]])


In [9]:
# === Apply Strategy Selection ===
merged[["idEmisora", "operation_cost"]] = merged.apply(lambda row: select_strategy(row, strategy_df), axis=1)


In [10]:

# === Save Final Output ===
merged[[
    "idCredito",
    "montoExigible",
    "fechaEnvioCobro",
    "idEmisora",
    "idBanco",
    "operation_cost",
    "seasonality_score",
    "adjusted_expected_repayment"
]].to_csv(OUTPUT_FILE, index=False)


In [13]:
# === Print Results ===
total_cost = merged["operation_cost"].sum()
print(f"✅ Final strategy-based results saved to {OUTPUT_FILE}")
print("💰 Total operational cost for 2025: 1931427.17")
print(f"💰 Total operational cost for 2025 with applied strategy: {total_cost:.2f}")


✅ Final strategy-based results saved to repayment_simulation_test_1.csv
💰 Total operational cost for 2025: 1931427.17
💰 Total operational cost for 2025 with applied strategy: 1090893.42
