In [1]:
import os
import pandas as pd
import numpy as np
import mlflow
import mlflow.sklearn
import psycopg2
from sqlalchemy import create_engine
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [2]:
# Parámetros desde .env
POSTGRES_USER = "mlflow_user"
POSTGRES_PASSWORD = "mlflow_pass"
POSTGRES_DB = "mlflowdb"
POSTGRES_HOST = "postgres"  # nombre del servicio Docker
POSTGRES_PORT = "5432"

# URL de conexión para SQLAlchemy
db_uri = f"postgresql://{POSTGRES_USER}:{POSTGRES_PASSWORD}@{POSTGRES_HOST}:{POSTGRES_PORT}/{POSTGRES_DB}"

engine = create_engine(db_uri)

# Cargar datos desde tabla
df = pd.read_sql("SELECT * FROM credit_data", engine)

df.head()


Unnamed: 0,id,age,income,education_level,credit_score
0,1,59,87761.0,Bachelor,740
1,2,49,66368.0,Master,725
2,3,35,36783.0,Master,661
3,4,28,71914.0,Bachelor,687
4,5,41,97526.0,Bachelor,755


In [3]:
# Define las categorías esperadas explícitamente
expected_levels = ["High School", "Bachelor", "Master", "PhD"]

# One-hot encoding SIN drop_first
df = pd.get_dummies(df, columns=["education_level"], prefix="education_level")

# Agrega columnas faltantes
for level in expected_levels:
    col = f"education_level_{level}"
    if col not in df.columns:
        df[col] = 0

# Asegura el orden correcto
ordered_cols = ["age", "income"] + [f"education_level_{lvl}" for lvl in expected_levels]
X = df[ordered_cols]
y = df["credit_score"]


# Dividir datos
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)


In [4]:
from sqlalchemy import create_engine

engine = create_engine("postgresql://mlflow_user:mlflow_pass@postgres:5432/mlflowdb")

# Guardar splits
X_train.to_sql("credit_data_train", engine, if_exists="replace", index=False)
X_val.to_sql("credit_data_val", engine, if_exists="replace", index=False)
X_test.to_sql("credit_data_test", engine, if_exists="replace", index=False)


150

In [5]:
y_train.to_frame("credit_score").to_sql("credit_score_train", engine, if_exists="replace", index=False)
y_val.to_frame("credit_score").to_sql("credit_score_val", engine, if_exists="replace", index=False)
y_test.to_frame("credit_score").to_sql("credit_score_test", engine, if_exists="replace", index=False)


150

In [6]:
def eval_model(model, X_val, y_val):
    y_pred = model.predict(X_val)
    return {
        "mae": mean_absolute_error(y_val, y_pred),
        "mse": mean_squared_error(y_val, y_pred),
        "r2": r2_score(y_val, y_pred)
    }

def train_model(model_name, model_class, params_grid):
    for params in params_grid:
        with mlflow.start_run(run_name=f"{model_name}_{params}"):
            model = model_class(**params)
            model.fit(X_train, y_train)

            metrics = eval_model(model, X_val, y_val)

            # Log params and metrics
            mlflow.log_params(params)
            mlflow.log_metrics(metrics)

            # Log model artifact
            mlflow.sklearn.log_model(model, "model", registered_model_name="CreditScoreModel")

            print(f"Run {mlflow.active_run().info.run_id} - {model_name} - {params} - R2: {metrics['r2']:.3f}")


In [7]:
import boto3
import os

bucket_name = "mlflow-artifacts"

s3 = boto3.client(
    "s3",
    endpoint_url=os.environ.get("MLFLOW_S3_ENDPOINT_URL"),
    aws_access_key_id=os.environ.get("AWS_ACCESS_KEY_ID"),
    aws_secret_access_key=os.environ.get("AWS_SECRET_ACCESS_KEY"),
)

# Crear el bucket si no existe
buckets = s3.list_buckets()
if not any(b['Name'] == bucket_name for b in buckets['Buckets']):
    s3.create_bucket(Bucket=bucket_name)
    print(f"✅ Bucket '{bucket_name}' creado.")
else:
    print(f"📦 Bucket '{bucket_name}' ya existe.")


📦 Bucket 'mlflow-artifacts' ya existe.


In [8]:
# Configuración de MLflow (ya viene de variables de entorno en el contenedor)
mlflow.set_tracking_uri("http://mlflow:5000")
mlflow.set_experiment("CreditScorePrediction")

# Rejilla de hiperparámetros (mínimo 20 combinaciones)
rf_params = [
    {"n_estimators": n, "max_depth": d}
    for n in [10, 50, 100, 200]
    for d in [3, 5, 10, None]
]

train_model("RandomForest", RandomForestRegressor, rf_params[:20])


2025/09/22 05:13:01 INFO mlflow.tracking.fluent: Experiment with name 'CreditScorePrediction' does not exist. Creating a new experiment.
The git executable must be specified in one of the following ways:
    - be included in your $PATH
    - be set via $GIT_PYTHON_GIT_EXECUTABLE
    - explicitly set via git.refresh(<full-path-to-git-executable>)

All git commands will error until this is rectified.

This initial message can be silenced or aggravated in the future by setting the
$GIT_PYTHON_REFRESH environment variable. Use one of the following values:
    - quiet|q|silence|s|silent|none|n|0: for no message or exception
    - error|e|exception|raise|r|2: for a raised exception

Example:
    export GIT_PYTHON_REFRESH=quiet

Successfully registered model 'CreditScoreModel'.
2025/09/22 05:13:04 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 1
Created version '1' of model 'CreditScoreMode

Run 15f2c16ecfb446f8b4bbd7c89fb4ea77 - RandomForest - {'n_estimators': 10, 'max_depth': 3} - R2: 0.699
🏃 View run RandomForest_{'n_estimators': 10, 'max_depth': 3} at: http://mlflow:5000/#/experiments/654921044916849395/runs/15f2c16ecfb446f8b4bbd7c89fb4ea77
🧪 View experiment at: http://mlflow:5000/#/experiments/654921044916849395


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 05:13:06 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 2
Created version '2' of model 'CreditScoreModel'.


Run c43940c5b8a04a35bcbd3cc2693b9187 - RandomForest - {'n_estimators': 10, 'max_depth': 5} - R2: 0.869
🏃 View run RandomForest_{'n_estimators': 10, 'max_depth': 5} at: http://mlflow:5000/#/experiments/654921044916849395/runs/c43940c5b8a04a35bcbd3cc2693b9187
🧪 View experiment at: http://mlflow:5000/#/experiments/654921044916849395


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 05:13:08 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 3
Created version '3' of model 'CreditScoreModel'.


Run 805b07d1db39480a8cae3fdc90afec55 - RandomForest - {'n_estimators': 10, 'max_depth': 10} - R2: 0.874
🏃 View run RandomForest_{'n_estimators': 10, 'max_depth': 10} at: http://mlflow:5000/#/experiments/654921044916849395/runs/805b07d1db39480a8cae3fdc90afec55
🧪 View experiment at: http://mlflow:5000/#/experiments/654921044916849395


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 05:13:10 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 4
Created version '4' of model 'CreditScoreModel'.


Run bd302ebbd37a429fa1c70a9ad404bd4f - RandomForest - {'n_estimators': 10, 'max_depth': None} - R2: 0.862
🏃 View run RandomForest_{'n_estimators': 10, 'max_depth': None} at: http://mlflow:5000/#/experiments/654921044916849395/runs/bd302ebbd37a429fa1c70a9ad404bd4f
🧪 View experiment at: http://mlflow:5000/#/experiments/654921044916849395


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 05:13:12 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 5
Created version '5' of model 'CreditScoreModel'.


Run 922eabf3adb74bef878eb23642845a25 - RandomForest - {'n_estimators': 50, 'max_depth': 3} - R2: 0.713
🏃 View run RandomForest_{'n_estimators': 50, 'max_depth': 3} at: http://mlflow:5000/#/experiments/654921044916849395/runs/922eabf3adb74bef878eb23642845a25
🧪 View experiment at: http://mlflow:5000/#/experiments/654921044916849395


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 05:13:14 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 6
Created version '6' of model 'CreditScoreModel'.


Run 2ce35774533b46c8bd9c2909a118617c - RandomForest - {'n_estimators': 50, 'max_depth': 5} - R2: 0.866
🏃 View run RandomForest_{'n_estimators': 50, 'max_depth': 5} at: http://mlflow:5000/#/experiments/654921044916849395/runs/2ce35774533b46c8bd9c2909a118617c
🧪 View experiment at: http://mlflow:5000/#/experiments/654921044916849395


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 05:13:16 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 7
Created version '7' of model 'CreditScoreModel'.


Run a44867f824f6401493844aff64b42e2d - RandomForest - {'n_estimators': 50, 'max_depth': 10} - R2: 0.885
🏃 View run RandomForest_{'n_estimators': 50, 'max_depth': 10} at: http://mlflow:5000/#/experiments/654921044916849395/runs/a44867f824f6401493844aff64b42e2d
🧪 View experiment at: http://mlflow:5000/#/experiments/654921044916849395


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 05:13:19 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 8
Created version '8' of model 'CreditScoreModel'.


Run 8ff1ed1519b4434b9c8912e920a5731e - RandomForest - {'n_estimators': 50, 'max_depth': None} - R2: 0.878
🏃 View run RandomForest_{'n_estimators': 50, 'max_depth': None} at: http://mlflow:5000/#/experiments/654921044916849395/runs/8ff1ed1519b4434b9c8912e920a5731e
🧪 View experiment at: http://mlflow:5000/#/experiments/654921044916849395


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 05:13:21 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 9
Created version '9' of model 'CreditScoreModel'.


Run 0121980c760c425bb6e3494298636b5d - RandomForest - {'n_estimators': 100, 'max_depth': 3} - R2: 0.715
🏃 View run RandomForest_{'n_estimators': 100, 'max_depth': 3} at: http://mlflow:5000/#/experiments/654921044916849395/runs/0121980c760c425bb6e3494298636b5d
🧪 View experiment at: http://mlflow:5000/#/experiments/654921044916849395


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 05:13:23 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 10
Created version '10' of model 'CreditScoreModel'.


Run 26e250c1e64545098b818923b53b99f8 - RandomForest - {'n_estimators': 100, 'max_depth': 5} - R2: 0.870
🏃 View run RandomForest_{'n_estimators': 100, 'max_depth': 5} at: http://mlflow:5000/#/experiments/654921044916849395/runs/26e250c1e64545098b818923b53b99f8
🧪 View experiment at: http://mlflow:5000/#/experiments/654921044916849395


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 05:13:25 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 11
Created version '11' of model 'CreditScoreModel'.


Run 80c9b0d44edf47d0931b039b64b9887e - RandomForest - {'n_estimators': 100, 'max_depth': 10} - R2: 0.886
🏃 View run RandomForest_{'n_estimators': 100, 'max_depth': 10} at: http://mlflow:5000/#/experiments/654921044916849395/runs/80c9b0d44edf47d0931b039b64b9887e
🧪 View experiment at: http://mlflow:5000/#/experiments/654921044916849395


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 05:13:27 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 12
Created version '12' of model 'CreditScoreModel'.


Run 0f5f734e106e4987b92c86496a884de3 - RandomForest - {'n_estimators': 100, 'max_depth': None} - R2: 0.878
🏃 View run RandomForest_{'n_estimators': 100, 'max_depth': None} at: http://mlflow:5000/#/experiments/654921044916849395/runs/0f5f734e106e4987b92c86496a884de3
🧪 View experiment at: http://mlflow:5000/#/experiments/654921044916849395


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 05:13:29 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 13
Created version '13' of model 'CreditScoreModel'.


Run f939fe32a34a40129add04855819bd1a - RandomForest - {'n_estimators': 200, 'max_depth': 3} - R2: 0.720
🏃 View run RandomForest_{'n_estimators': 200, 'max_depth': 3} at: http://mlflow:5000/#/experiments/654921044916849395/runs/f939fe32a34a40129add04855819bd1a
🧪 View experiment at: http://mlflow:5000/#/experiments/654921044916849395


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 05:13:32 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 14
Created version '14' of model 'CreditScoreModel'.


Run 8797dbb5189a477186bfed695451dfec - RandomForest - {'n_estimators': 200, 'max_depth': 5} - R2: 0.869
🏃 View run RandomForest_{'n_estimators': 200, 'max_depth': 5} at: http://mlflow:5000/#/experiments/654921044916849395/runs/8797dbb5189a477186bfed695451dfec
🧪 View experiment at: http://mlflow:5000/#/experiments/654921044916849395


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 05:13:34 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 15
Created version '15' of model 'CreditScoreModel'.


Run 41acb004a47645a8aafba6fd8317396b - RandomForest - {'n_estimators': 200, 'max_depth': 10} - R2: 0.884
🏃 View run RandomForest_{'n_estimators': 200, 'max_depth': 10} at: http://mlflow:5000/#/experiments/654921044916849395/runs/41acb004a47645a8aafba6fd8317396b
🧪 View experiment at: http://mlflow:5000/#/experiments/654921044916849395


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 05:13:37 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 16


Run ec0735347b33477f833c8a9927b225d9 - RandomForest - {'n_estimators': 200, 'max_depth': None} - R2: 0.880
🏃 View run RandomForest_{'n_estimators': 200, 'max_depth': None} at: http://mlflow:5000/#/experiments/654921044916849395/runs/ec0735347b33477f833c8a9927b225d9
🧪 View experiment at: http://mlflow:5000/#/experiments/654921044916849395


Created version '16' of model 'CreditScoreModel'.


In [9]:
from mlflow.tracking import MlflowClient

client = MlflowClient()

experiment = client.get_experiment_by_name("CreditScorePrediction")
if experiment is None:
    raise ValueError("El experimento no existe.")

runs = client.search_runs(
    experiment_ids=[experiment.experiment_id],
    order_by=["metrics.r2 DESC"],
)

if not runs:
    raise ValueError("❌ No hay ejecuciones registradas en este experimento.")

best_run = runs[0]
print(f"✅ Best run ID: {best_run.info.run_id}")
print("📊 Métricas:", best_run.data.metrics)


✅ Best run ID: 80c9b0d44edf47d0931b039b64b9887e
📊 Métricas: {'mae': 9.4997764062455, 'r2': 0.8859520933451851, 'mse': 140.7286490285353}
