In [1]:
import os
import pandas as pd
import numpy as np
import mlflow
import mlflow.sklearn
import psycopg2
from sqlalchemy import create_engine
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [2]:
# Parámetros desde .env
POSTGRES_USER = "mlflow_user"
POSTGRES_PASSWORD = "mlflow_pass"
POSTGRES_DB = "mlflowdb"
POSTGRES_HOST = "postgres"  # nombre del servicio Docker
POSTGRES_PORT = "5432"

# URL de conexión para SQLAlchemy
db_uri = f"postgresql://{POSTGRES_USER}:{POSTGRES_PASSWORD}@{POSTGRES_HOST}:{POSTGRES_PORT}/{POSTGRES_DB}"

engine = create_engine(db_uri)

# Cargar datos desde tabla
df = pd.read_sql("SELECT * FROM credit_data", engine)

df.head()


Unnamed: 0,id,age,income,education_level,credit_score
0,1,59,87761.0,Bachelor,740
1,2,49,66368.0,Master,725
2,3,35,36783.0,Master,661
3,4,28,71914.0,Bachelor,687
4,5,41,97526.0,Bachelor,755


In [3]:
# One-hot encoding simple para education_level
df = pd.get_dummies(df, columns=["education_level"], drop_first=True)

# Separar features y target
X = df.drop(columns=["id", "credit_score"])
y = df["credit_score"]

# Dividir datos
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)


In [4]:
from sqlalchemy import create_engine

engine = create_engine("postgresql://mlflow_user:mlflow_pass@postgres:5432/mlflowdb")

# Guardar splits
X_train.to_sql("credit_data_train", engine, if_exists="replace", index=False)
X_val.to_sql("credit_data_val", engine, if_exists="replace", index=False)
X_test.to_sql("credit_data_test", engine, if_exists="replace", index=False)


150

In [5]:
y_train.to_frame("credit_score").to_sql("credit_score_train", engine, if_exists="replace", index=False)
y_val.to_frame("credit_score").to_sql("credit_score_val", engine, if_exists="replace", index=False)
y_test.to_frame("credit_score").to_sql("credit_score_test", engine, if_exists="replace", index=False)


150

In [6]:
def eval_model(model, X_val, y_val):
    y_pred = model.predict(X_val)
    return {
        "mae": mean_absolute_error(y_val, y_pred),
        "mse": mean_squared_error(y_val, y_pred),
        "r2": r2_score(y_val, y_pred)
    }

def train_model(model_name, model_class, params_grid):
    for params in params_grid:
        with mlflow.start_run(run_name=f"{model_name}_{params}"):
            model = model_class(**params)
            model.fit(X_train, y_train)

            metrics = eval_model(model, X_val, y_val)

            # Log params and metrics
            mlflow.log_params(params)
            mlflow.log_metrics(metrics)

            # Log model artifact
            mlflow.sklearn.log_model(model, "model", registered_model_name="CreditScoreModel")

            print(f"Run {mlflow.active_run().info.run_id} - {model_name} - {params} - R2: {metrics['r2']:.3f}")


In [7]:
import boto3
import os

bucket_name = "mlflow-artifacts"

s3 = boto3.client(
    "s3",
    endpoint_url=os.environ.get("MLFLOW_S3_ENDPOINT_URL"),
    aws_access_key_id=os.environ.get("AWS_ACCESS_KEY_ID"),
    aws_secret_access_key=os.environ.get("AWS_SECRET_ACCESS_KEY"),
)

# Crear el bucket si no existe
buckets = s3.list_buckets()
if not any(b['Name'] == bucket_name for b in buckets['Buckets']):
    s3.create_bucket(Bucket=bucket_name)
    print(f"✅ Bucket '{bucket_name}' creado.")
else:
    print(f"📦 Bucket '{bucket_name}' ya existe.")


📦 Bucket 'mlflow-artifacts' ya existe.


In [8]:
# Configuración de MLflow (ya viene de variables de entorno en el contenedor)
mlflow.set_tracking_uri("http://mlflow:5000")
mlflow.set_experiment("CreditScorePrediction")

# Rejilla de hiperparámetros (mínimo 20 combinaciones)
rf_params = [
    {"n_estimators": n, "max_depth": d}
    for n in [10, 50, 100, 200]
    for d in [3, 5, 10, None]
]

train_model("RandomForest", RandomForestRegressor, rf_params[:20])


2025/09/22 04:48:22 INFO mlflow.tracking.fluent: Experiment with name 'CreditScorePrediction' does not exist. Creating a new experiment.
The git executable must be specified in one of the following ways:
    - be included in your $PATH
    - be set via $GIT_PYTHON_GIT_EXECUTABLE
    - explicitly set via git.refresh(<full-path-to-git-executable>)

All git commands will error until this is rectified.

This initial message can be silenced or aggravated in the future by setting the
$GIT_PYTHON_REFRESH environment variable. Use one of the following values:
    - quiet|q|silence|s|silent|none|n|0: for no message or exception
    - error|e|exception|raise|r|2: for a raised exception

Example:
    export GIT_PYTHON_REFRESH=quiet

Successfully registered model 'CreditScoreModel'.
2025/09/22 04:48:25 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 1
Created version '1' of model 'CreditScoreMode

Run aa4d5008b1cd43838b8ca5f5988e3742 - RandomForest - {'n_estimators': 10, 'max_depth': 3} - R2: 0.711
🏃 View run RandomForest_{'n_estimators': 10, 'max_depth': 3} at: http://mlflow:5000/#/experiments/696005550256114257/runs/aa4d5008b1cd43838b8ca5f5988e3742
🧪 View experiment at: http://mlflow:5000/#/experiments/696005550256114257


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 04:48:27 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 2
Created version '2' of model 'CreditScoreModel'.


Run 7863faf6610b443983c1aa48081301e6 - RandomForest - {'n_estimators': 10, 'max_depth': 5} - R2: 0.847
🏃 View run RandomForest_{'n_estimators': 10, 'max_depth': 5} at: http://mlflow:5000/#/experiments/696005550256114257/runs/7863faf6610b443983c1aa48081301e6
🧪 View experiment at: http://mlflow:5000/#/experiments/696005550256114257


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 04:48:29 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 3
Created version '3' of model 'CreditScoreModel'.


Run 809f23079cdd4f83a05fbfbe17163159 - RandomForest - {'n_estimators': 10, 'max_depth': 10} - R2: 0.872
🏃 View run RandomForest_{'n_estimators': 10, 'max_depth': 10} at: http://mlflow:5000/#/experiments/696005550256114257/runs/809f23079cdd4f83a05fbfbe17163159
🧪 View experiment at: http://mlflow:5000/#/experiments/696005550256114257


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 04:48:31 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 4
Created version '4' of model 'CreditScoreModel'.


Run b1fb3ec2b2f44c4e9ebdcb63d942b72d - RandomForest - {'n_estimators': 10, 'max_depth': None} - R2: 0.874
🏃 View run RandomForest_{'n_estimators': 10, 'max_depth': None} at: http://mlflow:5000/#/experiments/696005550256114257/runs/b1fb3ec2b2f44c4e9ebdcb63d942b72d
🧪 View experiment at: http://mlflow:5000/#/experiments/696005550256114257


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 04:48:33 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 5
Created version '5' of model 'CreditScoreModel'.


Run 7e1ca20b421048f09e08dfe36a942f05 - RandomForest - {'n_estimators': 50, 'max_depth': 3} - R2: 0.724
🏃 View run RandomForest_{'n_estimators': 50, 'max_depth': 3} at: http://mlflow:5000/#/experiments/696005550256114257/runs/7e1ca20b421048f09e08dfe36a942f05
🧪 View experiment at: http://mlflow:5000/#/experiments/696005550256114257


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 04:48:35 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 6
Created version '6' of model 'CreditScoreModel'.


Run 2df170c6ce404bd7b9afcedf1253a2a3 - RandomForest - {'n_estimators': 50, 'max_depth': 5} - R2: 0.859
🏃 View run RandomForest_{'n_estimators': 50, 'max_depth': 5} at: http://mlflow:5000/#/experiments/696005550256114257/runs/2df170c6ce404bd7b9afcedf1253a2a3
🧪 View experiment at: http://mlflow:5000/#/experiments/696005550256114257


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 04:48:37 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 7
Created version '7' of model 'CreditScoreModel'.


Run dfad2a8614514e719b40c9ce7b53ee83 - RandomForest - {'n_estimators': 50, 'max_depth': 10} - R2: 0.884
🏃 View run RandomForest_{'n_estimators': 50, 'max_depth': 10} at: http://mlflow:5000/#/experiments/696005550256114257/runs/dfad2a8614514e719b40c9ce7b53ee83
🧪 View experiment at: http://mlflow:5000/#/experiments/696005550256114257


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 04:48:39 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 8
Created version '8' of model 'CreditScoreModel'.


Run e253a126ca2d482b8b9388c218128b40 - RandomForest - {'n_estimators': 50, 'max_depth': None} - R2: 0.885
🏃 View run RandomForest_{'n_estimators': 50, 'max_depth': None} at: http://mlflow:5000/#/experiments/696005550256114257/runs/e253a126ca2d482b8b9388c218128b40
🧪 View experiment at: http://mlflow:5000/#/experiments/696005550256114257


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 04:48:42 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 9
Created version '9' of model 'CreditScoreModel'.


Run 40671e4c012f450390202a6ae3a29d03 - RandomForest - {'n_estimators': 100, 'max_depth': 3} - R2: 0.724
🏃 View run RandomForest_{'n_estimators': 100, 'max_depth': 3} at: http://mlflow:5000/#/experiments/696005550256114257/runs/40671e4c012f450390202a6ae3a29d03
🧪 View experiment at: http://mlflow:5000/#/experiments/696005550256114257


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 04:48:44 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 10
Created version '10' of model 'CreditScoreModel'.


Run 0403562c6e2c476b8431c12677acd92a - RandomForest - {'n_estimators': 100, 'max_depth': 5} - R2: 0.857
🏃 View run RandomForest_{'n_estimators': 100, 'max_depth': 5} at: http://mlflow:5000/#/experiments/696005550256114257/runs/0403562c6e2c476b8431c12677acd92a
🧪 View experiment at: http://mlflow:5000/#/experiments/696005550256114257


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 04:48:46 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 11
Created version '11' of model 'CreditScoreModel'.


Run e0389a71413b4314b3591e73314d21af - RandomForest - {'n_estimators': 100, 'max_depth': 10} - R2: 0.884
🏃 View run RandomForest_{'n_estimators': 100, 'max_depth': 10} at: http://mlflow:5000/#/experiments/696005550256114257/runs/e0389a71413b4314b3591e73314d21af
🧪 View experiment at: http://mlflow:5000/#/experiments/696005550256114257


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 04:48:48 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 12
Created version '12' of model 'CreditScoreModel'.


Run 416d5db14ff444d9b494dfc0ee066e09 - RandomForest - {'n_estimators': 100, 'max_depth': None} - R2: 0.881
🏃 View run RandomForest_{'n_estimators': 100, 'max_depth': None} at: http://mlflow:5000/#/experiments/696005550256114257/runs/416d5db14ff444d9b494dfc0ee066e09
🧪 View experiment at: http://mlflow:5000/#/experiments/696005550256114257


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 04:48:50 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 13
Created version '13' of model 'CreditScoreModel'.


Run 5fec9f304e7b43bcbc938b801f40e008 - RandomForest - {'n_estimators': 200, 'max_depth': 3} - R2: 0.718
🏃 View run RandomForest_{'n_estimators': 200, 'max_depth': 3} at: http://mlflow:5000/#/experiments/696005550256114257/runs/5fec9f304e7b43bcbc938b801f40e008
🧪 View experiment at: http://mlflow:5000/#/experiments/696005550256114257


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 04:48:52 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 14
Created version '14' of model 'CreditScoreModel'.


Run c5639025fea54724bd755f4ac6ec3495 - RandomForest - {'n_estimators': 200, 'max_depth': 5} - R2: 0.857
🏃 View run RandomForest_{'n_estimators': 200, 'max_depth': 5} at: http://mlflow:5000/#/experiments/696005550256114257/runs/c5639025fea54724bd755f4ac6ec3495
🧪 View experiment at: http://mlflow:5000/#/experiments/696005550256114257


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 04:48:55 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 15
Created version '15' of model 'CreditScoreModel'.


Run af06c5cbd21c4236b9dc5a95593cc27f - RandomForest - {'n_estimators': 200, 'max_depth': 10} - R2: 0.885
🏃 View run RandomForest_{'n_estimators': 200, 'max_depth': 10} at: http://mlflow:5000/#/experiments/696005550256114257/runs/af06c5cbd21c4236b9dc5a95593cc27f
🧪 View experiment at: http://mlflow:5000/#/experiments/696005550256114257


Registered model 'CreditScoreModel' already exists. Creating a new version of this model...
2025/09/22 04:48:57 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 16


Run fba2e5b3db0a410495e25af6a50ed516 - RandomForest - {'n_estimators': 200, 'max_depth': None} - R2: 0.880
🏃 View run RandomForest_{'n_estimators': 200, 'max_depth': None} at: http://mlflow:5000/#/experiments/696005550256114257/runs/fba2e5b3db0a410495e25af6a50ed516
🧪 View experiment at: http://mlflow:5000/#/experiments/696005550256114257


Created version '16' of model 'CreditScoreModel'.


In [10]:
experiment = client.get_experiment_by_name("CreditScorePrediction")
if experiment is None:
    raise ValueError("El experimento no existe.")

runs = client.search_runs(
    experiment_ids=[experiment.experiment_id],
    order_by=["metrics.r2 DESC"],
)

if not runs:
    raise ValueError("❌ No hay ejecuciones registradas en este experimento.")

best_run = runs[0]
print(f"✅ Best run ID: {best_run.info.run_id}")
print("📊 Métricas:", best_run.data.metrics)


NameError: name 'client' is not defined