In [16]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report
import mlflow
import mlflow.sklearn
import pickle

In [17]:
train_df = pd.read_csv("data/train.csv")
test_df = pd.read_csv("data/test.csv")

X_train = train_df.drop("label", axis=1)
y_train = train_df["label"]

X_test = test_df.drop("label", axis=1)
y_test = test_df["label"]

In [18]:
train_df.isna().sum().sum()

np.int64(0)

In [19]:
round((train_df['label'] == 'A').mean(), 2)

np.float64(0.33)

In [27]:
X_train = train_df.drop("label", axis=1)
y_train = train_df["label"]

# Cerrar cualquier corrida activa previa
if mlflow.active_run():
    mlflow.end_run()

# Configurar nombre del experimento
mlflow.set_experiment("decision_tree_training")

with mlflow.start_run():
    # Entrenar el modelo
    model = DecisionTreeClassifier(
        max_depth=3,
        min_samples_split=4,
        random_state=42
    )
    model.fit(X_train, y_train)

    # Predicciones y métricas
    y_pred = model.predict(X_train)
    accuracy = accuracy_score(y_train, y_pred)
    f1 = f1_score(y_train, y_pred, average="weighted")

    # Registrar parámetros y métricas
    mlflow.log_param("max_depth", 3)
    mlflow.log_param("min_samples_split", 4)
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("f1_score", f1)

    # Guardar y registrar el modelo como artefacto
    with open("model_1.pkl", "wb") as f:
        pickle.dump(model, f)
    mlflow.log_artifact("model_1.pkl")

    print(f"Accuracy: {accuracy:.2f}")
    print(f"F1 Score: {f1:.2f}")

Accuracy: 0.82
F1 Score: 0.82


In [28]:
X_train = train_df.drop("label", axis=1)
y_train = train_df["label"]

# Cerrar cualquier corrida activa previa
if mlflow.active_run():
    mlflow.end_run()

# Configurar nombre del experimento
mlflow.set_experiment("decision_tree_training")

with mlflow.start_run():
    # Entrenar el modelo
    model = DecisionTreeClassifier(
        max_depth=5,
        min_samples_split=2,
        random_state=42
    )
    model.fit(X_train, y_train)

    # Predicciones y métricas
    y_pred = model.predict(X_train)
    accuracy = accuracy_score(y_train, y_pred)
    f1 = f1_score(y_train, y_pred, average="weighted")

    # Registrar parámetros y métricas
    mlflow.log_param("max_depth", 5)
    mlflow.log_param("min_samples_split", 2)
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("f1_score", f1)

    # Guardar y registrar el modelo como artefacto
    with open("model_2.pkl", "wb") as f:
        pickle.dump(model, f)
    mlflow.log_artifact("model_2.pkl")

    print(f"Accuracy: {accuracy:.2f}")
    print(f"F1 Score: {f1:.2f}")

Accuracy: 0.93
F1 Score: 0.93


In [29]:
from mlflow import get_artifact_uri 

uri = get_artifact_uri("model_1.pkl") 

print(uri)

file:///c:/Users/Federico%20Rusi/github/MLOpsISTEA/mlruns/195105915392587647/24f284eb726a4c0ca6793a9869dcc841/artifacts/model_1.pkl
