In [1]:
import mlflow
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.model_selection import cross_val_score
import os

In [2]:
mlflow.set_tracking_uri('https://mlflow-service-982318063078.us-west2.run.app')
mlflow.set_experiment('lab5-experiment')

2025/04/16 17:57:23 INFO mlflow.tracking.fluent: Experiment with name 'lab5-experiment' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/2', creation_time=1744851443790, experiment_id='2', last_update_time=1744851443790, lifecycle_stage='active', name='lab5-experiment', tags={}>

In [3]:
column_names = ['class', 'alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 
                'magnesium', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols',
                'proanthocyanins', 'color_intensity', 'hue', 
                'od280/od315_of_diluted_wines', 'proline']

df = pd.read_csv('data/wine/wine.data', header=None, names=column_names)

df.head()

Unnamed: 0,class,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


In [4]:
df.describe()

Unnamed: 0,class,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
count,178.0,178.0,178.0,178.0,178.0,178.0,178.0,178.0,178.0,178.0,178.0,178.0,178.0,178.0
mean,1.938202,13.000618,2.336348,2.366517,19.494944,99.741573,2.295112,2.02927,0.361854,1.590899,5.05809,0.957449,2.611685,746.893258
std,0.775035,0.811827,1.117146,0.274344,3.339564,14.282484,0.625851,0.998859,0.124453,0.572359,2.318286,0.228572,0.70999,314.907474
min,1.0,11.03,0.74,1.36,10.6,70.0,0.98,0.34,0.13,0.41,1.28,0.48,1.27,278.0
25%,1.0,12.3625,1.6025,2.21,17.2,88.0,1.7425,1.205,0.27,1.25,3.22,0.7825,1.9375,500.5
50%,2.0,13.05,1.865,2.36,19.5,98.0,2.355,2.135,0.34,1.555,4.69,0.965,2.78,673.5
75%,3.0,13.6775,3.0825,2.5575,21.5,107.0,2.8,2.875,0.4375,1.95,6.2,1.12,3.17,985.0
max,3.0,14.83,5.8,3.23,30.0,162.0,3.88,5.08,0.66,3.58,13.0,1.71,4.0,1680.0


In [5]:
y = df['class']
X = df.drop('class', axis=1)

In [6]:
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.2, shuffle=True)

In [7]:
scaler = StandardScaler()
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns,index=X_train.index)

X_val_scaled = pd.DataFrame(scaler.transform(X_val), columns=X_val.columns,index=X_val.index)

X_test_scaled = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns, index=X_test.index)

In [8]:
os.makedirs('save_data_lab2', exist_ok=True)

X_train_scaled.to_parquet('save_data_lab2/x_train.parquet')
X_val_scaled.to_parquet('save_data_lab2/x_val.parquet')
X_test_scaled.to_parquet('save_data_lab2/x_test.parquet')

pd.DataFrame(y_train, columns=['class']).to_parquet('save_data_lab2/y_train.parquet')
pd.DataFrame(y_val, columns=['class']).to_parquet('save_data_lab2/y_val.parquet')
pd.DataFrame(y_test, columns=['class']).to_parquet('save_data_lab2/y_test.parquet')

In [15]:
def objective(params):
    classifier_type = params.pop("type")

    if classifier_type == "dt":
        clf = DecisionTreeClassifier(**params)
        model_name = "decision_tree_wine"
    elif classifier_type == "rf":
        clf = RandomForestClassifier(**params)
        model_name = "random_forest_wine"
    else:  # lr
        clf = LogisticRegression(**params, max_iter=2000)
        model_name = "logistic_regression_wine"

    with mlflow.start_run():
        acc = cross_val_score(clf, X_train_scaled, y_train, cv=5).mean()
        clf.fit(X_train_scaled, y_train)

        mlflow.set_tag("model_type", classifier_type)
        mlflow.log_params(params)
        mlflow.log_metric("accuracy", acc)

        mlflow.sklearn.log_model(
            clf, artifact_path="model",
            registered_model_name=model_name
        )

        return {"loss": -acc, "status": STATUS_OK}

In [16]:
search_space = hp.choice('classifier_type', [
    {
        'type': 'dt',
        'criterion': hp.choice('dtree_criterion', ['gini', 'entropy']),
        'max_depth': hp.choice('dtree_max_depth', [None] + list(range(1, 10))),
        'min_samples_split': hp.randint('dtree_min_samples_split', 2, 10),
        'random_state': 24
    },
    {
        'type': 'rf',
        'n_estimators': hp.randint('rf_n_estimators', 20, 500),
        'max_features': hp.randint('rf_max_features', 2, 9),
        'criterion': hp.choice('rf_criterion', ['gini', 'entropy']),
        'random_state': 24
    },
    {
        'type': 'lr',
        'C': hp.loguniform('lr_C', np.log(0.01), np.log(10.0)),
        'random_state': 24
    }
])

In [17]:
algo = tpe.suggest
trials = Trials()
best_result = fmin(
        fn=objective, 
        space=search_space,
        algo=algo,
        max_evals=32,
        trials=trials)

  0%|          | 0/32 [00:00<?, ?trial/s, best loss=?]

Successfully registered model 'decision_tree_wine'.
2025/04/17 10:30:24 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: decision_tree_wine, version 1

Created version '1' of model 'decision_tree_wine'.

2025/04/17 10:30:24 INFO mlflow.tracking._tracking_service.client: 🏃 View run unique-wolf-355 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/3ea42d16afd0485db1bacbc43010b1ed.

2025/04/17 10:30:24 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



  3%|▎         | 1/32 [00:04<02:11,  4.23s/trial, best loss: -0.8837944664031621]

Registered model 'decision_tree_wine' already exists. Creating a new version of this model...
2025/04/17 10:30:27 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: decision_tree_wine, version 2

Created version '2' of model 'decision_tree_wine'.

2025/04/17 10:30:27 INFO mlflow.tracking._tracking_service.client: 🏃 View run redolent-bird-293 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/a3885b327caa4ea2a2c2c17153bd4a4f.

2025/04/17 10:30:27 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



  6%|▋         | 2/32 [00:07<01:52,  3.74s/trial, best loss: -0.8837944664031621]

Registered model 'decision_tree_wine' already exists. Creating a new version of this model...
2025/04/17 10:30:30 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: decision_tree_wine, version 3

Created version '3' of model 'decision_tree_wine'.

2025/04/17 10:30:30 INFO mlflow.tracking._tracking_service.client: 🏃 View run learned-penguin-731 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/41785548d9604513b360868c06ff90c7.

2025/04/17 10:30:30 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



  9%|▉         | 3/32 [00:10<01:39,  3.42s/trial, best loss: -0.8837944664031621]

Registered model 'decision_tree_wine' already exists. Creating a new version of this model...
2025/04/17 10:30:34 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: decision_tree_wine, version 4

Created version '4' of model 'decision_tree_wine'.

2025/04/17 10:30:34 INFO mlflow.tracking._tracking_service.client: 🏃 View run lyrical-lynx-422 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/0337937b3f324d79837d4d163288bb4a.

2025/04/17 10:30:34 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



 12%|█▎        | 4/32 [00:13<01:32,  3.30s/trial, best loss: -0.8837944664031621]

Successfully registered model 'random_forest_wine'.
2025/04/17 10:30:37 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: random_forest_wine, version 1

Created version '1' of model 'random_forest_wine'.

2025/04/17 10:30:37 INFO mlflow.tracking._tracking_service.client: 🏃 View run hilarious-eel-178 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/40bd3d66ac254824979a47411c1137e3.

2025/04/17 10:30:37 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



 16%|█▌        | 5/32 [00:17<01:30,  3.34s/trial, best loss: -0.9739130434782609]

Registered model 'random_forest_wine' already exists. Creating a new version of this model...
2025/04/17 10:30:40 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: random_forest_wine, version 2

Created version '2' of model 'random_forest_wine'.

2025/04/17 10:30:40 INFO mlflow.tracking._tracking_service.client: 🏃 View run legendary-steed-488 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/640f40e326db4e18b41dcdf204ffec1b.

2025/04/17 10:30:40 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



 19%|█▉        | 6/32 [00:20<01:27,  3.37s/trial, best loss: -0.9739130434782609]

Successfully registered model 'logistic_regression_wine'.
2025/04/17 10:30:43 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: logistic_regression_wine, version 1

Created version '1' of model 'logistic_regression_wine'.

2025/04/17 10:30:43 INFO mlflow.tracking._tracking_service.client: 🏃 View run spiffy-shrimp-834 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/56a7e9b358c749db8e02038238e16f14.

2025/04/17 10:30:43 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



 22%|██▏       | 7/32 [00:23<01:21,  3.25s/trial, best loss: -0.9739130434782609]

Registered model 'random_forest_wine' already exists. Creating a new version of this model...
2025/04/17 10:30:47 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: random_forest_wine, version 3

Created version '3' of model 'random_forest_wine'.

2025/04/17 10:30:47 INFO mlflow.tracking._tracking_service.client: 🏃 View run entertaining-roo-524 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/4a2d8f45d8cf48568a11bfffe043c965.

2025/04/17 10:30:47 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



 25%|██▌       | 8/32 [00:27<01:22,  3.44s/trial, best loss: -0.9739130434782609]

Registered model 'logistic_regression_wine' already exists. Creating a new version of this model...
2025/04/17 10:30:50 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: logistic_regression_wine, version 2

Created version '2' of model 'logistic_regression_wine'.

2025/04/17 10:30:50 INFO mlflow.tracking._tracking_service.client: 🏃 View run brawny-bat-763 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/8d8b17c1c61041e7bb732b20998e9bbd.

2025/04/17 10:30:50 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



 28%|██▊       | 9/32 [00:30<01:16,  3.33s/trial, best loss: -0.9913043478260869]

Registered model 'decision_tree_wine' already exists. Creating a new version of this model...
2025/04/17 10:30:53 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: decision_tree_wine, version 5

Created version '5' of model 'decision_tree_wine'.

2025/04/17 10:30:53 INFO mlflow.tracking._tracking_service.client: 🏃 View run amazing-frog-224 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/a778139a57984d91b800f44e323a1203.

2025/04/17 10:30:53 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



 31%|███▏      | 10/32 [00:33<01:11,  3.25s/trial, best loss: -0.9913043478260869]

Registered model 'logistic_regression_wine' already exists. Creating a new version of this model...
2025/04/17 10:30:57 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: logistic_regression_wine, version 3

Created version '3' of model 'logistic_regression_wine'.

2025/04/17 10:30:57 INFO mlflow.tracking._tracking_service.client: 🏃 View run suave-bug-202 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/f207e54d9c3d4e06a3ac3ceb160bccb4.

2025/04/17 10:30:57 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



 34%|███▍      | 11/32 [00:37<01:09,  3.29s/trial, best loss: -0.9913043478260869]

Registered model 'decision_tree_wine' already exists. Creating a new version of this model...
2025/04/17 10:31:00 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: decision_tree_wine, version 6

Created version '6' of model 'decision_tree_wine'.

2025/04/17 10:31:00 INFO mlflow.tracking._tracking_service.client: 🏃 View run bustling-newt-120 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/543971640d1341298e5dc284ef3ac380.

2025/04/17 10:31:00 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



 38%|███▊      | 12/32 [00:40<01:05,  3.29s/trial, best loss: -0.9913043478260869]

Registered model 'logistic_regression_wine' already exists. Creating a new version of this model...
2025/04/17 10:31:03 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: logistic_regression_wine, version 4

Created version '4' of model 'logistic_regression_wine'.

2025/04/17 10:31:03 INFO mlflow.tracking._tracking_service.client: 🏃 View run victorious-seal-971 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/d130e4f5aa6d48248db2954467211151.

2025/04/17 10:31:03 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



 41%|████      | 13/32 [00:43<01:00,  3.20s/trial, best loss: -0.9913043478260869]

Registered model 'logistic_regression_wine' already exists. Creating a new version of this model...
2025/04/17 10:31:06 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: logistic_regression_wine, version 5

Created version '5' of model 'logistic_regression_wine'.

2025/04/17 10:31:06 INFO mlflow.tracking._tracking_service.client: 🏃 View run intrigued-stork-317 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/dc873a9aaf914285ac16818a0eaea9c6.

2025/04/17 10:31:06 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



 44%|████▍     | 14/32 [00:46<00:56,  3.15s/trial, best loss: -0.9913043478260869]

Registered model 'random_forest_wine' already exists. Creating a new version of this model...
2025/04/17 10:31:10 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: random_forest_wine, version 4

Created version '4' of model 'random_forest_wine'.

2025/04/17 10:31:10 INFO mlflow.tracking._tracking_service.client: 🏃 View run inquisitive-mink-971 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/4f2ad39410ea4f71988a92ff03b05c75.

2025/04/17 10:31:10 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



 47%|████▋     | 15/32 [00:49<00:55,  3.28s/trial, best loss: -0.9913043478260869]

Registered model 'logistic_regression_wine' already exists. Creating a new version of this model...
2025/04/17 10:31:13 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: logistic_regression_wine, version 6

Created version '6' of model 'logistic_regression_wine'.

2025/04/17 10:31:13 INFO mlflow.tracking._tracking_service.client: 🏃 View run redolent-cod-471 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/edf4b3cb2ff74c008460979012a12efe.

2025/04/17 10:31:13 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



 50%|█████     | 16/32 [00:53<00:51,  3.24s/trial, best loss: -0.9913043478260869]

Registered model 'decision_tree_wine' already exists. Creating a new version of this model...
2025/04/17 10:31:16 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: decision_tree_wine, version 7

Created version '7' of model 'decision_tree_wine'.

2025/04/17 10:31:16 INFO mlflow.tracking._tracking_service.client: 🏃 View run sincere-crow-714 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/712a70f5f79e4725ad8ea3442c80aa0d.

2025/04/17 10:31:16 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



 53%|█████▎    | 17/32 [00:56<00:48,  3.22s/trial, best loss: -0.9913043478260869]

Registered model 'decision_tree_wine' already exists. Creating a new version of this model...
2025/04/17 10:31:19 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: decision_tree_wine, version 8

Created version '8' of model 'decision_tree_wine'.

2025/04/17 10:31:19 INFO mlflow.tracking._tracking_service.client: 🏃 View run funny-hawk-1 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/2b4fa19fe7894ee4acee767837b00ef0.

2025/04/17 10:31:19 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



 56%|█████▋    | 18/32 [00:59<00:44,  3.19s/trial, best loss: -0.9913043478260869]

Registered model 'decision_tree_wine' already exists. Creating a new version of this model...
2025/04/17 10:31:22 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: decision_tree_wine, version 9

Created version '9' of model 'decision_tree_wine'.

2025/04/17 10:31:22 INFO mlflow.tracking._tracking_service.client: 🏃 View run auspicious-smelt-246 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/8ed99954bc304a7d856220781397aa9f.

2025/04/17 10:31:22 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



 59%|█████▉    | 19/32 [01:02<00:41,  3.17s/trial, best loss: -0.9913043478260869]

Registered model 'decision_tree_wine' already exists. Creating a new version of this model...
2025/04/17 10:31:25 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: decision_tree_wine, version 10

Created version '10' of model 'decision_tree_wine'.

2025/04/17 10:31:25 INFO mlflow.tracking._tracking_service.client: 🏃 View run nebulous-smelt-807 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/d00a41c0ad3d484282184ad218c5847b.

2025/04/17 10:31:25 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



 62%|██████▎   | 20/32 [01:05<00:37,  3.11s/trial, best loss: -0.9913043478260869]

Registered model 'logistic_regression_wine' already exists. Creating a new version of this model...
2025/04/17 10:31:28 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: logistic_regression_wine, version 7

Created version '7' of model 'logistic_regression_wine'.

2025/04/17 10:31:28 INFO mlflow.tracking._tracking_service.client: 🏃 View run funny-slug-870 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/35bc58f005d5423d8f3b33579e92e71a.

2025/04/17 10:31:28 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



 66%|██████▌   | 21/32 [01:08<00:34,  3.11s/trial, best loss: -0.9913043478260869]

Registered model 'logistic_regression_wine' already exists. Creating a new version of this model...
2025/04/17 10:31:31 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: logistic_regression_wine, version 8

Created version '8' of model 'logistic_regression_wine'.

2025/04/17 10:31:31 INFO mlflow.tracking._tracking_service.client: 🏃 View run victorious-whale-175 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/1ae8e9fa51e54046823a9ace4a02e31f.

2025/04/17 10:31:31 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



 69%|██████▉   | 22/32 [01:11<00:30,  3.09s/trial, best loss: -0.9913043478260869]

Registered model 'logistic_regression_wine' already exists. Creating a new version of this model...
2025/04/17 10:31:34 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: logistic_regression_wine, version 9

Created version '9' of model 'logistic_regression_wine'.

2025/04/17 10:31:34 INFO mlflow.tracking._tracking_service.client: 🏃 View run nebulous-ape-307 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/fb0e0f8468774d68bb040ba7581291ce.

2025/04/17 10:31:34 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



 72%|███████▏  | 23/32 [01:14<00:27,  3.03s/trial, best loss: -0.9913043478260869]

Registered model 'logistic_regression_wine' already exists. Creating a new version of this model...
2025/04/17 10:31:37 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: logistic_regression_wine, version 10

Created version '10' of model 'logistic_regression_wine'.

2025/04/17 10:31:37 INFO mlflow.tracking._tracking_service.client: 🏃 View run abundant-vole-493 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/06696fde39304900836b50d4a9dd646e.

2025/04/17 10:31:37 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



 75%|███████▌  | 24/32 [01:17<00:24,  3.00s/trial, best loss: -0.9913043478260869]

Registered model 'logistic_regression_wine' already exists. Creating a new version of this model...
2025/04/17 10:31:40 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: logistic_regression_wine, version 11

Created version '11' of model 'logistic_regression_wine'.

2025/04/17 10:31:40 INFO mlflow.tracking._tracking_service.client: 🏃 View run trusting-squirrel-38 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/cc62124214bf4f5d9919cd13e5a0fce5.

2025/04/17 10:31:40 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



 78%|███████▊  | 25/32 [01:20<00:20,  2.97s/trial, best loss: -0.9913043478260869]

Registered model 'logistic_regression_wine' already exists. Creating a new version of this model...
2025/04/17 10:31:43 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: logistic_regression_wine, version 12

Created version '12' of model 'logistic_regression_wine'.

2025/04/17 10:31:43 INFO mlflow.tracking._tracking_service.client: 🏃 View run handsome-loon-280 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/caad33a02f1d45528e578c5fd03a5dad.

2025/04/17 10:31:43 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



 81%|████████▏ | 26/32 [01:23<00:17,  2.95s/trial, best loss: -0.9913043478260869]

Registered model 'logistic_regression_wine' already exists. Creating a new version of this model...
2025/04/17 10:31:46 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: logistic_regression_wine, version 13

Created version '13' of model 'logistic_regression_wine'.

2025/04/17 10:31:46 INFO mlflow.tracking._tracking_service.client: 🏃 View run enthused-fowl-841 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/1d2bc93b2a7e477d9f20dc377c0ed822.

2025/04/17 10:31:46 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



 84%|████████▍ | 27/32 [01:26<00:15,  3.01s/trial, best loss: -0.9913043478260869]

Registered model 'logistic_regression_wine' already exists. Creating a new version of this model...
2025/04/17 10:31:49 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: logistic_regression_wine, version 14

Created version '14' of model 'logistic_regression_wine'.

2025/04/17 10:31:49 INFO mlflow.tracking._tracking_service.client: 🏃 View run puzzled-deer-213 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/5f24bbb7541449db85ed73983388f68f.

2025/04/17 10:31:49 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



 88%|████████▊ | 28/32 [01:29<00:12,  3.06s/trial, best loss: -0.9913043478260869]

Registered model 'logistic_regression_wine' already exists. Creating a new version of this model...
2025/04/17 10:31:52 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: logistic_regression_wine, version 15

Created version '15' of model 'logistic_regression_wine'.

2025/04/17 10:31:52 INFO mlflow.tracking._tracking_service.client: 🏃 View run glamorous-loon-557 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/0c54db1ebc174f4985b8824996bbbf12.

2025/04/17 10:31:52 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



 91%|█████████ | 29/32 [01:32<00:09,  3.07s/trial, best loss: -0.9913043478260869]

Registered model 'logistic_regression_wine' already exists. Creating a new version of this model...
2025/04/17 10:31:56 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: logistic_regression_wine, version 16

Created version '16' of model 'logistic_regression_wine'.

2025/04/17 10:31:56 INFO mlflow.tracking._tracking_service.client: 🏃 View run puzzled-midge-114 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/2a5390ccd915498e975e6168e6270e53.

2025/04/17 10:31:56 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



 94%|█████████▍| 30/32 [01:35<00:06,  3.09s/trial, best loss: -0.9913043478260869]

Registered model 'random_forest_wine' already exists. Creating a new version of this model...
2025/04/17 10:31:59 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: random_forest_wine, version 5

Created version '5' of model 'random_forest_wine'.

2025/04/17 10:31:59 INFO mlflow.tracking._tracking_service.client: 🏃 View run mysterious-squid-387 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/ae014de3122449e8a1de8af955b9508a.

2025/04/17 10:31:59 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



 97%|█████████▋| 31/32 [01:38<00:03,  3.12s/trial, best loss: -0.9913043478260869]

Registered model 'logistic_regression_wine' already exists. Creating a new version of this model...
2025/04/17 10:32:02 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: logistic_regression_wine, version 17

Created version '17' of model 'logistic_regression_wine'.

2025/04/17 10:32:02 INFO mlflow.tracking._tracking_service.client: 🏃 View run loud-koi-409 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/95ce6fe6884b4c2da13ae4aafe8908a8.

2025/04/17 10:32:02 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.



100%|██████████| 32/32 [01:42<00:00,  3.19s/trial, best loss: -0.9913043478260869]


In [13]:
with mlflow.start_run(run_name="feature_selection"):
    rf_selector = RandomForestClassifier(n_estimators=100, random_state=24)
    rf_selector.fit(X_train_scaled, y_train)
    
    feature_importance = pd.DataFrame({
        'feature': X_train_scaled.columns,
        'importance': rf_selector.feature_importances_
    }).sort_values('importance', ascending=False)
    
    top_features = feature_importance.head(7)['feature'].tolist()

    X_train_selected = X_train_scaled[top_features]
    X_val_selected = X_val_scaled[top_features]
    X_test_selected = X_test_scaled[top_features]

    X_train_selected.to_parquet('save_data_lab2/x_train_selected.parquet')
    X_val_selected.to_parquet('save_data_lab2/x_val_selected.parquet')
    X_test_selected.to_parquet('save_data_lab2/x_test_selected.parquet')
    
    rf_selected = RandomForestClassifier(n_estimators=100, random_state=24)
    rf_selected.fit(X_train_selected, y_train)
    acc_selected = cross_val_score(rf_selected, X_train_selected, y_train, cv=5).mean()
    mlflow.log_metric('cv_accuracy_selected_features', acc_selected)
    
    mlflow.sklearn.log_model(
        rf_selector,
        artifact_path='feature_selector',
        registered_model_name='wine_feature_selector'
    )

    mlflow.sklearn.log_model(
        rf_selected,
        artifact_path='selected_features_model',
        registered_model_name='wine_classifier_selected_features'
    )
    
    mlflow.end_run()

Successfully registered model 'wine_feature_selector'.
2025/04/17 10:21:33 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: wine_feature_selector, version 1
Created version '1' of model 'wine_feature_selector'.
Successfully registered model 'wine_classifier_selected_features'.
2025/04/17 10:21:36 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: wine_classifier_selected_features, version 1
Created version '1' of model 'wine_classifier_selected_features'.
2025/04/17 10:21:36 INFO mlflow.tracking._tracking_service.client: 🏃 View run feature_selection at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/6f9f314e2eb842329232a729f1cc9f46.
2025/04/17 10:21:36 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.


Top 3 models are random forest with max_features = 5 and n_estimators = 429, random forest model with max_features = 2 and n_estimators = 290, and random forest with max_features = 2 and n_estimators = 342.

In [14]:
with mlflow.start_run():
    mlflow.set_tags({
        "Model": "random-forest",
        "Data": "final-comprehensive-logging"
    })
    
    mlflow.log_params({
        "n_estimators": 429,
        "max_features": 5
    })
    
    rf = RandomForestClassifier(n_estimators=429, max_features=5, random_state=24)
    rf.fit(X_train_scaled, y_train)

    mlflow.log_artifact('save_data_lab2/x_train.parquet', 'training_data')
    mlflow.log_artifact('save_data_lab2/y_train.parquet', 'training_data')
    
    mlflow.log_artifact('save_data_lab2/x_val.parquet', 'validation_data')
    mlflow.log_artifact('save_data_lab2/y_val.parquet', 'validation_data')
    
    mlflow.log_artifact('save_data_lab2/x_test.parquet', 'test_data')
    mlflow.log_artifact('save_data_lab2/y_test.parquet', 'test_data')
    
    train_acc = accuracy_score(y_train, rf.predict(X_train_scaled))
    val_acc = accuracy_score(y_val, rf.predict(X_val_scaled))
    test_acc = accuracy_score(y_test, rf.predict(X_test_scaled))
    
    mlflow.log_metrics({
        'train_accuracy': train_acc,
        'validation_accuracy': val_acc,
        'test_accuracy': test_acc
    })
    
    mlflow.sklearn.log_model(
        rf, 
        artifact_path="random_forest_model",
        registered_model_name="wine_classifier_final"
    )
    
    print(f"Training Accuracy: {train_acc:.4f}")
    print(f"Validation Accuracy: {val_acc:.4f}")
    print(f"Test Accuracy: {test_acc:.4f}")

mlflow.end_run()

Successfully registered model 'wine_classifier_final'.
2025/04/17 10:22:14 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: wine_classifier_final, version 1
Created version '1' of model 'wine_classifier_final'.
2025/04/17 10:22:14 INFO mlflow.tracking._tracking_service.client: 🏃 View run bouncy-stoat-140 at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2/runs/7a04b29aa37b4c208f09e4dcbcc2aa00.
2025/04/17 10:22:14 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://mlflow-service-982318063078.us-west2.run.app/#/experiments/2.


Training Accuracy: 1.0000
Validation Accuracy: 0.9655
Test Accuracy: 0.9444
