In [0]:
# Import Libraries

import mlflow
import mlflow.sklearn
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [0]:

# Step 1: Generate diabetes data from sklearn.dataset
diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target

In [0]:
# Step 2: Build a random forest model with different parameters
param_grid = {
    "n_estimators": [100, 200],
    "max_depth": [5, 10]
}


In [0]:
experiment_name = "/Shared/MLFlow_Training"

In [0]:
experiment = mlflow.get_experiment_by_name(experiment_name)
if experiment is None:
    experiment = mlflow.create_experiment(experiment_name)

experiment

Out[16]: <Experiment: artifact_location='dbfs:/databricks/mlflow-tracking/3203432005005608', creation_time=1688717112813, experiment_id='3203432005005608', last_update_time=1688717112813, lifecycle_stage='active', name='/Shared/MLFlow_Training', tags={'mlflow.experiment.sourceName': '/Shared/MLFlow_Training',
 'mlflow.experimentType': 'MLFLOW_EXPERIMENT',
 'mlflow.ownerEmail': 'vivekananthan_ms@hotmail.com',
 'mlflow.ownerId': '2563259287553374'}>

In [0]:
mlflow.set_experiment(experiment_id=experiment.experiment_id)

Out[17]: <Experiment: artifact_location='dbfs:/databricks/mlflow-tracking/3203432005005608', creation_time=1688717112813, experiment_id='3203432005005608', last_update_time=1688717112813, lifecycle_stage='active', name='/Shared/MLFlow_Training', tags={'mlflow.experiment.sourceName': '/Shared/MLFlow_Training',
 'mlflow.experimentType': 'MLFLOW_EXPERIMENT',
 'mlflow.ownerEmail': 'vivekananthan_ms@hotmail.com',
 'mlflow.ownerId': '2563259287553374'}>

In [0]:


# Step 3: Log each run into mlflow
for n_estimators in param_grid['n_estimators']:
    for max_depth in param_grid['max_depth']:
        with mlflow.start_run():
            # Split data into train and test sets
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

            # Build the model
            model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth)
            model.fit(X_train, y_train)

            # Step 4: Visualize the error metrics
            y_pred = model.predict(X_test)
            accuracy = accuracy_score(y_test, y_pred)
            print(f"Model with n_estimators={n_estimators}, max_depth={max_depth}: Accuracy={accuracy}")

            # Step 5: Log accuracy metric
            mlflow.log_metric("accuracy", accuracy)
            mlflow.log_params({"n_estimator":n_estimators, "max_depth":max_depth})
            mlflow.sklearn.log_model(model, "RF_model")



Model with n_estimators=100, max_depth=5: Accuracy=0.0
Model with n_estimators=100, max_depth=10: Accuracy=0.0
Model with n_estimators=200, max_depth=5: Accuracy=0.0
Model with n_estimators=200, max_depth=10: Accuracy=0.0


In [0]:
runs = mlflow.search_runs(experiment_names = [experiment_name])
runs 

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.accuracy,params.n_estimator,params.max_depth,tags.mlflow.databricks.cluster.id,tags.mlflow.databricks.notebookRevisionID,tags.mlflow.user,tags.mlflow.databricks.workspaceID,tags.mlflow.databricks.workspaceURL,tags.mlflow.databricks.notebookPath,tags.mlflow.source.name,tags.mlflow.runName,tags.mlflow.databricks.notebookID,tags.mlflow.source.type,tags.mlflow.log-model.history,tags.mlflow.databricks.cluster.info,tags.mlflow.databricks.notebook.commandID,tags.mlflow.databricks.webappURL,tags.mlflow.databricks.cluster.libraries
0,2e2dc3a939c5426eb300efdc2ad28cda,3203432005005608,FINISHED,dbfs:/databricks/mlflow-tracking/3203432005005...,2023-07-07 08:09:25.046000+00:00,2023-07-07 08:09:29.502000+00:00,0.0,200.0,10.0,0707-055810-1kgkevzv,1688717369686,vivekananthan_ms@hotmail.com,3203338864279735,https://community.cloud.databricks.com,/Users/vivekananthan_ms@hotmail.com/MLOps Trai...,/Users/vivekananthan_ms@hotmail.com/MLOps Trai...,agreeable-kit-238,2797004045850863,NOTEBOOK,"[{""artifact_path"":""RF_model"",""flavors"":{""pytho...","{""cluster_name"":""MLOps_Training"",""spark_versio...",7494652566083212984_8030526791293572849_d19e08...,https://community.cloud.databricks.com,"{""installable"":[],""redacted"":[]}"
1,44f914a719754c3a8e995cadc9da919f,3203432005005608,FINISHED,dbfs:/databricks/mlflow-tracking/3203432005005...,2023-07-07 08:09:20.811000+00:00,2023-07-07 08:09:24.913000+00:00,0.0,200.0,5.0,0707-055810-1kgkevzv,1688717365097,vivekananthan_ms@hotmail.com,3203338864279735,https://community.cloud.databricks.com,/Users/vivekananthan_ms@hotmail.com/MLOps Trai...,/Users/vivekananthan_ms@hotmail.com/MLOps Trai...,dapper-foal-859,2797004045850863,NOTEBOOK,"[{""artifact_path"":""RF_model"",""flavors"":{""pytho...","{""cluster_name"":""MLOps_Training"",""spark_versio...",7494652566083212984_8030526791293572849_d19e08...,https://community.cloud.databricks.com,"{""installable"":[],""redacted"":[]}"
2,d774cf9050bd4cb0885c33cbe45d4fbf,3203432005005608,FINISHED,dbfs:/databricks/mlflow-tracking/3203432005005...,2023-07-07 08:09:16.810000+00:00,2023-07-07 08:09:20.671000+00:00,0.0,100.0,10.0,0707-055810-1kgkevzv,1688717360878,vivekananthan_ms@hotmail.com,3203338864279735,https://community.cloud.databricks.com,/Users/vivekananthan_ms@hotmail.com/MLOps Trai...,/Users/vivekananthan_ms@hotmail.com/MLOps Trai...,intrigued-lamb-223,2797004045850863,NOTEBOOK,"[{""artifact_path"":""RF_model"",""flavors"":{""pytho...","{""cluster_name"":""MLOps_Training"",""spark_versio...",7494652566083212984_8030526791293572849_d19e08...,https://community.cloud.databricks.com,"{""installable"":[],""redacted"":[]}"
3,0623aafdcfbf417e8bdea43ae0ee7aaf,3203432005005608,FINISHED,dbfs:/databricks/mlflow-tracking/3203432005005...,2023-07-07 08:09:13.090000+00:00,2023-07-07 08:09:16.707000+00:00,0.0,100.0,5.0,0707-055810-1kgkevzv,1688717356869,vivekananthan_ms@hotmail.com,3203338864279735,https://community.cloud.databricks.com,/Users/vivekananthan_ms@hotmail.com/MLOps Trai...,/Users/vivekananthan_ms@hotmail.com/MLOps Trai...,carefree-tern-361,2797004045850863,NOTEBOOK,"[{""artifact_path"":""RF_model"",""flavors"":{""pytho...","{""cluster_name"":""MLOps_Training"",""spark_versio...",7494652566083212984_8030526791293572849_d19e08...,https://community.cloud.databricks.com,"{""installable"":[],""redacted"":[]}"
4,2e4c13a2400d4d4493c3d3b3459808a9,3203432005005608,FAILED,dbfs:/databricks/mlflow-tracking/3203432005005...,2023-07-07 08:09:01.207000+00:00,2023-07-07 08:09:01.956000+00:00,0.0,,,0707-055810-1kgkevzv,1688717342148,vivekananthan_ms@hotmail.com,3203338864279735,https://community.cloud.databricks.com,/Users/vivekananthan_ms@hotmail.com/MLOps Trai...,/Users/vivekananthan_ms@hotmail.com/MLOps Trai...,upbeat-swan-787,2797004045850863,NOTEBOOK,,"{""cluster_name"":""MLOps_Training"",""spark_versio...",7494652566083212984_8077908717670377400_38354f...,https://community.cloud.databricks.com,"{""installable"":[],""redacted"":[]}"


In [0]:
# Choose the best model 

best_run = runs.sort_values('metrics.accuracy').iloc[0]

In [0]:
# Register the best model

mlflow.register_model("runs:/"+best_run.run_id + "/RF_model","best_model")

[0;31m---------------------------------------------------------------------------[0m
[0;31mRestException[0m                             Traceback (most recent call last)
File [0;32m<command-3203432005005611>:3[0m
[1;32m      1[0m [38;5;66;03m# Register the best model[39;00m
[0;32m----> 3[0m [43mmlflow[49m[38;5;241;43m.[39;49m[43mregister_model[49m[43m([49m[38;5;124;43m"[39;49m[38;5;124;43mruns:/[39;49m[38;5;124;43m"[39;49m[38;5;241;43m+[39;49m[43mbest_run[49m[38;5;241;43m.[39;49m[43mrun_id[49m[43m [49m[38;5;241;43m+[39;49m[43m [49m[38;5;124;43m"[39;49m[38;5;124;43m/RF_model[39;49m[38;5;124;43m"[39;49m[43m,[49m[38;5;124;43m"[39;49m[38;5;124;43mbest_model[39;49m[38;5;124;43m"[39;49m[43m)[49m

File [0;32m/databricks/python/lib/python3.9/site-packages/mlflow/tracking/_model_registry/fluent.py:77[0m, in [0;36mregister_model[0;34m(model_uri, name, await_registration_for, tags)[0m
[1;32m     72[0m         eprint(
[1;32m     73[0