In [0]:
# Installing MLflow 
%pip install mlflow

[43mNote: you may need to restart the kernel using dbutils.library.restartPython() to use updated packages.[0m
Collecting mlflow
  Downloading mlflow-2.15.1-py3-none-any.whl (26.3 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 26.3/26.3 MB 39.0 MB/s eta 0:00:00
Collecting markdown<4,>=3.3
  Downloading Markdown-3.7-py3-none-any.whl (106 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 106.3/106.3 kB 14.5 MB/s eta 0:00:00
Collecting querystring-parser<2
  Downloading querystring_parser-1.2.4-py2.py3-none-any.whl (7.9 kB)
Collecting Flask<4
  Downloading flask-3.0.3-py3-none-any.whl (101 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 101.7/101.7 kB 15.1 MB/s eta 0:00:00
Collecting alembic!=1.10.0,<2
  Downloading alembic-1.13.2-py3-none-any.whl (232 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 233.0/233.0 kB 31.1 MB/s eta 0:00:00
Collecting sqlalchemy<3,>=1.4.0
  Downloading SQLAlchemy-2.0.32-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
     ━━━━━━━━━━━━━━━

In [0]:
# Impoting required Libraries
import mlflow
import mlflow.sklearn
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from mlflow.models import infer_signature

In [0]:
# Loading the Wine Quality dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
data = pd.read_csv(url, sep=';')

In [0]:
# Spliting the data into features (X) and target (y)
X = data.drop("quality", axis=1)
y = data["quality"]

In [0]:
# Spliting the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [0]:
# Enabling automatic logging to MLflow
mlflow.sklearn.autolog()

In [0]:
# Starting an MLflow run
with mlflow.start_run():

    # Define parameters for tuning
    n_estimators = 100
    random_state = 42

    # Log parameters
    mlflow.log_param("n_estimators", n_estimators)
    mlflow.log_param("random_state", random_state)
    
    # Training a RandomForest Regressor model
    model = RandomForestRegressor(n_estimators=n_estimators, random_state=random_state)
    model.fit(X_train, y_train)

    # Making predictions on the test set
    predictions = model.predict(X_test)

    # Evaluating the model's performance
    mse = mean_squared_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)

    # metrics
    mlflow.log_metric("mean_squared_error", mse)
    mlflow.log_metric("r2_score", r2)

    # Infer the signature of the model
    signature = infer_signature(X_train, model.predict(X_train))

    # Log the model with the inferred signature
    mlflow.sklearn.log_model(model, "random_forest_model", signature=signature)

    print(f"Mean Squared Error: {mse}")
    print(f"R2 Score: {r2}")

2024/08/16 21:15:59 INFO mlflow.utils.databricks_utils: Failed to create databricks SDK workspace client, error: ValueError('default auth: cannot configure default credentials')
2024/08/16 21:16:00 INFO mlflow.utils.databricks_utils: Failed to create databricks SDK workspace client, error: ValueError('default auth: cannot configure default credentials')
2024/08/16 21:16:00 INFO mlflow.utils.databricks_utils: Failed to create databricks SDK workspace client, error: ValueError('default auth: cannot configure default credentials')
2024/08/16 21:16:01 INFO mlflow.utils.databricks_utils: Failed to create databricks SDK workspace client, error: ValueError('default auth: cannot configure default credentials')
2024/08/16 21:16:01 INFO mlflow.utils.databricks_utils: Failed to create databricks SDK workspace client, error: ValueError('default auth: cannot configure default credentials')
2024/08/16 21:16:02 INFO mlflow.utils.databricks_utils: Failed to create databricks SDK workspace client, erro

Mean Squared Error: 0.30123812499999997
R2 Score: 0.5390429623873638
