In [4]:
!pip install google-cloud-storage



In [6]:
import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
import os 

# MLflow setup
mlflow.set_tracking_uri("http://127.0.0.1:5000/")  # Set the tracking URI to the Minikube service

# Ensure Google Application Credentials are set for GCS
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "cs777.json"

# Load Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [7]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import mlflow
import mlflow.sklearn

# Assuming X_train, y_train, X_test, y_test are already defined

# Model 1: Poorly performing model
# Using very strong regularization (C very small) to underfit
C_poor = 0.001
solver_poor = 'liblinear'

with mlflow.start_run(run_name="Poor_Model"):
    clf_poor = LogisticRegression(C=C_poor, solver=solver_poor, random_state=42)
    clf_poor.fit(X_train[:30], y_train[:30])  # Using only a small part of the training data

    predictions_poor = clf_poor.predict(X_test)
    accuracy_poor = accuracy_score(y_test, predictions_poor)

    mlflow.log_param("C", C_poor)
    mlflow.log_param("solver", solver_poor)
    mlflow.log_metric("accuracy", accuracy_poor)
    mlflow.sklearn.log_model(clf_poor, 
                             artifact_path="logistic-regression-model-poor", 
                             registered_model_name="demo_model")

    print(f"Poor Logistic Regression model accuracy: {accuracy_poor:.2f}")



Registered model 'demo_model' already exists. Creating a new version of this model...
2023/11/08 15:16:04 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: demo_model, version 1
Created version '1' of model 'demo_model'.


Poor Logistic Regression model accuracy: 0.63


In [8]:
# Model 2: Well-performing model
# Using standard regularization and full training data
C_good = 1.0
solver_good = 'liblinear'

with mlflow.start_run(run_name="Good_Model"):
    clf_good = LogisticRegression(C=C_good, solver=solver_good, random_state=42)
    clf_good.fit(X_train, y_train)

    predictions_good = clf_good.predict(X_test)
    accuracy_good = accuracy_score(y_test, predictions_good)

    mlflow.log_param("C", C_good)
    mlflow.log_param("solver", solver_good)
    mlflow.log_metric("accuracy", accuracy_good)
    mlflow.sklearn.log_model(clf_good, 
                             artifact_path="logistic-regression-model-good", 
                             registered_model_name="demo_model")

    print(f"Good Logistic Regression model accuracy: {accuracy_good:.2f}")


Registered model 'demo_model' already exists. Creating a new version of this model...
2023/11/08 15:16:16 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: demo_model, version 2
Created version '2' of model 'demo_model'.


Good Logistic Regression model accuracy: 1.00
