In [0]:
df_clean = spark.read.parquet("/Volumes/workspace/default/projects/cleaned_data/mouse_viral_study_cleaned.parquet")

df_clean = df_clean.withColumn("virus_present", df_clean["virus_present"].cast("boolean"))

# display(df_clean)

df_pd = df_clean.toPandas()

In [0]:
import mlflow


In [0]:
CATALOG_NAME = "workspace"
SCHEMA_NAME = "default"

In [0]:
mvis_table = f"{CATALOG_NAME}.{SCHEMA_NAME}.mvis"
spark.sql(f"DROP TABLE IF EXISTS {mvis_table}")
df_clean.write.format("delta").saveAsTable(mvis_table)

In [0]:
import numpy as np
import pandas as pd
import sklearn.datasets
import sklearn.metrics
import sklearn.model_selection
import sklearn.ensemble
import sklearn.svm

import matplotlib.pyplot as plt

from hyperopt import fmin, tpe, hp, SparkTrials, Trials, STATUS_OK
from hyperopt.pyll import scope

In [0]:
data_labels = df_pd['virus_present']
df_pd = df_pd.drop(['virus_present'], axis=1)

In [0]:
# Split 80/20 train-test
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
  df_pd,
  data_labels,
  test_size=0.2,
  random_state=1
)

In [0]:
mlflow.autolog()

2025/07/05 13:44:54 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


In [0]:
import mlflow
import mlflow.sklearn
import sklearn
from sklearn import svm, metrics
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials

import databricks.connect as db_connect
import mlflow.tracking._model_registry.utils

mlflow.tracking._model_registry.utils._get_registry_uri_from_spark_session = lambda: "databricks-uc"

mlflow.login() 
spark_ctx = db_connect.DatabricksSession.builder.serverless(True).getOrCreate()

search_space = {
    'C': hp.loguniform('C', -3, 3),
    'kernel': hp.choice('kernel', ['linear', 'rbf', 'poly']),
    'gamma': hp.choice('gamma', ['scale', 'auto'])
}

def train_model(params):
    mlflow.sklearn.autolog()
    
    with mlflow.start_run(nested=True):
        model = svm.SVC(
            C=params['C'],
            kernel=params['kernel'],
            gamma=params['gamma'],
            probability=True,
            random_state=0
        )
        
        model.fit(X_train, y_train)
        
        predicted_probs = model.predict_proba(X_test)
        auc = sklearn.metrics.roc_auc_score(y_test, predicted_probs[:, 1])
        
        mlflow.log_metric('test_auc', auc)
        mlflow.sklearn.log_model(model, "model")
        
        return {'status': STATUS_OK, 'loss': -1 * auc, 'model':model}

trials = Trials()

with mlflow.start_run(run_name='svm_hyperopt') as main_run:
    best_params = fmin(
        fn=train_model,
        space=search_space,
        algo=tpe.suggest,
        max_evals=32,
        trials=trials
    )
    
    print("Best hyperparameters found:", best_params)

2025/07/05 13:44:56 INFO mlflow.utils.credentials: Successfully connected to MLflow hosted tracking server! Host: https://dbc-499a9713-b50f.cloud.databricks.com.


  0%|          | 0/32 [00:00<?, ?trial/s, best loss=?]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

  3%|▎         | 1/32 [00:16<08:38, 16.71s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

  6%|▋         | 2/32 [00:24<05:39, 11.33s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

  9%|▉         | 3/32 [00:32<04:52, 10.09s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

 12%|█▎        | 4/32 [00:40<04:15,  9.12s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

 16%|█▌        | 5/32 [00:47<03:46,  8.41s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

 19%|█▉        | 6/32 [00:55<03:35,  8.28s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

 22%|██▏       | 7/32 [01:03<03:24,  8.17s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

 25%|██▌       | 8/32 [01:11<03:11,  7.96s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

 28%|██▊       | 9/32 [01:18<02:57,  7.72s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

 31%|███▏      | 10/32 [01:26<02:50,  7.77s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

 34%|███▍      | 11/32 [01:37<03:02,  8.70s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

 38%|███▊      | 12/32 [01:44<02:45,  8.27s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

 41%|████      | 13/32 [01:50<02:27,  7.75s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

 44%|████▍     | 14/32 [01:57<02:15,  7.53s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

 47%|████▋     | 15/32 [02:04<02:04,  7.35s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

 50%|█████     | 16/32 [02:11<01:56,  7.28s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

 53%|█████▎    | 17/32 [02:18<01:46,  7.11s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

 56%|█████▋    | 18/32 [02:25<01:37,  6.97s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

 59%|█████▉    | 19/32 [02:33<01:33,  7.20s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

 62%|██████▎   | 20/32 [02:40<01:25,  7.16s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

 66%|██████▌   | 21/32 [02:46<01:16,  7.00s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

 69%|██████▉   | 22/32 [02:53<01:10,  7.05s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

 72%|███████▏  | 23/32 [03:00<01:02,  6.99s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

 75%|███████▌  | 24/32 [03:11<01:04,  8.03s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

 78%|███████▊  | 25/32 [03:17<00:52,  7.55s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

 81%|████████▏ | 26/32 [03:24<00:43,  7.30s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

 84%|████████▍ | 27/32 [03:30<00:35,  7.09s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

 88%|████████▊ | 28/32 [03:37<00:27,  6.92s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

 91%|█████████ | 29/32 [03:45<00:21,  7.12s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

 94%|█████████▍| 30/32 [03:52<00:14,  7.16s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

 97%|█████████▋| 31/32 [04:00<00:07,  7.50s/trial, best loss: -1.0]

Uploading artifacts:   0%|          | 0/3 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]




Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

100%|██████████| 32/32 [04:08<00:00,  7.49s/trial, best loss: -1.0]100%|██████████| 32/32 [04:08<00:00,  7.75s/trial, best loss: -1.0]
Best hyperparameters found: {'C': 0.19525541873031102, 'gamma': 1, 'kernel': 0}


In [0]:
best_run = mlflow.search_runs(
  order_by=['metrics.test_auc DESC', 'start_time DESC'],
  max_results=10,
).iloc[0]
print('Best Run')
print('AUC: {}'.format(best_run["metrics.test_auc"]))
print('Run ID : {}'.format(best_run["run_id"]))

Best Run
AUC: 1.0
Run ID : 8b0e74a62db6487d8978a18e59377c1d


In [0]:
import mlflow
from mlflow.models.signature import infer_signature
import mlflow.sklearn

model_uri = 'runs:/8b0e74a62db6487d8978a18e59377c1d/model'

# Load the model
model = mlflow.sklearn.load_model(model_uri)

if mlflow.active_run() is not None:
    mlflow.end_run()

# Start a new run
with mlflow.start_run() as run:
    # (Optional) Retrain if needed
    # model.fit(X_train, y_train)

    # Infer signature
    signature = infer_signature(X_train, model.predict(X_train))

    # Log model with signature
    mlflow.sklearn.log_model(model, "model", signature=signature)

    # New URI for registration
    new_model_uri = f"runs:/{run.info.run_id}/model"




Downloading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

Registered model 'workspace.default.mvis_model_m' already exists. Creating a new version of this model...


Downloading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

[0;31m---------------------------------------------------------------------------[0m
[0;31mMlflowException[0m                           Traceback (most recent call last)
File [0;32m<command-6879499967690092>, line 28[0m
[1;32m     25[0m     new_model_uri [38;5;241m=[39m [38;5;124mf[39m[38;5;124m"[39m[38;5;124mruns:/[39m[38;5;132;01m{[39;00mrun[38;5;241m.[39minfo[38;5;241m.[39mrun_id[38;5;132;01m}[39;00m[38;5;124m/model[39m[38;5;124m"[39m
[1;32m     27[0m [38;5;66;03m# Register using new URI[39;00m
[0;32m---> 28[0m mlflow[38;5;241m.[39mregister_model(new_model_uri, [38;5;124mf[39m[38;5;124m"[39m[38;5;132;01m{[39;00mCATALOG_NAME[38;5;132;01m}[39;00m[38;5;124m.[39m[38;5;132;01m{[39;00mSCHEMA_NAME[38;5;132;01m}[39;00m[38;5;124m.mvis_model_m[39m[38;5;124m"[39m)

File [0;32m/databricks/python/lib/python3.11/site-packages/mlflow/tracking/_model_registry/fluent.py:77[0m, in [0;36mregister_model[0;34m(model_uri, name, await_registration_

In [0]:
import mlflow

mlflow.artifacts.download_artifacts('runs:/8b0e74a62db6487d8978a18e59377c1d/model', dst_path="./svm")


  from google.protobuf import service as _service


Downloading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

'/Workspace/Users/dhinaz2025@gmail.com/Mouse Viral Infection Study/svm/model'