Databricks notebook source
============================================================
JOB 2: INFERENCE NOTEBOOK
- Loads registered model from MLflow Model Registry
- Loads input data (could be from CSV, DBFS, or sklearn dataset)
- Runs inference
- Saves results with timestamp
============================================================

In [None]:
# COMMAND ----------
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
import mlflow
import mlflow.sklearn
from datetime import datetime
from pyspark.sql import SparkSession

In [None]:
spark = SparkSession.builder.getOrCreate()

In [None]:
# ------------------------------------------------------------
# 1 Load data for inference
# ------------------------------------------------------------
iris = load_iris(as_frame=True)
df = iris.frame.copy()
df.columns = ["sepal_length", "sepal_width", "petal_length", "petal_width", "target"]

In [None]:
# Create engineered features (same as in training)
df["petal_area"] = df["petal_length"] * df["petal_width"]
df["sepal_area"] = df["sepal_length"] * df["sepal_width"]

In [None]:
# Drop target to simulate unlabeled data
X_new = df.drop(columns=["target"])

In [None]:
display(X_new.head())

In [None]:
# COMMAND ----------
# ------------------------------------------------------------
# 2️⃣ Load model by alias
# ------------------------------------------------------------
CATALOG = "main"
SCHEMA = "default"
MODEL_NAME = "IrisClassifier"
ALIAS = "production"

In [None]:
model_uri = f"models:/{CATALOG}/{SCHEMA}/{MODEL_NAME}@{ALIAS}"
model = mlflow.pyfunc.load_model(model_uri=model_uri)
print(f"✅ Loaded model from MLflow registry: {model_uri}")

In [None]:
print(f"✅ Loaded model '{MODEL_NAME}' from MLflow registry.")

In [None]:
# COMMAND ----------
# ------------------------------------------------------------
# 3️⃣ Perform inference
# ------------------------------------------------------------
preds = model.predict(X_new)
results_df = X_new.copy()
results_df["prediction"] = preds
results_df["inference_timestamp"] = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")

In [None]:
display(results_df.head())

COMMAND ----------
------------------------------------------------------------
4️⃣ Save inference results
------------------------------------------------------------

In [None]:
# Option 1: Save to DBFS CSV
output_path = f"/dbfs/tmp/inference_results_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}.csv"
results_df.to_csv(output_path, index=False)
print(f"✅ Inference results saved to: {output_path}")

In [None]:
# Option 2 (optional): Save to Delta table for history
spark_df = spark.createDataFrame(results_df)
spark_df.write.mode("append").format("delta").saveAsTable("mlops_inference_results")

In [None]:
print("✅ Inference results appended to Delta table: mlops_inference_results")