In [0]:
%pip install --upgrade threadpoolctl torch
dbutils.library.restartPython()

In [0]:
dbutils.widgets.text("test_table_raw", "dp_ml_titanic_test_raw")
test_table_raw = dbutils.widgets.get("test_table_raw")
pdf = spark.sql(f"SELECT * FROM dp_ml_raw.dp_ml_titanic.{test_table_raw}").toPandas()

In [0]:
print(pdf)

In [0]:
test_pdf = pdf

In [0]:
test_pdf['Survived'] = None

In [0]:
import mlflow
from mlflow.tracking import MlflowClient

spark.sql("USE CATALOG `dp_ml_raw`")
spark.sql("USE SCHEMA `dp_ml_titanic`")


NAME = "dp_ml_raw.dp_ml_titanic.titanic_feature_pipeline"
client = MlflowClient()

latest_ver = max(int(mv.version) for mv in client.search_model_versions(f"name='{NAME}'"))
print(latest_ver)
uri = f"models:/{NAME}/{latest_ver}"
from mlflow import spark as mlflow_spark
pipeline = mlflow.sklearn.load_model(uri) 
# sdf = spark.createDataFrame(test_pdf)                 # requires an active SparkSession
X_test_features = pipeline.transform(test_pdf)


In [0]:
print(X_test_features, X_test_features.columns)
X_test_features = X_test_features.drop("num__Survived", axis=1, errors="ignore")

In [0]:
# make standard scaler


NAME = "dp_ml_raw.dp_ml_titanic.titanic-scaler"
client = MlflowClient()

latest_ver = max(int(mv.version) for mv in client.search_model_versions(f"name='{NAME}'"))
print(latest_ver)

uri = f"models:/{NAME}/{latest_ver}"
from mlflow import spark as mlflow_spark
pipeline = mlflow.sklearn.load_model(uri) 
# sdf = spark.createDataFrame(test_pdf)                 # requires an active SparkSession
X_test = pipeline.transform(X_test_features)
print(X_test)

In [0]:
import torch
import mlflow
from mlflow.tracking import MlflowClient

NAME = "dp_ml_raw.dp_ml_titanic.titanic-pytorch"
client = MlflowClient()

# Use the champion alias
uri = f"models:/{NAME}@champion"

# Load the PyTorch model
model = mlflow.pytorch.load_model(uri)
model.eval()  # set to evaluation mode

# Convert your test features into a Torch tensor
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

# Run inference (disable gradient tracking)
with torch.no_grad():
    y_pred = model(X_test_tensor)

print(y_pred[:5])


In [0]:
import pandas as pd
print(type(y_pred))
y_np = y_pred.detach().cpu().numpy()
print(y_np.shape)
df = pd.DataFrame({"prediction": y_np.ravel()})
print(df)
# Add a primary-key-like column from the index
df = df.reset_index().rename(columns={"index": "row_id"})
df["row_id"] = df["row_id"].astype("int64")  # ensure BIGINT-compatible
print(df)

In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.types import LongType
#spark = SparkSession.builder.getOrCreate()
spark.sql('USE CATALOG dp_ml_raw')
spark.sql('CREATE SCHEMA IF NOT EXISTS dp_ml_raw.inference')
sdf = spark.createDataFrame(df)
# Make sure row_id is LongType in Spark as well
sdf = sdf.withColumn("row_id", sdf["row_id"].cast(LongType()))

full_table_name = "dp_ml_raw.inference.titanic_pytorch_predictions"

# Overwrite the table with the latest predictions; change to "append" if you prefer accumulating
(sdf.write
    .format("delta")
    .mode("overwrite")
    .option("overwriteSchema", "true")
    .saveAsTable(full_table_name))