In [1]:
import mlflow
import pandas as pd
import numpy as np
import os
import random
import datetime
import yaml
import tempfile
from mlflow.tracking import MlflowClient

# =====================================================================
# 1. Configuration (Keep the same)
# =====================================================================

MLFLOW_TRACKING_URI = "http://asa-c25d20a2-8350-4950-8064-1d8a819e702c.kt-wast-app.svc.cluster.local:80"
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

REGISTERED_MODEL_NAME = "Meralco_Classification_Model"

print(f"MLflow URI set to: {MLFLOW_TRACKING_URI}")

# =====================================================================
# 2. Extracting Required Schema by Downloading Artifacts (FINALIZED STEP)
# =====================================================================

print("\n--- Downloading and Inspecting Artifacts ---")
try:
    client = MlflowClient()
    
    # Get the latest model version
    model_version = client.get_latest_versions(REGISTERED_MODEL_NAME, stages=None)[0]
    run_id = model_version.run_id
    
    # **FINAL ATTEMPT FIX**: Assume the artifact path is the common PyCaret/MLflow default "model"
    artifact_subdir = 'model' 
        
    print(f"Inferred Run ID: {run_id}")
    print(f"Assumed Artifact Subdirectory: {artifact_subdir}")
    
    # 1. Define local path and download the artifact (model folder)
    local_dir = tempfile.mkdtemp()
    print(f"Downloading artifact to temporary local directory: {local_dir}")

    # Use the full URI for maximum compatibility
    full_artifact_uri = f"runs:/{run_id}/{artifact_subdir}"
    
    mlflow.artifacts.download_artifacts(
        artifact_uri=full_artifact_uri, 
        dst_path=local_dir
    )
    
    # 2. Find the MLmodel file in the downloaded artifacts
    mlmodel_path = None
    found = False
    for root, dirs, files in os.walk(local_dir):
        if "MLmodel" in files:
            mlmodel_path = os.path.join(root, "MLmodel")
            found = True
            print(f"MLmodel file found at: {mlmodel_path.replace(local_dir, '...')}")
            break
            
    if not found:
        raise FileNotFoundError("Could not locate the MLmodel file after downloading artifacts.")

    # 3. Read and parse the MLmodel YAML file
    with open(mlmodel_path, 'r') as f:
        mlmodel_config = yaml.safe_load(f)

    # 4. Extract the required column order from the signature
    import json
    if 'signature' not in mlmodel_config or 'inputs' not in mlmodel_config['signature']:
        # This covers cases where signature is missing, but fields is in 'databricks_model_input_schema'
        # Not common, but handles potential PyCaret/MLflow version quirks.
        raise KeyError("MLmodel file does not contain a valid input signature.")
        
    input_schema_json = mlmodel_config['signature']['inputs']
    schema_data = json.loads(input_schema_json)
    
    # The columns array is usually nested inside 'fields'
    REQUIRED_FEATURES = [col['name'] for col in schema_data['fields']]

    print("SUCCESS: Retrieved Model Input Schema from MLmodel artifact.")
    print(f"Total Columns Required: {len(REQUIRED_FEATURES)}")
    print("--- Exact Required Feature Order ---")
    for i, col_name in enumerate(REQUIRED_FEATURES):
        print(f"  {i+1}. {col_name}")
        
except Exception as e:
    print(f"FATAL ERROR: Failed to download, locate, or parse MLmodel file.")
    print(f"Details: {e}")
    # We must stop here if the schema cannot be retrieved automatically
    raise

# =====================================================================
# 3. Create Sample Input Data (USING EXTRACTED ORDER)
# =====================================================================

N_SAMPLES = 5 
sample_rows = []

for i in range(N_SAMPLES):
    row_data = {}
    for col in REQUIRED_FEATURES:
        # Determine data type based on our knowledge of the 26 columns
        if col == 'Cluster':
            row_data[col] = 0
        elif col == 'tln':
            row_data[col] = f"TLN{random.randint(100, 999)}"
        elif col == 'with_inc':
            row_data[col] = random.choice([0, 1])
        elif col == 'incident_date':
            # Use Pandas Timestamps for date compatibility
            row_data[col] = pd.Timestamp(datetime.datetime(2025, 10, i + 1))
        else:
            # Assume all other features are the 22 numerical features (float)
            row_data[col] = random.uniform(0.1, 1000)

    sample_rows.append(row_data)

# Create DataFrame, forcing the column order using the 'columns' argument
sample_input_df = pd.DataFrame(sample_rows, columns=REQUIRED_FEATURES)

print("\n--- Sample Input Data (Generated with Schema Order) ---")
print(f"Columns in test data: {list(sample_input_df.columns)}")
print(sample_input_df.head())

# =====================================================================
# 4. Run Prediction (The Test)
# =====================================================================

MODEL_URI = f"models:/{REGISTERED_MODEL_NAME}/latest" 

print("\n--- Starting Model Load ---")
try:
    loaded_model = mlflow.pyfunc.load_model(MODEL_URI)
    print("SUCCESS: Model successfully loaded from MLflow Registry!")
except Exception as e:
    print(f"ERROR: Failed to load model from registry.")
    raise

print("\n--- Running Prediction Test ---")
predictions = loaded_model.predict(sample_input_df)

print("\nPrediction Results:")
print(predictions)

print("\nTEST COMPLETE: Model is functional and available for deployment.")

MLflow URI set to: http://asa-c25d20a2-8350-4950-8064-1d8a819e702c.kt-wast-app.svc.cluster.local:80

--- Downloading and Inspecting Artifacts ---
Inferred Run ID: f274ee64dd8741ffa7e28e70e99cc0d1
Assumed Artifact Subdirectory: model
Downloading artifact to temporary local directory: /tmp/tmphy53q_f1


  model_version = client.get_latest_versions(REGISTERED_MODEL_NAME, stages=None)[0]


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

FATAL ERROR: Failed to download, locate, or parse MLmodel file.
Details: The following failures occurred while downloading one or more artifacts from http://asa-c25d20a2-8350-4950-8064-1d8a819e702c.kt-wast-app.svc.cluster.local:80/api/2.0/mlflow-artifacts/artifacts/371010811712805945/f274ee64dd8741ffa7e28e70e99cc0d1/artifacts:
##### File model #####
API request to http://asa-c25d20a2-8350-4950-8064-1d8a819e702c.kt-wast-app.svc.cluster.local:80/api/2.0/mlflow-artifacts/artifacts/371010811712805945/f274ee64dd8741ffa7e28e70e99cc0d1/artifacts/model failed with exception HTTPConnectionPool(host='asa-c25d20a2-8350-4950-8064-1d8a819e702c.kt-wast-app.svc.cluster.local', port=80): Max retries exceeded with url: /api/2.0/mlflow-artifacts/artifacts/371010811712805945/f274ee64dd8741ffa7e28e70e99cc0d1/artifacts/model (Caused by ResponseError('too many 500 error responses'))


MlflowException: The following failures occurred while downloading one or more artifacts from http://asa-c25d20a2-8350-4950-8064-1d8a819e702c.kt-wast-app.svc.cluster.local:80/api/2.0/mlflow-artifacts/artifacts/371010811712805945/f274ee64dd8741ffa7e28e70e99cc0d1/artifacts:
##### File model #####
API request to http://asa-c25d20a2-8350-4950-8064-1d8a819e702c.kt-wast-app.svc.cluster.local:80/api/2.0/mlflow-artifacts/artifacts/371010811712805945/f274ee64dd8741ffa7e28e70e99cc0d1/artifacts/model failed with exception HTTPConnectionPool(host='asa-c25d20a2-8350-4950-8064-1d8a819e702c.kt-wast-app.svc.cluster.local', port=80): Max retries exceeded with url: /api/2.0/mlflow-artifacts/artifacts/371010811712805945/f274ee64dd8741ffa7e28e70e99cc0d1/artifacts/model (Caused by ResponseError('too many 500 error responses'))