In [1]:
import mlflow
import pandas as pd
import numpy as np
import os
import random
import datetime
import yaml
import tempfile
from mlflow.tracking import MlflowClient

# =====================================================================
# 1. Configuration (Keep the same)
# =====================================================================

MLFLOW_TRACKING_URI = "http://asa-c25d20a2-8350-4950-8064-1d8a819e702c.kt-wast-app.svc.cluster.local:80"
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

REGISTERED_MODEL_NAME = "Meralco_Classification_Model"

print(f"MLflow URI set to: {MLFLOW_TRACKING_URI}")

# =====================================================================
# 2. Extracting Required Schema by Downloading Artifacts (FINAL ATTEMPT)
# =====================================================================

print("\n--- Downloading and Inspecting Artifacts ---")
try:
    client = MlflowClient()
    
    # Get the latest model version
    # Note: Suppressing FutureWarning here for cleaner output
    model_version = client.get_latest_versions(REGISTERED_MODEL_NAME, stages=None)[0]
    run_id = model_version.run_id
    
    # The 'source' attribute contains the full path to the artifact folder, 
    # e.g., 'runs:/<run_id>/artifacts/model' or similar. 
    # We strip everything before the 'artifacts' folder name.
    source_parts = model_version.source.split('artifacts/')
    
    # If 'artifacts/' is present, use the part after it. If not, assume the model 
    # artifact path is the final part of the URI (the model folder name itself).
    if len(source_parts) > 1:
        artifact_subdir = source_parts[-1].strip('/')
    else:
        # Fallback assumption: artifact path is just the model folder name ('model')
        artifact_subdir = 'model' 
        
    print(f"Inferred Run ID: {run_id}")
    print(f"Inferred Artifact Subdirectory: {artifact_subdir}")
    
    # 1. Define local path and download the artifact (model folder)
    local_dir = tempfile.mkdtemp()
    print(f"Downloading artifact to temporary local directory: {local_dir}")

    # Use the run_id and inferred artifact_subdir for the download
    mlflow.artifacts.download_artifacts(
        run_id=run_id, 
        artifact_path=artifact_subdir, 
        dst_path=local_dir
    )
    
    # The MLmodel file is expected inside the downloaded folder
    model_folder = os.path.join(local_dir, artifact_subdir)
    mlmodel_path = os.path.join(model_folder, "MLmodel")

    # If the direct path doesn't exist, it means artifact_subdir was incorrect, 
    # but the download still saved the artifact correctly. Let's find the MLmodel file.
    if not os.path.exists(mlmodel_path):
        # Recursively search for MLmodel in the local_dir
        found = False
        for root, dirs, files in os.walk(local_dir):
            if "MLmodel" in files:
                mlmodel_path = os.path.join(root, "MLmodel")
                found = True
                print(f"MLmodel file found at: {mlmodel_path.replace(local_dir, '...')}")
                break
        if not found:
            raise FileNotFoundError("Could not locate the MLmodel file after downloading artifacts.")


    # 2. Read and parse the MLmodel YAML file
    with open(mlmodel_path, 'r') as f:
        mlmodel_config = yaml.safe_load(f)

    # 3. Extract the required column order from the signature
    import json
    # Ensure the signature exists before accessing it
    if 'signature' not in mlmodel_config or 'inputs' not in mlmodel_config['signature']:
        raise KeyError("MLmodel file does not contain a valid input signature.")
        
    input_schema_json = mlmodel_config['signature']['inputs']
    schema_data = json.loads(input_schema_json)
    
    # The columns array is usually nested inside 'fields'
    REQUIRED_FEATURES = [col['name'] for col in schema_data['fields']]

    print("SUCCESS: Retrieved Model Input Schema from MLmodel artifact.")
    print(f"Total Columns Required: {len(REQUIRED_FEATURES)}")
    print("--- Exact Required Feature Order ---")
    for i, col_name in enumerate(REQUIRED_FEATURES):
        print(f"  {i+1}. {col_name}")
        
except Exception as e:
    print(f"FATAL ERROR: Failed to download, locate, or parse MLmodel file.")
    print(f"Details: {e}")
    raise

# =====================================================================
# 3. Create Sample Input Data (USING EXTRACTED ORDER)
# =====================================================================

N_SAMPLES = 5 
sample_rows = []

for i in range(N_SAMPLES):
    row_data = {}
    for col in REQUIRED_FEATURES:
        # Determine data type based on our knowledge of the 26 columns
        if col == 'Cluster':
            row_data[col] = 0
        elif col == 'tln':
            row_data[col] = f"TLN{random.randint(100, 999)}"
        elif col == 'with_inc':
            row_data[col] = random.choice([0, 1])
        elif col == 'incident_date':
            row_data[col] = pd.Timestamp(datetime.datetime(2025, 10, i + 1))
        else:
            # Assume all other features are the 22 numerical features (float)
            row_data[col] = random.uniform(0.1, 1000)

    sample_rows.append(row_data)

# Create DataFrame, forcing the column order using the 'columns' argument
sample_input_df = pd.DataFrame(sample_rows, columns=REQUIRED_FEATURES)

print("\n--- Sample Input Data (Generated with Schema Order) ---")
print(f"Columns in test data: {list(sample_input_df.columns)}")
print(sample_input_df.head())

# =====================================================================
# 4. Run Prediction (The Test)
# =====================================================================

MODEL_URI = f"models:/{REGISTERED_MODEL_NAME}/latest" 

print("\n--- Starting Model Load ---")
try:
    loaded_model = mlflow.pyfunc.load_model(MODEL_URI)
    print("SUCCESS: Model successfully loaded from MLflow Registry!")
except Exception as e:
    print(f"ERROR: Failed to load model from registry.")
    raise

print("\n--- Running Prediction Test ---")
predictions = loaded_model.predict(sample_input_df)

print("\nPrediction Results:")
print(predictions)

print("\nTEST COMPLETE: Model is functional and available for deployment.")

MLflow URI set to: http://asa-c25d20a2-8350-4950-8064-1d8a819e702c.kt-wast-app.svc.cluster.local:80

--- Downloading and Inspecting Artifacts ---
FATAL ERROR: Failed to download or parse MLmodel file. Cannot proceed without manual file access.
Details: list index out of range


  model_version = client.get_latest_versions(REGISTERED_MODEL_NAME, stages=None)[0]


IndexError: list index out of range