In [2]:
import pandas as pd
import numpy as np
import joblib
from sklearn.impute import SimpleImputer

def inference(csv_file_path):
    """
    Function to make predictions using the trained stacking ensemble model.
    Args:
        csv_file_path (str): Path to the CSV file containing the data to predict on.
    Returns:
        output (pd.DataFrame): DataFrame containing 'ID' and 'result' columns.
    """
    # Load the saved stacking ensemble model
    stacking_ensemble = joblib.load('stacking_ensemble_model.pkl')

    # Define the columns to drop (as per your training code)
    columns_to_drop = [
        'Column17', 'Column3', 'Column6', 'Column4', 'Column8', 'Column14',
        'Column2', 'Column5', 'Column0', 'Column19', 'Column15', 'Column12',
        'Column20', 'Column11', 'Column10', 'Column9', 'Column13', 'Column16', 'Column21'
    ]

    # Read the new data
    data = pd.read_csv(csv_file_path)

    # Check for 'ID' column
    if 'ID' not in data.columns:
        raise ValueError("Input data must contain an 'ID' column.")

    # Keep the IDs for output
    IDs = data['ID']

    # Drop 'ID' column
    X_inference = data.drop(columns=['ID'])

    # Drop the specified columns
    columns_present_to_drop = [col for col in columns_to_drop if col in X_inference.columns]
    X_inference = X_inference.drop(columns=columns_present_to_drop)

    # Handle missing values by imputing with mean (using inference data)
    imputer = SimpleImputer(strategy='mean')
    X_inference_imputed = pd.DataFrame(imputer.fit_transform(X_inference), columns=X_inference.columns)

    # Ensure the feature columns are in the same order as during training
    features = X_inference_imputed.columns.tolist()
    X_inference_imputed = X_inference_imputed[features]

    # Make predictions
    y_pred = stacking_ensemble.predict(X_inference_imputed)

    # Output predictions with IDs
    output = pd.DataFrame({'ID': IDs, 'result': y_pred})

    return output


In [3]:
inference('/content/X_Test_Data_Input.csv')

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



Unnamed: 0,ID,result
0,07cf2025382f6325b316e128b1b90999,0
1,eb972eb3a1f8d0d1a13f45e7c07d37d4,0
2,ee35e164b3ddc25a9f40243b81ad290d,0
3,28229ccd7bad7dd83324a4175a7e0531,0
4,2f94873da2c332d28f111742818e0fbb,0
...,...,...
261707,03439d629190be73a4b382325ea46547,0
261708,10d17d09c22e9c2f77f0667a17b00d0e,0
261709,c22938dab5fc0cfae07f3a910f2eb177,0
261710,2bda3964c16fe191fd8735850c454d26,0
