In [None]:
import os
from pathlib import Path
import sys

import pandas as pd
import polars as pl
import numpy as np
import joblib

# import kaggle_evaluation.nfl_inference_server
# Use robust importer to handle missing module in runtime
try:
    import kaggle_evaluation.nfl_inference_server as nfl_inf
except ModuleNotFoundError:
    from pathlib import Path
    root = Path('/kaggle/input')
    comp = None
    if root.exists():
        for p in root.iterdir():
            if p.is_dir() and 'nfl-big-data-bowl-2026-prediction' in p.name:
                comp = p
                break
    candidates = []
    if comp:
        candidates.append(comp / 'kaggle_evaluation')
    for p in root.iterdir() if root.exists() else []:
        if p.is_dir():
            candidates.append(p / 'kaggle_evaluation')
    for c in candidates:
        if c.exists():
            sys.path.insert(0, str(c.parent))
    import kaggle_evaluation.nfl_inference_server as nfl_inf

# Global cache to load model/features once on first call
_MODEL_CACHE = {
    'loaded': False,
    'mx': None,
    'my': None,
    'feat_cols': None,
    'player_pos_vals': None,
    'attached_root': None,
    'model_path': None
}


def _find_features_module(model_root=None):
    """Search all attached datasets for features.py
    
    Args:
        model_root: Optional path where model was found, to search there first
    """
    root = Path('/kaggle/input')
    if not root.exists():
        print("⚠️  /kaggle/input does not exist")
        return None
    
    print(f"\nSearching for features.py in {root}")
    
    # First check if features.py is in the same location as the model
    if model_root:
        print(f"  Checking model location: {model_root.name}")
        for candidate in [
            model_root / 'features.py',
            model_root / 'scikitlearn' / 'default' / '1' / 'features.py',
            model_root / 'for_kaggle' / 'features.py'
        ]:
            if candidate.exists():
                print(f"✓ Found features.py with model at: {candidate}")
                return candidate
    
    # Search all folders for features.py
    for p in root.iterdir():
        if p.is_dir() and 'nfl-big-data-bowl-2026-prediction' not in p.name:
            if p == model_root:
                continue  # Already checked above
            print(f"  Checking: {p.name}")
            # Check root level
            if (p / 'features.py').exists():
                print(f"✓ Found features.py in: {p}")
                return p / 'features.py'
            # Check nested locations
            for nested in p.rglob('features.py'):
                print(f"✓ Found features.py at: {nested}")
                return nested
    
    print("⚠️  No features.py found in any dataset")
    return None


def _find_model_file():
    """Search all attached datasets for best_model.pkl"""
    root = Path('/kaggle/input')
    if not root.exists():
        print("⚠️  /kaggle/input does not exist")
        return None, None
    
    print(f"\n=== Searching for Model ===")
    print(f"Available folders: {[p.name for p in root.iterdir() if p.is_dir()]}")
    
    candidates = sorted([p for p in root.iterdir() if p.is_dir()], key=lambda p: p.name)
    
    for p in candidates:
        # Skip the competition data folder
        if 'nfl-big-data-bowl-2026-prediction' in p.name:
            print(f"  Skipping competition folder: {p.name}")
            continue
        
        print(f"  Checking dataset: {p.name}")
        
        # Check for Kaggle Model Registry format: scikitlearn/default/1/best_model.pkl
        model_registry_path = p / 'scikitlearn' / 'default' / '1' / 'best_model.pkl'
        if model_registry_path.exists():
            print(f"✓ Found Kaggle model registry: {p}")
            print(f"  Model at: {model_registry_path}")
            return p, model_registry_path
        
        # Check for dataset format: models/best_model.pkl
        dataset_model_path = p / 'models' / 'best_model.pkl'
        if dataset_model_path.exists():
            print(f"✓ Found model dataset: {p}")
            print(f"  Model at: {dataset_model_path}")
            return p, dataset_model_path
        
        # Check for nested for_kaggle structure
        nested_model_path = p / 'for_kaggle' / 'models' / 'best_model.pkl'
        if nested_model_path.exists():
            print(f"✓ Found model in for_kaggle subfolder: {p}")
            print(f"  Model at: {nested_model_path}")
            return p, nested_model_path
        
        # Search for any best_model.pkl
        pkl_files = list(p.glob('**/best_model.pkl'))
        if pkl_files:
            print(f"✓ Found model in: {p}")
            print(f"  Model at: {pkl_files[0]}")
            return p, pkl_files[0]
        
        print(f"  No model found in {p.name}")
    
    print("\n⚠️  No model found in any attached dataset")
    return None, None


def _lazy_load_model_and_modules():
    if _MODEL_CACHE['loaded']:
        return
    
    print("\n" + "=" * 50)
    print("LOADING MODEL AND FEATURES")
    print("=" * 50)
    
    # Find model file
    attached, model_path = _find_model_file()
    if not attached or not model_path:
        raise FileNotFoundError(
            "No attached model found. Please attach a dataset containing best_model.pkl. "
            "Make sure the dataset is attached in the notebook's 'Input' section."
        )
    
    _MODEL_CACHE['attached_root'] = attached
    _MODEL_CACHE['model_path'] = model_path
    
    # Find features.py (may be in different dataset than model, or with the model)
    features_path = _find_features_module(model_root=attached)
    if not features_path:
        raise FileNotFoundError(
            "\nfeatures.py not found in any attached dataset.\n"
            "Solutions:\n"
            "  1. Upload features.py as a separate dataset and attach it, OR\n"
            "  2. Upload for_kaggle.zip as a dataset (not Model Registry) - it contains both model and features.py"
        )
    
    # Add features location to path
    sys.path.insert(0, str(features_path.parent))
    print(f"\n✓ Loading features from: {features_path}")
    
    from features import add_time_lag_features, prepare_features, transform_for_inference  # noqa: F401
    _MODEL_CACHE['add_time_lag_features'] = add_time_lag_features
    _MODEL_CACHE['prepare_features'] = prepare_features
    _MODEL_CACHE['transform_for_inference'] = transform_for_inference
    
    # Load model
    print(f"✓ Loading model from: {model_path}")
    meta = joblib.load(model_path)
    _MODEL_CACHE['mx'] = meta['models']['x']
    _MODEL_CACHE['my'] = meta['models']['y']
    _MODEL_CACHE['feat_cols'] = meta['feature_columns']
    _MODEL_CACHE['player_pos_vals'] = meta.get('player_position_values', None)
    _MODEL_CACHE['loaded'] = True
    print(f"✓ Model loaded successfully with {len(_MODEL_CACHE['feat_cols'])} features")
    print(f"✓ X model random_state: {_MODEL_CACHE['mx'].random_state}")
    print(f"✓ Y model random_state: {_MODEL_CACHE['my'].random_state}")
    print("=" * 50 + "\n")


def _to_pandas(df):
    if isinstance(df, pl.DataFrame):
        return df.to_pandas()
    return df


def predict(test: pl.DataFrame, test_input: pl.DataFrame) -> pl.DataFrame | pd.DataFrame:
    """Inference function used by the NFL evaluation gateway.
    Loads model + feature funcs on first call, then predicts x,y for incoming batch.
    Returns either Polars or Pandas DataFrame with columns ['x','y'] of equal length to `test`.
    """
    _lazy_load_model_and_modules()
    add_time_lag_features = _MODEL_CACHE['add_time_lag_features']
    prepare_features = _MODEL_CACHE['prepare_features']
    transform_for_inference = _MODEL_CACHE['transform_for_inference']
    mx = _MODEL_CACHE['mx']
    my = _MODEL_CACHE['my']
    feat_cols = _MODEL_CACHE['feat_cols']
    player_pos_vals = _MODEL_CACHE['player_pos_vals']

    # Convert inputs to pandas for feature pipeline
    test_pd = _to_pandas(test)
    test_in_pd = _to_pandas(test_input)

    # Merge like training: left join on identifiers if available
    key_cols = [c for c in ['game_id','play_id','nfl_id','frame_id'] if c in test_pd.columns and c in test_in_pd.columns]
    if key_cols:
        df = pd.merge(test_pd, test_in_pd, on=key_cols, how='left', suffixes=(None,'_in'))
    else:
        df = test_pd.copy()

    # Feature engineering for inference
    df = add_time_lag_features(df)
    _ = prepare_features(df)
    X_pred = transform_for_inference(df, feat_cols, player_pos_vals)

    # Predict
    px = mx.predict(X_pred)
    py = my.predict(X_pred)

    predictions = pd.DataFrame({'x': px, 'y': py})
    assert len(predictions) == len(test_pd)
    return predictions


# Start inference server (serve on hidden test; local gateway otherwise)
inference_server = nfl_inf.NFLInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    # Provide path to published public competition files for local gateway
    inference_server.run_local_gateway(('/kaggle/input/nfl-big-data-bowl-2026-prediction/',))
