In [None]:
import os
from pathlib import Path
import sys
from datetime import datetime
import json

import pandas as pd
import polars as pl
import numpy as np
import joblib

# import kaggle_evaluation.nfl_inference_server
# Use robust importer to handle missing module in runtime
try:
    import kaggle_evaluation.nfl_inference_server as nfl_inf
except ModuleNotFoundError:
    from pathlib import Path
    root = Path('/kaggle/input')
    comp = None
    if root.exists():
        for p in root.iterdir():
            if p.is_dir() and 'nfl-big-data-bowl-2026-prediction' in p.name:
                comp = p
                break
    candidates = []
    if comp:
        candidates.append(comp / 'kaggle_evaluation')
    for p in root.iterdir() if root.exists() else []:
        if p.is_dir():
            candidates.append(p / 'kaggle_evaluation')
    for c in candidates:
        if c.exists():
            sys.path.insert(0, str(c.parent))
    import kaggle_evaluation.nfl_inference_server as nfl_inf

# Submission tracking info
SUBMISSION_CREATED = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
SUBMISSION_ID = datetime.now().strftime("%Y%m%d_%H%M%S")

print("\n" + "=" * 60)
print("LOADING MODEL AND FEATURES")
print("=" * 60)
print(f"Submission Created: {SUBMISSION_CREATED}")
print(f"Submission ID: {SUBMISSION_ID}")


def _find_model():
    """Search all attached datasets for nfl_model_v*.pkl files.
    Returns path to the FIRST model found that matches the pattern.
    """
    root = Path('/kaggle/input')
    if not root.exists():
        print("⚠️  /kaggle/input does not exist")
        return None
    
    print("\n=== Searching for Model ===")
    folders = [p.name for p in root.iterdir() if p.is_dir()]
    print(f"Available folders: {folders}")
    print("POLICY: Only accepting nfl_model_v*.pkl (NEW pattern)")
    print("        Rejecting best_model_*.pkl (OLD pattern with NumPy issues)")
    
    for dataset in root.iterdir():
        if not dataset.is_dir():
            continue
        
        # Skip the competition data folder
        if 'nfl-big-data-bowl-2026-prediction' in dataset.name.lower():
            print(f"  Skipping competition folder: {dataset.name}")
            continue
        
        print(f"  Checking dataset: {dataset.name}")
        
        # Search for nfl_model_v*.pkl files
        model_candidates = list(dataset.rglob('nfl_model_v*.pkl'))
        
        if model_candidates:
            model_path = model_candidates[0]
            print(f"✓ Found model in: {dataset}")
            print(f"  Model at: {model_path}")
            return model_path
    
    return None


def _find_features_module(model_root=None):
    """Search all attached datasets for features.py
    
    Args:
        model_root: Optional path where model was found, to search there first
    """
    root = Path('/kaggle/input')
    if not root.exists():
        print("⚠️  /kaggle/input does not exist")
        return None
    
    print(f"\nSearching for features.py in {root}")
    
    # First check if features.py is in the same location as the model
    if model_root:
        print(f"  Checking model location: {model_root.name}")
        for candidate in [
            model_root / 'features.py',
            model_root / 'scikitlearn' / 'default' / '1' / 'features.py',
            model_root / 'for_kaggle' / 'features.py'
        ]:
            if candidate.exists():
                print(f"✓ Found features.py with model at: {candidate}")
                return candidate
    
    # Search all folders for features.py
    for dataset in root.iterdir():
        if not dataset.is_dir():
            continue
        features_candidates = list(dataset.rglob('features.py'))
        if features_candidates:
            print(f"✓ Found features.py in: {dataset.name}")
            return features_candidates[0]
    
    return None


def _to_pandas(df):
    if isinstance(df, pl.DataFrame):
        return df.to_pandas()
    return df


# Find model and features paths
model_path = _find_model()
if not model_path:
    raise FileNotFoundError(
        "❌ No valid model found (nfl_model_v*.pkl pattern required).\n"
        "The old best_model_*.pkl files are rejected due to NumPy PCG64 compatibility issues.\n\n"
        "To fix:\n"
        "1. Delete old 'nfl-model-v*' datasets from Kaggle (without time component)\n"
        "2. Re-upload using timestamped dataset name: nfl-model-v{YYYYMMDD-HHMMSS}\n"
        "3. Ensure for_kaggle.zip contains nfl_model_v*.pkl (not best_model_*.pkl)\n"
        "4. Re-run the notebook"
    )

features_path = _find_features_module(model_root=model_path.parent.parent)
if not features_path:
    raise FileNotFoundError(
        "\nfeatures.py not found in any attached dataset.\n"
        "Solutions:\n"
        "  1. Upload features.py as a separate dataset and attach it, OR\n"
        "  2. Upload for_kaggle.zip as a dataset (not Model Registry) - it contains both model and features.py"
    )

print(f"\n✓ Found features at: {features_path}")
print(f"✓ Found model at: {model_path}")

# Write paths to environment variables so predict() can find them
# This avoids having ANY objects in global scope that could be pickled
os.environ['NFL_MODEL_PATH'] = str(model_path)
os.environ['NFL_FEATURES_PATH'] = str(features_path)

print(f"✓ Stored paths in environment")
print("=" * 60 + "\n")


def predict(test: pl.DataFrame, test_input: pl.DataFrame) -> pl.DataFrame | pd.DataFrame:
    """
    Inference function used by the NFL evaluation gateway.
    
    CRITICAL: This function is called IN THE SERVER PROCESS via gRPC.
    We read paths from environment variables and load the model fresh here.
    NO global variables are used to avoid pickle serialization issues.
    """
    # Read paths from environment (set in notebook process)
    model_path_str = os.environ.get('NFL_MODEL_PATH')
    features_path_str = os.environ.get('NFL_FEATURES_PATH')
    
    if not model_path_str or not features_path_str:
        raise RuntimeError("Model/features paths not found in environment")
    
    # Add features location to path
    features_parent = str(Path(features_path_str).parent)
    if features_parent not in sys.path:
        sys.path.insert(0, features_parent)
    
    from features import add_time_lag_features, prepare_features, transform_for_inference
    
    # Load model in this process
    meta = joblib.load(model_path_str)
    mx = meta['models']['x']
    my = meta['models']['y']
    feat_cols = meta['feature_columns']
    player_pos_vals = meta.get('player_position_values', None)

    # Convert inputs to pandas for feature pipeline
    test_pd = _to_pandas(test)
    test_in_pd = _to_pandas(test_input)

    # Merge like training: left join on identifiers if available
    key_cols = [c for c in ['game_id','play_id','nfl_id','frame_id'] 
               if c in test_pd.columns and c in test_in_pd.columns]
    if key_cols:
        df = pd.merge(test_pd, test_in_pd, on=key_cols, how='left', suffixes=(None,'_in'))
    else:
        df = test_pd.copy()

    # Feature engineering for inference
    df = add_time_lag_features(df)
    _ = prepare_features(df)
    X_pred = transform_for_inference(df, feat_cols, player_pos_vals)

    # Predict
    px = mx.predict(X_pred)
    py = my.predict(X_pred)

    predictions = pd.DataFrame({'x': px, 'y': py})
    assert len(predictions) == len(test_pd)
    return predictions


# Start inference server (serve on hidden test; local gateway otherwise)
inference_server = nfl_inf.NFLInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    # Provide path to published public competition files for local gateway
    inference_server.run_local_gateway(('/kaggle/input/nfl-big-data-bowl-2026-prediction/',))