# AutoGluon IEEE Fraud Detection (Kaggle)

Execute in Colab or local `.venv` with Kaggle credentials. This notebook clones the AutoGluon tabular Kaggle tutorial and adds runtime-friendly sampling controls. Do not commit Kaggle data; only the executed notebook with outputs.

## 1) Install & Imports

In [None]:
# If running in Colab, uncomment:
# !pip install -q autogluon.tabular kaggle

import os, zipfile, pandas as pd
from pathlib import Path
from autogluon.tabular import TabularDataset, TabularPredictor


## 2) Kaggle download (requires credentials)

In [None]:
# Set Kaggle creds in env or upload kaggle.json (do not hardcode in repo)
# os.environ['KAGGLE_USERNAME'] = 'YOUR_USERNAME'
# os.environ['KAGGLE_KEY'] = 'YOUR_KEY'

competition = 'ieee-fraud-detection'
data_dir = Path('data')
data_dir.mkdir(exist_ok=True)

# Uncomment to download in Colab/local with Kaggle API available
# !kaggle competitions download -c {competition} -p {data_dir}
# with zipfile.ZipFile(data_dir / f'{competition}.zip', 'r') as zf:
#     zf.extractall(data_dir)


## 3) Load data (supports sampling for speed)

In [None]:
train_path = Path('data/train.csv')
test_path = Path('data/test.csv')

if not train_path.exists():
    raise FileNotFoundError('train.csv not found; download with Kaggle API first')

SAMPLE_FRACTION = float(os.environ.get('SAMPLE_FRACTION', '0.1'))  # adjust for runtime

train_df = pd.read_csv(train_path)
if SAMPLE_FRACTION < 1.0:
    train_df = train_df.sample(frac=SAMPLE_FRACTION, random_state=42)

print('Train shape:', train_df.shape)
print(train_df['isFraud'].value_counts(normalize=True).head())


## 4) Fit AutoGluon predictor

In [None]:
label = 'isFraud'
predictor = TabularPredictor(label=label, eval_metric='auc', path='ag_ieee_models')
predictor.fit(train_df, presets='best_quality', time_limit=1800)  # adjust time_limit as needed


## 5) Leaderboard & feature importance

In [None]:
lb = predictor.leaderboard(silent=True)
fi = predictor.feature_importance(train_df)
print(lb.head())
print(fi.head())


## 6) Predict on test and save (not committed)

In [None]:
test_df = pd.read_csv(test_path)
preds = predictor.predict(test_df)
probs = predictor.predict_proba(test_df)

out_dir = Path('outputs')
out_dir.mkdir(exist_ok=True)
submission = pd.DataFrame({'TransactionID': test_df['TransactionID'], 'isFraud': probs[predictor.positive_class]})
submission_path = out_dir / 'submission.csv'
submission.to_csv(submission_path, index=False)
print('Saved submission to', submission_path)
