# LightGBM Classification Template (Kaggle)
Parameters are injected via environment variables.


In [None]:
import os, json, pandas as pd, numpy as np, requests
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
import lightgbm as lgb

RUN_ID = os.getenv('RUN_ID')
CONFIG = json.loads(os.getenv('RUN_CONFIG_JSON','{}'))
PUTS   = json.loads(os.getenv('ARTIFACT_PUT_URLS_JSON','{}'))
API    = os.getenv('API_BASE','http://localhost:8000')
BASE_URI = os.getenv('ARTIFACT_BASE_URI','s3://lab-artifacts/')

# Replace with competition-specific paths
train = pd.read_csv('/kaggle/input/train.csv')
test  = pd.read_csv('/kaggle/input/test.csv')
y = train['target'].values
X = train.drop(columns=['id','target'])
X_test = test.drop(columns=['id'])

X = X.fillna(X.median(numeric_only=True))

skf = StratifiedKFold(n_splits=CONFIG.get('cv',{}).get('n_splits',5), shuffle=True, random_state=CONFIG.get('seed',42))
oof = np.zeros(len(train)); preds = np.zeros(len(test))
for tr, va in skf.split(X, y):
    trn = lgb.Dataset(X.iloc[tr], label=y[tr]); val = lgb.Dataset(X.iloc[va], label=y[va])
    params = CONFIG.get('model',{}).get('params',{'n_estimators':500,'learning_rate':0.05})
    model = lgb.train(params, trn, valid_sets=[val], verbose_eval=200)
    oof[va] = model.predict(X.iloc[va])
    preds += model.predict(X_test)/skf.n_splits

auc = float(roc_auc_score(y, oof))
print('AUC:', auc)

sub = pd.DataFrame({'id': test['id'], 'target': preds})
oof_df = pd.DataFrame({'id': train['id'], 'oof': oof})
metrics = {'auc': auc}

requests.put(PUTS['submission'], data=sub.to_csv(index=False).encode(), headers={'Content-Type':'text/csv'}).raise_for_status()
requests.put(PUTS['oof'],        data=oof_df.to_csv(index=False).encode(), headers={'Content-Type':'text/csv'}).raise_for_status()
requests.put(PUTS['metrics'],    data=json.dumps(metrics).encode(), headers={'Content-Type':'application/json'}).raise_for_status()

payload = {
  'score': auc,
  'cv_scores': [],
  'artifacts': {
    'oof': BASE_URI + 'oof.csv',
    'submission': BASE_URI + 'submission.csv',
    'model': BASE_URI + 'model.bin',
    'metrics': BASE_URI + 'metrics.json',
    'plots': []
  },
  'env': {'docker':'kaggle/lightgbm','lib':'lightgbm'}
}
requests.post(f"{API}/runs/{RUN_ID}/complete", json=payload, timeout=60).raise_for_status()
print('DONE')
