In [20]:
import torch
from pytorch_tabnet.tab_model import TabNetRegressor
import numpy as np
import pandas as pd
import lightgbm as lgb
import tensorflow as tf

from catboost import CatBoostRegressor
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GroupKFold, train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping

# Check GPU availability
print("PyTorch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("CUDA version:", torch.version.cuda)
    print("Device name:", torch.cuda.get_device_name(0))

PyTorch version: 2.4.1+cpu
CUDA available: False


In [2]:
# Load datasets
train = pd.read_csv("train.csv", low_memory=False)
test = pd.read_csv("test.csv", low_memory=False)

# Identify numeric columns
numeric_cols = train.select_dtypes(include=[np.number]).columns.tolist()
feature_cols = [col for col in numeric_cols if col != 'bg+1:00']
target_col = 'bg+1:00'
group_col = 'p_num'

# Impute missing values for numeric columns
imputer = SimpleImputer(strategy="mean")
train[feature_cols] = imputer.fit_transform(train[feature_cols])
test[feature_cols] = imputer.transform(test[feature_cols])

# Normalize features
scaler = StandardScaler()
train[feature_cols] = scaler.fit_transform(train[feature_cols])
test[feature_cols] = scaler.transform(test[feature_cols])

In [22]:
def tabnet_model(train, test, feature_cols, target_col, group_col):
    X = train[feature_cols].values
    y = train[target_col].values.reshape(-1, 1)
    X_test = test[feature_cols].values

    group_kfold = GroupKFold(n_splits=5)
    predictions = np.zeros(X_test.shape[0])

    for train_idx, valid_idx in group_kfold.split(X, y, groups=train[group_col].values):
        X_train, X_valid = X[train_idx], X[valid_idx]
        y_train, y_valid = y[train_idx], y[valid_idx]

        model = TabNetRegressor(device_name="cuda" if torch.cuda.is_available() else "cpu")
        model.fit(
            X_train, y_train,
            eval_set=[(X_valid, y_valid)],
            eval_metric=['rmse'],
            max_epochs=50,
            patience=20,
            batch_size=1024,
        )
        predictions += model.predict(X_test).flatten() / 5

    return predictions

def yunbase_nn(train, test, feature_cols, target_col):
    X_train = train[feature_cols].values
    y_train = train[target_col].values
    X_test = test[feature_cols].values

    model = Sequential([
        Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(1, activation='linear')
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])

    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    model.fit(X_train, y_train, epochs=100, batch_size=64, validation_split=0.2, callbacks=[early_stopping])

    predictions = model.predict(X_test).flatten()
    return predictions

import lightgbm as lgb
from sklearn.model_selection import train_test_split
from lightgbm import early_stopping, log_evaluation

def lgb_model(train, test, feature_cols, target_col):
    # Split data for validation
    train_data, valid_data = train_test_split(train, test_size=0.2, random_state=42)
    
    # Prepare LightGBM datasets
    lgb_train = lgb.Dataset(train_data[feature_cols], label=train_data[target_col])
    lgb_valid = lgb.Dataset(valid_data[feature_cols], label=valid_data[target_col])
    
    lgb_params = {
        "objective": "regression",
        "metric": "rmse",
        "learning_rate": 0.1,
        "max_depth": 10,
        "num_leaves": 64,
        "feature_fraction": 0.8,
        "bagging_fraction": 0.8,
        "device": "gpu" if tf.config.list_physical_devices('GPU') else "cpu",
    }

    # Train LightGBM model with callbacks
    model = lgb.train(
        lgb_params,
        lgb_train,
        num_boost_round=1000,
        valid_sets=[lgb_valid],
        callbacks=[
            early_stopping(stopping_rounds=50),
            log_evaluation(100)
        ]
    )

    # Generate predictions
    predictions = model.predict(test[feature_cols])
    return predictions

def catboost_model(train, test, feature_cols, target_col):
    # Initialize CatBoost model
    model = CatBoostRegressor(
        iterations=1000,
        learning_rate=0.1,
        depth=10,
        loss_function="RMSE",
        task_type="GPU" if tf.config.list_physical_devices('GPU') else "CPU",
        verbose=100
    )

    # Train the model
    model.fit(train[feature_cols], train[target_col], verbose=False)

    # Generate predictions
    predictions = model.predict(test[feature_cols])
    return predictions

def sgr_model(train, test, feature_cols, target_col):
    X_train = train[feature_cols].values
    y_train = train[target_col].values
    X_test = test[feature_cols].values

    model = GradientBoostingRegressor(n_estimators=200, learning_rate=0.1, max_depth=5, random_state=42)
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    return predictions

In [4]:
# TabNet predictions
tabnet_preds = tabnet_model(train, test, feature_cols, target_col, group_col)



epoch 0  | loss: 10.27476| val_0_rmse: 2.64077 |  0:01:28s
epoch 1  | loss: 5.28056 | val_0_rmse: 2.46149 |  0:02:52s
epoch 2  | loss: 4.60472 | val_0_rmse: 2.33214 |  0:04:34s
epoch 3  | loss: 4.36062 | val_0_rmse: 2.28479 |  0:06:06s
epoch 4  | loss: 4.3236  | val_0_rmse: 2.28897 |  0:07:32s
epoch 5  | loss: 4.2754  | val_0_rmse: 2.29858 |  0:09:04s
epoch 6  | loss: 4.2117  | val_0_rmse: 2.28033 |  0:10:37s
epoch 7  | loss: 4.15906 | val_0_rmse: 2.34124 |  0:12:10s
epoch 8  | loss: 4.17542 | val_0_rmse: 2.25751 |  0:13:42s
epoch 9  | loss: 4.06884 | val_0_rmse: 2.33603 |  0:15:14s
epoch 10 | loss: 4.03338 | val_0_rmse: 2.2851  |  0:16:42s
epoch 11 | loss: 4.02795 | val_0_rmse: 2.30888 |  0:17:32s
epoch 12 | loss: 3.97227 | val_0_rmse: 2.31063 |  0:17:48s
epoch 13 | loss: 3.98021 | val_0_rmse: 2.27069 |  0:18:03s
epoch 14 | loss: 3.91915 | val_0_rmse: 2.27146 |  0:18:19s
epoch 15 | loss: 3.91246 | val_0_rmse: 2.26136 |  0:18:35s
epoch 16 | loss: 3.84274 | val_0_rmse: 2.26362 |  0:18:5



epoch 0  | loss: 10.25345| val_0_rmse: 2.49453 |  0:00:15s
epoch 1  | loss: 5.29427 | val_0_rmse: 2.39362 |  0:00:30s
epoch 2  | loss: 5.13644 | val_0_rmse: 2.43655 |  0:00:45s
epoch 3  | loss: 5.06647 | val_0_rmse: 2.43491 |  0:01:00s
epoch 4  | loss: 5.06909 | val_0_rmse: 2.38869 |  0:01:15s
epoch 5  | loss: 5.03058 | val_0_rmse: 2.45223 |  0:01:31s
epoch 6  | loss: 4.9676  | val_0_rmse: 2.363   |  0:01:46s
epoch 7  | loss: 4.69186 | val_0_rmse: 2.30285 |  0:02:01s
epoch 8  | loss: 4.35378 | val_0_rmse: 2.22566 |  0:02:16s
epoch 9  | loss: 4.17563 | val_0_rmse: 2.21567 |  0:02:31s
epoch 10 | loss: 4.04393 | val_0_rmse: 2.2044  |  0:02:47s
epoch 11 | loss: 3.97763 | val_0_rmse: 2.19529 |  0:03:02s
epoch 12 | loss: 3.94464 | val_0_rmse: 2.2012  |  0:03:17s
epoch 13 | loss: 3.85714 | val_0_rmse: 2.20254 |  0:03:32s
epoch 14 | loss: 3.82084 | val_0_rmse: 2.19319 |  0:03:47s
epoch 15 | loss: 3.79887 | val_0_rmse: 2.21226 |  0:04:02s
epoch 16 | loss: 3.75147 | val_0_rmse: 2.26436 |  0:04:1



epoch 0  | loss: 11.11654| val_0_rmse: 2.08152 |  0:00:15s
epoch 1  | loss: 5.64528 | val_0_rmse: 2.06827 |  0:00:30s
epoch 2  | loss: 5.01677 | val_0_rmse: 1.89839 |  0:00:45s
epoch 3  | loss: 4.77792 | val_0_rmse: 1.81785 |  0:01:00s
epoch 4  | loss: 4.69025 | val_0_rmse: 1.80823 |  0:01:16s
epoch 5  | loss: 4.63154 | val_0_rmse: 1.91896 |  0:01:31s
epoch 6  | loss: 4.56385 | val_0_rmse: 1.94633 |  0:01:46s
epoch 7  | loss: 4.52681 | val_0_rmse: 1.8348  |  0:02:01s
epoch 8  | loss: 4.48963 | val_0_rmse: 1.8738  |  0:02:17s
epoch 9  | loss: 4.42463 | val_0_rmse: 1.85631 |  0:02:32s
epoch 10 | loss: 4.37893 | val_0_rmse: 1.907   |  0:02:47s
epoch 11 | loss: 4.25305 | val_0_rmse: 1.99869 |  0:03:02s
epoch 12 | loss: 4.16621 | val_0_rmse: 1.86764 |  0:03:18s
epoch 13 | loss: 4.11412 | val_0_rmse: 1.94137 |  0:03:33s
epoch 14 | loss: 4.06038 | val_0_rmse: 1.93596 |  0:03:48s
epoch 15 | loss: 4.02017 | val_0_rmse: 1.9601  |  0:04:03s
epoch 16 | loss: 3.98633 | val_0_rmse: 1.91553 |  0:04:1



epoch 0  | loss: 9.99032 | val_0_rmse: 2.36111 |  0:00:15s
epoch 1  | loss: 5.27914 | val_0_rmse: 2.3259  |  0:00:30s
epoch 2  | loss: 5.22515 | val_0_rmse: 2.32255 |  0:00:45s
epoch 3  | loss: 5.11846 | val_0_rmse: 2.31827 |  0:01:01s
epoch 4  | loss: 5.00457 | val_0_rmse: 2.33011 |  0:01:16s
epoch 5  | loss: 4.71044 | val_0_rmse: 2.22624 |  0:01:31s
epoch 6  | loss: 4.38749 | val_0_rmse: 2.18385 |  0:01:46s
epoch 7  | loss: 4.19971 | val_0_rmse: 2.1236  |  0:02:02s
epoch 8  | loss: 4.08276 | val_0_rmse: 2.09931 |  0:02:17s
epoch 9  | loss: 4.02336 | val_0_rmse: 2.12265 |  0:02:32s
epoch 10 | loss: 3.98757 | val_0_rmse: 2.1114  |  0:02:47s
epoch 11 | loss: 3.92725 | val_0_rmse: 2.14636 |  0:03:03s
epoch 12 | loss: 3.89578 | val_0_rmse: 2.1045  |  0:03:18s
epoch 13 | loss: 3.90659 | val_0_rmse: 2.15902 |  0:03:33s
epoch 14 | loss: 3.85468 | val_0_rmse: 2.16719 |  0:03:48s
epoch 15 | loss: 3.84163 | val_0_rmse: 2.1268  |  0:04:03s
epoch 16 | loss: 3.81611 | val_0_rmse: 2.1341  |  0:04:1



epoch 0  | loss: 11.55169| val_0_rmse: 2.58821 |  0:00:14s
epoch 1  | loss: 6.0079  | val_0_rmse: 2.33141 |  0:00:28s
epoch 2  | loss: 4.96097 | val_0_rmse: 2.20619 |  0:00:43s
epoch 3  | loss: 4.37417 | val_0_rmse: 2.16693 |  0:00:57s
epoch 4  | loss: 4.16872 | val_0_rmse: 2.14295 |  0:01:11s
epoch 5  | loss: 4.06708 | val_0_rmse: 2.14969 |  0:01:26s
epoch 6  | loss: 4.00028 | val_0_rmse: 2.14008 |  0:01:40s
epoch 7  | loss: 3.95202 | val_0_rmse: 2.11359 |  0:01:54s
epoch 8  | loss: 3.8993  | val_0_rmse: 2.11435 |  0:02:09s
epoch 9  | loss: 3.87154 | val_0_rmse: 2.09696 |  0:02:23s
epoch 10 | loss: 3.82289 | val_0_rmse: 2.1125  |  0:02:37s
epoch 11 | loss: 3.78092 | val_0_rmse: 2.12987 |  0:02:51s
epoch 12 | loss: 3.73877 | val_0_rmse: 2.15083 |  0:03:06s
epoch 13 | loss: 3.7257  | val_0_rmse: 2.15017 |  0:03:20s
epoch 14 | loss: 3.68751 | val_0_rmse: 2.12396 |  0:03:34s
epoch 15 | loss: 3.64989 | val_0_rmse: 2.14791 |  0:03:49s
epoch 16 | loss: 3.59984 | val_0_rmse: 2.15719 |  0:04:0



In [5]:
# YunBase predictions
yunbase_preds = yunbase_nn(train, test, feature_cols, target_col)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100


In [6]:
# SGR predictions
sgr_preds = sgr_model(train, test, feature_cols, target_col)

In [23]:
lgb_preds = lgb_model(train, test, feature_cols, target_col)

LightGBMError: Do not support special JSON characters in feature name.

In [24]:
catboost_preds = catboost_model(train, test, feature_cols, target_col)



In [25]:
sample_submission = pd.read_csv("sample_submission.csv")

In [26]:
# Save predictions to files
sample_submission['bg+1:00'] = tabnet_preds
sample_submission.to_csv("tabnet_predictions.csv", index=False)

sample_submission['bg+1:00'] = yunbase_preds
sample_submission.to_csv("yunbase_predictions.csv", index=False)

# sample_submission['bg+1:00'] = lgb_preds
# sample_submission.to_csv("lgb_predictions.csv", index=False)

sample_submission['bg+1:00'] = catboost_preds
sample_submission.to_csv("catboost_predictions.csv", index=False)

sample_submission['bg+1:00'] = sgr_preds
sample_submission.to_csv("sgr_predictions.csv", index=False)

In [27]:
sample_submission

Unnamed: 0,id,bg+1:00
0,p01_8459,9.426122
1,p01_8460,5.844817
2,p01_8461,7.755239
3,p01_8462,9.456155
4,p01_8463,6.253992
...,...,...
3639,p24_256,7.380559
3640,p24_257,9.294625
3641,p24_258,8.344094
3642,p24_259,8.288723
