In [14]:
import numpy as np
import pandas as pd
import torch

from pytorch_tabnet.tab_model import TabNetRegressor
from sklearn.impute import SimpleImputer
from sklearn.model_selection import GroupKFold
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor

In [15]:
torch.cuda.is_available()

False

In [3]:
# Load Train and Test Datasets with Adjustments
# Paths to train, test, and submission files
train_path = 'train.csv'
test_path = 'test.csv'
submission_path = 'sample_submission.csv'

# Define data types for known columns
dtype_dict = {
    "p_num": "category",  # Participant number can be treated as a categorical feature
    **{f"bg-{i}:{j:02d}": "float64" for i in range(6) for j in range(0, 60, 5)},  # Blood glucose readings
    **{f"insulin-{i}:{j:02d}": "float64" for i in range(6) for j in range(0, 60, 5)},  # Insulin readings
    **{f"steps-{i}:{j:02d}": "float64" for i in range(6) for j in range(0, 60, 5)},  # Steps data
    **{f"cals-{i}:{j:02d}": "float64" for i in range(6) for j in range(0, 60, 5)},  # Calories data
    **{f"activity-{i}:{j:02d}": "object" for i in range(6) for j in range(0, 60, 5)},  # Activity data (strings or None)
    "bg+1:00": "float64"  # Target column
}

# Load datasets with adjustments
train = pd.read_csv(
    train_path, index_col="id", parse_dates=["time"], 
    infer_datetime_format=True, low_memory=False, dtype=dtype_dict
)
test = pd.read_csv(
    test_path, index_col="id", parse_dates=["time"], 
    infer_datetime_format=True, low_memory=False, dtype=dtype_dict
)

# Print basic information to confirm successful loading
print("Train Dataset Shape:", train.shape)
print("Test Dataset Shape:", test.shape)

submission_path = 'sample_submission.csv'
submission = pd.read_csv(submission_path)

  train = pd.read_csv(
  train = pd.read_csv(
  test = pd.read_csv(
  test = pd.read_csv(


Train Dataset Shape: (177024, 507)
Test Dataset Shape: (3644, 506)


In [4]:
# Define target and group columns
target_col = 'bg+1:00'
group_col = 'p_num'

# Define feature columns
feature_cols = [col for col in train.columns if col.startswith(('bg-', 'insulin-', 'hr-', 'steps-', 'cals-'))]

# Fill missing values
imputer = SimpleImputer()
train[feature_cols] = imputer.fit_transform(train[feature_cols])
test[feature_cols] = imputer.transform(test[feature_cols])

In [11]:
def tabnet_model(train, test, feature_cols, target_col, group_col):
    from pytorch_tabnet.tab_model import TabNetRegressor
    from sklearn.model_selection import GroupKFold
    import numpy as np
    import torch

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f"Using device: {device}")

    X = train[feature_cols].values
    y = train[target_col].values.reshape(-1, 1)  # Ensure y is 2D
    groups = train[group_col].values
    X_test = test[feature_cols].values

    predictions = np.zeros(X_test.shape[0])
    cv = GroupKFold(n_splits=5)

    for train_idx, valid_idx in cv.split(X, y, groups):
        X_train, X_valid = X[train_idx], X[valid_idx]
        y_train, y_valid = y[train_idx], y[valid_idx]

        tabnet = TabNetRegressor(device_name=device)
        tabnet.fit(
            X_train, y_train,
            eval_set=[(X_valid, y_valid)],
            eval_metric=['rmse'],
            batch_size=1024,
            patience=20,
            max_epochs=50
        )
        predictions += tabnet.predict(X_test).flatten() / 5

    return predictions

In [16]:
def lgb_model(train, test, feature_cols, target_col):
    from lightgbm import LGBMRegressor
    import numpy as np

    X_train = train[feature_cols]
    y_train = train[target_col]
    X_test = test[feature_cols]

    model = LGBMRegressor(
        boosting_type='gbdt',
        device='gpu',  # Use GPU for LightGBM
        gpu_use_dp=True,
        n_estimators=1000,
        learning_rate=0.05,
        max_depth=-1
    )

    model.fit(X_train, y_train)
    predictions = model.predict(X_test)

    return predictions

In [17]:
def catboost_model(train, test, feature_cols, target_col):
    from catboost import CatBoostRegressor
    import numpy as np

    X_train = train[feature_cols]
    y_train = train[target_col]
    X_test = test[feature_cols]

    model = CatBoostRegressor(
        iterations=1000,
        learning_rate=0.05,
        depth=10,
        loss_function='RMSE',
        task_type='GPU',  # Use GPU for CatBoost
        verbose=100
    )

    model.fit(X_train, y_train)
    predictions = model.predict(X_test)

    return predictions

In [12]:
# Generate predictions for each model
tabnet_preds = tabnet_model(train, test, feature_cols, target_col, group_col)

Using device: cpu




KeyboardInterrupt: 

In [18]:
lgb_preds = lgb_model(train, test, feature_cols, target_col)

LightGBMError: Do not support special JSON characters in feature name.

In [19]:
catboost_preds = catboost_model(train, test, feature_cols, target_col)

0:	learn: 2.9238802	total: 101ms	remaining: 1m 40s
100:	learn: 1.9326349	total: 3.52s	remaining: 31.4s
200:	learn: 1.8361224	total: 7.25s	remaining: 28.8s
300:	learn: 1.7618788	total: 11s	remaining: 25.6s
400:	learn: 1.6976377	total: 14.7s	remaining: 22s
500:	learn: 1.6347449	total: 18.6s	remaining: 18.5s
600:	learn: 1.5806175	total: 22.3s	remaining: 14.8s
700:	learn: 1.5338074	total: 26.1s	remaining: 11.1s
800:	learn: 1.4879511	total: 29.9s	remaining: 7.43s
900:	learn: 1.4454125	total: 33.8s	remaining: 3.71s
999:	learn: 1.4067595	total: 37.5s	remaining: 0us


In [20]:
catboost_preds

array([8.87102238, 6.01995321, 8.39841594, ..., 7.7637816 , 8.69742785,
       7.94606009])

In [None]:
# Combine predictions using simple average
simple_avg_preds = (tabnet_preds + lgb_preds + catboost_preds) / 3

# Combine predictions using weighted average
weights = {'tabnet': 0.4, 'lgb': 0.3, 'catboost': 0.3}
weighted_avg_preds = (
    weights['tabnet'] * tabnet_preds +
    weights['lgb'] * lgb_preds +
    weights['catboost'] * catboost_preds
)

In [None]:
# Save simple average predictions
submission['bg+1:00'] = simple_avg_preds
submission.to_csv('submission_simple_avg.csv', index=False)

# Save weighted average predictions
submission['bg+1:00'] = weighted_avg_preds
submission.to_csv('submission_weighted_avg.csv', index=False)

print("Submission files generated: 'submission_simple_avg.csv' and 'submission_weighted_avg.csv'")