Based on single LightGBM baseline LB **0.447**:

https://www.kaggle.com/code/snufkin77/mcts-strength-relevant-baseline

DeepTables: Deep-learning Toolkit for Tabular data

https://github.com/DataCanvasIO/DeepTables

https://deeptables.readthedocs.io/en/latest/model_config.html#parameters

**Version 1**: single DeepTables NN baseline LB **0.462**.

**Version 6**: single DeepTables NN LB **0.448**: `ModelConfig(apply_gbm_features=True)`.

**Version 7**: single DeepTables NN: `ModelConfig(apply_gbm_features=True)`, `ModelConfig(nets=['dnn_nets'] + ['fm_nets'] + ['cin_nets'])`.

In [None]:
!pip install --no-index -U --find-links=/kaggle/input/tensorflow-2-15/tensorflow tensorflow==2.15.0

In [None]:
!pip install --no-index -U --find-links=/kaggle/input/deeptables-v0-2-5/deeptables-0.2.5 deeptables==0.2.5

In [None]:
import os
import math
import random
import warnings
import matplotlib.pyplot as plt
import numpy as np, pandas as pd, polars as pl
from sklearn.model_selection import GroupKFold
from colorama import Fore, Style

import tensorflow as tf, deeptables as dt
from tensorflow.keras import backend as K
from tensorflow.keras.utils import plot_model
from tensorflow.keras.optimizers.legacy import Adam
from deeptables.models import DeepTable, ModelConfig
from deeptables.models import deepnets

import kaggle_evaluation.mcts_inference_server

warnings.filterwarnings('ignore')
print('TensorFlow version:',tf.__version__+',',
      'GPU =',tf.test.is_gpu_available())
print('DeepTables version:',dt.__version__)

In [None]:
seed = 42
def seed_everything(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
seed_everything(seed=seed)

In [None]:
constant_cols = pd.read_csv('/kaggle/input/um-gps-of-mcts-variants-constant-columns/constant_columns.csv').columns.to_list()
target_col = 'utility_agent1'
game_col = 'GameRulesetName'
game_rule_cols = ['EnglishRules', 'LudRules']
output_cols = ['num_wins_agent1', 'num_draws_agent1', 'num_losses_agent1']
dropped_cols = ['Id'] + constant_cols + game_rule_cols + output_cols
agent_cols = ['agent1', 'agent2']

def preprocess_data(df): 
    df = df.drop(filter(lambda x: x in df.columns, dropped_cols))
    if CFG.split_agent_features:
        for col in agent_cols:
            df = df.with_columns(pl.col(col).str.split(by="-").list.to_struct(fields=lambda idx: f"{col}_{idx}")).unnest(col).drop(f"{col}_0")
    df = df.with_columns([pl.col(col).cast(pl.Categorical) for col in df.columns if col[:6] in agent_cols])            
    df = df.with_columns([pl.col(col).cast(pl.Float32) for col in df.columns if col[:6] not in agent_cols and col != game_col])
    print(f'Data shape: {df.shape}\n')
    return df.to_pandas()

In [None]:
# https://www.kaggle.com/code/cdeotte/tensorflow-transformer-0-790/notebook
LR_START = 1e-3
LR_MAX = 1e-3
LR_MIN = 1e-4
LR_RAMPUP_EPOCHS = 0
LR_SUSTAIN_EPOCHS = 2
EPOCHS = 7

def lrfn(epoch):
    if epoch < LR_RAMPUP_EPOCHS:
        lr = (LR_MAX - LR_START) / LR_RAMPUP_EPOCHS * epoch + LR_START
    elif epoch < LR_RAMPUP_EPOCHS + LR_SUSTAIN_EPOCHS:
        lr = LR_MAX
    else:
        decay_total_epochs = EPOCHS - LR_RAMPUP_EPOCHS - LR_SUSTAIN_EPOCHS - 1
        decay_epoch_index = epoch - LR_RAMPUP_EPOCHS - LR_SUSTAIN_EPOCHS
        phase = math.pi * decay_epoch_index / decay_total_epochs
        cosine_decay = 0.5 * (1 + math.cos(phase))
        lr = (LR_MAX - LR_MIN) * cosine_decay + LR_MIN    
    return lr

rng = [i for i in range(EPOCHS)]
lr_y = [lrfn(x) for x in rng]
plt.figure(figsize=(10, 4))
plt.plot(rng, lr_y, '-o')
plt.xlabel('Epoch'); plt.ylabel('LR')
print("Learning rate schedule: {:.3g} to {:.3g} to {:.3g}". \
      format(lr_y[0], max(lr_y), lr_y[-1]))
LR_Scheduler = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose = True)

In [None]:
class CFG:
    train_path = '/kaggle/input/um-game-playing-strength-of-mcts-variants/train.csv'
    split_agent_features = True
    folds = 6
    epochs = 7
    batch_size = 128
    LR_Scheduler = []  # [LR_Scheduler]
    optimizer = Adam(learning_rate=1e-3)
    
    conf = ModelConfig(auto_imputation=False,
                       auto_discrete=False,
                       auto_discard_unique=True,
                       categorical_columns='auto',
                       apply_gbm_features=True,
                       fixed_embedding_dim=True,
                       embeddings_output_dim=4,
                       embedding_dropout=0.2,
                       nets=['dnn_nets'] + ['fm_nets'] + ['cin_nets'],
                       dnn_params={
                           'hidden_units': ((1024, 0.0, True),
                                            (512, 0.0, True),
                                            (256, 0.0, True),
                                            (128, 0.0, True)),
                           'dnn_activation': 'relu',
                       },
                       stacking_op='concat',
                       output_use_bias=False,
                       optimizer=optimizer,
                       task='regression',
                       loss='auto',
                       metrics=["RootMeanSquaredError"],
                       earlystopping_patience=1,
                       )

In [None]:
def train_nn(data):
    cv = GroupKFold(n_splits=CFG.folds)
    groups = data[game_col]
    X = data.drop([target_col, game_col], axis=1)
    y = data[target_col]
    oof = np.zeros(len(data))
    models = []
    
    for fi, (train_idx, valid_idx) in enumerate(cv.split(X, y, groups)):
        print("#"*25)
        print(f"### Fold {fi+1}/{CFG.folds} ...")
        print("#"*25)   
        K.clear_session()
        model = DeepTable(config=CFG.conf)
        model.fit(X.iloc[train_idx], y.iloc[train_idx],
                  validation_data=(X.iloc[valid_idx], y.iloc[valid_idx]),
                  callbacks=CFG.LR_Scheduler,
                  batch_size=CFG.batch_size, epochs=CFG.epochs, verbose=2)
        models.append(model)
        
        # Avoid some errors
        with K.name_scope(CFG.optimizer.__class__.__name__):
            for j, var in enumerate(CFG.optimizer.weights):
                name = 'variable{}'.format(j)
                CFG.optimizer.weights[j] = tf.Variable(var, name=name)
        CFG.conf = CFG.conf._replace(optimizer=CFG.optimizer)
        
        oof_preds = model.predict(X.iloc[valid_idx], verbose=1, batch_size=512).flatten()
        rmse = np.round(np.sqrt(np.mean((oof_preds - y.iloc[valid_idx])**2)),4)
        print(f'{Fore.GREEN}{Style.BRIGHT}\nFold {fi+1} | rmse: {rmse}\n')
        if rmse>1.0:
            print(f'{Fore.GREEN}{Style.BRIGHT}Replace Fold {fi+1} oof_preds values with mean of y_valid in Overall CV rmse calculation.\n')
            if fi<CFG.folds: oof[valid_idx] = np.mean(y.iloc[valid_idx])
            else: oof[valid_idx] += np.mean(y.iloc[valid_idx])
        else:
            if fi<CFG.folds: oof[valid_idx] = oof_preds
            else: oof[valid_idx] += oof_preds
            
    rmse = np.round(np.sqrt(np.mean((oof - y)**2)),4)
    print(f'{Fore.BLUE}{Style.BRIGHT}Overall CV rmse: {rmse}\n')
    plot_model(model.get_model().model)
    return models

def infer_nn(data, models):
    return np.mean([model.predict(data, verbose=1, batch_size=512).flatten() for model in models], axis=0)

In [None]:
%%time
run_i = 0
def predict(test_data, submission):
    global run_i, models
    if run_i == 0:
        train_df = pl.read_csv(CFG.train_path)
        models = train_nn(preprocess_data(train_df))
    run_i += 1
    test_data = preprocess_data(test_data).drop(columns=game_col)
    return submission.with_columns(pl.Series(target_col, infer_nn(test_data, models)))

inference_server = kaggle_evaluation.mcts_inference_server.MCTSInferenceServer(predict)
if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(
        ('/kaggle/input/um-game-playing-strength-of-mcts-variants/test.csv',
         '/kaggle/input/um-game-playing-strength-of-mcts-variants/sample_submission.csv'))