# **FOREWORD**

This kernel builds on the below references- <br>
- https://www.kaggle.com/code/jaejohn/mcts-strength-relevant-baseline-lb-0-422 <br>
- https://www.kaggle.com/code/snufkin77/mcts-strength-relevant-baseline <br>

### **MY CONTRIBUTION-**
- I add xgboost and catboost to the existing lightgbm <br>
- I use latest versions of these packages instead of the older ones in Kaggle image <br>
- I use a GPU based training process and bypass the training process for the dummy submission <br>
- I used  GroupKFold CV scheme instead of KFold in many kernels <br>
- I added a syntax check option to train sample models on 100 rows of the data <br>
- I also added a provision to calculate the OOF score using the **model - best score** method <br>
- I also post-process the test set predictions to align with the limits in the train data- we clip values between -1 and 1 <br>
- I also suppress unnecessary logs in lightgbm using the logger module inheritance and my custom logging <br>

Wishing you all the best for the competition! <br>

This kernel scores around 0.46xx on the leaderboard, but has a comparible CV with the public kernels that score better on the leaderboard. Please feel free to explore this kernel and use it in your processes! 

# **IMPORTS**

In [None]:
%%capture

!pip install lightgbm==4.5.0 --force-reinstall --no-index --find-links=/kaggle/input/mcts2024-packages-v1/Packages
!pip install xgboost==2.1.1 --force-reinstall --no-index --find-links=/kaggle/input/mcts2024-packages-v1/Packages
!pip install scikit-learn==1.3.2 --force-reinstall --no-index --find-links=/kaggle/input/mcts2024-packages-v1/Packages

In [None]:
%%time 

import os, sys, warnings, joblib
warnings.filterwarnings('ignore')
from IPython.display import clear_output

from gc import collect
import numpy as np
import polars as pl, pandas as pd
from sklearn.model_selection import GroupKFold, KFold

import lightgbm as lgb, logging
from lightgbm import early_stopping, log_evaluation, LGBMRegressor as LGBMR
from xgboost import XGBRegressor as XGBR
from catboost import CatBoostRegressor as CBR

import kaggle_evaluation.mcts_inference_server

In [None]:
%%capture

# Customizing logging for LGBM
class MyLogger:
    """
    This class helps to suppress logs in lightgbm and Optuna
    Source - https://github.com/microsoft/LightGBM/issues/6014
    """

    def init(self, logging_lbl: str):
        self.logger = logging.getLogger(logging_lbl)
        self.logger.setLevel(logging.ERROR)

    def info(self, message):
        pass

    def warning(self, message):
        pass

    def error(self, message):
        self.logger.error(message)

l = MyLogger()
l.init(logging_lbl = "lightgbm_custom")
lgb.register_logger(l)

# **CONFIGURATION**

In [None]:
irrelevant_cols = ['Id', 'Properties', 'Format', 'Time', 'Discrete', 'Realtime', 'Turns', 'Alternating', 'Simultaneous', 'HiddenInformation', 'Match', 'AsymmetricRules', 'AsymmetricPlayRules', 'AsymmetricEndRules', 'AsymmetricSetup', 'Players', 'NumPlayers', 'Simulation', 'Solitaire', 'TwoPlayer', 'Multiplayer', 'Coalition', 'Puzzle', 'DeductionPuzzle', 'PlanningPuzzle', 'Equipment', 'Container', 'Board', 'PrismShape', 'ParallelogramShape', 'RectanglePyramidalShape', 'TargetShape', 'BrickTiling', 'CelticTiling', 'QuadHexTiling', 'Hints', 'PlayableSites', 'Component', 'DiceD3', 'BiasedDice', 'Card', 'Domino', 'Rules', 'SituationalTurnKo', 'SituationalSuperko', 'InitialAmount', 'InitialPot', 'Play', 'BetDecision', 'BetDecisionFrequency', 'VoteDecisionFrequency', 'ChooseTrumpSuitDecision', 'ChooseTrumpSuitDecisionFrequency', 'LeapDecisionToFriend', 'LeapDecisionToFriendFrequency', 'HopDecisionEnemyToFriend', 'HopDecisionEnemyToFriendFrequency', 'HopDecisionFriendToFriend', 'FromToDecisionWithinBoard', 'FromToDecisionBetweenContainers', 'BetEffect', 'BetEffectFrequency', 'VoteEffectFrequency', 'SwapPlayersEffectFrequency', 'TakeControl', 'TakeControlFrequency', 'PassEffectFrequency', 'SetCost', 'SetCostFrequency', 'SetPhase', 'SetPhaseFrequency', 'SetTrumpSuit', 'SetTrumpSuitFrequency', 'StepEffectFrequency', 'SlideEffectFrequency', 'LeapEffectFrequency', 'HopEffectFrequency', 'FromToEffectFrequency', 'SwapPiecesEffect', 'SwapPiecesEffectFrequency', 'ShootEffect', 'ShootEffectFrequency', 'MaxCapture', 'OffDiagonalDirection', 'Information', 'HidePieceType', 'HidePieceOwner', 'HidePieceCount', 'HidePieceRotation', 'HidePieceValue', 'HidePieceState', 'InvisiblePiece', 'End', 'LineDrawFrequency', 'ConnectionDraw', 'ConnectionDrawFrequency', 'GroupLossFrequency', 'GroupDrawFrequency', 'LoopLossFrequency', 'LoopDraw', 'LoopDrawFrequency', 'PatternLoss', 'PatternLossFrequency', 'PatternDraw', 'PatternDrawFrequency', 'PathExtentEndFrequency', 'PathExtentWinFrequency', 'PathExtentLossFrequency', 'PathExtentDraw', 'PathExtentDrawFrequency', 'TerritoryLoss', 'TerritoryLossFrequency', 'TerritoryDraw', 'TerritoryDrawFrequency', 'CheckmateLoss', 'CheckmateLossFrequency', 'CheckmateDraw', 'CheckmateDrawFrequency', 'NoTargetPieceLoss', 'NoTargetPieceLossFrequency', 'NoTargetPieceDraw', 'NoTargetPieceDrawFrequency', 'NoOwnPiecesDraw', 'NoOwnPiecesDrawFrequency', 'FillLoss', 'FillLossFrequency', 'FillDraw', 'FillDrawFrequency', 'ScoringDrawFrequency', 'NoProgressWin', 'NoProgressWinFrequency', 'NoProgressLoss', 'NoProgressLossFrequency', 'SolvedEnd', 'Behaviour', 'StateRepetition', 'PositionalRepetition', 'SituationalRepetition', 'Duration', 'Complexity', 'BoardCoverage', 'GameOutcome', 'StateEvaluation', 'Clarity', 'Narrowness', 'Variance', 'Decisiveness', 'DecisivenessMoves', 'DecisivenessThreshold', 'LeadChange', 'Stability', 'Drama', 'DramaAverage', 'DramaMedian', 'DramaMaximum', 'DramaMinimum', 'DramaVariance', 'DramaChangeAverage', 'DramaChangeSign', 'DramaChangeLineBestFit', 'DramaChangeNumTimes', 'DramaMaxIncrease', 'DramaMaxDecrease', 'MoveEvaluation', 'MoveEvaluationAverage', 'MoveEvaluationMedian', 'MoveEvaluationMaximum', 'MoveEvaluationMinimum', 'MoveEvaluationVariance', 'MoveEvaluationChangeAverage', 'MoveEvaluationChangeSign', 'MoveEvaluationChangeLineBestFit', 'MoveEvaluationChangeNumTimes', 'MoveEvaluationMaxIncrease', 'MoveEvaluationMaxDecrease', 'StateEvaluationDifference', 'StateEvaluationDifferenceAverage', 'StateEvaluationDifferenceMedian', 'StateEvaluationDifferenceMaximum', 'StateEvaluationDifferenceMinimum', 'StateEvaluationDifferenceVariance', 'StateEvaluationDifferenceChangeAverage', 'StateEvaluationDifferenceChangeSign', 'StateEvaluationDifferenceChangeLineBestFit', 'StateEvaluationDifferenceChangeNumTimes', 'StateEvaluationDifferenceMaxIncrease', 'StateEvaluationDifferenceMaxDecrease', 'BoardSitesOccupied', 'BoardSitesOccupiedMinimum', 'BranchingFactor', 'BranchingFactorMinimum', 'DecisionFactor', 'DecisionFactorMinimum', 'MoveDistance', 'MoveDistanceMinimum', 'PieceNumber', 'PieceNumberMinimum', 'ScoreDifference', 'ScoreDifferenceMinimum', 'ScoreDifferenceChangeNumTimes', 'Roots', 'Cosine', 'Sine', 'Tangent', 'Exponential', 'Logarithm', 'ExclusiveDisjunction', 'Float', 'HandComponent', 'SetHidden', 'SetInvisible', 'SetHiddenCount', 'SetHiddenRotation', 'SetHiddenState', 'SetHiddenValue', 'SetHiddenWhat', 'SetHiddenWho']
game_cols       = ['GameRulesetName', 'EnglishRules', 'LudRules']
output_cols     = ['num_wins_agent1', 'num_draws_agent1', 'num_losses_agent1']
agent_cols      = ['agent1', 'agent2']
dropped_cols    = output_cols + irrelevant_cols + game_cols

In [None]:
%%time 

class Config:
    train_path = '/kaggle/input/um-game-playing-strength-of-mcts-variants/train.csv'
    
    early_stop = 50
    n_splits   = 5
    seed       = 42
    split_agent_features = True
    
    device     = "gpu"
    test_req   = False


# **DATA TRANSFORMS**

In [None]:
%%time 

def process_data(df): 
    df = df.drop(filter(lambda x: x in df.columns, dropped_cols))
    
    if Config.split_agent_features:
        for col in agent_cols:
            df = \
            df.with_columns(pl.col(col).str.split(by="-").\
                            list.\
                            to_struct(fields=lambda idx: f"{col}_{idx}")
                           ).unnest(col).drop(f"{col}_0")
            
    df = df.with_columns([pl.col(col).cast(pl.Categorical) for col in df.columns if col[:6] in agent_cols])            
    df = df.with_columns([pl.col(col).cast(pl.Float32) for col in df.columns if col[:6] not in agent_cols])
    
    print(f'---> Shapes = {df.shape}')
    return df.to_pandas()


# **MODEL TRAINING**

In [None]:
%%time 

def train_models(
    data, ygrp
):
    "This function fits boosted tree models on the training data"
    
    X        = data.drop(['utility_agent1'], axis=1)
    y        = data['utility_agent1']
    cv       = GroupKFold(n_splits= Config.n_splits)
    cat_cols = list(X.select_dtypes("category").columns)
    
    lgb_models, xgb_models, cb_models = [], [], []
    
    scores = pd.DataFrame(index   = range(0, Config.n_splits, 1), 
                          columns = ["LGBM", "XGB", "CB"],
                          dtype   = np.float32,
                         )
    
    for fold_nb, (train_idx, valid_idx) in enumerate(cv.split(X, y, groups = ygrp)):
        
        print(f"\n {'=' * 15} TRAINING FOLD {fold_nb + 1} {'=' * 15}")
        
        Xtr, ytr   = (X.iloc[train_idx], y.iloc[train_idx])
        Xdev, ydev = (X.iloc[valid_idx], y.iloc[valid_idx])
        
        if Config.test_req:
            Xtr, ytr = Xtr.iloc[0:100], ytr.iloc[0:100]
            print(f'\n---> Syntax check = {Xtr.shape} {ytr.shape}\n')
        else:
            pass
        
        model = \
        LGBMR(**{'objective'    : 'regression_l2',
                 'metric'       : 'rmse',
                 'device'       : 'gpu' if Config.device != "cpu" else "cpu",
                 'n_estimators' : 10_000,
                 'random_state' : 42,
                 'verbosity'    : -1,
                 'num_leaves'   : 42,
                 'learning_rate': 0.035,
                 'max_depth'    : 8,
                 'reg_lambda'   : 1.0,
                 'categorical_feature' : [f"name: {c}" for c in cat_cols],
                }
             )
        
        model.fit(Xtr, ytr, 
                  eval_set  = [(Xdev, ydev)],
                  eval_names = [("Dev")], 
                  callbacks = [early_stopping(Config.early_stop, verbose = False),
                               log_evaluation(0),
                              ]
                 )
        lgb_models.append(model)
        
        best_iter  = model.best_iteration_
        best_score = model.best_score_["Dev"]["rmse"]
        print(f'---> OOF = {best_score :.5f} | LGBM') 
        scores.at[fold_nb, "LGBM"] = best_score
        
        
        model = \
        XGBR(**{'tree_method'           : "hist",
                "device"                : "cuda" if Config.device != "cpu" else "cpu", 
                'objective'             : "reg:squarederror",
                'eval_metric'           : "rmse",
                'random_state'          : 42,
                'colsample_bytree'      : 0.60,
                'learning_rate'         : 0.03,
                'max_depth'             : 7,
                'n_estimators'          : 3000,                         
                'reg_alpha'             : 0.001,
                'reg_lambda'            : 0.95,
                'min_child_weight'      : 18,
                'early_stopping_rounds' : Config.early_stop,
                'verbosity'             : 0,
                'enable_categorical'    : True, 
               }
            )
        
        model.fit(Xtr, ytr, 
                  eval_set= [(Xdev, ydev)],
                  verbose = 0,
                 ) 
        xgb_models.append(model)
        
        best_iter  = model.best_iteration
        best_score = model.best_score
        print(f'---> OOF = {best_score :.5f} | XgBoost')
        scores.at[fold_nb, "XGB"] = best_score
        
        model = \
        CBR(**{'task_type'           : "GPU" if Config.device != "cpu" else "CPU",
               'loss_function'       : 'RMSE',
               'eval_metric'         : "RMSE",
               'bagging_temperature' : 0.25,
               'iterations'          : 3000,
               'learning_rate'       : 0.04,
               'max_depth'           : 8,
               'l2_leaf_reg'         : 1.25,
               'min_data_in_leaf'    : 15,
               'random_strength'     : 0.25, 
               'verbose'             : 0,
               'use_best_model'      : True,
               'cat_features'        : cat_cols,
              }
            )
        
        model.fit(Xtr, ytr, 
                  eval_set= [(Xdev, ydev)],
                  verbose = 0,
                  early_stopping_rounds = Config.early_stop,
                 ) 
        cb_models.append(model) 
        
        best_iter  = model.get_best_iteration()
        best_score = model.get_best_score()["validation"]["RMSE"]     
        print(f'---> OOF = {best_score :.5f} | CatBoost')
        scores.at[fold_nb, "CB"] = best_score
        
    clear_output()
    display(
        scores.style.\
        set_caption(f"OOF scores across folds").\
        format(precision = 5).\
        set_properties(
            subset = scores.columns,
            props = "background-color:white;font-weight:bold;border:dashed maroon 1.5px",
        ).\
        highlight_min(color = "#ccf2ff", axis=1)
    )
    
    print("\n\n")
    display(
        scores.mean().to_frame().\
        transpose().\
        style.\
        set_caption(f"Mean OOF scores across folds").\
        format(precision = 5).\
        set_properties(
            subset = scores.columns,
            props = "background-color:white;font-weight:bold;border:dashed maroon 1.5px",
        ).\
        highlight_min(color = "#ccf2ff", axis=1)
    )    
      
    return (lgb_models, xgb_models, cb_models)

def make_preds(data, models):
    "This function makes the test set predictions from the provided sklearn-compatible model across folds"
    
    return np.mean([model.predict(data) for model in models], axis = 0)

# **SUBMISSION**
    

In [None]:
%%time 

run_i = 0

def predict(
    test_data, submission
):
    global run_i, lgb_models, xgb_models, cb_models
    
    if run_i == 0:
        print(f"\n---> Training models with counter = 0\n")
        
        train_df = pl.read_csv(Config.train_path)
        ygrp     = train_df.select(pl.col('GameRulesetName')).to_pandas().squeeze()
        train    = process_data(train_df)
              
        lgb_models, xgb_models, cb_models = train_models(train, ygrp)
        del train, ygrp
        print();
            
    run_i += 1
    test_data = process_data(test_data)
    
    test_preds = \
    np.average(
        np.stack(
            [make_preds(test_data, lgb_models),
             make_preds(test_data, xgb_models),
             make_preds(test_data, cb_models),
            ],
            axis = 1
        ),
        axis = 1,
        weights = [0.40, 0.30, 0.30],
    )
          
    return submission.with_columns(
        pl.Series('utility_agent1', 
                  np.clip(test_preds, a_min = -1.0, a_max = 1.0)
                 )
    )


In [None]:
%%time 

inference_server = kaggle_evaluation.mcts_inference_server.MCTSInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
    
else:
    inference_server.run_local_gateway(
        (
            '/kaggle/input/um-game-playing-strength-of-mcts-variants/test.csv',
            '/kaggle/input/um-game-playing-strength-of-mcts-variants/sample_submission.csv'
        )
    )
    
print()
collect();