In [1]:
import warnings
warnings.simplefilter('ignore')
import math
import pandas as pd
import numpy as np
import sys
import time
import datetime
from contextlib import contextmanager
import logging
from tqdm import tqdm_notebook as tqdm
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, average_precision_score, confusion_matrix
from sklearn.model_selection import StratifiedKFold, KFold,GroupKFold,StratifiedGroupKFold
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import TransformerEncoder
from torch.nn import TransformerDecoder
from torch.nn import LayerNorm
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, Dataset
from torch.optim import lr_scheduler
from transformers import AdamW, get_linear_schedule_with_warmup
import gc
import random
import os
%matplotlib inline
import logging
pd.set_option('display.max_columns', 300)

def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = False
    torch.backends.cudnn.benchmark = False


In [2]:
debug = False
exp = "002"
if not os.path.exists(f"../out/exp/exp{exp}"):
    os.makedirs(f"../out/exp/exp{exp}")
    os.makedirs(f"../out/exp/exp{exp}/exp{exp}_model")
logger_path = f"../out/exp/exp{exp}/exp_{exp}.txt"
model_path =f"../out/exp/exp{exp}/exp{exp}_model/exp{exp}.pth"
LOGGER = logging.getLogger(__name__)
file_handler = logging.FileHandler(logger_path)
file_handler.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
file_handler.setFormatter(formatter)
LOGGER.addHandler(file_handler)

# config
seed = 0
shuffle = True
n_splits = 5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model config
batch_size = 32
n_epochs = 5
lr = 1e-4
weight_decay = 0.05
num_warmup_steps = 10

id_path = f"../out/fe/fe002/id_list.npy"
player_path = f"../out/fe/fe002/player_list.npy"
feature_arr_path = f"../out/fe/fe002/feature_arr.npy"
target_arr_path = f"../out/fe/fe002/target_arr.npy"

In [3]:
feature_arr = np.load(feature_arr_path)
feature_arr = feature_arr.astype(np.float32)
target_arr = np.load(target_arr_path)
target_arr = target_arr.astype(np.float32)
id_list = np.load(id_path)
player_list = np.load(player_path)

In [4]:
class SwingDataset(Dataset):
    def __init__(self, X, 
                 train = True, y = None):
        self.X = X
        self.train = train
        self.y = y
    
    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        if self.train:
            return self.X[idx], self.y[idx]
        else:
            return self.X[idx]

In [11]:
class SwingGRU(nn.Module):
    def __init__(
        self, dropout=0.2,
        input_dim = 24,
        hidden_dim = 64,
        model_dim = 128,
        out_size = 11
        ):
        super(SwingGRU, self).__init__()
        self.numerical_linear  = nn.Sequential(
                nn.Linear(input_dim, hidden_dim),
                nn.LayerNorm(hidden_dim)
            )
        
        self.rnn = nn.GRU(hidden_dim, model_dim,
                            num_layers = 2, 
                            batch_first=True,
                            bidirectional=True)
                
        self.linear_out  = nn.Sequential(
                nn.Linear(model_dim * 2, 
                          model_dim),
                nn.LayerNorm(model_dim),
                nn.ReLU(),
                nn.Dropout(dropout),
                nn.Linear(model_dim, 
                          out_size),
                nn.Sigmoid(),
        )
        self._reinitialize()
        
    def _reinitialize(self):
        """
        Tensorflow/Keras-like initialization
        """
        for name, p in self.named_parameters():
            if 'rnn' in name:
                if 'weight_ih' in name:
                    nn.init.xavier_uniform_(p.data)
                elif 'weight_hh' in name:
                    nn.init.orthogonal_(p.data)
                elif 'bias_ih' in name:
                    p.data.fill_(0)
                    # Set forget-gate bias to 1
                    n = p.size(0)
                    p.data[(n // 4):(n // 2)].fill_(1)
                elif 'bias_hh' in name:
                    p.data.fill_(0)
    
    def forward(self, numerical_array):
        
        numerical_embedding = self.numerical_linear(numerical_array)
        output,_ = self.rnn(numerical_embedding)
        output = self.linear_out(output[:, -1, :])
        return output

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=1000):
        super().__init__()
        
        # Create sinusoidal positional encoding
        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        
        pe = torch.zeros(1, max_len, d_model)
        pe[0, :, 0::2] = torch.sin(position * div_term)
        pe[0, :, 1::2] = torch.cos(position * div_term)
        
        # Register as buffer (not a parameter but should be saved and loaded with the model)
        self.register_buffer('pe', pe)
        
    def forward(self, x):
        # x: [batch_size, seq_len, d_model]
        x = x + self.pe[:, :x.size(1), :]
        return x

class EncoderOnlyClassifier(nn.Module):
    def __init__(self, input_dim=24, n_enc=3, nhead=8, d_model=64, max_seq_len=1000):
        super().__init__()
        # Initialize Transformer model
        self.input_proj = nn.Sequential(
            nn.Linear(input_dim, d_model),
        )
        
        # Positional encoding
        self.pos_encoder = PositionalEncoding(d_model, max_seq_len)
        
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=d_model * 4,
            batch_first=True,
            norm_first=True,
        )
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=n_enc)
        self.classifier = nn.Sequential(
            nn.LayerNorm(d_model),
            nn.Linear(d_model, 11),
            nn.Sigmoid(),
        )

    def forward(self, src):
        # Project input to d_model dimension
        x = self.input_proj(src)  # -> (batch_size, seq_len, d_model)
        
        # Add positional encoding
        x = self.pos_encoder(x)
        
        # Pass through transformer encoder
        memory = self.encoder(x)
        
        # Use the last time-step from encoder output
        last = memory[:, -1, :]  # shape: (batch_size, d_model)
        logits = self.classifier(last)  # shape: (batch_size, 11)
        
        return logits



In [12]:
model = EncoderOnlyClassifier()
y = model(torch.randn(2, 1000, 24))
print(y.shape)

torch.Size([2, 11])


In [6]:
target_labels = target_arr[:, -4:]
target_labels = np.argmax(target_labels, axis=1)

In [7]:
gkf = StratifiedGroupKFold(n_splits=5,shuffle=True,random_state = seed)
iterator = gkf.split(feature_arr, y = target_labels, groups= player_list)

In [8]:
train_ds = SwingDataset(feature_arr, train=True, y = target_arr)
train_loader = DataLoader(train_ds, batch_size=2, pin_memory=True)
criterion = nn.BCELoss()
X, y = next(iter(train_loader))

In [9]:
model = SwingGRU()

In [10]:
out = model(X)
loss = criterion(y, out)

In [11]:
out[:, -4:]

tensor([[0.5833, 0.6206, 0.6263, 0.5055],
        [0.8018, 0.6430, 0.4366, 0.4115]], grad_fn=<SliceBackward0>)

In [12]:
0.62496989 - 0.5 + 0.67610621

0.8010761000000001

In [7]:
def to_numpy(p: torch.Tensor):
    if p.requires_grad:
        return p.detach().cpu().numpy()
    else:
        return p.cpu().numpy()

def metric_report(y_batch, out_batch):
    cut = [0, 2, 4, 7, 11]
    classes = ['gender', 'hand', 'year', 'level']
    for start, end, cls in zip(cut, cut[1:], classes):
        micro_roc_score = roc_auc_score(y_batch[:, start:end], out_batch[:, start:end], average='micro', multi_class='ovr')
        macro_roc_score = roc_auc_score(y_batch[:, start:end], out_batch[:, start:end], average='macro', multi_class='ovr')
        micro_presicion_score = average_precision_score(y_batch[:, start:end], out_batch[:, start:end], average='micro')
        macro_presicion_score = average_precision_score(y_batch[:, start:end], out_batch[:, start:end], average='macro')
        
        print(f"{cls} micro roc: {micro_roc_score:.4f}, macro roc: {macro_roc_score:.4f}, micro presci: {micro_presicion_score:.4f}, macro presci: {macro_presicion_score:.4f}")


In [14]:
    #AUC SCORE: 0.792(gender) + 0.998(hold) + 0.660(years) + 0.822(levels)


In [13]:
batch_size = 32
n_epochs = 20
lr = 1e-4
weight_decay = 0.05
num_warmup_steps = 10
main_loss_weight = 0.8
class_start = 7
class_end = 11

# train levels first
# target_labels = target_arr[:, class_start:class_end]
# target_labels = np.argmax(target_labels, axis=1)

# gkf = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state = seed)
# iterator = gkf.split(feature_arr, y = target_labels, groups = player_list)
gf = GroupKFold(n_splits=5, shuffle=True, random_state=seed)
iterator = gf.split(feature_arr, target_arr, groups=player_list)

criterion = nn.BCELoss()

for fold, (train_idx, val_idx) in enumerate(iterator):
    print(f"start fold:{fold}, train size: {len(train_idx)}, val size: {len(val_idx)}")
        
    train_feature = feature_arr[train_idx]
    train_target = target_arr[train_idx]
    
    train_weights = 1 / np.sum(train_target, axis=0)
    level_weights = train_weights[class_start:class_end] / np.sum(train_weights[class_start:class_end])
    print(f"class weights: {level_weights}")
    
    val_feature = feature_arr[val_idx]
    val_target = target_arr[val_idx]

    train_ds = SwingDataset(train_feature, train=True, y = train_target)
    val_ds = SwingDataset(val_feature, train=True, y = val_target)
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=4)
    val_loader = DataLoader(val_ds, batch_size=batch_size, pin_memory=True, num_workers=4)
    
    # model = SwingGRU()
    model = EncoderOnlyClassifier()
    model = model.to(device)
    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': weight_decay},
        {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]
    optimizer = AdamW(optimizer_grouped_parameters,
                        lr=lr,
                        weight_decay=weight_decay,
                        )
    num_train_optimization_steps = int(len(train_loader) * n_epochs)
    scheduler = get_linear_schedule_with_warmup(optimizer,
                                                num_warmup_steps=num_warmup_steps,
                                                num_training_steps=num_train_optimization_steps)

    main_criterion = nn.BCELoss(weight=torch.tensor(level_weights).to(device))
    aux_criterion = nn.BCELoss()
    # criterion = nn.BCELoss()
    
    for epoch in range(n_epochs):                
        train_out_batch = []
        val_out_batch = []
        train_y_batch = []
        val_y_batch = []
        train_loss = 0.0
        val_loss = 0.0
        
        model.train() 
        pbar = tqdm(train_loader, total=len(train_loader), leave = False)
        for d in pbar:
            X, y = d
            X, y = X.to(device), y.to(device)
            
            optimizer.zero_grad()
            
            out = model(X)
            aux_loss = aux_criterion(out[:, :7], y[:, :7])
            main_loss = main_criterion(out[:, 7:], y[:, 7:])
            loss = (1 - main_loss_weight) * aux_loss + main_loss_weight * main_loss
            loss.backward()
            
            optimizer.step()
            scheduler.step()
            
            train_out_batch.append(to_numpy(out))
            train_y_batch.append(to_numpy(y))
            train_loss += loss.item()
        
        train_out_batch = np.concatenate(train_out_batch)
        train_y_batch = np.concatenate(train_y_batch)
        
        print(f"Train loss: {train_loss/len(train_loader):.4f}")
        # metric_report(train_y_batch, train_out_batch)
        # break
        
        model.eval()
        pbar = tqdm(val_loader, total=len(val_loader), leave = False)
        with torch.no_grad():
            for d in pbar:
                X, y = d
                X, y = X.to(device), y.to(device)
                
                out = model(X)
                aux_loss = aux_criterion(out[:, :7], y[:, :7])
                main_loss = main_criterion(out[:, 7:], y[:, 7:])
                loss = (1 - main_loss_weight) * aux_loss + main_loss_weight * main_loss

                val_out_batch.append(to_numpy(out))
                val_y_batch.append(to_numpy(y))
                val_loss += loss.item()
        
        val_out_batch = np.concatenate(val_out_batch)
        val_y_batch = np.concatenate(val_y_batch)
        
        print(f"Val loss: {val_loss/len(val_loader):.4f}")
        metric_report(val_y_batch, val_out_batch)
        
    break
                        


start fold:0, train size: 6882, val size: 1520
class weights: [0.07866459 0.30622187 0.5473914  0.06772215]


  0%|          | 0/216 [00:00<?, ?it/s]

KeyboardInterrupt: 