In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os, time

from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import KFold, GroupKFold

from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F
from pytorch_toolbelt import losses as L
from nn_utils import *

pd.options.display.max_rows = 1000
pd.options.display.max_columns = None

In [2]:
df_train_raw = pd.read_pickle('../features/train_clean.pkl')
df_test_raw = pd.read_pickle('../features/test_clean.pkl')
TARGET = "open_channels"
df_test_raw[TARGET] = 0

print(df_train_raw.shape, df_test_raw.shape)
df_train_raw.head()

(4500000, 6) (2000000, 7)


Unnamed: 0,time,signal,open_channels,local_time,batch,mini_batch
0,0.0001,-2.76,0,0.0001,1.0,1.0
1,0.0002,-2.8557,0,0.0002,1.0,1.0
2,0.0003,-2.4074,0,0.0003,1.0,1.0
3,0.0004,-3.1404,0,0.0004,1.0,1.0
4,0.0005,-3.1525,0,0.0005,1.0,1.0


In [3]:
df_train_raw["signal_pow_2"] = df_train_raw["signal"] ** 2
df_test_raw["signal_pow_2"] = df_test_raw["signal"] ** 2

In [4]:
use_cols = [
    col for col in df_train_raw.columns if col not in
    ["time", "local_time", "open_channels", "batch", "mini_batch"]
]
print(use_cols)

['signal', 'signal_pow_2']


In [5]:
def chop_seq(df_batch_i):

    df_batch_i_features = []
    df_batch_i_y = []

    for i in range(200):

        # (2500, 5)
        tmp = df_batch_i[(2500 * i):(2500 * (i + 1))]
        df_batch_i_features.append(tmp[use_cols].values)
        df_batch_i_y.append(tmp[TARGET].values)

    return df_batch_i_features, df_batch_i_y

In [6]:
# TRAIN
df_train = []
df_train_y = []

for batch_i in [1, 2, 3, 4, 5, 6, 7, 9, 10]:
    df_batch_i = df_train_raw[df_train_raw.batch == batch_i]
    df_batch_i_features, df_batch_i_y = chop_seq(df_batch_i)
    df_train.append(df_batch_i_features)
    df_train_y.append(df_batch_i_y)

df_train = np.array(df_train).reshape([-1, 2500, np.array(df_train).shape[-1]]).transpose([0, 2, 1])
df_train_y = np.array(df_train_y).reshape([-1, 2500])

print("TRAIN:", df_train.shape, df_train_y.shape)

TRAIN: (1800, 2, 2500) (1800, 2500)


In [7]:
# TEST
df_test = []
df_test_y = []

for batch_i in [1, 2, 3, 4]:
    df_batch_i = df_test_raw[df_test_raw.batch == batch_i]
    df_batch_i_features, df_batch_i_y = chop_seq(df_batch_i)
    df_test.append(df_batch_i_features)
    df_test_y.append(df_batch_i_y)

df_test = np.array(df_test).reshape([-1, 2500, np.array(df_test).shape[-1]]).transpose([0, 2, 1])
df_test_y = np.array(df_test_y).reshape([-1, 2500])

print("TEST:", df_test.shape, df_test_y.shape)

TEST: (800, 2, 2500) (800, 2500)


In [8]:
# kf = KFold(n_splits=3, random_state=42, shuffle=True)
group = list(range(df_train.shape[0]))
skf = GroupKFold(n_splits=3)

In [9]:
class Seq2SeqRnn(nn.Module):
    def __init__(self, input_size, seq_len, hidden_size, output_size, num_layers=1, bidirectional=False, dropout=.3,
            hidden_layers = [100, 200]):
        
        super().__init__()
        self.input_size = input_size
        self.seq_len = seq_len
        self.hidden_size = hidden_size
        self.num_layers=num_layers
        self.bidirectional=bidirectional
        self.output_size=output_size
        
        self.rnn = nn.GRU(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, 
                           bidirectional=bidirectional, batch_first=True,dropout=0.3)
         # Input Layer
        if hidden_layers and len(hidden_layers):
            first_layer  = nn.Linear(hidden_size*2 if bidirectional else hidden_size, hidden_layers[0])

            # Hidden Layers
            self.hidden_layers = nn.ModuleList(
                [first_layer]+[nn.Linear(hidden_layers[i], hidden_layers[i+1]) for i in range(len(hidden_layers) - 1)]
            )
            for layer in self.hidden_layers: nn.init.kaiming_normal_(layer.weight.data)   

            self.intermediate_layer = nn.Linear(hidden_layers[-1], self.input_size)
            # output layers
            self.output_layer = nn.Linear(hidden_layers[-1], output_size)
            nn.init.kaiming_normal_(self.output_layer.weight.data) 
           
        else:
            self.hidden_layers = []
            self.intermediate_layer = nn.Linear(hidden_size*2 if bidirectional else hidden_siz, self.input_size)
            self.output_layer = nn.Linear(hidden_size*2 if bidirectional else hidden_size, output_size)
            nn.init.kaiming_normal_(self.output_layer.weight.data) 

        self.activation_fn = torch.relu
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x):
        batch_size = x.size(0)
        x = x.permute(0,2,1)

        outputs, hidden = self.rnn(x)        

        x = self.dropout(self.activation_fn(outputs))
        for hidden_layer in self.hidden_layers:
            x = self.activation_fn(hidden_layer(x))
            x = self.dropout(x)
            
        x = self.output_layer(x)

        return x


In [10]:
df_train = torch.Tensor(df_train)
df_test = torch.Tensor(df_test)

print(df_train.shape, df_test.shape)

torch.Size([1800, 2, 2500]) torch.Size([800, 2, 2500])


In [11]:
val_preds_all = np.zeros((df_train_raw.shape[0], 11))
test_preds_all = np.zeros((df_test_raw.shape[0], 11))

In [12]:
if not os.path.exists("./models"):
            os.makedirs("./models")
for index, (train_index, val_index ) in enumerate(skf.split(df_train, df_train_y, group)):
    print("Fold : {}".format(index))
    print("TRAIN:", train_index.shape, "TEST:", val_index.shape)
    
    batchsize = 128
    train_dataset = IonDataset(df_train[train_index],  df_train_y[train_index], flip=False, noise_level=0.0, class_split=0.0)
    train_dataloader = DataLoader(train_dataset, batchsize, shuffle=True, num_workers=16, pin_memory=True)

    valid_dataset = IonDataset(df_train[val_index],  df_train_y[val_index], flip=False)
    valid_dataloader = DataLoader(valid_dataset, batchsize, shuffle=False, num_workers=16, pin_memory=True)

    test_dataset = IonDataset(df_test,  df_test_y, flip=False, noise_level=0.0, class_split=0.0)
    test_dataloader = DataLoader(test_dataset, batchsize, shuffle=False, num_workers=16, pin_memory=True)
    test_preds_iter = np.zeros((2000000, 11))

    for it in range(1):
        device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
        model=Seq2SeqRnn(input_size=df_train.shape[1], seq_len=2500, hidden_size=128, output_size=11, num_layers=2, hidden_layers=[128,64,128],
                         bidirectional=True).to(device)
        
        no_of_epochs = 150
        early_stopping = EarlyStopping(patience=20, is_maximize=True, checkpoint_path="./models/gru_clean_checkpoint_fold_{}_iter_{}.pt".format(index, it))
        criterion = L.FocalLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        schedular = torch.optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e2, max_lr=0.001, epochs=no_of_epochs,
                                                steps_per_epoch=len(train_dataloader))
        avg_train_losses, avg_valid_losses = [], [] 
    
    
        for epoch in range(no_of_epochs):
            start_time = time.time()
    
            print("Epoch : {}".format(epoch))
            print( "learning_rate: {:0.9f}".format(schedular.get_lr()[0]))
            train_losses, valid_losses = [], []
    
            model.train() # prep model for training
            train_preds, train_true = torch.Tensor([]).to(device), torch.LongTensor([]).to(device)
    
            for x, y in train_dataloader:
            
                x = x.to(device)
                y = y.to(device)
    
                optimizer.zero_grad()
                predictions = model(x[:, :df_train.shape[1], :])
    
                predictions_ = predictions.view(-1, predictions.shape[-1]) 
                y_ = y.view(-1)
    
                loss = criterion(predictions_, y_)
                # backward pass: compute gradient of the loss with respect to model parameters
                loss.backward()
                # perform a single optimization step (parameter update)
                optimizer.step()
                schedular.step()
                # record training loss
                train_losses.append(loss.item())
    
                train_true = torch.cat([train_true, y_], 0)
                train_preds = torch.cat([train_preds, predictions_], 0)

            model.eval() # prep model for evaluation
            val_preds, val_true = torch.Tensor([]).to(device), torch.LongTensor([]).to(device)
            with torch.no_grad():
                for x, y in valid_dataloader:
                    x = x.to(device)
                    y = y.to(device)
    
                    predictions = model(x[:,:df_train.shape[1],:])
                    predictions_ = predictions.view(-1, predictions.shape[-1]) 
                    y_ = y.view(-1)
    
                    loss = criterion(predictions_, y_)
                    valid_losses.append(loss.item())
                    
                    val_true = torch.cat([val_true, y_], 0)
                    val_preds = torch.cat([val_preds, predictions_], 0)

            # calculate average loss over an epoch
            train_loss = np.average(train_losses)
            valid_loss = np.average(valid_losses)
            avg_train_losses.append(train_loss)
            avg_valid_losses.append(valid_loss)

            print( "train_loss: {:0.6f}, valid_loss: {:0.6f}".format(train_loss, valid_loss))
            
            train_score = f1_score(train_true.cpu().detach().numpy(), train_preds.cpu().detach().numpy().argmax(1), labels=list(range(11)), average='macro')
            val_score = f1_score(val_true.cpu().detach().numpy(), val_preds.cpu().detach().numpy().argmax(1), labels=list(range(11)), average='macro')
            print( "train_f1: {:0.6f}, valid_f1: {:0.6f}".format(train_score, val_score))
    
            if early_stopping(val_score, model):
                print("Early Stopping...")
                print("Best Val Score: {:0.6f}".format(early_stopping.best_score))
                break
    
            print("--- %s seconds ---" % (time.time() - start_time))
        
        model.load_state_dict(torch.load("./models/gru_clean_checkpoint_fold_{}_iter_{}.pt".format(index, it)))
        with torch.no_grad():
            pred_list = []
            for x, y in test_dataloader:
                x = x.to(device)
                y = y.to(device)

                predictions = model(x[:,:df_train.shape[1],:])
                predictions_ = predictions.view(-1, predictions.shape[-1]) 

                pred_list.append(F.softmax(predictions_, dim=1).cpu().numpy())
            test_preds = np.vstack(pred_list)
       
        test_preds_iter += test_preds
        test_preds_all += test_preds
        if not os.path.exists("./predictions/test"):
            os.makedirs("./predictions/test")
        np.save('./predictions/test/gru_clean_fold_{}_iter_{}_raw.npy'.format(index, it), arr=test_preds_iter)
        np.save('./predictions/test/gru_clean_fold_{}_raw.npy'.format(index), arr=test_preds_all)

Fold : 0
TRAIN: (1200,) TEST: (600,)
Epoch : 0
learning_rate: 0.000010000
train_loss: 2.348066, valid_loss: 1.647776
train_f1: 0.079300, valid_f1: 0.088224
--- 8.420817136764526 seconds ---
Epoch : 1
learning_rate: 0.000020962
train_loss: 2.138751, valid_loss: 1.464072
train_f1: 0.080650, valid_f1: 0.083408
--- 8.05654239654541 seconds ---
Epoch : 2
learning_rate: 0.000053363
train_loss: 1.830200, valid_loss: 1.268299
train_f1: 0.082924, valid_f1: 0.083895
--- 8.63381838798523 seconds ---
Epoch : 3
learning_rate: 0.000105767
train_loss: 1.537680, valid_loss: 1.097071
train_f1: 0.089340, valid_f1: 0.107533
--- 9.029078722000122 seconds ---
Epoch : 4
learning_rate: 0.000175854
train_loss: 1.315390, valid_loss: 0.896831
train_f1: 0.101633, valid_f1: 0.185827
--- 8.251904010772705 seconds ---
Epoch : 5
learning_rate: 0.000260519
train_loss: 1.129189, valid_loss: 0.755838
train_f1: 0.120186, valid_f1: 0.211913
--- 8.396852731704712 seconds ---
Epoch : 6
learning_rate: 0.000356012
train_loss

train_loss: 0.083771, valid_loss: 0.058769
train_f1: 0.887007, valid_f1: 0.935496
--- 7.917997121810913 seconds ---
Epoch : 54
learning_rate: 0.000806913
train_loss: 0.082773, valid_loss: 0.058649
train_f1: 0.889142, valid_f1: 0.934656
--- 7.913301467895508 seconds ---
Epoch : 55
learning_rate: 0.000797645
train_loss: 0.082462, valid_loss: 0.057966
train_f1: 0.892104, valid_f1: 0.936421
--- 7.957485914230347 seconds ---
Epoch : 56
learning_rate: 0.000788216
train_loss: 0.082270, valid_loss: 0.057880
train_f1: 0.893846, valid_f1: 0.935901
--- 7.915422677993774 seconds ---
Epoch : 57
learning_rate: 0.000778631
train_loss: 0.081519, valid_loss: 0.057414
train_f1: 0.895648, valid_f1: 0.937288
--- 7.9619176387786865 seconds ---
Epoch : 58
learning_rate: 0.000768895
train_loss: 0.082085, valid_loss: 0.057229
train_f1: 0.897612, valid_f1: 0.937634
--- 7.923636198043823 seconds ---
Epoch : 59
learning_rate: 0.000759014
train_loss: 0.080718, valid_loss: 0.057302
train_f1: 0.898577, valid_f1: 0.

train_f1: 0.919171, valid_f1: 0.938395
--- 7.916727542877197 seconds ---
Epoch : 107
learning_rate: 0.000229147
train_loss: 0.070545, valid_loss: 0.053603
train_f1: 0.919946, valid_f1: 0.938469
Early Stopping...
Best Val Score: 0.938671
Fold : 1
TRAIN: (1200,) TEST: (600,)
Epoch : 0
learning_rate: 0.000010000
train_loss: 2.486644, valid_loss: 1.832772
train_f1: 0.059246, valid_f1: 0.028780
--- 8.136149406433105 seconds ---
Epoch : 1
learning_rate: 0.000020962
train_loss: 2.292979, valid_loss: 1.691832
train_f1: 0.061995, valid_f1: 0.050412
--- 8.67445421218872 seconds ---
Epoch : 2
learning_rate: 0.000053363
train_loss: 1.982825, valid_loss: 1.549912
train_f1: 0.067221, valid_f1: 0.038794
--- 8.141481399536133 seconds ---
Epoch : 3
learning_rate: 0.000105767
train_loss: 1.696141, valid_loss: 1.344538
train_f1: 0.073735, valid_f1: 0.054497
--- 8.742886781692505 seconds ---
Epoch : 4
learning_rate: 0.000175854
train_loss: 1.453696, valid_loss: 1.047070
train_f1: 0.081109, valid_f1: 0.024

train_f1: 0.853990, valid_f1: 0.850474
--- 7.920761346817017 seconds ---
Epoch : 52
learning_rate: 0.000824945
train_loss: 0.087565, valid_loss: 0.062487
train_f1: 0.856770, valid_f1: 0.855565
--- 7.925373554229736 seconds ---
Epoch : 53
learning_rate: 0.000816015
train_loss: 0.086876, valid_loss: 0.062020
train_f1: 0.858865, valid_f1: 0.869903
--- 7.947523355484009 seconds ---
Epoch : 54
learning_rate: 0.000806913
train_loss: 0.087991, valid_loss: 0.061256
train_f1: 0.860415, valid_f1: 0.905121
--- 7.954632043838501 seconds ---
Epoch : 55
learning_rate: 0.000797645
train_loss: 0.088437, valid_loss: 0.060420
train_f1: 0.866560, valid_f1: 0.918865
--- 8.600877523422241 seconds ---
Epoch : 56
learning_rate: 0.000788216
train_loss: 0.087435, valid_loss: 0.060046
train_f1: 0.868978, valid_f1: 0.922863
--- 8.351093053817749 seconds ---
Epoch : 57
learning_rate: 0.000778631
train_loss: 0.084620, valid_loss: 0.059621
train_f1: 0.872748, valid_f1: 0.928052
--- 7.999316215515137 seconds ---
Epo

train_f1: 0.489555, valid_f1: 0.660609
--- 7.990440607070923 seconds ---
Epoch : 14
learning_rate: 0.000991114
train_loss: 0.319245, valid_loss: 0.188293
train_f1: 0.541753, valid_f1: 0.723606
--- 7.945739507675171 seconds ---
Epoch : 15
learning_rate: 0.000999999
train_loss: 0.281362, valid_loss: 0.153767
train_f1: 0.590758, valid_f1: 0.734528
--- 7.962782859802246 seconds ---
Epoch : 16
learning_rate: 0.000999836
train_loss: 0.251188, valid_loss: 0.129976
train_f1: 0.633441, valid_f1: 0.808507
--- 7.970511198043823 seconds ---
Epoch : 17
learning_rate: 0.000999403
train_loss: 0.219333, valid_loss: 0.112813
train_f1: 0.667280, valid_f1: 0.822847
--- 7.959457635879517 seconds ---
Epoch : 18
learning_rate: 0.000998700
train_loss: 0.199627, valid_loss: 0.101985
train_f1: 0.692233, valid_f1: 0.833781
--- 7.941775798797607 seconds ---
Epoch : 19
learning_rate: 0.000997726
train_loss: 0.182862, valid_loss: 0.093661
train_f1: 0.712813, valid_f1: 0.839780
--- 8.014472961425781 seconds ---
Epo

train_loss: 0.082365, valid_loss: 0.058581
train_f1: 0.865690, valid_f1: 0.893244
--- 7.976168870925903 seconds ---
Epoch : 68
learning_rate: 0.000664434
train_loss: 0.081257, valid_loss: 0.058553
train_f1: 0.867844, valid_f1: 0.918578
--- 8.010127544403076 seconds ---
Epoch : 69
learning_rate: 0.000653402
train_loss: 0.079998, valid_loss: 0.057914
train_f1: 0.872164, valid_f1: 0.920962
--- 7.961558103561401 seconds ---
Epoch : 70
learning_rate: 0.000642287
train_loss: 0.080355, valid_loss: 0.057588
train_f1: 0.874898, valid_f1: 0.930027
--- 7.95403265953064 seconds ---
Epoch : 71
learning_rate: 0.000631095
train_loss: 0.081754, valid_loss: 0.057407
train_f1: 0.877323, valid_f1: 0.932792
--- 7.972431421279907 seconds ---
Epoch : 72
learning_rate: 0.000619832
train_loss: 0.079072, valid_loss: 0.057111
train_f1: 0.879065, valid_f1: 0.935147
--- 7.945966482162476 seconds ---
Epoch : 73
learning_rate: 0.000608504
train_loss: 0.078847, valid_loss: 0.056828
train_f1: 0.882637, valid_f1: 0.93

train_f1: 0.911173, valid_f1: 0.938997
--- 7.982801198959351 seconds ---
Epoch : 121
learning_rate: 0.000108878
train_loss: 0.072534, valid_loss: 0.053448
train_f1: 0.910950, valid_f1: 0.939008
--- 7.953637361526489 seconds ---
Epoch : 122
learning_rate: 0.000101736
train_loss: 0.071912, valid_loss: 0.053490
train_f1: 0.910980, valid_f1: 0.938952
--- 7.9549171924591064 seconds ---
Epoch : 123
learning_rate: 0.000094810
train_loss: 0.071462, valid_loss: 0.053502
train_f1: 0.911763, valid_f1: 0.938955
--- 7.9355857372283936 seconds ---
Epoch : 124
learning_rate: 0.000088103
train_loss: 0.071690, valid_loss: 0.053443
train_f1: 0.911535, valid_f1: 0.939019
--- 8.041980504989624 seconds ---
Epoch : 125
learning_rate: 0.000081619
train_loss: 0.072744, valid_loss: 0.053431
train_f1: 0.911423, valid_f1: 0.938938
--- 7.966798305511475 seconds ---
Epoch : 126
learning_rate: 0.000075361
train_loss: 0.071123, valid_loss: 0.053437
train_f1: 0.911743, valid_f1: 0.938965
--- 7.95628023147583 seconds 

In [16]:
# Best Val Score: 0.939033
ss = pd.read_csv("../input/sample_submission.csv", dtype={'time':str})

test_preds_all = test_preds_all/np.sum(test_preds_all, axis=1)[:, None]
test_pred_frame = pd.DataFrame({'time': ss['time'].astype(str),
                                'open_channels': np.argmax(test_preds_all, axis=1)})
test_pred_frame.to_csv("../submissions/gru_preds.csv", index=False)