In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os, time

from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import KFold, GroupKFold

from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F
from pytorch_toolbelt import losses as L
from nn_utils import *

pd.options.display.max_rows = 1000
pd.options.display.max_columns = None

In [None]:
df_train_raw = pd.read_pickle('../features/train_clean.pkl')
df_test_raw = pd.read_pickle('../features/test_clean.pkl')
TARGET = "open_channels"
df_test_raw[TARGET] = 0

print(df_train_raw.shape, df_test_raw.shape)
df_train_raw.head()

In [None]:
# # feature engineering here
# df_train_raw["signal_pow_2"] = df_train_raw["signal"] ** 2
# df_test_raw["signal_pow_2"] = df_test_raw["signal"] ** 2

In [None]:
use_cols = [
    col for col in df_train_raw.columns if col not in
    ["time", "local_time", "open_channels", "batch", "mini_batch"]
]
print(use_cols)

In [None]:
def chop_seq(df_batch_i):

    df_batch_i_features = []
    df_batch_i_y = []

    for i in range(200):

        # (2500, 5)
        tmp = df_batch_i[(2500 * i):(2500 * (i + 1))]
        df_batch_i_features.append(tmp[use_cols].values)
        df_batch_i_y.append(tmp[TARGET].values)

    return df_batch_i_features, df_batch_i_y

In [None]:
# TRAIN
df_train = []
df_train_y = []

for batch_i in [1, 2, 3, 4, 5, 6, 7, 9, 10]:
    df_batch_i = df_train_raw[df_train_raw.batch == batch_i]
    df_batch_i_features, df_batch_i_y = chop_seq(df_batch_i)
    df_train.append(df_batch_i_features)
    df_train_y.append(df_batch_i_y)

df_train = np.array(df_train).reshape([-1, 2500, np.array(df_train).shape[-1]]).transpose([0, 2, 1])
df_train_y = np.array(df_train_y).reshape([-1, 2500])

print("TRAIN:", df_train.shape, df_train_y.shape)

In [None]:
# TEST
df_test = []
df_test_y = []

for batch_i in [1, 2, 3, 4]:
    df_batch_i = df_test_raw[df_test_raw.batch == batch_i]
    df_batch_i_features, df_batch_i_y = chop_seq(df_batch_i)
    df_test.append(df_batch_i_features)
    df_test_y.append(df_batch_i_y)

df_test = np.array(df_test).reshape([-1, 2500, np.array(df_test).shape[-1]]).transpose([0, 2, 1])
df_test_y = np.array(df_test_y).reshape([-1, 2500])

print("TEST:", df_test.shape, df_test_y.shape)

In [None]:
# kf = KFold(n_splits=3, random_state=42, shuffle=True)
group = list(range(df_train.shape[0]))
cv = 3
skf = GroupKFold(n_splits=cv)

In [None]:
class Seq2SeqRnn(nn.Module):
    def __init__(self,
                 input_size,
                 seq_len,
                 hidden_size,
                 output_size,
                 num_layers=1,
                 bidirectional=False,
                 dropout=0,
                 hidden_layers=[100, 200]):

        super().__init__()
        self.input_size = input_size  # 1
        self.seq_len = seq_len  # 2500
        self.hidden_size = hidden_size  # 128
        self.num_layers = num_layers  # 2
        self.bidirectional = bidirectional  # True
        self.output_size = output_size  # 11

        # CNN
        self.cov1 = nn.Conv1d(in_channels=input_size,
                              out_channels=256,
                              kernel_size=11,
                              padding=5)
        self.cov2 = nn.Conv1d(in_channels=256,
                              out_channels=128,
                              kernel_size=11,
                              padding=5)
        
        # RNN
        self.rnn = nn.GRU(input_size=128,
                          hidden_size=hidden_size,
                          num_layers=num_layers,
                          bidirectional=bidirectional,
                          batch_first=True,
                          dropout=0.3)
        # Input Layer
        if hidden_layers and len(hidden_layers):  # [128, 64, 128]
            first_layer = nn.Linear(
                hidden_size * 2 if bidirectional else hidden_size,  # 128
                hidden_layers[0])

            # Hidden Layers
            self.hidden_layers = nn.ModuleList([first_layer] + [
                nn.Linear(hidden_layers[i], hidden_layers[i + 1])
                for i in range(len(hidden_layers) - 1)
            ])
            for layer in self.hidden_layers:
                nn.init.kaiming_normal_(layer.weight.data)

            self.intermediate_layer = nn.Linear(hidden_layers[-1],
                                                self.input_size)
            # output layers
            self.output_layer = nn.Linear(hidden_layers[-1], output_size)
            nn.init.kaiming_normal_(self.output_layer.weight.data)

        else:
            self.hidden_layers = []
            self.intermediate_layer = nn.Linear(
                hidden_size * 2 if bidirectional else hidden_siz,
                self.input_size)
            self.output_layer = nn.Linear(
                hidden_size * 2 if bidirectional else hidden_size, output_size)
            nn.init.kaiming_normal_(self.output_layer.weight.data)

        self.activation_fn = torch.relu
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        batch_size = x.size(0)
        
        x = self.cov1(x)
        x = self.cov2(x)
        
        x = x.permute(0, 2, 1)

        outputs, hidden = self.rnn(x)

        x = self.dropout(self.activation_fn(outputs))
        for hidden_layer in self.hidden_layers:
            x = self.activation_fn(hidden_layer(x))
            x = self.dropout(x)

        x = self.output_layer(x)

        return x

In [None]:
df_train = torch.Tensor(df_train)
df_test = torch.Tensor(df_test)
        
print(df_train.shape, df_test.shape)

In [None]:
val_preds_all = np.zeros((df_train_raw.shape[0], 11))
test_preds_all = np.zeros((df_test_raw.shape[0], 11))
cv_score = np.zeros(cv)

In [None]:
if not os.path.exists("./models"):
            os.makedirs("./models")
for index, (train_index, val_index) in enumerate(skf.split(df_train, df_train_y, group)):

    print("Fold ", index, "TRAIN:", train_index.shape, "TEST:", val_index.shape)
    
    batchsize = 64
    train_dataset = IonDataset(df_train[train_index],  df_train_y[train_index], flip=False, noise_level=0.0, class_split=0.0)
    train_dataloader = DataLoader(train_dataset, batchsize, shuffle=True, num_workers=16, pin_memory=True)

    valid_dataset = IonDataset(df_train[val_index],  df_train_y[val_index], flip=False)
    valid_dataloader = DataLoader(valid_dataset, batchsize, shuffle=False, num_workers=16, pin_memory=True)

    test_dataset = IonDataset(df_test,  df_test_y, flip=False, noise_level=0.0, class_split=0.0)
    test_dataloader = DataLoader(test_dataset, batchsize, shuffle=False, num_workers=16, pin_memory=True)
    test_preds_iter = np.zeros((2000000, 11))

    for it in range(1):
        device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
        model=Seq2SeqRnn(input_size=df_train.shape[1], seq_len=2500, hidden_size=128, output_size=11, num_layers=2, hidden_layers=[128,64,32],
                         bidirectional=True).to(device)
        
        no_of_epochs = 200
        early_stopping = EarlyStopping(patience=20, is_maximize=True, checkpoint_path="./models/gru_clean_checkpoint_fold_{}_iter_{}.pt".format(index, it))
        criterion = L.FocalLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        schedular = torch.optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e2, max_lr=0.001, epochs=no_of_epochs,
                                                steps_per_epoch=len(train_dataloader))
        avg_train_losses, avg_valid_losses = [], [] 
    
    
        for epoch in range(no_of_epochs):
            start_time = time.time()
    
            print("Epoch : {}".format(epoch))
            print( "learning_rate: {:0.9f}".format(schedular.get_lr()[0]))
            train_losses, valid_losses = [], []
    
            model.train() # prep model for training
            train_preds, train_true = torch.Tensor([]).to(device), torch.LongTensor([]).to(device)
    
            for x, y in train_dataloader:
            
                x = x.to(device)
                y = y.to(device)
    
                optimizer.zero_grad()
                predictions = model(x[:, :df_train.shape[1], :])
    
                predictions_ = predictions.view(-1, predictions.shape[-1]) 
                y_ = y.view(-1)
    
                loss = criterion(predictions_, y_)
                # backward pass: compute gradient of the loss with respect to model parameters
                loss.backward()
                # perform a single optimization step (parameter update)
                optimizer.step()
                schedular.step()
                # record training loss
                train_losses.append(loss.item())
    
                train_true = torch.cat([train_true, y_], 0)
                train_preds = torch.cat([train_preds, predictions_], 0)

            model.eval() # prep model for evaluation
            val_preds, val_true = torch.Tensor([]).to(device), torch.LongTensor([]).to(device)
            with torch.no_grad():
                for x, y in valid_dataloader:
                    x = x.to(device)
                    y = y.to(device)
    
#                     predictions = model(x[:,:df_train.shape[1],:])
                    predictions = model(x)
                    predictions_ = predictions.view(-1, predictions.shape[-1]) 
                    y_ = y.view(-1)
    
                    loss = criterion(predictions_, y_)
                    valid_losses.append(loss.item())
                    
                    val_true = torch.cat([val_true, y_], 0)
                    val_preds = torch.cat([val_preds, predictions_], 0)

            # calculate average loss over an epoch
            train_loss = np.average(train_losses)
            valid_loss = np.average(valid_losses)
            avg_train_losses.append(train_loss)
            avg_valid_losses.append(valid_loss)

            print( "train_loss: {:0.6f}, valid_loss: {:0.6f}".format(train_loss, valid_loss))
            
            train_score = f1_score(train_true.cpu().detach().numpy(), train_preds.cpu().detach().numpy().argmax(1), labels=list(range(11)), average='macro')
            val_score = f1_score(val_true.cpu().detach().numpy(), val_preds.cpu().detach().numpy().argmax(1), labels=list(range(11)), average='macro')
            print( "train_f1: {:0.6f}, valid_f1: {:0.6f}".format(train_score, val_score))
            
            cv_score[index] = val_score
            if early_stopping(val_score, model):
                print("Early Stopping...")
                print("Best Val Score: {:0.6f}".format(early_stopping.best_score))
                cv_score[index] = early_stopping.best_score
                break
            
            print("--- %s seconds ---" % (time.time() - start_time))
        
        model.load_state_dict(torch.load("./models/gru_clean_checkpoint_fold_{}_iter_{}.pt".format(index, it)))
        with torch.no_grad():
            pred_list = []
            for x, y in test_dataloader:
                x = x.to(device)
                y = y.to(device)

                predictions = model(x[:,:df_train.shape[1],:])
                predictions_ = predictions.view(-1, predictions.shape[-1]) 

                pred_list.append(F.softmax(predictions_, dim=1).cpu().numpy())
            test_preds = np.vstack(pred_list)
       
        test_preds_iter += test_preds
        test_preds_all += test_preds
        if not os.path.exists("./predictions/test"):
            os.makedirs("./predictions/test")
        np.save('./predictions/test/gru_clean_fold_{}_iter_{}_raw.npy'.format(index, it), arr=test_preds_iter)
        np.save('./predictions/test/gru_clean_fold_{}_raw.npy'.format(index), arr=test_preds_all)

In [None]:
print("cross validation score is:", cv_score.mean().round(4))

In [None]:
# Best Val Score: 
sub = pd.read_csv("../input/sample_submission.csv", dtype={'time':str})

test_preds_all = test_preds_all/np.sum(test_preds_all, axis=1)[:, None]
test_pred_frame = pd.DataFrame({'time': sub['time'].astype(str),
                                'open_channels': np.argmax(test_preds_all, axis=1)})
# test_pred_frame.to_csv("../submissions/cnn_rnn_preds.csv", index=False)