In [1]:
import torch
import torch.nn as nn
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader

from scipy.special import softmax

In [2]:
# General
from os import path
from random import randrange

from sklearn.model_selection import train_test_split, GridSearchCV #cross validation
from sklearn.metrics import confusion_matrix, plot_confusion_matrix, make_scorer
from sklearn.metrics import accuracy_score, roc_auc_score, balanced_accuracy_score

from sklearn.preprocessing import LabelEncoder

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import xgboost as xgb

import pickle
import joblib 

In [3]:
trainDataFull = pd.read_csv("trainData.csv")
trainDataFull.head(3)

Unnamed: 0,v1,v10,v100,v101,v102,v103,v11,v12,v13,v14,...,v91,v92,v93,v94,v95,v96,v97,v98,v99,target
0,1.4,0.0,0.2,1.0,4.2,0.4,0.0,0.0,0.0,1.2,...,0.6,0.2,0.0,3.2,1.0,0.2,0.0,1.6,0.4,9
1,0.0,0.0,0.0,2.8,0.0,0.8,0.0,0.2,1.2,1.4,...,0.0,0.0,1.2,0.0,1.2,0.2,0.2,2.6,2.2,6
2,0.0,0.0,0.0,0.4,0.0,0.6,0.8,0.0,0.0,0.2,...,0.0,0.0,0.0,0.0,0.8,0.2,0.8,1.4,0.0,3


In [4]:
trainData = trainDataFull.loc[:,'v1':'v99']
trainData.head(3)

Unnamed: 0,v1,v10,v100,v101,v102,v103,v11,v12,v13,v14,...,v90,v91,v92,v93,v94,v95,v96,v97,v98,v99
0,1.4,0.0,0.2,1.0,4.2,0.4,0.0,0.0,0.0,1.2,...,0.2,0.6,0.2,0.0,3.2,1.0,0.2,0.0,1.6,0.4
1,0.0,0.0,0.0,2.8,0.0,0.8,0.0,0.2,1.2,1.4,...,0.0,0.0,0.0,1.2,0.0,1.2,0.2,0.2,2.6,2.2
2,0.0,0.0,0.0,0.4,0.0,0.6,0.8,0.0,0.0,0.2,...,0.0,0.0,0.0,0.0,0.0,0.8,0.2,0.8,1.4,0.0


In [5]:
trainLabels = trainDataFull.loc[:,'target']
trainLabels.unique()

array([9, 6, 3, 4, 2, 8, 7, 1, 5])

In [6]:
# encode string class values as integers
label_encoder = LabelEncoder()
label_encoder = label_encoder.fit(trainLabels)
label_encoded_y = label_encoder.transform(trainLabels)
label_encoded_y

array([8, 5, 2, ..., 7, 4, 2])

In [7]:
X_train, X_test, y_train, y_test = train_test_split(trainData.values, 
                                                    label_encoded_y, 
                                                    test_size = 0.05, 
                                                    random_state = 33,
                                                    shuffle = True,
                                                    stratify = label_encoded_y)

In [8]:
# scale
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
# y_train = torch.from_numpy(y_train.astype(np.float32))
# y_test = torch.from_numpy(y_test.astype(np.float32))

# y_train = y_train.view(y_train.shape[0],1) # from columns vector to row vector
# y_test = y_test.view(y_test.shape[0],1)

In [9]:
y_train

array([2, 2, 4, ..., 6, 1, 1])

In [10]:
len(X_train)

58784

In [11]:
class TrainDataset(Dataset):
    
    def __init__(self, data, labels):
        
        self.paths = data
        self.labels = labels
                                                 
    def __len__(self):
        return self.paths.shape[0]
    
    def __getitem__(self, i): 
        
        image = self.paths[i]
        label = torch.tensor(self.labels[i])
         
        return image, label

In [12]:
class ValidDataset(Dataset):
    
    def __init__(self, data, labels):
        
        self.paths = data
        self.labels = labels
                                                 
    def __len__(self):
        return self.paths.shape[0]
    
    def __getitem__(self, i): 
        
        image = self.paths[i]
        label = torch.tensor(self.labels[i])
         
        return image, label

In [15]:
# hyper parameters
input_size = 103 
num_classes = 9
num_epochs = 1000
batch_size = 100
learning_rate = 0.00001
device = 'cuda:0'

In [16]:
train_dataset = TrainDataset(X_train, y_train)
trainloader = DataLoader(train_dataset, shuffle=True, batch_size = batch_size, num_workers = 2)

valid_dataset = ValidDataset(X_test, y_test)
validloader = DataLoader(valid_dataset, shuffle=False, batch_size = batch_size, num_workers = 2)

In [17]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NeuralNet, self).__init__()
        self.l1 = nn.Linear(input_size, 2000,bias = True)
        self.relu1 = nn.ReLU()
        self.l2 = nn.Linear(2000,1000,bias = True)
        self.relu2 = nn.ReLU()
        self.drop1 = nn.Dropout(p=0.3)
        self.l3 = nn.Linear(1000, num_classes,bias = True)
        
    def forward(self, x):
        out = self.l1(x)
        out = self.relu1(out)
        out = self.l2(out)
        out = self.relu2(out)
        out = self.drop1(out)
        out = self.l3(out)
        # It is no need Softmax. Because CrossEntropyLoss includes it
        return out

In [18]:
model = NeuralNet(input_size, num_classes)
model.cuda()

NeuralNet(
  (l1): Linear(in_features=103, out_features=2000, bias=True)
  (relu1): ReLU()
  (l2): Linear(in_features=2000, out_features=1000, bias=True)
  (relu2): ReLU()
  (drop1): Dropout(p=0.3, inplace=False)
  (l3): Linear(in_features=1000, out_features=9, bias=True)
)

In [19]:
# loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)

In [None]:
# training loop
n_total_steps = len(trainloader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(trainloader):
        # images.size = 100, 1, 28, 28
        # labels.size = 100, 784
        images = images.to(device)
        labels = labels.to(device)
        
        model.train()
        # forward
        outputs = model(images)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        # backwards
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print(f'epoch {epoch+1}/{num_epochs}, step {i+1}/{n_total_steps}, loss = {loss.item():.4f}')
            
            # Valid score
            with torch.no_grad():
                n_correct = 0
                n_samples = 0
                for images, labels in validloader:
                    images = images.to(device)
                    labels = labels.to(device)
                    outputs = model(images)

                    # value, index
                    _, predictions = torch.max(outputs,1)
                    n_samples += labels.shape[0]
                    n_correct += (predictions == labels).sum().item()

                acc = 100.0 * n_correct / n_samples
                print(f'accuracy = {acc}') 

epoch 1/1000, step 100/588, loss = 1.6295
accuracy = 55.46218487394958
epoch 1/1000, step 200/588, loss = 1.4578
accuracy = 62.346477052359404
epoch 1/1000, step 300/588, loss = 1.1122
accuracy = 67.6470588235294
epoch 1/1000, step 400/588, loss = 0.9666
accuracy = 69.55397543632837
epoch 1/1000, step 500/588, loss = 1.0169
accuracy = 71.55785391079509
epoch 2/1000, step 100/588, loss = 0.9684
accuracy = 72.20426632191338
epoch 2/1000, step 200/588, loss = 1.0066
accuracy = 72.75371687136393
epoch 2/1000, step 300/588, loss = 0.7575
accuracy = 73.46477052359405
epoch 2/1000, step 400/588, loss = 0.9188
accuracy = 73.3678086619263
epoch 2/1000, step 500/588, loss = 0.9653
accuracy = 74.01422107304461
epoch 3/1000, step 100/588, loss = 0.7216
accuracy = 74.75759534583064
epoch 3/1000, step 200/588, loss = 0.8604
accuracy = 74.72527472527473
epoch 3/1000, step 300/588, loss = 0.7339
accuracy = 74.56367162249515
epoch 3/1000, step 400/588, loss = 0.6449
accuracy = 74.46670976082741
epoch 3

epoch 24/1000, step 100/588, loss = 0.5986
accuracy = 80.4137039431157
epoch 24/1000, step 200/588, loss = 0.4826
accuracy = 79.89657401422107
epoch 24/1000, step 300/588, loss = 0.5614
accuracy = 80.31674208144797
epoch 24/1000, step 400/588, loss = 0.6484
accuracy = 80.12281835811248
epoch 24/1000, step 500/588, loss = 0.6815
accuracy = 80.15513897866839
epoch 25/1000, step 100/588, loss = 0.4682
accuracy = 80.34906270200388
epoch 25/1000, step 200/588, loss = 0.7049
accuracy = 80.02585649644473
epoch 25/1000, step 300/588, loss = 0.4905
accuracy = 79.70265029088559
epoch 25/1000, step 400/588, loss = 0.4697
accuracy = 79.5087265675501
epoch 25/1000, step 500/588, loss = 0.5555
accuracy = 80.51066580478346
epoch 26/1000, step 100/588, loss = 0.5265
accuracy = 79.83193277310924
epoch 26/1000, step 200/588, loss = 0.4100
accuracy = 79.7349709114415
epoch 26/1000, step 300/588, loss = 0.4905
accuracy = 80.02585649644473
epoch 26/1000, step 400/588, loss = 0.4816
accuracy = 80.0581771170

accuracy = 81.18939883645766
epoch 47/1000, step 100/588, loss = 0.3528
accuracy = 80.8338720103426
epoch 47/1000, step 200/588, loss = 0.6122
accuracy = 80.67226890756302
epoch 47/1000, step 300/588, loss = 0.6415
accuracy = 80.73691014867485
epoch 47/1000, step 400/588, loss = 0.6304
accuracy = 80.54298642533936
epoch 47/1000, step 500/588, loss = 0.4826
accuracy = 81.02779573367809
epoch 48/1000, step 100/588, loss = 0.4777
accuracy = 80.80155138978668
epoch 48/1000, step 200/588, loss = 0.4341
accuracy = 81.09243697478992
epoch 48/1000, step 300/588, loss = 0.5434
accuracy = 81.22171945701358
epoch 48/1000, step 400/588, loss = 0.5644
accuracy = 80.93083387201034
epoch 48/1000, step 500/588, loss = 0.4525
accuracy = 80.93083387201034
epoch 49/1000, step 100/588, loss = 0.3716
accuracy = 81.09243697478992
epoch 49/1000, step 200/588, loss = 0.3814
accuracy = 80.80155138978668
epoch 49/1000, step 300/588, loss = 0.3975
accuracy = 80.8338720103426
epoch 49/1000, step 400/588, loss = 0

epoch 69/1000, step 500/588, loss = 0.3939
accuracy = 81.60956690368455
epoch 70/1000, step 100/588, loss = 0.4978
accuracy = 81.060116354234
epoch 70/1000, step 200/588, loss = 0.4930
accuracy = 81.83581124757595
epoch 70/1000, step 300/588, loss = 0.3206
accuracy = 81.4802844214609
epoch 70/1000, step 400/588, loss = 0.4005
accuracy = 81.4802844214609
epoch 70/1000, step 500/588, loss = 0.4575
accuracy = 81.35100193923724
epoch 71/1000, step 100/588, loss = 0.3015
accuracy = 81.25404007756948
epoch 71/1000, step 200/588, loss = 0.3841
accuracy = 80.76923076923077
epoch 71/1000, step 300/588, loss = 0.3892
accuracy = 81.31868131868131
epoch 71/1000, step 400/588, loss = 0.3842
accuracy = 81.25404007756948
epoch 71/1000, step 500/588, loss = 0.5500
accuracy = 81.44796380090497
epoch 72/1000, step 100/588, loss = 0.4186
accuracy = 81.70652876535229
epoch 72/1000, step 200/588, loss = 0.5430
accuracy = 81.09243697478992
epoch 72/1000, step 300/588, loss = 0.4119
accuracy = 80.83387201034

accuracy = 81.12475759534583
epoch 92/1000, step 500/588, loss = 0.3858
accuracy = 81.44796380090497
epoch 93/1000, step 100/588, loss = 0.3862
accuracy = 81.12475759534583
epoch 93/1000, step 200/588, loss = 0.3474
accuracy = 81.54492566257272
epoch 93/1000, step 300/588, loss = 0.4861
accuracy = 81.2863606981254
epoch 93/1000, step 400/588, loss = 0.4663
accuracy = 81.80349062702004
epoch 93/1000, step 500/588, loss = 0.3139
accuracy = 81.64188752424046
epoch 94/1000, step 100/588, loss = 0.3616
accuracy = 81.60956690368455
epoch 94/1000, step 200/588, loss = 0.3120
accuracy = 81.83581124757595
epoch 94/1000, step 300/588, loss = 0.3141
accuracy = 81.90045248868778
epoch 94/1000, step 400/588, loss = 0.3075
accuracy = 81.18939883645766
epoch 94/1000, step 500/588, loss = 0.4240
accuracy = 81.54492566257272
epoch 95/1000, step 100/588, loss = 0.3997
accuracy = 81.4802844214609
epoch 95/1000, step 200/588, loss = 0.5175
accuracy = 82.12669683257919
epoch 95/1000, step 300/588, loss = 0

accuracy = 81.64188752424046
epoch 115/1000, step 300/588, loss = 0.3662
accuracy = 81.5126050420168
epoch 115/1000, step 400/588, loss = 0.3017
accuracy = 81.57724628312863
epoch 115/1000, step 500/588, loss = 0.3065
accuracy = 81.80349062702004
epoch 116/1000, step 100/588, loss = 0.3611
accuracy = 81.2863606981254
epoch 116/1000, step 200/588, loss = 0.3304
accuracy = 81.9327731092437
epoch 116/1000, step 300/588, loss = 0.3817
accuracy = 81.09243697478992
epoch 116/1000, step 400/588, loss = 0.4431
accuracy = 81.80349062702004
epoch 116/1000, step 500/588, loss = 0.3630
accuracy = 81.70652876535229
epoch 117/1000, step 100/588, loss = 0.3799
accuracy = 81.38332255979314
epoch 117/1000, step 200/588, loss = 0.4509
accuracy = 81.64188752424046
epoch 117/1000, step 300/588, loss = 0.3471
accuracy = 82.12669683257919
epoch 117/1000, step 400/588, loss = 0.3281
accuracy = 81.44796380090497
epoch 117/1000, step 500/588, loss = 0.5062
accuracy = 81.44796380090497
epoch 118/1000, step 100/

accuracy = 81.57724628312863
epoch 138/1000, step 100/588, loss = 0.2814
accuracy = 81.57724628312863
epoch 138/1000, step 200/588, loss = 0.2835
accuracy = 81.64188752424046
epoch 138/1000, step 300/588, loss = 0.2733
accuracy = 81.15707821590175
epoch 138/1000, step 400/588, loss = 0.3183
accuracy = 81.64188752424046
epoch 138/1000, step 500/588, loss = 0.4200
accuracy = 81.80349062702004
epoch 139/1000, step 100/588, loss = 0.2858
accuracy = 81.77117000646412
epoch 139/1000, step 200/588, loss = 0.3155
accuracy = 81.4802844214609
epoch 139/1000, step 300/588, loss = 0.3173
accuracy = 82.09437621202328
epoch 139/1000, step 400/588, loss = 0.2725
accuracy = 81.67420814479638
epoch 139/1000, step 500/588, loss = 0.3813
accuracy = 81.57724628312863
epoch 140/1000, step 100/588, loss = 0.2542
accuracy = 81.90045248868778
epoch 140/1000, step 200/588, loss = 0.3057
accuracy = 81.25404007756948
epoch 140/1000, step 300/588, loss = 0.2731
accuracy = 81.38332255979314
epoch 140/1000, step 40

accuracy = 81.38332255979314
epoch 160/1000, step 400/588, loss = 0.2732
accuracy = 81.4802844214609
epoch 160/1000, step 500/588, loss = 0.3263
accuracy = 81.77117000646412
epoch 161/1000, step 100/588, loss = 0.3479
accuracy = 81.35100193923724
epoch 161/1000, step 200/588, loss = 0.3075
accuracy = 81.54492566257272
epoch 161/1000, step 300/588, loss = 0.2051
accuracy = 82.02973497091145
epoch 161/1000, step 400/588, loss = 0.2097
accuracy = 81.57724628312863
epoch 161/1000, step 500/588, loss = 0.2774
accuracy = 81.31868131868131
epoch 162/1000, step 100/588, loss = 0.3066
accuracy = 81.44796380090497
epoch 162/1000, step 200/588, loss = 0.2684
accuracy = 81.5126050420168
epoch 162/1000, step 300/588, loss = 0.3437
accuracy = 82.1590174531351
epoch 162/1000, step 400/588, loss = 0.2739
accuracy = 81.15707821590175
epoch 162/1000, step 500/588, loss = 0.3012
accuracy = 81.18939883645766
epoch 163/1000, step 100/588, loss = 0.3004
accuracy = 81.90045248868778
epoch 163/1000, step 200/

accuracy = 81.25404007756948
epoch 183/1000, step 200/588, loss = 0.2539
accuracy = 81.31868131868131
epoch 183/1000, step 300/588, loss = 0.2381
accuracy = 81.18939883645766
epoch 183/1000, step 400/588, loss = 0.2801
accuracy = 81.57724628312863
epoch 183/1000, step 500/588, loss = 0.1605
accuracy = 81.35100193923724
epoch 184/1000, step 100/588, loss = 0.2354
accuracy = 81.60956690368455
epoch 184/1000, step 200/588, loss = 0.2594
accuracy = 81.5126050420168
epoch 184/1000, step 300/588, loss = 0.2266
accuracy = 81.86813186813187
epoch 184/1000, step 400/588, loss = 0.4798
accuracy = 81.54492566257272
epoch 184/1000, step 500/588, loss = 0.2239
accuracy = 81.54492566257272
epoch 185/1000, step 100/588, loss = 0.2854
accuracy = 81.02779573367809
epoch 185/1000, step 200/588, loss = 0.3086
accuracy = 81.67420814479638
epoch 185/1000, step 300/588, loss = 0.2358
accuracy = 81.25404007756948
epoch 185/1000, step 400/588, loss = 0.4586
accuracy = 81.86813186813187
epoch 185/1000, step 50

accuracy = 81.18939883645766
epoch 205/1000, step 500/588, loss = 0.2816
accuracy = 81.60956690368455
epoch 206/1000, step 100/588, loss = 0.2413
accuracy = 81.41564318034906
epoch 206/1000, step 200/588, loss = 0.2094
accuracy = 81.25404007756948
epoch 206/1000, step 300/588, loss = 0.2344
accuracy = 81.57724628312863
epoch 206/1000, step 400/588, loss = 0.2672
accuracy = 80.93083387201034
epoch 206/1000, step 500/588, loss = 0.2268
accuracy = 81.54492566257272
epoch 207/1000, step 100/588, loss = 0.2462
accuracy = 81.67420814479638
epoch 207/1000, step 200/588, loss = 0.2420
accuracy = 81.67420814479638
epoch 207/1000, step 300/588, loss = 0.2868
accuracy = 81.80349062702004
epoch 207/1000, step 400/588, loss = 0.3764
accuracy = 81.44796380090497
epoch 207/1000, step 500/588, loss = 0.2577
accuracy = 81.35100193923724
epoch 208/1000, step 100/588, loss = 0.3099
accuracy = 81.18939883645766
epoch 208/1000, step 200/588, loss = 0.2834
accuracy = 82.25597931480284
epoch 208/1000, step 3

In [None]:
# Valid score
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for images, labels in validloader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        
        # value, index
        _, predictions = torch.max(outputs,1)
        n_samples += labels.shape[0]
        n_correct += (predictions == labels).sum().item()
        
    acc = 100.0 * n_correct / n_samples
    print(f'accuracy = {acc}')  

In [None]:
class TestDataset(Dataset):
    
    def __init__(self, data, labels):
        
        self.paths = data
        self.labels = labels
                                                 
    def __len__(self):
        return self.paths.shape[0]
    
    def __getitem__(self, i): 
        
        image = self.paths[i]
        label = torch.tensor(self.labels[i])
         
        return image, label

In [None]:
testData = pd.read_csv("testData.csv")
testData

In [None]:
test_normalized_standart = sc.transform(testData.values)
test_normalized_standart

In [None]:
testNN = torch.from_numpy(test_normalized_standart.astype(np.float32))
testNN[0]

In [None]:
preds_pair = []
for i in range(len(testNN)):
    print(i)
    outputs = model(testNN[i].to(device))
    
    predictions_normalize = softmax(outputs.cpu().detach().numpy(),0)
    preds_pair.append(list(predictions_normalize))   

In [None]:
result = pd.DataFrame(preds_pair, columns=['c1','c2','c3','c4','c5','c6','c7','c8','c9'])
result

In [None]:
result.to_csv('./results/test-submission-model-nn', index = False)

In [None]:
sum(predictions_normalize)

In [None]:
preds_pair = []
for i in range(len(X_test)):
    print(i)
    outputs = model(X_test[i].to(device))
    
    predictions_normalize = softmax(outputs.cpu().detach().numpy(),0)
    preds_pair.append(list(predictions_normalize))   