In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import torch
import numpy as np
from torchvision import datasets
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn.functional as F
import torch.nn as nn 
from torch import optim

In [2]:
class TMDDataset(Dataset):
    def __init__(self, csv_file,transform=None):
        self.data_frame=pd.read_csv(csv_file)
        self.labels = np.asarray(self.data_frame.iloc[:, -1])
        self.transform = transform
        
    def __len__(self):
        return len(self.data_frame)   
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        sample_x =  np.asarray(self.data_frame.iloc[idx, :-1])
        sample_y = np.asarray(self.data_frame.iloc[idx, -1])
        
        
        if self.transform:
            sample_x = self.transform(sample_x)

            
        sample = (sample_x,  sample_y)  
        return sample      
    

In [3]:
tmd_train = TMDDataset('train.csv', transform = transforms.Compose([transforms.ToTensor()]))

In [4]:
tmd_test = TMDDataset('test.csv' , transform = transforms.Compose([transforms.ToTensor()]))

In [5]:
train_loader = torch.utils.data.DataLoader(dataset=tmd_train,
                                                    batch_size=64,
                                                    shuffle=False)

In [6]:
test_loader = torch.utils.data.DataLoader(dataset=tmd_test,
                                                    batch_size=64,
                                                    shuffle=False)

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [9]:
input_size = 37
hidden_sizes = [800, 512 ]
output_size = 5

criterion = nn.NLLLoss()

model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]), #done
                      nn.ReLU(),
                      nn.Dropout(p=0.2),
                      nn.Linear(hidden_sizes[0], hidden_sizes[1]), #done
                      nn.ReLU(),
                      nn.Dropout(p=0.2), #cut here
                      nn.Linear(hidden_sizes[1], hidden_sizes[0]),
                      nn.ReLU(),
                      nn.Dropout(p=0.2),
                      nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                      nn.ReLU(),
                      nn.Dropout(p=0.2),
                      nn.Linear(hidden_sizes[1], hidden_sizes[0]),
                      nn.ReLU(),
                      nn.Dropout(p=0.2),
                      nn.Linear(hidden_sizes[0], output_size),
                      nn.LogSoftmax(dim=1) )
optimizer = optim.SGD(model.parameters(), lr=0.003)
model.to(device)

Sequential(
  (0): Linear(in_features=37, out_features=800, bias=True)
  (1): ReLU()
  (2): Dropout(p=0.2, inplace=False)
  (3): Linear(in_features=800, out_features=512, bias=True)
  (4): ReLU()
  (5): Dropout(p=0.2, inplace=False)
  (6): Linear(in_features=512, out_features=800, bias=True)
  (7): ReLU()
  (8): Dropout(p=0.2, inplace=False)
  (9): Linear(in_features=800, out_features=512, bias=True)
  (10): ReLU()
  (11): Dropout(p=0.2, inplace=False)
  (12): Linear(in_features=512, out_features=800, bias=True)
  (13): ReLU()
  (14): Dropout(p=0.2, inplace=False)
  (15): Linear(in_features=800, out_features=5, bias=True)
  (16): LogSoftmax(dim=1)
)

In [None]:
train_accuracies , test_accuracies, train_losses, test_losses = [], [], [], []
test_accuracy_checkpt=0
model.train()
epochs = 500
for e in range(epochs):
    train_loss = 0
    train_accuracy = 0
    
    for feature, labels in train_loader:

        feature, labels = feature.to(device), labels.to(device)
        optimizer.zero_grad()
        
        log_ps = model.forward(feature.float())
        loss = criterion(log_ps, labels)
        loss.backward()
        optimizer.step()
        
        ps = torch.exp(log_ps)
        top_p, top_class = ps.topk(1, dim=1)
        
        equals = top_class == labels.view(*top_class.shape)
        train_accuracy += torch.mean(equals.type(torch.FloatTensor))
        
        train_loss += loss.item()
        
    else:
        
        test_loss = 0
        test_accuracy = 0
        model.eval()
        
        with torch.no_grad():
            
            for feature, labels in test_loader:
                
                feature, labels = feature.to(device), labels.to(device)
                
                log_ps = model.forward(feature.float())
                test_loss += criterion(log_ps, labels).item()
                
                ps = torch.exp(log_ps)
                top_p, top_class = ps.topk(1, dim=1)
                equals = top_class == labels.view(*top_class.shape)
                test_accuracy += torch.mean(equals.type(torch.FloatTensor))
         
        model.train()
        train_accuracies.append(train_accuracy/len(train_loader))
        test_accuracies.append(test_accuracy/len(test_loader))
        train_losses.append(train_loss/len(train_loader))
        test_losses.append(test_loss/len(test_loader))
        
        print("Epoch: {}/{}.. ".format(e+1, epochs),
              "Training Accuracy: {:.3f}.. ".format(train_accuracies[-1]),
              "Training Loss: {:.3f}.. ".format(train_losses[-1]),
              "Test Accuracy: {:.3f}..".format(test_accuracies[-1]),
              "Test Loss: {:.3f}.. ".format(test_losses[-1])
        )
        if test_accuracy_checkpt < test_accuracies [-1]:
            torch.save(model.state_dict(), 'simplemodel.pth')
            test_accuracy_checkpt = test_accuracies [-1]
            print("check point epoch : {}".format(e+1) )

In [15]:
state_dict = torch.load('simplemodel.pth')

In [16]:
test_model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]), #done
                      nn.ReLU(),
                      nn.Dropout(p=0.2),
                      nn.Linear(hidden_sizes[0], hidden_sizes[1]), #done
                      nn.ReLU(),
                      nn.Dropout(p=0.2), #cut here
                      nn.Linear(hidden_sizes[1], hidden_sizes[0]),
                      nn.ReLU(),
                      nn.Dropout(p=0.2),
                      nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                      nn.ReLU(),
                      nn.Dropout(p=0.2),
                      nn.Linear(hidden_sizes[1], hidden_sizes[0]),
                      nn.ReLU(),
                      nn.Dropout(p=0.2),
                      nn.Linear(hidden_sizes[0], output_size),
                      nn.LogSoftmax(dim=1) )
test_model.to(device)

Sequential(
  (0): Linear(in_features=37, out_features=800, bias=True)
  (1): ReLU()
  (2): Dropout(p=0.2, inplace=False)
  (3): Linear(in_features=800, out_features=512, bias=True)
  (4): ReLU()
  (5): Dropout(p=0.2, inplace=False)
  (6): Linear(in_features=512, out_features=800, bias=True)
  (7): ReLU()
  (8): Dropout(p=0.2, inplace=False)
  (9): Linear(in_features=800, out_features=512, bias=True)
  (10): ReLU()
  (11): Dropout(p=0.2, inplace=False)
  (12): Linear(in_features=512, out_features=800, bias=True)
  (13): ReLU()
  (14): Dropout(p=0.2, inplace=False)
  (15): Linear(in_features=800, out_features=5, bias=True)
  (16): LogSoftmax(dim=1)
)

In [17]:
test_model.load_state_dict(state_dict)

<All keys matched successfully>

In [18]:
from sklearn.metrics import classification_report

In [19]:
x_test_ch = torch.from_numpy(np.asarray(pd.read_csv('test.csv').iloc[:,:-1])).to(device).float()

In [20]:
y_test_ch = torch.from_numpy(np.asarray(pd.read_csv('test.csv').iloc[:,-1])).to(device)

In [21]:
test_model.eval()
with torch.no_grad():
    log_ps = test_model.forward(x_test_ch)

In [22]:
ps = torch.exp(log_ps)

In [23]:
top_p, top_class = ps.topk(1, dim=1)

In [24]:
print(classification_report(y_test_ch.cpu(), top_class.cpu()))

              precision    recall  f1-score   support

           0       0.89      0.96      0.93       255
           1       0.95      0.90      0.93       228
           2       0.96      0.92      0.94       239
           3       0.94      0.94      0.94       239
           4       0.95      0.95      0.95       218

    accuracy                           0.94      1179
   macro avg       0.94      0.94      0.94      1179
weighted avg       0.94      0.94      0.94      1179

