## Binary Classification
https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data

In [1]:
!pip install pandas



In [None]:
!pip install numpy

In [None]:
!pip install torch

In [2]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torch.nn import functional as F

In [4]:
print(pd.__version__)

0.22.0


In [5]:
print(np.__version__)

1.16.1


In [6]:
print(torch.__version__)

0.4.0


In [3]:
columns = ['Class', 'Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash', 'Magnesium', 'Total phenols', 'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins', 'Color intensity', 'Hue', 'OD280/OD315 of diluted wines', 'Proline']

In [4]:
wine_data = pd.read_csv('data/Wine.csv', names=columns)
wine_data.sample(5)

Unnamed: 0,Class,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315 of diluted wines,Proline
130,3,12.86,1.35,2.32,18.0,122,1.51,1.25,0.21,0.94,4.1,0.76,1.29,630
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
98,2,12.37,1.07,2.1,18.5,88,3.52,3.75,0.24,1.95,4.5,1.04,2.77,660
102,2,12.34,2.45,2.46,21.0,98,2.56,2.11,0.34,1.31,2.8,0.8,3.38,438
85,2,12.67,0.98,2.24,18.0,99,2.2,1.94,0.3,1.46,2.62,1.23,3.16,450


In [5]:
wine_data.shape

(178, 14)

## Data Pre-processing

#### Cleaning the data
* Removing all the records having NaN values
* Convert target values to number using label encoding

In [6]:
from sklearn import preprocessing

le = preprocessing.LabelEncoder()

wine_data['Class'] = le.fit_transform(wine_data['Class'])

wine_data.sample(10)

Unnamed: 0,Class,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315 of diluted wines,Proline
102,1,12.34,2.45,2.46,21.0,98,2.56,2.11,0.34,1.31,2.8,0.8,3.38,438
117,1,12.42,1.61,2.19,22.5,108,2.0,2.09,0.34,1.61,2.06,1.06,2.96,345
64,1,12.17,1.45,2.53,19.0,104,1.89,1.75,0.45,1.03,2.95,1.45,2.23,355
37,0,13.05,1.65,2.55,18.0,98,2.45,2.43,0.29,1.44,4.25,1.12,2.51,1105
7,0,14.06,2.15,2.61,17.6,121,2.6,2.51,0.31,1.25,5.05,1.06,3.58,1295
77,1,11.84,2.89,2.23,18.0,112,1.72,1.32,0.43,0.95,2.65,0.96,2.52,500
68,1,13.34,0.94,2.36,17.0,110,2.53,1.3,0.55,0.42,3.17,1.02,1.93,750
41,0,13.41,3.84,2.12,18.8,90,2.45,2.68,0.27,1.48,4.28,0.91,3.0,1035
58,0,13.72,1.43,2.5,16.7,108,3.4,3.67,0.19,2.04,6.8,0.89,2.87,1285
33,0,13.76,1.53,2.7,19.5,132,2.95,2.74,0.5,1.35,5.4,1.25,3.0,1235


In [7]:
wine_data.to_csv('data/wine_data.csv', index = False)

#### Collecting Features
* Create training and test data using train_test_split

In [9]:
wine_features = wine_data.drop('Class', axis = 1)
wine_features.sample(5)

Unnamed: 0,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315 of diluted wines,Proline
80,12.0,0.92,2.0,19.0,86,2.42,2.26,0.3,1.43,2.5,1.38,3.12,278
63,12.37,1.13,2.16,19.0,87,3.5,3.1,0.19,1.87,4.45,1.22,2.87,420
13,14.75,1.73,2.39,11.4,91,3.1,3.69,0.43,2.81,5.4,1.25,2.73,1150
112,11.76,2.68,2.92,20.0,103,1.75,2.03,0.6,1.05,3.8,1.23,2.5,607
174,13.4,3.91,2.48,23.0,102,1.8,0.75,0.43,1.41,7.3,0.7,1.56,750


In [12]:
wine_target = wine_data[['Class']]
wine_target.sample(5)

Unnamed: 0,Class
25,0
27,0
132,2
164,2
90,1


In [13]:
from sklearn.model_selection import train_test_split

X_train, x_test, Y_train, y_test = train_test_split(wine_features,
                                                    wine_target,
                                                    test_size=0.4,
                                                    random_state=0)

In [14]:
X_train.shape, Y_train.shape

((106, 13), (106, 1))

#### Convert data into tensors

In [15]:
Xtrain_ = torch.from_numpy(X_train.values).float()
Xtest_ = torch.from_numpy(x_test.values).float()

In [16]:
Xtrain_.shape

torch.Size([106, 13])

Y parameters have shape 106 X 1 but we need those in shape 1 X 106<br>
Our loss function doesnt support multi-target, our target should be 1D Tensor
i.e 1 row containing all the labels<br>

<b>view: </b>with view we reshape the tensor <br>
view with -1<br>
If there is any situation that you don't know how many columns you want but are sure of the number of rows then you can mention it as -1, or vice-versa (You can extend this to tensors with more dimensions. Only one of the axis value can be -1).

In [17]:
Ytrain_ = torch.from_numpy(Y_train.values).view(1,-1)[0]
Ytest_ = torch.from_numpy(y_test.values).view(1,-1)[0]

In [18]:
Ytrain_.shape

torch.Size([106])

## Creating a classifier


we have 13 features therefore input size is 13 and we want 3 discrete outputs 

In [19]:
input_size = 13
output_size = 3
hidden_size = 100

#### Define a neural network class from which to create our model

We create a class named Net which inherits nn.Module(Base class for all neural network modules.)<br>

<b>super :</b> This is calling the \__init__() method of the parent class(nn.Module)

<b>fc1 to fc3 :</b>  Applies a linear transformation to the incoming data: y=Wx+b<br>
Parameters :<br>
in_features – size of each input sample<br>
out_features – size of each output sample<br>
bias – If set to False, the layer will not learn an additive bias. Default: True<br>

<b>Sigmoid : </b>Applies the element-wise function Sigmoid(x)= 1 / (1+exp(−x))

<b>log_softmax :</b>
Softmax applies the Softmax() function to an n-dimensional input Tensor rescaling them so that the elements of the n-dimensional output Tensor lie in the range (0,1) and sum to 1<br>
While mathematically equivalent to log(softmax(x)), doing these two operations separately is slower, and numerically unstable. This function uses an alternative formulation to compute the output and gradient correctly.<br>
Parameters:<br>
dim(int) – A dimension along which Softmax will be computed (so every slice along dim will sum to 1).


In [20]:
class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)

    def forward(self, X):
        X = torch.sigmoid((self.fc1(X)))
        X = torch.sigmoid(self.fc2(X))
        X = self.fc3(X)

        return F.log_softmax(X, dim=-1)

In [21]:
model = Net()

#### Defining optimizer and loss function

In [22]:
import torch.optim as optim

optimizer = optim.Adam(model.parameters(), lr = 0.01)

loss_fn = nn.NLLLoss()

#### Training the model

Optimizer.zero_grad:
* Before the backward pass, use the optimizer object to zero all of the gradients for the variables it will update (which are the learnable weights of the model)

Foward Pass:
* Predicting Y with input data X

Finding training Loss:
* Finding difference between Y_train_tensor and Y_pred using NLLLoss() function defined above


Back Propogation:
* back propogation is done by simply loss.backward() function

Working on test data<br>
predicting Y with X test data<br>
Finding test loss same as training loss, but we will not back propogate this loss<br>

Finding accuracy 
* we used .eq() function which computes element-wise equality
* returns 1 if element is equal else 0 (Hence its summation will give us total correct predictions)

We append all data in the form of list per epoch so that it will be easier for us to plot graphs


In [23]:
epochs = 1000

for epoch in range(epochs):

    optimizer.zero_grad()
    Ypred = model(Xtrain_)

    loss = loss_fn(Ypred , Ytrain_)
    loss.backward()

    optimizer.step()
        
    if epoch % 100 == 0:
        print ('Epoch', epoch, 'loss', loss.item())

Epoch 0 loss 1.2374279499053955
Epoch 100 loss 0.09370845556259155
Epoch 200 loss 0.33703213930130005
Epoch 300 loss 0.04161982983350754
Epoch 400 loss 0.03735429793596268
Epoch 500 loss 0.03491339087486267
Epoch 600 loss 0.03336271271109581
Epoch 700 loss 0.032282955944538116
Epoch 800 loss 0.03135195001959801
Epoch 900 loss 0.030197054147720337


## Saving the model
* saves and loads the entire model, all the intermediate variables as well, like intermediate outputs for back propagation use.

In [25]:
list(model.parameters())

[Parameter containing:
 tensor([[ 0.0773, -0.1097,  0.2621,  ..., -0.2325,  0.0918,  0.1510],
         [ 0.0688, -0.0662,  0.2192,  ..., -0.2004, -0.1044,  0.1772],
         [ 0.1836,  0.2435,  0.1673,  ...,  0.0829, -0.1532, -0.2352],
         ...,
         [-0.0864,  0.0541,  0.0614,  ...,  0.0902,  0.1654, -0.1038],
         [ 0.0527, -0.2339,  0.1291,  ..., -0.2668, -0.2348, -0.1155],
         [-0.1834,  0.0694,  0.2361,  ..., -0.0612, -0.2456, -0.0447]],
        requires_grad=True), Parameter containing:
 tensor([-0.2292,  0.2082,  0.1403, -0.0262,  0.1446, -0.2530, -0.1569,  0.1160,
          0.0215, -0.1281, -0.0627, -0.2238,  0.0942, -0.1678,  0.0757,  0.0058,
         -0.2191, -0.1466,  0.0339, -0.0829,  0.2397, -0.1417, -0.1658,  0.1268,
          0.1997,  0.1813, -0.2599,  0.2112, -0.1270,  0.2615, -0.1047, -0.1651,
          0.4089,  0.1107,  0.9022, -0.2060,  0.2283,  0.0341, -0.0320, -0.2728,
          0.0720, -0.1814,  0.0508, -0.2135,  0.0707, -0.0557, -0.1073, -0.1982,

In [26]:
torch.save(model, 'models/classifier.pt')

  "type " + obj.__name__ + ". It won't be checked "


In [27]:
!ls models

'ls' is not recognized as an internal or external command,
operable program or batch file.


In [28]:
!cat models/classifier.pt

'cat' is not recognized as an internal or external command,
operable program or batch file.


## Loading Model
* We saved the entire model and not the state dict. Loading requires the class definition to be in the same script as well.
* **Compute the accuracy, precision and recall from the loaded model**

In [29]:
new_model = torch.load('models/classifier.pt')

In [30]:
predict_out = new_model(Xtest_)
_, predict_y = torch.max(predict_out, 1)

In [32]:
from sklearn.metrics import accuracy_score, precision_score, recall_score

print ('prediction accuracy', accuracy_score(Ytest_.data, predict_y.data))
print ('micro precision', precision_score(Ytest_.data, predict_y.data, average='micro'))
print ('micro recall', recall_score(Ytest_.data, predict_y.data, average='micro'))

prediction accuracy 0.9305555555555556
micro precision 0.9305555555555556
micro recall 0.9305555555555556
