## Binary Classification
https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data

In [None]:
!pip install pandas

In [None]:
!pip install numpy

In [None]:
!pip install torch

In [1]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torch.nn import functional as F

In [2]:
print(pd.__version__)

1.4.2


In [3]:
print(np.__version__)

1.24.1


In [4]:
print(torch.__version__)

2.0.1+cpu


In [5]:
columns = ['Class', 'Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash', 'Magnesium', 'Total phenols', 'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins', 'Color intensity', 'Hue', 'OD280/OD315 of diluted wines', 'Proline']

In [8]:
wine_data = pd.read_csv('data/Wine.csv', names=columns)
wine_data.sample(5)

Unnamed: 0,Class,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315 of diluted wines,Proline
148,3,13.32,3.24,2.38,21.5,92,1.93,0.76,0.45,1.25,8.42,0.55,1.62,650
166,3,13.45,3.7,2.6,23.0,111,1.7,0.92,0.43,1.46,10.68,0.85,1.56,695
173,3,13.71,5.65,2.45,20.5,95,1.68,0.61,0.52,1.06,7.7,0.64,1.74,740
15,1,13.63,1.81,2.7,17.2,112,2.85,2.91,0.3,1.46,7.3,1.28,2.88,1310
17,1,13.83,1.57,2.62,20.0,115,2.95,3.4,0.4,1.72,6.6,1.13,2.57,1130


In [9]:
wine_data.shape

(178, 14)

## Data Pre-processing

#### Cleaning the data
* Removing all the records having NaN values
* Convert target values to number using label encoding

In [5]:
from sklearn import preprocessing

le = preprocessing.LabelEncoder()

wine_data['Class'] = le.fit_transform(wine_data['Class'])

wine_data.sample(10)

Unnamed: 0,Class,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315 of diluted wines,Proline
83,1,13.05,3.86,2.32,22.5,85,1.65,1.59,0.61,1.62,4.8,0.84,2.01,515
138,2,13.49,3.59,2.19,19.5,88,1.62,0.48,0.58,0.88,5.7,0.81,1.82,580
29,0,14.02,1.68,2.21,16.0,96,2.65,2.33,0.26,1.98,4.7,1.04,3.59,1035
67,1,12.37,1.17,1.92,19.6,78,2.11,2.0,0.27,1.04,4.68,1.12,3.48,510
131,2,12.88,2.99,2.4,20.0,104,1.3,1.22,0.24,0.83,5.4,0.74,1.42,530
56,0,14.22,1.7,2.3,16.3,118,3.2,3.0,0.26,2.03,6.38,0.94,3.31,970
176,2,13.17,2.59,2.37,20.0,120,1.65,0.68,0.53,1.46,9.3,0.6,1.62,840
153,2,13.23,3.3,2.28,18.5,98,1.8,0.83,0.61,1.87,10.52,0.56,1.51,675
97,1,12.29,1.41,1.98,16.0,85,2.55,2.5,0.29,1.77,2.9,1.23,2.74,428
17,0,13.83,1.57,2.62,20.0,115,2.95,3.4,0.4,1.72,6.6,1.13,2.57,1130


In [6]:
wine_data.to_csv('data/wine_data.csv', index = False)

#### Collecting Features
* Create training and test data using train_test_split

In [7]:
wine_features = wine_data.drop('Class', axis = 1)
wine_features.sample(5)

Unnamed: 0,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315 of diluted wines,Proline
26,13.39,1.77,2.62,16.1,93,2.85,2.94,0.34,1.45,4.8,0.92,3.22,1195
63,12.37,1.13,2.16,19.0,87,3.5,3.1,0.19,1.87,4.45,1.22,2.87,420
154,12.58,1.29,2.1,20.0,103,1.48,0.58,0.53,1.4,7.6,0.58,1.55,640
174,13.4,3.91,2.48,23.0,102,1.8,0.75,0.43,1.41,7.3,0.7,1.56,750
150,13.5,3.12,2.62,24.0,123,1.4,1.57,0.22,1.25,8.6,0.59,1.3,500


In [8]:
wine_target = wine_data[['Class']]
wine_target.sample(5)

Unnamed: 0,Class
108,1
9,0
139,2
56,0
157,2


In [9]:
from sklearn.model_selection import train_test_split

X_train, x_test, Y_train, y_test = train_test_split(wine_features,
                                                    wine_target,
                                                    test_size=0.4,
                                                    random_state=0)

In [10]:
X_train.shape, Y_train.shape

((106, 13), (106, 1))

#### Convert data into tensors

In [11]:
Xtrain_ = torch.from_numpy(X_train.values).float()
Xtest_ = torch.from_numpy(x_test.values).float()

In [12]:
Xtrain_.shape

torch.Size([106, 13])

Y parameters have shape 106 X 1 but we need those in shape 1 X 106<br>
Our loss function doesnt support multi-target, our target should be 1D Tensor
i.e 1 row containing all the labels<br>

<b>view: </b>with view we reshape the tensor <br>
view with -1<br>
If there is any situation that you don't know how many columns you want but are sure of the number of rows then you can mention it as -1, or vice-versa (You can extend this to tensors with more dimensions. Only one of the axis value can be -1).

In [13]:
Ytrain_ = torch.from_numpy(Y_train.values).view(1,-1)[0]
Ytest_ = torch.from_numpy(y_test.values).view(1,-1)[0]

In [14]:
Ytrain_.shape

torch.Size([106])

## Creating a classifier


we have 13 features therefore input size is 13 and we want 3 discrete outputs 

In [15]:
input_size = 13
output_size = 3
hidden_size = 100

#### Define a neural network class from which to create our model

We create a class named Net which inherits nn.Module(Base class for all neural network modules.)<br>

<b>super :</b> This is calling the \__init__() method of the parent class(nn.Module)

<b>fc1 to fc3 :</b>  Applies a linear transformation to the incoming data: y=Wx+b<br>
Parameters :<br>
in_features – size of each input sample<br>
out_features – size of each output sample<br>
bias – If set to False, the layer will not learn an additive bias. Default: True<br>

<b>Sigmoid : </b>Applies the element-wise function Sigmoid(x)= 1 / (1+exp(−x))

<b>log_softmax :</b>
Softmax applies the Softmax() function to an n-dimensional input Tensor rescaling them so that the elements of the n-dimensional output Tensor lie in the range (0,1) and sum to 1<br>
While mathematically equivalent to log(softmax(x)), doing these two operations separately is slower, and numerically unstable. This function uses an alternative formulation to compute the output and gradient correctly.<br>
Parameters:<br>
dim(int) – A dimension along which Softmax will be computed (so every slice along dim will sum to 1).


In [16]:
class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)

    def forward(self, X):
        X = torch.sigmoid((self.fc1(X)))
        X = torch.sigmoid(self.fc2(X))
        X = self.fc3(X)

        return F.log_softmax(X, dim=-1)

In [17]:
model = Net()

#### Defining optimizer and loss function

In [18]:
import torch.optim as optim

optimizer = optim.Adam(model.parameters(), lr = 0.01)

loss_fn = nn.NLLLoss()

#### Training the model

Optimizer.zero_grad:
* Before the backward pass, use the optimizer object to zero all of the gradients for the variables it will update (which are the learnable weights of the model)

Foward Pass:
* Predicting Y with input data X

Finding training Loss:
* Finding difference between Y_train_tensor and Y_pred using NLLLoss() function defined above


Back Propogation:
* back propogation is done by simply loss.backward() function

Working on test data<br>
predicting Y with X test data<br>
Finding test loss same as training loss, but we will not back propogate this loss<br>

Finding accuracy 
* we used .eq() function which computes element-wise equality
* returns 1 if element is equal else 0 (Hence its summation will give us total correct predictions)

We append all data in the form of list per epoch so that it will be easier for us to plot graphs


In [19]:
epochs = 1000

for epoch in range(epochs):

    optimizer.zero_grad()
    Ypred = model(Xtrain_)

    loss = loss_fn(Ypred , Ytrain_)
    loss.backward()

    optimizer.step()
        
    if epoch % 100 == 0:
        print ('Epoch', epoch, 'loss', loss.item())

Epoch 0 loss 1.1445715427398682
Epoch 100 loss 0.442806214094162
Epoch 200 loss 0.06015758588910103
Epoch 300 loss 0.05101180821657181
Epoch 400 loss 0.04862520471215248
Epoch 500 loss 0.047397345304489136
Epoch 600 loss 0.046599019318819046
Epoch 700 loss 0.04599539190530777
Epoch 800 loss 0.04547502100467682
Epoch 900 loss 0.044991228729486465


## Saving the model
* saves and loads the entire model, all the intermediate variables as well, like intermediate outputs for back propagation use.

In [20]:
list(model.parameters())

[Parameter containing:
 tensor([[-0.2623, -0.1539,  0.1760,  ..., -0.2537,  0.1115, -0.2060],
         [-0.0221,  0.1724, -0.1271,  ...,  0.1819,  0.0640,  0.1022],
         [-0.1655,  0.2747,  0.1801,  ...,  0.1069, -0.2559, -0.2362],
         ...,
         [ 0.0056,  0.2334, -0.2648,  ...,  0.1256, -0.2578, -0.0056],
         [ 0.0960, -0.0973, -0.1034,  ...,  0.0225,  0.2414,  0.2521],
         [-0.1915, -0.2479, -0.2676,  ..., -0.1850, -0.2333, -0.0310]],
        requires_grad=True), Parameter containing:
 tensor([-4.2211e-02, -1.7053e-01, -4.5946e-03, -1.6043e-01,  2.6865e-01,
         -8.4923e-01,  2.4748e-01, -2.7346e-01,  7.0205e-03, -1.6623e-01,
          2.7120e-01,  1.0300e-02, -2.6202e-01, -1.5330e-01,  1.4735e-01,
          4.2576e-04, -6.5529e-02,  8.1007e-02,  6.9867e-02, -2.5786e-01,
          2.7681e-01,  1.4196e-01, -2.3173e-01, -2.2348e-02,  5.7680e-02,
         -4.2618e-02,  9.2887e-02,  1.1920e-01, -2.9528e-02,  2.8631e-01,
          6.4100e-02,  1.6135e-01,  2.162

In [21]:
torch.save(model, 'models/classifier.pt')

  "type " + obj.__name__ + ". It won't be checked "


In [22]:
!ls models

classifier.pt


In [23]:
!cat models/classifier.pt

��
l��F� j�P.�M�.�}q (X   protocol_versionqM�X   little_endianq�X
   type_sizesq}q(X   shortqKX   intqKX   longqKuu.�(X   moduleq c__main__
Net
qNNtqQ)�q}q(X   _backendqctorch.nn.backends.thnn
_get_thnn_function_backend
q)RqX   _parametersqccollections
OrderedDict
q	)Rq
X   _buffersqh	)RqX   _backward_hooksqh	)RqX   _forward_hooksqh	)RqX   _forward_pre_hooksqh	)RqX   _state_dict_hooksqh	)RqX   _load_state_dict_pre_hooksqh	)RqX   _modulesqh	)Rq(X   fc1q(h ctorch.nn.modules.linear
Linear
qXA   /anaconda3/lib/python3.6/site-packages/torch/nn/modules/linear.pyqXQ	  class Linear(Module):
    r"""Applies a linear transformation to the incoming data: :math:`y = xA^T + b`

    Args:
        in_features: size of each input sample
        out_features: size of each output sample
        bias: If set to False, the layer will not learn an additive bias.
            Default: ``True``

    Shape:
        - Input: :math:`(

## Loading Model
* We saved the entire model and not the state dict. Loading requires the class definition to be in the same script as well.
* **Compute the accuracy, precision and recall from the loaded model**

In [24]:
new_model = torch.load('models/classifier.pt')

In [25]:
predict_out = new_model(Xtest_)
_, predict_y = torch.max(predict_out, 1)

In [26]:
from sklearn.metrics import accuracy_score, precision_score, recall_score

print ('prediction accuracy', accuracy_score(Ytest_.data, predict_y.data))
print ('micro precision', precision_score(Ytest_.data, predict_y.data, average='micro'))
print ('micro recall', recall_score(Ytest_.data, predict_y.data, average='micro'))

prediction accuracy 0.9305555555555556
micro precision 0.9305555555555556
micro recall 0.9305555555555556
