In [171]:
from typing import NewType

import torch
import numpy as np
import pandas as pd

from torch.utils.data import Dataset, DataLoader
import torch.nn as nn                 
import torch.nn.functional as F           
import torch.optim as optim

In [172]:
# Constants

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
lr = 0.1
batch_size = 10
epochs = 100
momentum = 0.9

In [173]:
# Loading the data

df = pd.read_csv('iris-dataset.csv')
print(df)

test = df.to_numpy()

y = test[:,4]
print(y.shape)

X = test[:,:4]
print(X.shape) ### Problem is that I need to create test and training sets for both X and y...




     sepal_length  sepal_width  petal_length  petal_width    species
0             5.1          3.5           1.4          0.2     setosa
1             4.9          3.0           1.4          0.2     setosa
2             4.7          3.2           1.3          0.2     setosa
3             4.6          3.1           1.5          0.2     setosa
4             5.0          3.6           1.4          0.2     setosa
..            ...          ...           ...          ...        ...
145           6.7          3.0           5.2          2.3  virginica
146           6.3          2.5           5.0          1.9  virginica
147           6.5          3.0           5.2          2.0  virginica
148           6.2          3.4           5.4          2.3  virginica
149           5.9          3.0           5.1          1.8  virginica

[150 rows x 5 columns]
(150,)
(150, 4)


In [174]:
#Loading the data using sk-learn

from sklearn.datasets import load_iris

iris=load_iris()

for keys in iris.keys() :
    print(keys)

data
target
frame
target_names
DESCR
feature_names
filename
data_module


In [175]:
X=iris.data
y=iris.target

print(type(X))
print(type(y))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [176]:
# Splitting the training and the testing data

from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)

print(f'X_train shape : {X_train.shape}')
print(f'X_test shape : {X_test.shape}')
print(f'y_train shape : {y_train.shape}')
print(f'y_test shape : {y_test.shape}')

print(y_test) # It does contain all the three types to my surprise

X_train shape : (120, 4)
X_test shape : (30, 4)
y_train shape : (120,)
y_test shape : (30,)
[1 1 1 1 2 1 0 1 1 1 1 1 2 0 0 0 2 0 2 0 0 1 2 2 0 1 1 1 1 2]


In [177]:
# Making it compatible with PyTorch

X_train, y_train, X_test, y_test = map(
    torch.tensor, (X_train, y_train, X_test, y_test)
)

In [178]:
print(type(X_train),type(X_test))
print(type(y_train),type(y_test))
print(y_train.min(), y_train.max())

<class 'torch.Tensor'> <class 'torch.Tensor'>
<class 'torch.Tensor'> <class 'torch.Tensor'>
tensor(0) tensor(2)


In [179]:
# Using TensorDataset and DataLoader

from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

train_ds = TensorDataset(X_train, y_train)
train_dl = DataLoader(train_ds, batch_size)

In [180]:
### GET THE DATA:

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

def load_data(test_size=0.2):
    iris=load_iris()
    X=iris.data
    y=iris.target
    X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=test_size)
    X_train, y_train, X_test, y_test = map(
    torch.tensor, (X_train, y_train, X_test, y_test)
)
    return X_train,X_test,y_train,y_test

def get_data(train_ds : TensorDataset, test_ds : TensorDataset, batch_size : int):
    return (
        DataLoader(train_ds, batch_size, shuffle=True),
        DataLoader(test_ds, batch_size=batch_size * 2),
    )
    
    

In [181]:
# Neural network using nn.Module

class IrisClassifier(nn.Module):
    
    def __init__(self):
        super(IrisClassifier, self).__init__()
        self.layer = nn.Linear(4, 3)
        
    def forward(self, x):
        return self.layer(x)



In [182]:
my_model = IrisClassifier()

print(my_model)

IrisClassifier(
  (layer): Linear(in_features=4, out_features=3, bias=True)
)


In [183]:
loss_function = nn.CrossEntropyLoss()

optimizer = optim.SGD(my_model.parameters(), lr=0.01, momentum=0.9)

In [184]:
### Get the model

def get_model_with_optimizer(lr, momentum):
    model = IrisClassifier()
    return model, optim.SGD(model.parameters(), lr=lr, momentum=momentum)



In [185]:
### Coding the training loop i.e. the fit() function

def fit(model,opt,epochs,loss_function, train_dl, test_dl):
    for epoch in range(epochs):
        model.train()
        for x_training_tensor, y_label in train_dl:
            prediction = model(x_training_tensor.float())
            loss = loss_function(prediction, y_label)

            loss.backward()
            opt.step()
            opt.zero_grad()

        model.eval()
        with torch.no_grad():
            valid_loss = sum(loss_function(model(x_training_tensor.float()), y_label) for x_training_tensor, y_label in test_dl)

        print(epoch, valid_loss)

In [187]:
epochs = 100

X_train,X_test,y_train,y_test = load_data()
train_ds = TensorDataset(X_train, y_train)
test_ds = TensorDataset(X_test, y_test)
train_dl, test_dl = get_data(train_ds, test_ds, batch_size)

model, opt = get_model_with_optimizer(lr, momentum)
loss_function = nn.CrossEntropyLoss()

fit(model,opt,epochs,loss_function, train_dl, test_dl)



0 tensor(0.8946)
1 tensor(9.6883)
2 tensor(3.6386)
3 tensor(1.8478)
4 tensor(0.2392)
5 tensor(0.1035)
6 tensor(0.0617)
7 tensor(2.0199)
8 tensor(9.3957)
9 tensor(1.0056)
10 tensor(0.2814)
11 tensor(0.0448)
12 tensor(0.5190)
13 tensor(2.2866)
14 tensor(0.2903)
15 tensor(0.0298)
16 tensor(0.4096)
17 tensor(0.7300)
18 tensor(0.0159)
19 tensor(0.0167)
20 tensor(0.0116)
21 tensor(0.0256)
22 tensor(0.0148)
23 tensor(0.0106)
24 tensor(0.5386)
25 tensor(0.2251)
26 tensor(0.0220)
27 tensor(0.2107)
28 tensor(0.1284)
29 tensor(0.0092)
30 tensor(0.0510)
31 tensor(0.0273)
32 tensor(0.0275)
33 tensor(0.0148)
34 tensor(0.2877)
35 tensor(0.0339)
36 tensor(0.0155)
37 tensor(0.0391)
38 tensor(0.0068)
39 tensor(0.0440)
40 tensor(1.0482)
41 tensor(0.8322)
42 tensor(0.2632)
43 tensor(0.0757)
44 tensor(0.0209)
45 tensor(0.0246)
46 tensor(0.0234)
47 tensor(0.0121)
48 tensor(0.0720)
49 tensor(0.0080)
50 tensor(0.0073)
51 tensor(0.0984)
52 tensor(0.2615)
53 tensor(0.1631)
54 tensor(0.0081)
55 tensor(0.0473)
56

In [None]:
def train(model, data, batch_size=64, num_epochs=1):
    train_loader = DataLoader(data, batch_size=batch_size)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

    losses, train_acc, val_acc = [], [], []

    # training
    for epoch in range(num_epochs):
        for imgs, labels in iter(train_loader):
            out = model(imgs)             # forward pass
            loss = criterion(out, labels) # compute the total loss
            loss.backward()               # backward pass (compute parameter updates)
            optimizer.step()              # make the updates for each parameter
            optimizer.zero_grad()         # a clean up step for PyTorch

            # save the current training information
            losses.append(float(loss)/batch_size)             # compute *average* loss
            train_acc.append(get_accuracy(model, train=True)) # compute training accuracy 
            val_acc.append(get_accuracy(model, train=False))  # compute validation accuracy
            
def get_accuracy(model, train=False):
    if train:
        data = iris_train
    else:
        data = iris_test

    correct = 0
    total = 0
    for imgs, labels in DataLoader(data, batch_size=64):
        output = model(imgs) # We don't need to run F.softmax
        pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
        correct += pred.eq(labels.view_as(pred)).sum().item()
        total += imgs.shape[0]
    return correct / total