In [35]:
import torch
import torch.nn as nn

import pandas as pd

import sklearn as skl

In [89]:
class IrisModule(nn.Module):
    def __init__(self):
        super(IrisModule, self).__init__()
        
        self.layers = nn.Sequential(
            nn.BatchNorm1d(4),
            nn.Linear(4, 7),
            nn.ReLU(),
            nn.Linear(7, 3)
        )
    
    def forward(self, x):
        return self.layers(x)

In [40]:
data = pd.read_csv('../../data/Iris_Species/Iris.csv')
data = skl.utils.shuffle(data)

In [41]:
data.sample(5)

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
69,70,5.6,2.5,3.9,1.1,Iris-versicolor
141,142,6.9,3.1,5.1,2.3,Iris-virginica
83,84,6.0,2.7,5.1,1.6,Iris-versicolor
26,27,5.0,3.4,1.6,0.4,Iris-setosa
81,82,5.5,2.4,3.7,1.0,Iris-versicolor


In [54]:
def to_numerical_category(name):
    if name == 'Iris-setosa':
        return 0
    elif name == 'Iris-versicolor':
        return 1
    elif name == 'Iris-virginica':
        return 2
    else:
        raise NotImplementedError

In [55]:
label = [to_numerical_category(name) for name in data['Species']]

In [56]:
list(zip(label, data['Species']))

[(2, 'Iris-virginica'),
 (1, 'Iris-versicolor'),
 (1, 'Iris-versicolor'),
 (2, 'Iris-virginica'),
 (0, 'Iris-setosa'),
 (0, 'Iris-setosa'),
 (2, 'Iris-virginica'),
 (1, 'Iris-versicolor'),
 (0, 'Iris-setosa'),
 (0, 'Iris-setosa'),
 (2, 'Iris-virginica'),
 (0, 'Iris-setosa'),
 (0, 'Iris-setosa'),
 (1, 'Iris-versicolor'),
 (2, 'Iris-virginica'),
 (0, 'Iris-setosa'),
 (1, 'Iris-versicolor'),
 (1, 'Iris-versicolor'),
 (0, 'Iris-setosa'),
 (0, 'Iris-setosa'),
 (2, 'Iris-virginica'),
 (1, 'Iris-versicolor'),
 (0, 'Iris-setosa'),
 (1, 'Iris-versicolor'),
 (2, 'Iris-virginica'),
 (2, 'Iris-virginica'),
 (1, 'Iris-versicolor'),
 (0, 'Iris-setosa'),
 (2, 'Iris-virginica'),
 (1, 'Iris-versicolor'),
 (0, 'Iris-setosa'),
 (0, 'Iris-setosa'),
 (0, 'Iris-setosa'),
 (2, 'Iris-virginica'),
 (0, 'Iris-setosa'),
 (1, 'Iris-versicolor'),
 (1, 'Iris-versicolor'),
 (2, 'Iris-virginica'),
 (1, 'Iris-versicolor'),
 (2, 'Iris-virginica'),
 (1, 'Iris-versicolor'),
 (1, 'Iris-versicolor'),
 (2, 'Iris-virginica')

In [67]:
data = data.drop(columns=['Id'])

KeyError: "['Id'] not found in axis"

In [78]:
train_size = int(len(data) * 0.8)
train_data, test_data = data[:train_size], data[train_size:]
train_label, test_label = label[:train_size], label[train_size:]

In [69]:
train_data.sample(5)

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
61,5.9,3.0,4.2,1.5
29,4.7,3.2,1.6,0.2
48,5.3,3.7,1.5,0.2
42,4.4,3.2,1.3,0.2
46,5.1,3.8,1.6,0.2


In [90]:
model = IrisModule()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)

In [96]:
train_torch = torch.tensor(train_data.values, dtype=torch.float32)
label_torch = torch.tensor(train_label)
for epoch in range(30):
    optimizer.zero_grad()
    output = model(train_torch)
    loss = criterion(output, label_torch)
    loss.backward()
    optimizer.step()
    
    display("{} > {}".format(epoch, loss))

'0 > 0.20615746080875397'

'1 > 0.20113803446292877'

'2 > 0.19611647725105286'

'3 > 0.19108709692955017'

'4 > 0.18606549501419067'

'5 > 0.18114441633224487'

'6 > 0.17622429132461548'

'7 > 0.17131291329860687'

'8 > 0.1664334386587143'

'9 > 0.161603182554245'

'10 > 0.15683294832706451'

'11 > 0.15213808417320251'

'12 > 0.14753474295139313'

'13 > 0.14303140342235565'

'14 > 0.13862791657447815'

'15 > 0.13433198630809784'

'16 > 0.1301514059305191'

'17 > 0.12610594928264618'

'18 > 0.12218931317329407'

'19 > 0.1184074878692627'

'20 > 0.11476050317287445'

'21 > 0.1112479716539383'

'22 > 0.10786759853363037'

'23 > 0.10462310165166855'

'24 > 0.101516492664814'

'25 > 0.09853838384151459'

'26 > 0.09569727629423141'

'27 > 0.09297801554203033'

'28 > 0.09037651866674423'

'29 > 0.08788833022117615'

In [97]:
test_data_tensor = torch.tensor(test_data.values, dtype=torch.float32)
test_label_tensor = torch.tensor(test_label)

test_output = model(test_data_tensor)
val, est_cat = test_output.max(1)

total = val.shape[0]
correct = (est_cat == test_label_tensor).sum().item()

print ("correct : {} // total : {}".format(correct, total))
print ("accuracy: {}".format(correct / total))

correct : 27 // total : 30
accuracy: 0.9
