In [1]:
# Implementation and exploration.

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import numpy as np
from sklearn.model_selection import KFold
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from typing import List, Dict, Tuple, Callable

In [3]:
class Model(nn.Module):
    def __init__(self, in_features=22, h1=8, h2=9, out_features=2):
        super().__init__()
        self.fc1 = nn.Linear(in_features, h1)
        self.fc2 = nn.Linear(h1, h2)
        self.out = nn.Linear(h2, out_features)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = torch.sigmoid(self.out(x))
        
        return x

In [4]:
class Model2(nn.Module):
    def __init__(self, in_features=22, h1=3, h2=4, out_features=2):
        super().__init__()
        self.fc1 = nn.Linear(in_features, h1)
        self.fc2 = nn.Linear(h1, h2)
        self.out = nn.Linear(h2, out_features)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = torch.sigmoid(self.out(x))
        
        return x

In [5]:
class Model3(nn.Module):
    def __init__(self, in_features=22, h1=2, h2=3, h3=2, out_features=2):
        super().__init__()
        self.fc1 = nn.Linear(in_features, h1)
        self.fc2 = nn.Linear(h1, h2)
        self.fc3 = nn.Linear(h2,h3)
        self.out = nn.Linear(h3, out_features)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = torch.sigmoid(self.out(x))
        
        return x

In [6]:
class Model4(nn.Module):
    def __init__(self, in_features=22, h1=7, h2=8, h3=7, out_features=2):
        super().__init__()
        self.fc1 = nn.Linear(in_features, h1)
        self.fc2 = nn.Linear(h1, h2)
        self.fc3 = nn.Linear(h2,h3)
        self.out = nn.Linear(h3, out_features)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = torch.sigmoid(self.out(x))
        
        return x

In [7]:
class Model5(nn.Module):
    def __init__(self, in_features=22, h1=2, h2=2, h3=2, h4=2, out_features=2):
        super().__init__()
        self.fc1 = nn.Linear(in_features, h1)
        self.fc2 = nn.Linear(h1, h2)
        self.fc3 = nn.Linear(h2,h3)
        self.fc4 = nn.Linear(h3,h4)
        self.out = nn.Linear(h4, out_features)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = torch.sigmoid(self.out(x))
        
        return x

In [8]:
class Model6(nn.Module):
    def __init__(self, in_features=22, h1=7, h2=8, h3=8, h4=7, out_features=2):
        super().__init__()
        self.fc1 = nn.Linear(in_features, h1)
        self.fc2 = nn.Linear(h1, h2)
        self.fc3 = nn.Linear(h2,h3)
        self.fc4 = nn.Linear(h3,h4)
        self.out = nn.Linear(h4, out_features)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = torch.sigmoid(self.out(x))
        
        return x

In [9]:
torch.manual_seed(41)
model = Model()
model2 = Model2()
model3 = Model3()
model4 = Model4()
model5 = Model5()
model6 = Model6()

In [10]:
def parse_data(file_name):
    data = []
    file = open(file_name, "r")
    for line in file:
        datum = [value for value in line.rstrip().split(",")]
        data.append(datum)
    random.shuffle(data)
    return data

In [11]:
attribute_names_mushroom = ["edible",'cap-shape',
                   'cap-surface',
                   'cap-color',
                   'bruises?',
                   'odor',
                   'gill-attachment',
                   'gill-spacing',
                   'gill-size',
                   'gill-color',
                   'stalk-shape',
                   'stalk-root',
                   'stalk-surface-above-ring',
                   'stalk-surface-below-ring',
                   'stalk-color-above-ring',
                   'stalk-color-below-ring',
                   'veil-type',
                   'veil-color',
                   'ring-number',
                   'ring-type',
                   'spore-print-color',
                   'population',
                   'habitat']

In [12]:
my_df = pd.read_csv("agaricus-lepiota.data", names=attribute_names_mushroom)

In [13]:
label_encoder = LabelEncoder()
for column in my_df.columns:
    my_df[column] = label_encoder.fit_transform(my_df[column])
    
X = my_df.drop(columns=['edible']) 
y = my_df['edible']

X_train, X_test_val, y_train, y_test_val = train_test_split(X, y, test_size=0.2, random_state=42)
X_test, X_val, y_test, y_val = train_test_split(X_test_val, y_test_val, test_size=0.5, random_state=42)


X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values)

X_val_tensor = torch.tensor(X_val.values, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values)

X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values)

In [14]:
def cross_validate(model, X_train, y_train, X_val, y_val, n_splits=10):
    kf = KFold(n_splits=n_splits, shuffle=True)
    train_errors = []
    val_errors = []
    
    for fold, (train_index, val_index) in enumerate(kf.split(X_train)):
        X_train_fold, X_val_fold = X_train[train_index], X_train[val_index]
        y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]
        
        optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
        criterion = nn.CrossEntropyLoss()
        
        epochs = 100
        for i in range(epochs):
            model.train()
            optimizer.zero_grad()
            y_pred = model(X_train_fold)
            loss = criterion(y_pred, y_train_fold)
            loss.backward()
            optimizer.step()
        
        model.eval()
        train_pred = torch.argmax(model(X_train_fold), dim=1)
        val_pred = torch.argmax(model(X_val_fold), dim=1)
        
        train_error = (train_pred != y_train_fold).float().mean().item()
        val_error = (val_pred != y_val_fold).float().mean().item()
        
        train_errors.append(train_error)
        val_errors.append(val_error)
        
        print(f'Fold: {fold}\tTrain Error: {train_error*100:.2f}%\tValidation Error: {val_error*100:.2f}%')
    
    train_errors = np.array(train_errors)
    val_errors = np.array(val_errors)
    
    print("\nMean(Std. Dev.) over all folds:")
    print("-------------------------------")
    print(f"Train Error: {train_errors.mean()*100:.2f}% ({train_errors.std()*100:.2f}%)")
    print(f"Validation Error: {val_errors.mean()*100:.2f}% ({val_errors.std()*100:.2f}%)")

In [15]:
print("Model 1: ")
cross_validate(model, X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor)
print('\n')
print("Model 2: ")
cross_validate(model2, X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor)
print('\n')
print("Model 3: ")
cross_validate(model3, X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor)
print('\n')
print("Model 4: ")
cross_validate(model4, X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor)
print('\n')
print("Model 5: ")
cross_validate(model5, X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor)
print('\n')
print("Model 6: ")
cross_validate(model6, X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor)

Model 1: 
Fold: 0	Train Error: 5.23%	Validation Error: 5.54%
Fold: 1	Train Error: 5.28%	Validation Error: 4.77%
Fold: 2	Train Error: 5.23%	Validation Error: 5.23%
Fold: 3	Train Error: 5.25%	Validation Error: 5.08%
Fold: 4	Train Error: 5.20%	Validation Error: 5.54%
Fold: 5	Train Error: 5.16%	Validation Error: 5.85%
Fold: 6	Train Error: 5.18%	Validation Error: 5.69%
Fold: 7	Train Error: 5.32%	Validation Error: 4.46%
Fold: 8	Train Error: 5.23%	Validation Error: 5.23%
Fold: 9	Train Error: 5.26%	Validation Error: 4.93%

Mean(Std. Dev.) over all folds:
-------------------------------
Train Error: 5.23% (0.04%)
Validation Error: 5.23% (0.41%)


Model 2: 
Fold: 0	Train Error: 28.36%	Validation Error: 27.38%
Fold: 1	Train Error: 27.63%	Validation Error: 26.92%
Fold: 2	Train Error: 26.91%	Validation Error: 29.69%
Fold: 3	Train Error: 27.24%	Validation Error: 23.23%
Fold: 4	Train Error: 26.64%	Validation Error: 28.15%
Fold: 5	Train Error: 26.59%	Validation Error: 25.85%
Fold: 6	Train Error: 26.47

In [16]:
cross_validate(model, X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor)

Fold: 0	Train Error: 5.20%	Validation Error: 5.54%
Fold: 1	Train Error: 5.27%	Validation Error: 4.92%
Fold: 2	Train Error: 5.25%	Validation Error: 5.08%
Fold: 3	Train Error: 5.18%	Validation Error: 5.69%
Fold: 4	Train Error: 5.16%	Validation Error: 5.85%
Fold: 5	Train Error: 5.21%	Validation Error: 5.38%
Fold: 6	Train Error: 5.27%	Validation Error: 4.92%
Fold: 7	Train Error: 5.35%	Validation Error: 4.15%
Fold: 8	Train Error: 5.09%	Validation Error: 6.46%
Fold: 9	Train Error: 5.33%	Validation Error: 4.31%

Mean(Std. Dev.) over all folds:
-------------------------------
Train Error: 5.23% (0.07%)
Validation Error: 5.23% (0.67%)


In [17]:
with torch.no_grad():
    correct = 0
    for i,data in enumerate(X_test_tensor):
        y_val = model.forward(data)

        if y_val.argmax().item() == y_test_tensor[i]:
            correct += 1
            
print(f'The model got {(correct / len(X_test_tensor)) * 100}% correct')
print(f'The model got {100 - ((correct / len(X_test_tensor)) * 100)}% incorrect')

The model got 94.08866995073892% correct
The model got 5.911330049261082% incorrect


In [18]:
criterion = nn.CrossEntropyLoss()
with torch.no_grad():
    y_eval = model.forward(X_test_tensor)
    loss = criterion(y_eval, y_test_tensor)
loss

tensor(0.3724)

### Configuration
I created 6 different models with varying number of layers and units per layer. <br />
Model 1: Number of layers: 2, Units per layer: 8,9 <br />
Model 2: Number of layers: 2, Units per layer: 3,4 <br />
Model 3: Number of layers: 3, Units per layer: 2,3,2 <br />
Model 4: Number of layers: 3, Units per layer: 7,8,7 <br />
Model 5: Number of layers: 4, Units per layer: 2,2,2,2 <br />
Model 6: Number of layers: 4, Units per layer: 7,8,8,7 <br />

I trained my models using a lr = 0.01 and epochs = 100. I also set `torch.manual_seed(41)` so that outputs are reproducable. 

I split my training, testing, and validation sets into a 8:1:1 ratio. 

I created different folds by using sklearn's library 'Kfold'.

### Winning Model / Results
My winning model would be Model 1 because I can see that the traing and validation errior were the lowest compared to all the other models. This is important because it determines which model has the optimal bias/variance tradeoff. We put strong attention to the validation error because it is a big indicator on how well the model will do on the test data. 

Model 1: Train Error: Train Error: 5.23% (0.04%) Validation Error: 5.23% (0.41%) <br />
Model 2: Train Error: 26.91% (0.64%) Validation Error: 26.97% (2.13%) <br />
Model 3: Train Error: 48.22% (0.15%) Validation Error: 48.22% (1.36%) <br />
Model 4: Train Error: 5.51% (0.10%) Validation Error: 5.52% (0.86%) <br />
Model 5: Train Error: 48.22% (0.17%) Validation Error: 48.22% (1.54%) <br />
Model 6: Train Error: Train Error: 5.28% (0.13%) Validation Error: 5.29% (1.04%) <br />

As we can see from the results, the models with greater units per layer do better than the models with less units per layer. We can also see that the number of layers is not as a determining factor compared to the units per layer as Model 1, 4, 6 tend to have similar errors. 

Final test error: 5.23% <br />
Loss: 0.3724


## Before You Submit...

1. Re-read the general instructions provided above, and
2. Hit "Kernel"->"Restart & Run All".