In [453]:
import torch
import pandas as pd
import numpy as np
from torch import nn
from tqdm import tqdm
from icecream import ic
from torch.utils.data import DataLoader

In [454]:
train_csv = pd.read_csv('train.csv')
test_csv = pd.read_csv('test.csv')

In [455]:
train_csv

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [456]:
unique = {}
for i, x in enumerate(train_csv['Ticket']):
    if x not in unique.keys():
        unique[x] = i
train_csv['Ticket'] = [unique[x] for x in train_csv['Ticket']]

unique = {}
for i, x in enumerate(test_csv['Ticket']):
    if x not in unique.keys():
        unique[x] = i
test_csv['Ticket'] = [unique[x] for x in test_csv['Ticket']]

In [457]:
useful_columns = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Embarked', 'Survived']
print(train_csv[useful_columns].shape)
train_csv[useful_columns]

(891, 9)


Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Embarked,Survived
0,3,male,22.0,1,0,0,7.2500,S,0
1,1,female,38.0,1,0,1,71.2833,C,1
2,3,female,26.0,0,0,2,7.9250,S,1
3,1,female,35.0,1,0,3,53.1000,S,1
4,3,male,35.0,0,0,4,8.0500,S,0
...,...,...,...,...,...,...,...,...,...
886,2,male,27.0,0,0,886,13.0000,S,0
887,1,female,19.0,0,0,887,30.0000,S,1
888,3,female,,1,2,783,23.4500,S,0
889,1,male,26.0,0,0,889,30.0000,C,1


### Replace Embarked Values with numbers

In [458]:
letters = []
[letters.append(x) for x in train_csv['Embarked'] if x not in letters]
nan = letters[-1]
train_csv['Embarked'] = [letters.index(x) for x in train_csv['Embarked']]
test_csv['Embarked'] = [letters.index(x) for x in test_csv['Embarked']]
train_csv['Embarked'], test_csv['Embarked']

(0      0
 1      1
 2      0
 3      0
 4      0
       ..
 886    0
 887    0
 888    0
 889    1
 890    2
 Name: Embarked, Length: 891, dtype: int64,
 0      2
 1      0
 2      2
 3      0
 4      0
       ..
 413    0
 414    1
 415    0
 416    0
 417    1
 Name: Embarked, Length: 418, dtype: int64)

### Replace Sex values with numbers

In [459]:
train_csv['Sex'] = [(0 if x == 'male' else 1) for x in train_csv['Sex']]
test_csv['Sex'] = [(0 if x == 'male' else 1) for x in test_csv['Sex']]
train_csv['Sex'], test_csv['Sex']

(0      0
 1      1
 2      1
 3      1
 4      0
       ..
 886    0
 887    1
 888    1
 889    0
 890    0
 Name: Sex, Length: 891, dtype: int64,
 0      0
 1      1
 2      0
 3      0
 4      1
       ..
 413    0
 414    1
 415    0
 416    0
 417    0
 Name: Sex, Length: 418, dtype: int64)

### Clean out NaN values from Age

In [460]:
ages = []
[ages.append(x) for x in train_csv['Age'] if x not in ages]

train_csv['Age'] = [(x if x > 0 else -1) for x in train_csv['Age']]
test_csv['Age'] = [(x if x > 0 else -1) for x in test_csv['Age']]
train_csv['Age']

0      22.0
1      38.0
2      26.0
3      35.0
4      35.0
       ... 
886    27.0
887    19.0
888    -1.0
889    26.0
890    32.0
Name: Age, Length: 891, dtype: float64

In [461]:
split = int(train_csv.shape[0] * .80)
data = train_csv[useful_columns].to_numpy()
np.random.shuffle(data)
train_dataset = data[:split]
val_dataset = data[split:]
print(train_dataset.shape)
test_dataset = test_csv[[x for x in useful_columns if x not in ['Survived']]].to_numpy()
print(val_dataset.shape)

(712, 9)
(179, 9)


In [462]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes=1, actFn=nn.Sigmoid()):
        super(NeuralNet, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.actFn = actFn
        self.linear2 = nn.Linear(hidden_size, hidden_size)
        self.linear3 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.linear1(x)
        # out = self.actFn(out)
        out = self.linear2(out)
        # out = self.linear3(out)
        # out = self.actFn(out)
        return out

In [463]:
device = ic('cpu' if torch.cuda.is_available() else 'cpu')

ic| 'cpu' if torch.cuda.is_available() else 'cpu': 'cpu'


In [464]:
print(test_dataset.shape)

(418, 8)


In [465]:
class HyperParameters:
    var_script = 'input_size, hidden_sizes, num_classes, batch_sizes, nums_epochs, learning_rates, activation_functions'

    def __init__(self, input_size, hidden_sizes, num_classes, batch_sizes, nums_epochs, learning_rates, actFns):
        self.input_size = input_size
        self.hidden_sizes_list = hidden_sizes
        self.num_classes = num_classes
        self.batch_sizes_list = batch_sizes
        self.nums_epochs_list = nums_epochs
        self.learning_rates_list = learning_rates
        self.actFns = actFns
        self.parameters = input_size, hidden_sizes, num_classes, batch_sizes, nums_epochs, learning_rates, actFns

    def print(self, acc, params):
        print('Benchmark Test:')
        print('Input Size:', self.input_size)
        print('Hidden Size:', params[0])
        print('Number of Classes:', self.num_classes)
        print('Batch Size:', params[1])
        print('Number of Epochs:', params[2])
        print('Learning Rate:', params[3])
        print('Activation Function:', str(params[4]))
        print()
        print('Accuracy of model:', acc)
        print()
        print()


input_size = train_dataset.shape[1] - 1
hidden_sizes = [20, 30, 100, 500] #[30, 50, 100, 500]
num_classes = 2
batch_sizes = [30] #[30, 50, 100]
nums_epochs = [500] # [250, 500]
learning_rates = [.05] #[.01, .05]
activation_functions = [nn.Softmin()]#[nn.Sigmoid(), nn.Tanh(), nn.ReLU(), nn.ReLU6(), nn.Softmax(), nn.LeakyReLU(), nn.Softmin(), nn.Softshrink()]

hyper_params = HyperParameters(input_size, hidden_sizes, num_classes, batch_sizes, nums_epochs, learning_rates, activation_functions)

In [466]:
def get_accuracy(model, test_loader):
    with torch.no_grad():
        n_correct = 0
        n_samples = 0

        for sample in test_loader:
            inputs = sample[:, :-1].float().to(device)
            labels = sample[:, -1].long().to(device)

            inputs -= inputs.min()
            inputs /= inputs.max()

            outputs = model(inputs)

            _, pred = torch.max(outputs, 1)

            n_samples += labels.shape[0]
            n_correct += (pred == labels).sum().item()

        acc = 100 * n_correct / n_samples

        return acc

In [467]:
def BenchmarkTesting(hyper_parameters: HyperParameters):
    input_size, hidden_sizes, num_classes, batch_sizes, nums_epochs, learning_rates, activation_functions = hyper_parameters.parameters
    results = {}
    total_iterations = len(hidden_sizes) * len(batch_sizes) * len(nums_epochs) * len(learning_rates) * len(activation_functions)
    iteration = 0
    for hidden_size in hidden_sizes:
        for batch_size in batch_sizes:
            for num_epochs in nums_epochs:
                for learning_rate in learning_rates:
                    for activation_function in activation_functions:
                        model = NeuralNet(input_size, hidden_size, num_classes, activation_function).to(device)
                        criterion = nn.CrossEntropyLoss().to(device)
                        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
                        train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
                        test_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=True)
                        for epoch in range(num_epochs):
                            for i, sample in enumerate(train_loader):
                                # extract data from sample
                                inputs = sample[..., :-1].float().to(device)
                                labels = sample[..., -1].long().to(device)

                                # inputs = normalize_hands(inputs).float()
                                inputs = inputs.reshape(-1, input_size)
                                inputs -= inputs.min()
                                inputs /= inputs.max()

                                # forward pass
                                outputs = model(inputs).float()
                                loss = criterion(outputs, labels)

                                # backward pass
                                loss.backward()
                                optimizer.step()
                                optimizer.zero_grad()

                        params = [hidden_size, batch_size, num_epochs, learning_rate, activation_function]
                        accuracy = np.median([get_accuracy(model, test_loader) for _ in range(300)])
                        results[accuracy] = params
                        iteration += 1
                        print(f'Completion: {(iteration / total_iterations * 100):.2f}%')
                        print(f'Iteration {iteration} out of {total_iterations}.\n')
                        hyper_parameters.print(accuracy, params)
    return results

In [468]:
results = BenchmarkTesting(hyper_params)

Completion: 25.00%
Iteration 1 out of 4.

Benchmark Test:
Input Size: 8
Hidden Size: 20
Number of Classes: 2
Batch Size: 30
Number of Epochs: 500
Learning Rate: 0.05
Activation Function: Softmin(dim=None)

Accuracy of model: 84.35754189944134


Completion: 50.00%
Iteration 2 out of 4.

Benchmark Test:
Input Size: 8
Hidden Size: 30
Number of Classes: 2
Batch Size: 30
Number of Epochs: 500
Learning Rate: 0.05
Activation Function: Softmin(dim=None)

Accuracy of model: 79.32960893854748


Completion: 75.00%
Iteration 3 out of 4.

Benchmark Test:
Input Size: 8
Hidden Size: 100
Number of Classes: 2
Batch Size: 30
Number of Epochs: 500
Learning Rate: 0.05
Activation Function: Softmin(dim=None)

Accuracy of model: 84.91620111731844


Completion: 100.00%
Iteration 4 out of 4.

Benchmark Test:
Input Size: 8
Hidden Size: 500
Number of Classes: 2
Batch Size: 30
Number of Epochs: 500
Learning Rate: 0.05
Activation Function: Softmin(dim=None)

Accuracy of model: 81.00558659217877




In [469]:
highest_accuracy = max(list(results.keys()))
hyper_params.print(highest_accuracy, results[highest_accuracy])

Benchmark Test:
Input Size: 8
Hidden Size: 100
Number of Classes: 2
Batch Size: 30
Number of Epochs: 500
Learning Rate: 0.05
Activation Function: Softmin(dim=None)

Accuracy of model: 84.91620111731844




In [470]:
from sklearn.model_selection import KFold
from sklearn.model_selection import LeaveOneOut
splits = 10
kf = KFold(n_splits=splits)
# kf = LeaveOneOut()
acc_list = []
for i, indices in tqdm(enumerate(kf.split(train_dataset))):
    train_index, test_index = indices
    train_dataset = data[train_index]
    val_dataset = data[test_index]
    hidden_size, batch_size, num_epochs, learning_rate, activation_function = results[highest_accuracy]
    model = NeuralNet(input_size, hidden_size, num_classes, activation_function).to(device)
    criterion = nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    kf_train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
    kf_test_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=True)

    for epoch in range(num_epochs):
        for i, sample in enumerate(kf_train_loader):
            # extract data from sample
            inputs = sample[..., :-1].float().to(device)
            labels = sample[..., -1].long().to(device)

            # inputs = normalize_hands(inputs).float()
            inputs = inputs.reshape(-1, input_size)
            inputs -= inputs.min()
            inputs /= inputs.max()

            # forward pass
            outputs = model(inputs).float()
            loss = criterion(outputs, labels)

            # backward pass
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        # if (epoch + 1) % (num_epochs / 10) == 0:
            # print(f'epoch {epoch + 1}/{num_epochs}, loss = {loss.item():.4f}')

    average = np.average([get_accuracy(model, kf_test_loader) for _ in range(500)])
    acc_list.append(average)
    hyper_params.print(average, results[highest_accuracy])



0it [00:00, ?it/s]

Benchmark Test:
Input Size: 8
Hidden Size: 100
Number of Classes: 2
Batch Size: 30
Number of Epochs: 500
Learning Rate: 0.05
Activation Function: Softmin(dim=None)

Accuracy of model: 73.54722222222222






1it [00:15, 15.08s/it]

Benchmark Test:
Input Size: 8
Hidden Size: 100
Number of Classes: 2
Batch Size: 30
Number of Epochs: 500
Learning Rate: 0.05
Activation Function: Softmin(dim=None)

Accuracy of model: 76.23055555555557






2it [00:30, 15.08s/it]

Benchmark Test:
Input Size: 8
Hidden Size: 100
Number of Classes: 2
Batch Size: 30
Number of Epochs: 500
Learning Rate: 0.05
Activation Function: Softmin(dim=None)

Accuracy of model: 73.0450704225352






3it [00:45, 15.14s/it]

Benchmark Test:
Input Size: 8
Hidden Size: 100
Number of Classes: 2
Batch Size: 30
Number of Epochs: 500
Learning Rate: 0.05
Activation Function: Softmin(dim=None)

Accuracy of model: 85.90422535211266






4it [01:00, 15.07s/it]

Benchmark Test:
Input Size: 8
Hidden Size: 100
Number of Classes: 2
Batch Size: 30
Number of Epochs: 500
Learning Rate: 0.05
Activation Function: Softmin(dim=None)

Accuracy of model: 74.90985915492958






5it [01:15, 15.14s/it]

Benchmark Test:
Input Size: 8
Hidden Size: 100
Number of Classes: 2
Batch Size: 30
Number of Epochs: 500
Learning Rate: 0.05
Activation Function: Softmin(dim=None)

Accuracy of model: 76.05070422535209






6it [01:30, 15.15s/it]

Benchmark Test:
Input Size: 8
Hidden Size: 100
Number of Classes: 2
Batch Size: 30
Number of Epochs: 500
Learning Rate: 0.05
Activation Function: Softmin(dim=None)

Accuracy of model: 73.78309859154929






7it [01:45, 15.16s/it]

Benchmark Test:
Input Size: 8
Hidden Size: 100
Number of Classes: 2
Batch Size: 30
Number of Epochs: 500
Learning Rate: 0.05
Activation Function: Softmin(dim=None)

Accuracy of model: 68.24225352112676






8it [02:01, 15.15s/it]

Benchmark Test:
Input Size: 8
Hidden Size: 100
Number of Classes: 2
Batch Size: 30
Number of Epochs: 500
Learning Rate: 0.05
Activation Function: Softmin(dim=None)

Accuracy of model: 80.70985915492957






9it [02:16, 15.15s/it]

Benchmark Test:
Input Size: 8
Hidden Size: 100
Number of Classes: 2
Batch Size: 30
Number of Epochs: 500
Learning Rate: 0.05
Activation Function: Softmin(dim=None)

Accuracy of model: 80.05633802816901






10it [02:31, 15.16s/it]


In [471]:
import winsound
frequency = 440  # Set Frequency To 2500 Hertz
duration = 1000  # Set Duration To 1000 ms == 1 second
winsound.Beep(frequency, duration)

test_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
ic(np.average(acc_list))
print(test_loader.dataset.shape)
hyper_params.print(np.average([get_accuracy(model, test_loader) for _ in range(5000)]), results[highest_accuracy])

ic| np.average(acc_list): 76.24791862284819


(71, 9)
Benchmark Test:
Input Size: 8
Hidden Size: 100
Number of Classes: 2
Batch Size: 30
Number of Epochs: 500
Learning Rate: 0.05
Activation Function: Softmin(dim=None)

Accuracy of model: 80.08112676056338




In [472]:
test_csv[[x for x in useful_columns if x not in ['Survived']]].shape

(418, 8)

In [474]:


# def divide(start, end, steps):
#     assert start < end
#     step = (end - start) / steps
#
#     return np.array([int(start + step * i) for i in range(steps + 1)])
# print(divide(1, 10, 5))
#
# frequencies = divide(440, 880, 12)
# print(frequencies)
#
# scale = np.array([1, 3, 4, 6, 8, 9, 11, 13]) - 1
# # for frequency in frequencies[scale]:

[ 1  2  4  6  8 10]
[440 476 513 550 586 623 660 696 733 770 806 843 880]
