In [1]:
import matplotlib.pyplot as plt
from nn import NN 
import numpy as np
import pandas as pd 
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader, TensorDataset

In [2]:
# load data 
data_path = "./data/breast_cancer.csv"
data = pd.read_csv(data_path)
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568
Data columns (total 33 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   id                       569 non-null    int64  
 1   diagnosis                569 non-null    object 
 2   radius_mean              569 non-null    float64
 3   texture_mean             569 non-null    float64
 4   perimeter_mean           569 non-null    float64
 5   area_mean                569 non-null    float64
 6   smoothness_mean          569 non-null    float64
 7   compactness_mean         569 non-null    float64
 8   concavity_mean           569 non-null    float64
 9   concave points_mean      569 non-null    float64
 10  symmetry_mean            569 non-null    float64
 11  fractal_dimension_mean   569 non-null    float64
 12  radius_se                569 non-null    float64
 13  texture_se               569 non-null    float64
 14  perimeter_se             5

In [3]:
Y = data['diagnosis']
X = data.drop(['id', 'diagnosis', 'Unnamed: 32'], axis=1)

In [4]:
# Y to T/F
Y = Y.map({'M': True, 'B': False})

In [5]:
# split data into train, test, validation (70, 20, 10) randomly 
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42)
X_test, X_val, Y_test, Y_val = train_test_split(X_test, Y_test, test_size=0.33, random_state=42)

# normalize data
mean = X_train.mean(axis=0)
std = X_train.std(axis=0)
X_train = (X_train - mean) / std
X_test = (X_test - mean) / std
X_val = (X_val - mean) / std

X_train = X_train.values
Y_train = Y_train.values
X_test = X_test.values
Y_test = Y_test.values
X_val = X_val.values
Y_val = Y_val.values



In [6]:
print("Train data shape: ", X_train.shape)
print("Test data shape: ", X_test.shape)
print("Validation data shape: ", X_val.shape)

Train data shape:  (398, 30)
Test data shape:  (114, 30)
Validation data shape:  (57, 30)


In [7]:

# neurons: 10 relu -> 5 relu -> 1 sigmoid
input_dim = X_train.shape[1]
print("Input dimension: ", input_dim)
output_dim = 1
hidden_dims = [10, 5, 1]

class model(torch.nn.Module):
    def __init__(self): 
        super(model, self).__init__()
        self.fc1 = torch.nn.Linear(input_dim, hidden_dims[0])
        self.relu = torch.nn.ReLU()
        self.fc2 = torch.nn.Linear(hidden_dims[0], hidden_dims[1])
        self.fc3 = torch.nn.Linear(hidden_dims[1], output_dim)
        self.sigmoid = torch.nn.Sigmoid()
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.sigmoid(x)
        return x
    
model = model()



        

Input dimension:  30


In [8]:
# convert to tensor
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
Y_train_tensor = torch.tensor(Y_train, dtype=torch.float32)
# change shape to (n, 1)
Y_train_tensor = Y_train_tensor.view(-1, 1)

In [9]:
loss_fn = torch.nn.BCELoss()


In [10]:
train_data = TensorDataset(X_train_tensor, Y_train_tensor)
train_loader = DataLoader(train_data, batch_size=32, shuffle=False)

# train model
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0)

epochs = 180

for epoch in range(epochs):
    for x, y in train_loader:
        y_pred = model(x)
        loss = loss_fn(y_pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, loss: {loss.item()}")



  from .autonotebook import tqdm as notebook_tqdm


Epoch 1, loss: 0.6853297352790833
Epoch 2, loss: 0.6834248304367065
Epoch 3, loss: 0.6815492510795593
Epoch 4, loss: 0.6792163848876953
Epoch 5, loss: 0.676249086856842
Epoch 6, loss: 0.6728821396827698
Epoch 7, loss: 0.6693319082260132
Epoch 8, loss: 0.6655139923095703
Epoch 9, loss: 0.6613584756851196
Epoch 10, loss: 0.6569510102272034
Epoch 11, loss: 0.6522315740585327
Epoch 12, loss: 0.6470612287521362
Epoch 13, loss: 0.6413851976394653
Epoch 14, loss: 0.635166585445404
Epoch 15, loss: 0.628170907497406
Epoch 16, loss: 0.6205064654350281
Epoch 17, loss: 0.6121031641960144
Epoch 18, loss: 0.6028543710708618
Epoch 19, loss: 0.592679500579834
Epoch 20, loss: 0.5814942717552185
Epoch 21, loss: 0.5692687630653381
Epoch 22, loss: 0.5559239387512207
Epoch 23, loss: 0.5413933992385864
Epoch 24, loss: 0.5257412195205688
Epoch 25, loss: 0.5090298652648926
Epoch 26, loss: 0.4913555681705475
Epoch 27, loss: 0.4728664755821228
Epoch 28, loss: 0.4535691440105438
Epoch 29, loss: 0.433760166168212

In [11]:
X_train_tensor.shape

torch.Size([398, 30])

In [12]:
# test accuracy of training data
y_pred = model(X_train_tensor)
y_pred = y_pred.detach().numpy()
y_pred = y_pred > 0.5
y_pred = y_pred.astype(int)
y_pred = y_pred.flatten()
accuracy = np.mean(y_pred == Y_train)
print("Training accuracy: ", accuracy)



Training accuracy:  0.9798994974874372


In [13]:
# test accuracy of test data

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
Y_test_tensor = torch.tensor(Y_test, dtype=torch.float32)
Y_test_tensor = Y_test_tensor.view(-1, 1)

y_pred = model(X_test_tensor)
y_pred = y_pred.detach().numpy()
y_pred = y_pred > 0.5
y_pred = y_pred.astype(int)
y_pred = y_pred.flatten()
accuracy = np.mean(y_pred == Y_test)
print("Test accuracy: ", accuracy)


Test accuracy:  0.9912280701754386


In [14]:
# test accuracy of validation data
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
Y_val_tensor = torch.tensor(Y_val, dtype=torch.float32)
Y_val_tensor = Y_val_tensor.view(-1, 1)

y_pred = model(X_val_tensor)
y_pred = y_pred.detach().numpy()
y_pred = y_pred > 0.5
y_pred = y_pred.astype(int)
y_pred = y_pred.flatten()
accuracy = np.mean(y_pred == Y_val)
print("Validation accuracy: ", accuracy)


Validation accuracy:  0.9824561403508771


In [15]:
# train with self implemented NN
my_model = NN()



In [16]:
input_dim = X_train.shape[1]
print("Input dimension: ", input_dim)
output_dim = 1
hidden_dims = [10, 5, 1]
arch = [[input_dim, hidden_dims[0], 'relu'], [hidden_dims[0], hidden_dims[1], 'relu'], [hidden_dims[1], output_dim, 'sigmoid']]
my_model = NN()
my_model.get_input(X_train.T) 
# make sure Y_train is 2D
Y_train_2d = Y_train.reshape(-1, 1)
my_model.get_output(Y_train_2d.T)

my_model.get_nn_architecture(arch)

my_model.train(epochs=180, lr=0.01, batch_size=32)

Input dimension:  30
Epoch 0 loss:  0.6912476336503985
Epoch 1 loss:  0.689464560678536
Epoch 2 loss:  0.6877914014619478
Epoch 3 loss:  0.6862210387499054
Epoch 4 loss:  0.6847469415201028
Epoch 5 loss:  0.6833630804244377
Epoch 6 loss:  0.682063835303898
Epoch 7 loss:  0.6808439038669659
Epoch 8 loss:  0.6796982851418807
Epoch 9 loss:  0.6786223302425334
Epoch 10 loss:  0.6776116942279946
Epoch 11 loss:  0.6766623044666848
Epoch 12 loss:  0.6757703661199095
Epoch 13 loss:  0.674932260868304
Epoch 14 loss:  0.6741446021549439
Epoch 15 loss:  0.673404285190266
Epoch 16 loss:  0.6727083469456397
Epoch 17 loss:  0.6720540351825897
Epoch 18 loss:  0.6714387734291489
Epoch 19 loss:  0.6708601306357853
Epoch 20 loss:  0.6703158294013936
Epoch 21 loss:  0.6698037360415986
Epoch 22 loss:  0.6693218677981453
Epoch 23 loss:  0.6688683558744392
Epoch 24 loss:  0.6684414410119454
Epoch 25 loss:  0.6680394585539063
Epoch 26 loss:  0.6676608768632822
Epoch 27 loss:  0.6673042401789658
Epoch 28 loss

In [17]:
# test on validation set
y_pred = my_model.predict()
y_pred = y_pred > 0.5
print("Accuracy: ", np.mean(y_pred == Y_train_2d.T))

Accuracy:  0.9798994974874372


In [18]:
# test on test set
my_model.get_input(X_test.T)
my_model.get_output(Y_test.reshape(-1, 1).T)
y_pred = my_model.predict()
y_pred = y_pred > 0.5
print("Accuracy (test set): ", np.mean(y_pred == Y_test.reshape(-1, 1).T))

Accuracy (test set):  0.9824561403508771


In [19]:
my_model.get_input(X_val.T)
my_model.get_output(Y_val.reshape(-1, 1).T)
y_pred = my_model.predict()
y_pred = y_pred > 0.5
print("Accuracy (validation set): ", np.mean(y_pred == Y_val.reshape(-1, 1).T))

Accuracy (validation set):  1.0
