In this data set we are using the Breast Cancer Wisconsin data set to predict whether a cancer is benign or malignant. To accomplish this, we will build a neural network from scratch in python

# Read Data

In [4]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


df = pd.read_csv('../data/data.csv')
df.describe()

Unnamed: 0,id,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
count,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,...,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,0.0
mean,30371830.0,14.127292,19.289649,91.969033,654.889104,0.09636,0.104341,0.088799,0.048919,0.181162,...,25.677223,107.261213,880.583128,0.132369,0.254265,0.272188,0.114606,0.290076,0.083946,
std,125020600.0,3.524049,4.301036,24.298981,351.914129,0.014064,0.052813,0.07972,0.038803,0.027414,...,6.146258,33.602542,569.356993,0.022832,0.157336,0.208624,0.065732,0.061867,0.018061,
min,8670.0,6.981,9.71,43.79,143.5,0.05263,0.01938,0.0,0.0,0.106,...,12.02,50.41,185.2,0.07117,0.02729,0.0,0.0,0.1565,0.05504,
25%,869218.0,11.7,16.17,75.17,420.3,0.08637,0.06492,0.02956,0.02031,0.1619,...,21.08,84.11,515.3,0.1166,0.1472,0.1145,0.06493,0.2504,0.07146,
50%,906024.0,13.37,18.84,86.24,551.1,0.09587,0.09263,0.06154,0.0335,0.1792,...,25.41,97.66,686.5,0.1313,0.2119,0.2267,0.09993,0.2822,0.08004,
75%,8813129.0,15.78,21.8,104.1,782.7,0.1053,0.1304,0.1307,0.074,0.1957,...,29.72,125.4,1084.0,0.146,0.3391,0.3829,0.1614,0.3179,0.09208,
max,911320500.0,28.11,39.28,188.5,2501.0,0.1634,0.3454,0.4268,0.2012,0.304,...,49.54,251.2,4254.0,0.2226,1.058,1.252,0.291,0.6638,0.2075,


Now that we've read in our data, we will perform some feature engineering to ensure the data fits the parameters for the neural network. First, we will drop the ID and unnamed columns

In [5]:
df = df.drop(['id', 'Unnamed: 32'], axis = 1)
df.head()

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


We will need the diagnosis field to work as a binary output, so to do this we will use a label encoder, where M (malignant) = 1 and B (Benign) = 0

In [9]:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()

le.fit(df['diagnosis'])
df['diagnosis'] = le.transform(df['diagnosis'])

df.head()

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,1,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,1,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,1,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,1,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


To improve the performance of the neural network in predicting the type of tumor, we will need to normalize the data

In [12]:
dfNorm = df[['radius_mean', 'texture_mean', 'perimeter_mean', 'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean', 'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean', 'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se', 'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se', 'fractal_dimension_se', 'radius_worst', 'texture_worst', 'perimeter_worst', 'area_worst', 'smoothness_worst', 'compactness_worst', 'concavity_worst', 'concave points_worst', 'symmetry_worst', 'fractal_dimension_worst']].apply(lambda x: (x - x.min()) / (x.max() - x.min()))

In [13]:
dfNorm.describe()

Unnamed: 0,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
count,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,...,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0
mean,0.338222,0.323965,0.332935,0.21692,0.394785,0.260601,0.208058,0.243137,0.379605,0.270379,...,0.296663,0.363998,0.283138,0.170906,0.404138,0.220212,0.217403,0.393836,0.263307,0.189596
std,0.166787,0.145453,0.167915,0.149274,0.126967,0.161992,0.186785,0.192857,0.138456,0.148702,...,0.17194,0.163813,0.167352,0.139932,0.150779,0.152649,0.166633,0.225884,0.121954,0.118466
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.223342,0.218465,0.216847,0.117413,0.304595,0.139685,0.06926,0.100944,0.282323,0.163016,...,0.180719,0.241471,0.167837,0.08113,0.300007,0.116337,0.091454,0.223127,0.185098,0.1077
50%,0.302381,0.308759,0.293345,0.172895,0.390358,0.224679,0.144189,0.166501,0.369697,0.243892,...,0.250445,0.356876,0.23532,0.123206,0.397081,0.17911,0.18107,0.343402,0.247782,0.163977
75%,0.416442,0.40886,0.416765,0.271135,0.47549,0.340531,0.306232,0.367793,0.45303,0.340354,...,0.386339,0.471748,0.373475,0.220901,0.494156,0.30252,0.305831,0.554639,0.318155,0.242949
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [16]:
df_target = df['diagnosis']
df = pd.concat([dfNorm, df_target], axis = 1)
df.describe()

Unnamed: 0,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,diagnosis
count,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,...,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0
mean,0.338222,0.323965,0.332935,0.21692,0.394785,0.260601,0.208058,0.243137,0.379605,0.270379,...,0.363998,0.283138,0.170906,0.404138,0.220212,0.217403,0.393836,0.263307,0.189596,0.372583
std,0.166787,0.145453,0.167915,0.149274,0.126967,0.161992,0.186785,0.192857,0.138456,0.148702,...,0.163813,0.167352,0.139932,0.150779,0.152649,0.166633,0.225884,0.121954,0.118466,0.483918
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.223342,0.218465,0.216847,0.117413,0.304595,0.139685,0.06926,0.100944,0.282323,0.163016,...,0.241471,0.167837,0.08113,0.300007,0.116337,0.091454,0.223127,0.185098,0.1077,0.0
50%,0.302381,0.308759,0.293345,0.172895,0.390358,0.224679,0.144189,0.166501,0.369697,0.243892,...,0.356876,0.23532,0.123206,0.397081,0.17911,0.18107,0.343402,0.247782,0.163977,0.0
75%,0.416442,0.40886,0.416765,0.271135,0.47549,0.340531,0.306232,0.367793,0.45303,0.340354,...,0.471748,0.373475,0.220901,0.494156,0.30252,0.305831,0.554639,0.318155,0.242949,1.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


Now that we have normalized the data, we will split it into the training and test sets to feed into the neural network

In [28]:
#seperate data to train and test
#split data with .8 train .2 test
np.random.seed(0)
mask = np.random.rand(len(df)) < 0.8

df_train = df[mask]
df_test = df[~mask]

In [29]:
df_train.sample(n=10)

Unnamed: 0,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,diagnosis
207,0.474656,0.356781,0.455463,0.322715,0.316783,0.164591,0.16284,0.267893,0.487879,0.04781,...,0.347281,0.396384,0.251868,0.263686,0.117696,0.154313,0.376632,0.337079,0.063295,1
54,0.384259,0.4163,0.369498,0.241485,0.342421,0.157751,0.123079,0.165706,0.280808,0.144903,...,0.524254,0.335126,0.207629,0.447269,0.173094,0.216613,0.525773,0.218805,0.155385,1
285,0.264991,0.293879,0.24905,0.146554,0.282567,0.069873,0.004358,0.014533,0.321717,0.180918,...,0.294776,0.175059,0.093123,0.215479,0.037789,0.004456,0.030144,0.185295,0.060803,0
433,0.56032,0.414609,0.552208,0.409968,0.443893,0.366603,0.373477,0.434592,0.44596,0.239259,...,0.503998,0.472583,0.348457,0.44793,0.309505,0.31246,0.586942,0.28425,0.184311,1
154,0.291968,0.190396,0.286919,0.16772,0.372032,0.201215,0.217737,0.173111,0.384848,0.255055,...,0.226013,0.23537,0.120945,0.506042,0.192401,0.240335,0.334089,0.450227,0.205234,0
305,0.218609,0.499831,0.210352,0.116098,0.199603,0.115024,0.046251,0.065258,0.441919,0.185762,...,0.522388,0.154291,0.071594,0.160338,0.105568,0.057819,0.165464,0.330968,0.081398,0
528,0.329358,0.11701,0.321471,0.191177,0.65153,0.239771,0.236645,0.328777,0.462626,0.307709,...,0.089552,0.219682,0.115046,0.450571,0.105859,0.124521,0.348797,0.117288,0.114719,0
476,0.341663,0.365911,0.335982,0.201442,0.331137,0.280412,0.118627,0.151988,0.225253,0.213353,...,0.406183,0.307236,0.158106,0.291422,0.306206,0.200639,0.460137,0.191011,0.154401,0
61,0.07653,0.38113,0.075116,0.033213,0.647016,0.215478,0.070291,0.046019,0.387879,0.370893,...,0.40032,0.058021,0.0218,0.612362,0.138264,0.071893,0.095464,0.277351,0.144825,0
525,0.075252,0.114643,0.074217,0.033001,0.460143,0.174652,0.060098,0.07505,0.312121,0.44861,...,0.171375,0.064196,0.022218,0.613683,0.190364,0.140096,0.292509,0.279519,0.327037,0


In [30]:
df_test.sample(n=10)

Unnamed: 0,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,diagnosis
66,0.117564,0.382144,0.112777,0.053404,0.467365,0.178977,0.05089,0.074751,0.331818,0.4008,...,0.520789,0.082773,0.03576,0.552268,0.134965,0.075176,0.223952,0.258821,0.243146,0
111,0.267358,0.37369,0.265082,0.142906,0.421594,0.311392,0.249531,0.299254,0.340909,0.436816,...,0.358475,0.192191,0.084103,0.379912,0.191819,0.176997,0.379725,0.130298,0.195592,0
441,0.486961,0.531282,0.474121,0.333107,0.27697,0.280719,0.282099,0.285089,0.205556,0.086563,...,0.624733,0.410329,0.270055,0.478307,0.373442,0.402236,0.597595,0.184309,0.160042,1
449,0.668229,0.365573,0.651717,0.526193,0.399115,0.300963,0.368322,0.574056,0.249495,0.140059,...,0.534382,0.586633,0.451435,0.433402,0.274384,0.351358,0.783505,0.138577,0.126,1
27,0.550381,0.356442,0.541151,0.403181,0.377088,0.26753,0.34911,0.384245,0.321717,0.148062,...,0.406183,0.44569,0.299302,0.41359,0.178916,0.27524,0.512027,0.152967,0.125738,1
384,0.298121,0.13561,0.290236,0.16895,0.279859,0.203576,0.118955,0.142346,0.281313,0.125948,...,0.142591,0.229992,0.107771,0.300007,0.234023,0.228914,0.315223,0.23083,0.119113,0
385,0.360594,0.459249,0.346762,0.221082,0.308658,0.144102,0.196579,0.261978,0.286364,0.088458,...,0.524787,0.257931,0.140828,0.396421,0.126913,0.213658,0.46701,0.179775,0.087367,1
459,0.131289,0.625296,0.123627,0.062524,0.245644,0.082449,0.036106,0.051839,0.283333,0.201348,...,0.663646,0.087753,0.040479,0.263026,0.081119,0.057428,0.167216,0.149024,0.111964,0
164,0.770931,0.416977,0.748462,0.654295,0.28672,0.291761,0.310216,0.482207,0.374242,0.117313,...,0.43177,0.666318,0.545075,0.34095,0.321148,0.315335,0.806186,0.398975,0.241572,1
471,0.239434,0.623267,0.228457,0.129968,0.314977,0.124594,0.055459,0.118141,0.40101,0.147852,...,0.567964,0.183425,0.093983,0.21746,0.067885,0.044121,0.190619,0.165385,0.074446,0


In [46]:
Input = df_train.values[:,:31]
print(Input[:32])

targets = [[1,0], [0,1]]
output = np.array([targets[int(x)] for x in df_train.values[:,30]])
print(output[:32])

[[0.52103744 0.0226581  0.54598853 0.36373277 0.59375282 0.7920373
  0.70313964 0.73111332 0.68636364 0.60551811 0.35614702 0.12046941
  0.3690336  0.27381126 0.15929565 0.35139844 0.13568182 0.30062512
  0.31164518 0.18304244 0.62077552 0.14152452 0.66831017 0.45069799
  0.60113584 0.61929156 0.56861022 0.91202749 0.59846245 0.41886396
  1.        ]
 [0.64314449 0.27257355 0.61578329 0.50159067 0.28987993 0.18176799
  0.20360825 0.34875746 0.37979798 0.14132266 0.15643672 0.08258929
  0.12444047 0.12565979 0.11938675 0.08132304 0.0469697  0.25383595
  0.08453875 0.0911101  0.60690146 0.30357143 0.53981772 0.43521431
  0.34755332 0.15456336 0.19297125 0.63917526 0.23358959 0.22287813
  1.        ]
 [0.60149557 0.3902604  0.59574321 0.44941676 0.51430893 0.4310165
  0.46251172 0.63568588 0.50959596 0.21124684 0.22962158 0.09430251
  0.18037035 0.16292179 0.15083115 0.2839547  0.09676768 0.38984656
  0.20569032 0.12700551 0.55638563 0.36007463 0.50844166 0.37450845
  0.48358978 0.3853751

# Backpropogation

This neural network has three layers: Input Layer, Hidden Layer, Output Layer The input layer will consist of thirty-one nodes, one for each feature used to predict in the data frame above. The output layer will consist of 2 nodes - one for each classification for the output malignant or benign tumor. A value of [1,0] predicts a malignant tumor, whereas a value of [0,1] would predict a benign tumor

In [37]:
hiddenNodes = 31
inputNodes = len(Input[0])

weight1 = 2 * np.random.random((inputNodes, hiddenNodes)) - 1
print(weight1)

[[ 0.11834756  0.84459701 -0.01527719  0.74766436  0.66796329 -0.57232931
   0.54245093 -0.97565769 -0.35434092 -0.54086511  0.01372592  0.47370632
  -0.80464727  0.0298444   0.87682404 -0.5427069   0.35428229  0.18576054
  -0.97987261 -0.04834761  0.41754078 -0.91204914  0.75904297  0.04016283
  -0.9386779  -0.55117278  0.90735139  0.16463947 -0.78505486 -0.424911
  -0.08659275]
 [-0.95809986 -0.17676897 -0.02108273 -0.51264425  0.177278    0.50648024
  -0.52833155  0.2409998   0.27924449  0.8970806   0.55655233  0.69669054
  -0.01916018 -0.62930283  0.99163059 -0.74128848 -0.05708536 -0.8638138
   0.88770171  0.92984988  0.43877812 -0.30001431 -0.4912352  -0.46939335
  -0.74541195  0.05161791 -0.71636545 -0.36653867  0.25341295  0.45508722
  -0.95145459]
 [-0.13976803  0.30424919  0.70649195 -0.04935044  0.93841174 -0.4687349
  -0.97298259 -0.03249427 -0.48777241  0.64743534 -0.53445466 -0.37874156
   0.58245486  0.4302865   0.11610247  0.40989612 -0.16272627 -0.9893799
  -0.97728974

In [38]:
outputNodes = len(output[0])

weight2 = 2 * np.random.random((hiddenNodes, outputNodes)) - 1
print(weight2)

[[ 0.2181878  -0.62976722]
 [-0.98759317 -0.9814311 ]
 [ 0.06418482  0.88555882]
 [ 0.28859725  0.42859969]
 [-0.01226903  0.16377789]
 [-0.74726495  0.75364124]
 [ 0.52158526  0.99639791]
 [-0.4045541  -0.54596446]
 [-0.74967668  0.92841951]
 [ 0.56177037 -0.66735077]
 [ 0.10537294 -0.17246358]
 [-0.69702798 -0.67585403]
 [ 0.92693999 -0.39007163]
 [ 0.88287858 -0.84877865]
 [-0.07839392 -0.7407619 ]
 [-0.99042523  0.10753215]
 [-0.77221179  0.44404906]
 [ 0.39623275 -0.64733419]
 [ 0.88348429  0.44208682]
 [-0.40405947  0.41846753]
 [ 0.46386056 -0.31554734]
 [-0.24882288 -0.28178698]
 [ 0.23323689  0.80082029]
 [-0.65361353  0.75039922]
 [-0.94469369  0.32067719]
 [-0.17112225  0.5825631 ]
 [ 0.44239623 -0.03978439]
 [ 0.28772807  0.00354626]
 [ 0.62303694 -0.04783203]
 [ 0.04631198 -0.49895883]
 [ 0.21008603 -0.39419038]]


# Activation Function
This neural network will utilize a sigmoid activation function

In [39]:
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))
def sigmoidDerivative(x):
    return x * (1.0 - x)
rate = 0.01

for i in range(10000):
    sig1 = sigmoid(np.dot(Input, weight1))
    
    sig2 = sigmoid(np.dot(sig1, weight2))
    
    totalErr = (abs(output - sig2)).mean()
    
    deltaSig2 = (output - sig2) * sigmoidDerivative(sig2)
    deltaSig1 = (np.dot(deltaSig2, weight2.T) * sigmoidDerivative(sig1))
    
    weight2 += (np.dot(sig1.T, deltaSig2) * rate)
    weight1 += (np.dot(Input.T, deltaSig1) * rate)

print('Error Rate: ', totalErr)

Error Rate:  0.005896418400051682


# Measuring the accuracy of the model

In [49]:
Input = df_test.values[:,:31]
output = np.array([targets[int(x)] for x in df_test.values[:,30:31]])

sig1 = sigmoid(np.dot(Input, weight1))
sig2 = sigmoid(np.dot(sig1, weight2))

In [50]:
PredictedY = np.argmax(sig2, axis = 1)
ActualY = np.argmax(output, axis = 1)
residual = (PredictedY == ActualY)

counter = 0
for i in residual:
    if i == True:
        counter = counter + 1
floatCounter = float(counter)
length = float(len(residual))
accuracy = floatCounter/length
print("Accuracy: ", accuracy)

Accuracy:  0.6756756756756757


Our model predicts whether tumors are malignant or benign with 67.5 percent accuracy, but has an error of .005.
Given the simplicity of the neural network we built, an accuracy of 68 percent is unsprising given the complexity behind identifying the extremity of a tumor. That being said, an accuracy of 67.5 percent does indicate our model does better than just blindly guessing. To better improve the model, we could add more hidden layers, while also using a different activation function on the network