In [111]:
import os
import pandas as pd
import numpy as np
import pickle
import time 

In [113]:
DATASET_PATH = "../input/breast-cancer-wisconsin/breast-cancer-wisconsin.csv"

dataset = pd.read_csv(DATASET_PATH)
print(dataset)

           id  clump  unif_size  unif_shape  marg_adh  secs bare_nucl  \
0     1000025      5          1           1         1     2         1   
1     1002945      5          4           4         5     7        10   
2     1015425      3          1           1         1     2         2   
3     1016277      6          8           8         1     3         4   
4     1017023      4          1           1         3     2         1   
...       ...    ...        ...         ...       ...   ...       ...   
6286   776715      3          1           1         1     3         2   
6287   841769      2          1           1         1     2         1   
6288   888820      5         10          10         3     7         3   
6289   897471      4          8           6         4     3         4   
6290   897471      4          8           8         5     4         5   

      bl_chro  norm_nucl  motises  class  
0           3          1        1      2  
1           3          2        1    

In [114]:
# Preprocessing
# ---------------------------

dataset.drop(['id'], axis=1, inplace=True)
dataset.replace('?', '0', inplace=True)

# Shuffling
for _ in range(5):
    dataset = dataset.sample(frac=1)

# X = dataset[['clump', 'unif_size', 'unif_shape', 'marg_adh', 'secs', 'bare_nucl', 'bl_chro', 'norm_nucl', 'motises' ]]
X = dataset.drop('class', 1)
X = (X.astype(int)) / 10.03125 # 0.03125 scale bias

y = dataset[[ 'class' ]]
y.replace(2, 0, inplace=True)
y.replace(4, 1, inplace=True)

X = np.array(X)
y = np.array(y)

Xs = []
ys = []

# Balancing
count = [0, 0]
for j in range(len(y)):
    if(y[j]==1):
        count[1] += 1
        Xs.append(X[j])
        ys.append(y[j])
    elif(y[j]==0 and count[0]<count[1]):
        count[0] += 1
        Xs.append(X[j])
        ys.append(y[j])

Xs = np.array(Xs)
ys = np.array(ys)

TEST_SET_SIZE = 100

train_X = Xs[:len(Xs)-TEST_SET_SIZE]
train_y = ys[:len(ys)-TEST_SET_SIZE]

test_X = Xs[-TEST_SET_SIZE:]
test_y = ys[-TEST_SET_SIZE:]

print(str(len(ys)) + " samples with " + str(Xs[1].shape) + " features")

4338 samples with (9,) features


In [115]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Dropout, Activation
from tensorflow.keras.callbacks import TensorBoard

gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=0.5)
sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))

In [116]:
# Keras Model Preparation
# 27x81x729x27 feed-forward Model
# ---------------------------

model = Sequential()

# Input Layer
model.add( Dense(27, input_shape=train_X.shape[1:]) )
model.add( Dropout(rate=0.15) )

# Hidden Layer 1
model.add( Dense(81) )
model.add( Activation('relu') )

# Hidden Layer 2
model.add( Dense(729) )
model.add( Activation('relu') )

# Hidden Layer 3
model.add( Dense(27) )
model.add( Activation('relu') )

# Output Layer
model.add( Dense(1) )
model.add( Activation('sigmoid') )

model.compile(
    loss="binary_crossentropy",
    optimizer="adam",
    metrics=["accuracy"]
)

In [117]:
ModelName = "27x81x729x27-Keras-" + str(int(time.time()))
print(ModelName)

tensorboard = TensorBoard(log_dir="logs/".format(ModelName))

model.fit(train_X, train_y, batch_size=16, validation_split=0.25, epochs=10, callbacks=[tensorboard])

27x81x729x27-Keras-1586044092
Train on 3178 samples, validate on 1060 samples
Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x7fc02803b5f8>

In [119]:
correct = 0
for k in range(TEST_SET_SIZE):
    _y = model.predict(test_X[k].reshape(1,9))
    if(int(_y[0][0]+0.25)==test_y[k][0]):
        correct += 1
        
print("Test Accuracy")
print((correct/TEST_SET_SIZE)*100, '%')

Test Accuracy
97.0 %
