# Breast Cancer Campaign

# 1 Reading the Dataset
Load the previously pre-processed Breast Cancer dataset saved in the CSV file “data_refined.csv” into a Pandas DataFrame.

In [1]:
import  pandas as pd
df = pd.read_csv("data_refined.csv")
df.shape

(569, 31)

# 2 -  Splitting the Data
Split your data as follows:
- 80% training set
- 10% validation set
- 10% test set

In [2]:
from sklearn.model_selection import train_test_split

y = df['diagnosis']
X = df.drop('diagnosis', axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=0)

# 3  - Classification Using Artificial Neural Networks
Use the Scikit-Learn Multi-Layer Perceptron (MLP) Classifier.

Train your dataset.

Get accuracy scores and confusion matrix.

You need a minimum accuracy score of 94%.

In [3]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

In [4]:
mlp = MLPClassifier(max_iter=400)
mlp.fit(X_train, y_train)

y_pred = mlp.predict(X_test)

accuracy = mlp.score(X_val,y_val)
confusion = confusion_matrix(y_test, y_pred)

print("MLP Accuracy: ", accuracy)

print("MLP Confusion Matrix: \n", confusion)

MLP Accuracy:  0.9824561403508771
MLP Confusion Matrix: 
 [[30  2]
 [ 1 24]]




## Build another neural network using Keras.
Re-train your dataset.  
Get accuracy scores and confusion matrix.  
Compare results with the outputs of the classifiers in through project 2.

In [5]:
from keras.utils import to_categorical
import numpy as np

num_classes = np.max(y_train) + 1
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)
num_classes

2

In [6]:
X_train.shape[1]

30

In [7]:
from keras import Sequential
from keras.layers import Dense

model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

In [8]:
# compile the keras model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# fit the keras model on the dataset
model.fit(X_train, y_train, epochs=50, batch_size=32)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7f23586b9670>

In [9]:
y_val = to_categorical(y_val, num_classes)
_, accuracy_keras = model.evaluate(X_val, y_val)
print('Keras Model Accuracy:', accuracy_keras)

Keras Model Accuracy: 1.0


In [10]:
# MLP Accuracy:  0.9824561403508771 and Keras Model Accuracy: 0.9824561476707458. They are about the same

# 4 - Reading the Dataset
Load the previously pre-processed Insurance dataset.

In [17]:
df = pd.read_csv("insurance_pre.csv")
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges,clusters
0,19,0,27.9,0,1,3,16884.924,2
1,18,1,33.77,1,0,2,1725.5523,0
2,28,1,33.0,3,0,2,4449.462,0
3,33,1,22.705,0,0,1,21984.47061,2
4,32,1,28.88,0,0,1,3866.8552,0


# 5 - Splitting the Data
Split your data as follows:  
80% training set  
10% validation set  
10% test set

In [34]:
from sklearn.model_selection import train_test_split
import numpy as np
y = df['clusters']
X = df.drop('clusters', axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=0)

In [36]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

# 6 - Regression Using Artificial Neural Networks:
Use the Scikit-Learn Multi-Layer Perceptron (MLP) Regression.  
Fit your dataset.  
Get r2 score.  
You need a minimum r2 score of 82%.

In [39]:
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import r2_score
mlp = MLPRegressor(hidden_layer_sizes=(100, 100), activation='relu', solver='adam', random_state=0, max_iter=20, batch_size=20)
mlp.fit(X_train, y_train)
y_pred = mlp.predict(X_test)
score = r2_score(y_test, y_pred)

print(f"Score: {score*100:.2f}%")

Score: 89.53%




## Build another neural network using Keras.
Re-train your dataset.  
Get r2 score.  
Compare results with the outputs of the regressors in through project 3.

In [40]:
from keras.models import Sequential
from keras.layers import Dense

model = Sequential()
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=1, activation='relu'))

model.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics = ['accuracy'])

model.fit(X_train, y_train, batch_size = 20, epochs = 1000)

loss, accuracy = model.evaluate(X_test, y_test)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

In [45]:
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import r2_score

# Define the model
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(1))


model.compile(loss='mean_squared_error', optimizer='adam')


model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=1)

y_pred = model.predict(X_test)

r2 = r2_score(y_test, y_pred)

# Print the r2 score
print(f"R2 Score: {r2*100:.2f}%")

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
R2 Score: 88.12%
