# Predict whether a mammogram is bening or malignant
   1. BI-RADS assessment: 1 to 5 (ordinal)  
   2. Age: patient's age in years (integer)
   3. Shape: mass shape: round=1 oval=2 lobular=3 irregular=4 (nominal)
   4. Margin: mass margin: circumscribed=1 microlobulated=2 obscured=3 ill-defined=4 spiculated=5 (nominal)
   5. Density: mass density high=1 iso=2 low=3 fat-containing=4 (ordinal)
   6. Severity: benign=0 or malignant=1 (binominal)

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
df = pd.read_csv("mammographic_masses.data.txt")

df.head()

In [None]:
header = ["BI_RADS", "age", "shape", "margin", "density", "severity"]

df = pd.read_csv("mammographic_masses.data.txt", names = header, usecols = range(6), na_values = '?')

df.head()
y=df.severity
x=df.drop('severity',axis=1)
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)
x_train.head()
x_train.shape
x_test.shape
print(x_train.shape[0],'train samples')
print(x_test.shape[0], 'test samples')

#num_classes =2
#convert class vectors to binary class matrices
#y_train = tf.keras.utils.to_categorical(y_train, num_classes)
#y_test = tf.keras.utils.to_categorical(y_test, num_classes)
#x_train.shape
#x_test.shape
#y_train.shape
#y_test.shape
#print(x_train.shape[0],"train samples")
#print(x_test.shape[0],"test samples")

In [None]:
df.describe()

In [None]:
df.loc[df['BI_RADS'][df['BI_RADS'] > 5]]

In [None]:
df['BI_RADS'][df['BI_RADS'] > 5] = np.nan
df.loc[(df['density'].isnull()) | 
       (df['age'].isnull()) |
      (df['shape'].isnull()) |
       (df['margin'].isnull()) |
      df['BI_RADS'].isnull()]


In [None]:
#we can easily calculate the median and find nan using numpy
import numpy as np

age_mean = np.mean(df['age'])
age_nan = np.isnan(df['age'])
df['age'][age_nan] = age_mean

In [None]:
df.info()

In [None]:
df.dropna(inplace = True)

In [None]:
#so lets check and see if all is well
df.describe()

So we can easily find the correlation of the datas with each other using heatmap. We will do that importing seaborn and matplotlib.pyplot.
Enthusiastsss you can read about both in this links respectively.
https://matplotlib.org/api/pyplot_api.html
https://seaborn.pydata.org/generated/seaborn.heatmap.html

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

f,ax = plt.subplots(figsize=(10, 10))
sns.heatmap(df.corr(), annot=True, linewidths=0.5, fmt= '.2f',ax=ax)
plt.show()

In [None]:
labels = df['severity'].values

features = df[['BI_RADS', 'age', 'shape', 'margin', 'density']].values

features

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)
scaled_features

In [None]:
from sklearn.model_selection import train_test_split

features_train, features_test, labels_train, labels_test = train_test_split(scaled_features, labels, test_size= 0.25, random_state = 1)


Wow! as simple as it sounds it did not perform bad at all. So now lets try this last model. Quite interesting one
# Neural Networks 
Here we are going to use Keras and sklearn which will in turn use a TensorFlow backend. You can get more information of Neural Networks and Keras here ------------

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import RMSprop
import tensorflow.keras.backend as K
import numpy as np
#from keras.layers import Dense, Dropout
#from keras.optimizers import RMSprop
#from keras.wrappers.scikit_learn import KerasClassifier
#from sklearn.model_selection import cross_val_score

In [None]:
def model_1(x_input):
    x = Dense(6,kernel_initializer='normal', activation='relu')(x_input)
    x = Dense(6,kernel_initializer='normal', activation='relu')(x)
    x = Dropout(0.5)(x)
    x_out = Dense(256, activation='relu')(x)
    x_out = Dropout(0.5)(x)
    return x_out


In [None]:
def model_2(x_input):
    x = Dense(4, activation='sigmoid')(x_input)
    x = Dropout(0.5)(x)
    x = Dense(4, activation='sigmoid')(x)
    x = Dropout(0.5)(x)
    x_out = Dense(12)(x)
    return x_out

In [None]:
inputs = Input(shape=(5,))
x      = model_1(inputs)
x_out  = Dense(1, use_bias=False, activation='linear', name='svm')(x)
model = Model(inputs, x_out)

In [None]:
def svm_loss(layer):
    weights = layer.weights[0]
    weights_tf = tf.convert_to_tensor(weights)
    
    def categorical_hinge_loss(y_true, y_pred):
        pos = K.sum(y_true * y_pred, axis=-1)
        neg = K.max((1.0 - y_true) * y_pred, axis=-1)
        hinge_loss = K.mean(K.maximum(0.0, neg - pos + 1), axis=-1)
        regularization_loss = 0.5*(tf.reduce_sum(tf.square(weights_tf)))
        return regularization_loss + 0.4*hinge_loss
    
    return categorical_hinge_loss

In [None]:
metrics = ['accuracy']
optimizer = tf.keras.optimizers.RMSprop(lr=2e-3, decay=1e-5)
#optimizer = tf.train.AdamOptimizer(1.e-3)

model.compile(optimizer=optimizer, loss=svm_loss(model.get_layer('svm')), metrics=metrics)


In [None]:
batch_size = 64
epochs = 100

history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])