<b> We are training 2 Different ML Models. </b>

In [1]:
#import the useful libraries.
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

## Normal vs Hyperchromasia

In [2]:
data = pd.read_csv("New NormalVSHyperchromasia.csv")

In [3]:
data['class'].value_counts()

Hyperchromasia    1618
Normal            1152
Name: class, dtype: int64

In [4]:
class_ratio = sum(data['class_label'])/len(data['class_label'])
print('Class Ratio:','{:.2f}'.format(class_ratio))

Class Ratio: 0.58


### Stratified KFold

In [5]:
# Import Required Modules.
from statistics import mean, stdev
from sklearn import preprocessing
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=30)

In [6]:
target = data.loc[:,'class_label']

In [7]:
fold_no = 1
for train_index, test_index in skf.split(data, target):
    train = data.loc[train_index, :]
    test = data.loc[test_index, :]
    class_ratio = sum(test['class_label'])/len(test['class_label'])
    print('Fold', str(fold_no), 'Class Ratio:','{:.2f}'.format(class_ratio))
    
    fold_no += 1

Fold 1 Class Ratio: 0.58
Fold 2 Class Ratio: 0.58
Fold 3 Class Ratio: 0.58
Fold 4 Class Ratio: 0.58
Fold 5 Class Ratio: 0.58
Fold 6 Class Ratio: 0.58
Fold 7 Class Ratio: 0.58
Fold 8 Class Ratio: 0.58
Fold 9 Class Ratio: 0.58
Fold 10 Class Ratio: 0.58


### Decision Tree

<b>Balanced Accuracy</b>

In [8]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import balanced_accuracy_score

# create model
model = DecisionTreeClassifier() 

In [9]:
def train_modelDT(train, test, fold_no):
    X = ['area','perimeter','eccentricity','mean_intensity','roundness_circularity']
    y = ['class_label']
    X_train = train[X]
    y_train = train[y]
    X_test = test[X]
    y_test = test[y]
    model.fit(X_train,y_train)
    predictions = model.predict(X_test)
    balanced_accuracy = balanced_accuracy_score(y_test,predictions)
    print('Fold',str(fold_no),'Balanced Accuracy:',
          '{:.4f}'.format(balanced_accuracy))
    accu_stratified.append(balanced_accuracy)    

In [10]:
fold_no = 1
accu_stratified = []

for train_index, test_index in skf.split(data, target):
    train = data.loc[train_index,:]
    test = data.loc[test_index,:]
    train_modelDT(train,test,fold_no)
    fold_no += 1

Fold 1 Balanced Accuracy: 0.9289
Fold 2 Balanced Accuracy: 0.9450
Fold 3 Balanced Accuracy: 0.9454
Fold 4 Balanced Accuracy: 0.9510
Fold 5 Balanced Accuracy: 0.9541
Fold 6 Balanced Accuracy: 0.9610
Fold 7 Balanced Accuracy: 0.9690
Fold 8 Balanced Accuracy: 0.9367
Fold 9 Balanced Accuracy: 0.9585
Fold 10 Balanced Accuracy: 0.9808


In [11]:
print('The Maximum Accuracy That can be obtained from this model is:',
      '{:.2f}%'.format(max(accu_stratified)*100))
print('\nThe Minimum Accuracy:',
      '{:.2f}%'.format(min(accu_stratified)*100))
print('\nThe Overall Accuracy:',
      '{:.2f}%'.format(mean(accu_stratified)*100))
print('\nThe Standard Deviation is:', 
      '{:.4f}%'.format(stdev(accu_stratified)*100))

The Maximum Accuracy That can be obtained from this model is: 98.08%

The Minimum Accuracy: 92.89%

The Overall Accuracy: 95.31%

The Standard Deviation is: 1.5244%


### Logistic Regression

In [12]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(solver='lbfgs', max_iter=500)

In [13]:
def train_model(train, test, fold_no):
    X = ['area','perimeter','eccentricity','mean_intensity','roundness_circularity']
    y = ['class_label']
    X_train = train[X]
    y_train = train[y]
    X_test = test[X]
    y_test = test[y]
    model.fit(X_train,y_train.values.ravel())
    predictions = model.predict(X_test)
    global balanced_accuracy
    balanced_accuracy = balanced_accuracy_score(y_test,predictions)
    #accu_stratified.append(balanced_accuracy)

In [14]:
fold_no = 1
accu_stratified = []

for train_index, test_index in skf.split(data, target):
    train = data.loc[train_index,:]
    test = data.loc[test_index,:]
    train_model(train,test,fold_no)
    print('Fold',str(fold_no),'Balanced Accuracy:','{:.4f}'.format(balanced_accuracy))
    accu_stratified.append(balanced_accuracy)
    fold_no += 1

Fold 1 Balanced Accuracy: 0.9296
Fold 2 Balanced Accuracy: 0.8822
Fold 3 Balanced Accuracy: 0.8400
Fold 4 Balanced Accuracy: 0.9076
Fold 5 Balanced Accuracy: 0.9194
Fold 6 Balanced Accuracy: 0.9560
Fold 7 Balanced Accuracy: 0.9386
Fold 8 Balanced Accuracy: 0.8977
Fold 9 Balanced Accuracy: 0.9275
Fold 10 Balanced Accuracy: 0.9337


In [15]:
print('The Maximum Accuracy That can be obtained from this model is:',
      '{:.2f}%'.format(max(accu_stratified)*100))
print('\nThe Minimum Accuracy:',
      '{:.2f}%'.format(min(accu_stratified)*100))
print('\nThe Overall Accuracy:',
      '{:.2f}%'.format(mean(accu_stratified)*100))
print('\nThe Standard Deviation is:', 
      '{:.4f}%'.format(stdev(accu_stratified)*100))

The Maximum Accuracy That can be obtained from this model is: 95.60%

The Minimum Accuracy: 84.00%

The Overall Accuracy: 91.32%

The Standard Deviation is: 3.3326%


### SVM with Standard Scaler

In [16]:
## Standard Scaler setup

from sklearn.preprocessing import StandardScaler

# Copying original dataframe
data_ready = data.copy()

#scaler = StandardScaler()
#feature_names = ['area','perimeter','eccentricity','mean_intensity','roundness_circularity']
#data_ready[feature_names] = scaler.fit_transform(data[feature_names])

data_ready.head()

Unnamed: 0,area,perimeter,eccentricity,mean_intensity,roundness_circularity,class,class_label
0,4923,278.208153,0.836405,130.994516,0.799282,Normal,0
1,4810,290.764502,0.641405,137.652807,0.714944,Normal,0
2,4276,244.793939,0.598323,140.647568,0.896698,Normal,0
3,7207,323.220346,0.573305,181.693492,0.866896,Normal,0
4,6457,371.019336,0.410081,137.248877,0.589451,Normal,0


In [17]:
from sklearn.svm import SVC

# create model
model = SVC()

In [18]:
def train_model_SC(train, test, fold_no):
    X = ['area','perimeter','eccentricity','mean_intensity','roundness_circularity']
    y = ['class_label']
    X_train = train[X]
    y_train = train[y]   
    X_test = test[X]
    y_test = test[y]
    
    scaler = StandardScaler().fit(X_train)
    
    train_sc = scaler.transform(X_train)
    test_sc = scaler.transform(X_test)
    
    model.fit(train_sc,y_train.values.ravel())
    predictions = model.predict(test_sc)
    global balanced_accuracy
    balanced_accuracy = balanced_accuracy_score(y_test,predictions)
    #accu_stratified.append(balanced_accuracy)

In [19]:
fold_no = 1
accu_stratified = []

for train_index, test_index in skf.split(data_ready, target):
    train = data_ready.loc[train_index,:]
    test = data_ready.loc[test_index,:]
    train_model_SC(train,test,fold_no)
    print('Fold',str(fold_no),'Balanced Accuracy:','{:.4f}'.format(balanced_accuracy))
    accu_stratified.append(balanced_accuracy)
    fold_no += 1

Fold 1 Balanced Accuracy: 0.9433
Fold 2 Balanced Accuracy: 0.9581
Fold 3 Balanced Accuracy: 0.9460
Fold 4 Balanced Accuracy: 0.9621
Fold 5 Balanced Accuracy: 0.9534
Fold 6 Balanced Accuracy: 0.9851
Fold 7 Balanced Accuracy: 0.9870
Fold 8 Balanced Accuracy: 0.9565
Fold 9 Balanced Accuracy: 0.9739
Fold 10 Balanced Accuracy: 0.9826


In [20]:
print('The Maximum Accuracy That can be obtained from this model is:',
      '{:.2f}%'.format(max(accu_stratified)*100))
print('\nThe Minimum Accuracy:',
      '{:.2f}%'.format(min(accu_stratified)*100))
print('\nThe Overall Accuracy:',
      '{:.2f}%'.format(mean(accu_stratified)*100))
print('\nThe Standard Deviation is:', 
      '{:.4f}%'.format(stdev(accu_stratified)*100))

The Maximum Accuracy That can be obtained from this model is: 98.70%

The Minimum Accuracy: 94.33%

The Overall Accuracy: 96.48%

The Standard Deviation is: 1.6231%


### Logistic Regression with Standard Scaler

In [21]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(solver='lbfgs', max_iter=500)

In [22]:
fold_no = 1
accu_stratified = []

for train_index, test_index in skf.split(data_ready, target):
    train = data_ready.loc[train_index,:]
    test = data_ready.loc[test_index,:]
    train_model_SC(train,test,fold_no)
    print('Fold',str(fold_no),'Balanced Accuracy:','{:.4f}'.format(balanced_accuracy))
    accu_stratified.append(balanced_accuracy)
    fold_no += 1

Fold 1 Balanced Accuracy: 0.9390
Fold 2 Balanced Accuracy: 0.9100
Fold 3 Balanced Accuracy: 0.9038
Fold 4 Balanced Accuracy: 0.9206
Fold 5 Balanced Accuracy: 0.9268
Fold 6 Balanced Accuracy: 0.9585
Fold 7 Balanced Accuracy: 0.9355
Fold 8 Balanced Accuracy: 0.9280
Fold 9 Balanced Accuracy: 0.9424
Fold 10 Balanced Accuracy: 0.9424


In [23]:
print('The Maximum Accuracy That can be obtained from this model is:',
      '{:.2f}%'.format(max(accu_stratified)*100))
print('\nThe Minimum Accuracy:',
      '{:.2f}%'.format(min(accu_stratified)*100))
print('\nThe Overall Accuracy:',
      '{:.2f}%'.format(mean(accu_stratified)*100))
print('\nThe Standard Deviation is:', 
      '{:.4f}%'.format(stdev(accu_stratified)*100))

The Maximum Accuracy That can be obtained from this model is: 95.85%

The Minimum Accuracy: 90.38%

The Overall Accuracy: 93.07%

The Standard Deviation is: 1.6370%


## Normal vs Abnormal Shape

In [24]:
data = pd.read_csv("New NormalVSAbnormalShape.csv")

In [25]:
data['class'].value_counts()

Abnormal_Shape    2762
Normal            1152
Name: class, dtype: int64

In [26]:
class_ratio = sum(data['class_label'])/len(data['class_label'])
print('Class Ratio:','{:.2f}'.format(class_ratio))

Class Ratio: 0.71


### Stratified KFold

In [27]:
# Import Required Modules.
from statistics import mean, stdev
from sklearn import preprocessing
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=30)

In [28]:
target = data.loc[:,'class_label']

In [29]:
fold_no = 1
for train_index, test_index in skf.split(data, target):
    train = data.loc[train_index, :]
    test = data.loc[test_index, :]
    class_ratio = sum(test['class_label'])/len(test['class_label'])
    print('Fold', str(fold_no), 'Class Ratio:','{:.2f}'.format(class_ratio))
    
    fold_no += 1

Fold 1 Class Ratio: 0.70
Fold 2 Class Ratio: 0.70
Fold 3 Class Ratio: 0.71
Fold 4 Class Ratio: 0.71
Fold 5 Class Ratio: 0.71
Fold 6 Class Ratio: 0.71
Fold 7 Class Ratio: 0.71
Fold 8 Class Ratio: 0.71
Fold 9 Class Ratio: 0.71
Fold 10 Class Ratio: 0.71


### Decision Tree

<b>Balanced Accuracy</b>

In [30]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import balanced_accuracy_score

# create model
model = DecisionTreeClassifier() 

In [31]:
def train_modelDT(train, test, fold_no):
    X = ['area','perimeter','eccentricity','mean_intensity','roundness_circularity']
    y = ['class_label']
    X_train = train[X]
    y_train = train[y]
    X_test = test[X]
    y_test = test[y]
    model.fit(X_train,y_train)
    predictions = model.predict(X_test)
    balanced_accuracy = balanced_accuracy_score(y_test,predictions)
    print('Fold',str(fold_no),'Balanced Accuracy:',
          '{:.4f}'.format(balanced_accuracy))
    accu_stratified.append(balanced_accuracy)    

In [32]:
fold_no = 1
accu_stratified = []

for train_index, test_index in skf.split(data, target):
    train = data.loc[train_index,:]
    test = data.loc[test_index,:]
    train_modelDT(train,test,fold_no)
    fold_no += 1

Fold 1 Balanced Accuracy: 0.9834
Fold 2 Balanced Accuracy: 0.9583
Fold 3 Balanced Accuracy: 0.9562
Fold 4 Balanced Accuracy: 0.9642
Fold 5 Balanced Accuracy: 0.9514
Fold 6 Balanced Accuracy: 0.9743
Fold 7 Balanced Accuracy: 0.9833
Fold 8 Balanced Accuracy: 0.9594
Fold 9 Balanced Accuracy: 0.9754
Fold 10 Balanced Accuracy: 0.9736


In [33]:
print('The Maximum Accuracy That can be obtained from this model is:',
      '{:.2f}%'.format(max(accu_stratified)*100))
print('\nThe Minimum Accuracy:',
      '{:.2f}%'.format(min(accu_stratified)*100))
print('\nThe Overall Accuracy:',
      '{:.2f}%'.format(mean(accu_stratified)*100))
print('\nThe Standard Deviation is:', 
      '{:.4f}%'.format(stdev(accu_stratified)*100))

The Maximum Accuracy That can be obtained from this model is: 98.34%

The Minimum Accuracy: 95.14%

The Overall Accuracy: 96.79%

The Standard Deviation is: 1.1520%


### Logistic Regression

In [34]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(solver='lbfgs', max_iter=500)

In [35]:
def train_model(train, test, fold_no):
    X = ['area','perimeter','eccentricity','mean_intensity','roundness_circularity']
    y = ['class_label']
    X_train = train[X]
    y_train = train[y]
    X_test = test[X]
    y_test = test[y]
    model.fit(X_train,y_train.values.ravel())
    predictions = model.predict(X_test)
    balanced_accuracy = balanced_accuracy_score(y_test,predictions)
    print('Fold',str(fold_no),'Balanced Accuracy:',
          '{:.4f}'.format(balanced_accuracy))
    accu_stratified.append(balanced_accuracy)

In [36]:
fold_no = 1
accu_stratified = []

for train_index, test_index in skf.split(data, target):
    train = data.loc[train_index,:]
    test = data.loc[test_index,:]
    train_model(train,test,fold_no)
    fold_no += 1

Fold 1 Balanced Accuracy: 0.9447
Fold 2 Balanced Accuracy: 0.8997
Fold 3 Balanced Accuracy: 0.9025
Fold 4 Balanced Accuracy: 0.9015
Fold 5 Balanced Accuracy: 0.9257
Fold 6 Balanced Accuracy: 0.9413
Fold 7 Balanced Accuracy: 0.9388
Fold 8 Balanced Accuracy: 0.9370
Fold 9 Balanced Accuracy: 0.9413
Fold 10 Balanced Accuracy: 0.9525


In [37]:
print('The Maximum Accuracy That can be obtained from this model is:',
      '{:.2f}%'.format(max(accu_stratified)*100))
print('\nThe Minimum Accuracy:',
      '{:.2f}%'.format(min(accu_stratified)*100))
print('\nThe Overall Accuracy:',
      '{:.2f}%'.format(mean(accu_stratified)*100))
print('\nThe Standard Deviation is:', 
      '{:.4f}%'.format(stdev(accu_stratified)*100))

The Maximum Accuracy That can be obtained from this model is: 95.25%

The Minimum Accuracy: 89.97%

The Overall Accuracy: 92.85%

The Standard Deviation is: 1.9952%


### SVM with Standard Scaler

In [38]:
## Standard Scaler setup

from sklearn.preprocessing import StandardScaler

# Copying original dataframe
data_ready = data.copy()

#scaler = StandardScaler()
#feature_names = ['area','perimeter','eccentricity','mean_intensity','roundness_circularity']
#data_ready[feature_names] = scaler.fit_transform(data[feature_names])

data_ready.head()

Unnamed: 0,area,perimeter,eccentricity,mean_intensity,roundness_circularity,class,class_label
0,4923,278.208153,0.836405,130.994516,0.799282,Normal,0
1,4810,290.764502,0.641405,137.652807,0.714944,Normal,0
2,4276,244.793939,0.598323,140.647568,0.896698,Normal,0
3,7207,323.220346,0.573305,181.693492,0.866896,Normal,0
4,6457,371.019336,0.410081,137.248877,0.589451,Normal,0


In [39]:
from sklearn.svm import SVC

# create model
model = SVC()

In [40]:
def train_model_SC(train, test, fold_no):
    X = ['area','perimeter','eccentricity','mean_intensity','roundness_circularity']
    y = ['class_label']
    X_train = train[X]
    y_train = train[y]   
    X_test = test[X]
    y_test = test[y]
    
    scaler = StandardScaler().fit(X_train)
    
    train_sc = scaler.transform(X_train)
    test_sc = scaler.transform(X_test)
    
    model.fit(train_sc,y_train.values.ravel())
    predictions = model.predict(test_sc)
    global balanced_accuracy
    balanced_accuracy = balanced_accuracy_score(y_test,predictions)
    #accu_stratified.append(balanced_accuracy)

In [41]:
fold_no = 1
accu_stratified = []

for train_index, test_index in skf.split(data_ready, target):
    train = data_ready.loc[train_index,:]
    test = data_ready.loc[test_index,:]
    train_model_SC(train,test,fold_no)
    print('Fold',str(fold_no),'Balanced Accuracy:','{:.4f}'.format(balanced_accuracy))
    accu_stratified.append(balanced_accuracy)
    fold_no += 1

Fold 1 Balanced Accuracy: 0.9655
Fold 2 Balanced Accuracy: 0.9551
Fold 3 Balanced Accuracy: 0.9522
Fold 4 Balanced Accuracy: 0.9703
Fold 5 Balanced Accuracy: 0.9721
Fold 6 Balanced Accuracy: 0.9877
Fold 7 Balanced Accuracy: 0.9851
Fold 8 Balanced Accuracy: 0.9634
Fold 9 Balanced Accuracy: 0.9746
Fold 10 Balanced Accuracy: 0.9746


In [42]:
print('The Maximum Accuracy That can be obtained from this model is:',
      '{:.2f}%'.format(max(accu_stratified)*100))
print('\nThe Minimum Accuracy:',
      '{:.2f}%'.format(min(accu_stratified)*100))
print('\nThe Overall Accuracy:',
      '{:.2f}%'.format(mean(accu_stratified)*100))
print('\nThe Standard Deviation is:', 
      '{:.4f}%'.format(stdev(accu_stratified)*100))

The Maximum Accuracy That can be obtained from this model is: 98.77%

The Minimum Accuracy: 95.22%

The Overall Accuracy: 97.01%

The Standard Deviation is: 1.1515%


### Logistic Regression with Standard Scaler

In [43]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(solver='lbfgs', max_iter=500)

In [44]:
fold_no = 1
accu_stratified = []

for train_index, test_index in skf.split(data_ready, target):
    train = data_ready.loc[train_index,:]
    test = data_ready.loc[test_index,:]
    train_model_SC(train,test,fold_no)
    print('Fold',str(fold_no),'Balanced Accuracy:','{:.4f}'.format(balanced_accuracy))
    accu_stratified.append(balanced_accuracy)
    fold_no += 1

Fold 1 Balanced Accuracy: 0.9447
Fold 2 Balanced Accuracy: 0.9109
Fold 3 Balanced Accuracy: 0.9138
Fold 4 Balanced Accuracy: 0.9301
Fold 5 Balanced Accuracy: 0.9337
Fold 6 Balanced Accuracy: 0.9703
Fold 7 Balanced Accuracy: 0.9388
Fold 8 Balanced Accuracy: 0.9388
Fold 9 Balanced Accuracy: 0.9500
Fold 10 Balanced Accuracy: 0.9457


In [45]:
print('The Maximum Accuracy That can be obtained from this model is:',
      '{:.2f}%'.format(max(accu_stratified)*100))
print('\nThe Minimum Accuracy:',
      '{:.2f}%'.format(min(accu_stratified)*100))
print('\nThe Overall Accuracy:',
      '{:.2f}%'.format(mean(accu_stratified)*100))
print('\nThe Standard Deviation is:', 
      '{:.4f}%'.format(stdev(accu_stratified)*100))

The Maximum Accuracy That can be obtained from this model is: 97.03%

The Minimum Accuracy: 91.09%

The Overall Accuracy: 93.77%

The Standard Deviation is: 1.7300%
