In [1]:
!pip install --upgrade numpy
!pip install --upgrade scipy

!pip install --upgrade scikit-learn

!pip install --upgrade tensorflow

!pip install --upgrade opencv-python
!pip install --upgrade pandas
!pip install --upgrade tqdm

!pip install --upgrade keras

Collecting numpy
  Using cached numpy-2.2.3-cp311-cp311-macosx_14_0_arm64.whl.metadata (62 kB)
Using cached numpy-2.2.3-cp311-cp311-macosx_14_0_arm64.whl (5.4 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.0.2
    Uninstalling numpy-2.0.2:
      Successfully uninstalled numpy-2.0.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tables 3.8.0 requires blosc2~=2.0.0, which is not installed.
tables 3.8.0 requires cython>=0.29.21, which is not installed.
gensim 4.3.0 requires FuzzyTM>=0.4.0, which is not installed.
tensorflow 2.18.0 requires numpy<2.1.0,>=1.26.0, but you have numpy 2.2.3 which is incompatible.
numba 0.57.0 requires numpy<1.25,>=1.21, but you have numpy 2.2.3 which is incompatible.
tensorflow-macos 2.13.0 requires keras<2.14,>=2.13.1, but you have keras 3.8.0 which is incompatible

    Uninstalling numpy-2.2.3:
      Successfully uninstalled numpy-2.2.3
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tables 3.8.0 requires blosc2~=2.0.0, which is not installed.
tables 3.8.0 requires cython>=0.29.21, which is not installed.
gensim 4.3.0 requires FuzzyTM>=0.4.0, which is not installed.
numba 0.57.0 requires numpy<1.25,>=1.21, but you have numpy 2.0.2 which is incompatible.
tensorflow-macos 2.13.0 requires keras<2.14,>=2.13.1, but you have keras 3.8.0 which is incompatible.
tensorflow-macos 2.13.0 requires numpy<=1.24.3,>=1.22, but you have numpy 2.0.2 which is incompatible.
tensorflow-macos 2.13.0 requires tensorboard<2.14,>=2.13, but you have tensorboard 2.18.0 which is incompatible.[0m[31m
[0mSuccessfully installed numpy-2.0.2


In [3]:
import tensorflow as tf
import numpy as np
import pandas as pd
import cv2
import os

print(f"TensorFlow: {tf.__version__}")
print(f"NumPy: {np.__version__}")
print(f"OpenCV: {cv2.__version__}")
print(f"Pandas: {pd.__version__}")

TensorFlow: 2.18.0
NumPy: 2.0.2
OpenCV: 4.11.0
Pandas: 2.2.3


In [4]:
# Loading Feature Extractors
def load_model(base_model):
    return tf.keras.applications.__dict__[base_model](
        include_top=False, weights='imagenet', input_shape=(224, 224, 3), pooling='avg'
    )
models = {
    "DenseNet201": load_model("DenseNet201"),
    "InceptionV3": load_model("InceptionV3"),
    "MobileNetV2": load_model("MobileNetV2"),
}

In [5]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def train_hybrid_model(fe_model, classifier_model, X_train, y_train, mask_train=None):
    # Check if mask_train is provided
    if mask_train is not None:
        X_train = X_train * mask_train  # Applying the mask (this is just an example, modify based on your needs)
    
    # Extract features using the feature extractor model (fe_model)
    features = fe_model.predict(X_train)
    
    # Train the classifier with the extracted features
    classifier_model.fit(features, y_train)
    
    # Predictions and evaluation
    y_pred = classifier_model.predict(features)
    
    # Calculate metrics
    accuracy = accuracy_score(y_train, y_pred)
    precision = precision_score(y_train, y_pred, average='weighted')
    recall = recall_score(y_train, y_pred, average='weighted')
    f1 = f1_score(y_train, y_pred, average='weighted')
    
    return accuracy, precision, recall, f1


In [6]:
# Details of Feature Extractors
for model_name, model in models.items():
    print(f"Basic Summary for {model_name}:")
    num_params = model.count_params()
    num_layers = len(model.layers)
    print(f"Total Parameters: {num_params}")
    print(f"Number of Layers: {num_layers}")
    print("\n")


Basic Summary for DenseNet201:
Total Parameters: 18321984
Number of Layers: 708


Basic Summary for InceptionV3:
Total Parameters: 21802784
Number of Layers: 312


Basic Summary for MobileNetV2:
Total Parameters: 2257984
Number of Layers: 155




In [7]:
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier

# Function to load machine learning models
def load_ml_model(model_name):
    if model_name == 'SVC':
        return SVC()
    elif model_name == 'KNN':
        return KNeighborsClassifier()
    elif model_name == 'DecisionTree':
        return DecisionTreeClassifier()
    elif model_name == 'MLP':
        return MLPClassifier()
    else:
        raise ValueError(f"Model {model_name} is not defined!")

# Dictionary of ML models
ml_models = {
    "SVC": load_ml_model("SVC"),
    "KNN": load_ml_model("KNN"),
    "DecisionTree": load_ml_model("DecisionTree"),
    "MLP": load_ml_model("MLP")
}

In [8]:
# Details of Machine Learning Models
for model_name, model in ml_models.items():
    print(f"Parameters for {model_name}:")
    print(model.get_params())
    print("-" * 50)


Parameters for SVC:
{'C': 1.0, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 'scale', 'kernel': 'rbf', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}
--------------------------------------------------
Parameters for KNN:
{'algorithm': 'auto', 'leaf_size': 30, 'metric': 'minkowski', 'metric_params': None, 'n_jobs': None, 'n_neighbors': 5, 'p': 2, 'weights': 'uniform'}
--------------------------------------------------
Parameters for DecisionTree:
{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'random_state': None, 'splitter': 'best'}
--------------------------------------------------
Parameters for MLP:
{'activation': 'relu', 'alp

In [9]:
# Hybrid Architecture Dictionary
hybrid_architectures = {}

# Pair Feature Extractors with ML Models
for fe_name, fe_model in models.items():
    for ml_name, ml_model in ml_models.items():
        hybrid_name = f"{fe_name}_{ml_name}"
        hybrid_architectures[hybrid_name] = {
            "Feature Extractor": fe_model,
            "Classifier": ml_model
        }

In [10]:
# Details Hybrid architectures dictionary
count = 0
for hybrid_name, hybrid_model in hybrid_architectures.items():
    count += 1
    print(f"Hybrid Architecture {count}: {hybrid_name}")
    print(f"Feature Extractor: {hybrid_model['Feature Extractor']}")
    print(f"Classifier: {hybrid_model['Classifier']}")
    print("\n")

print(f"Total Hybrid Architectures: {count}")


Hybrid Architecture 1: DenseNet201_SVC
Feature Extractor: <Functional name=densenet201, built=True>
Classifier: SVC()


Hybrid Architecture 2: DenseNet201_KNN
Feature Extractor: <Functional name=densenet201, built=True>
Classifier: KNeighborsClassifier()


Hybrid Architecture 3: DenseNet201_DecisionTree
Feature Extractor: <Functional name=densenet201, built=True>
Classifier: DecisionTreeClassifier()


Hybrid Architecture 4: DenseNet201_MLP
Feature Extractor: <Functional name=densenet201, built=True>
Classifier: MLPClassifier()


Hybrid Architecture 5: InceptionV3_SVC
Feature Extractor: <Functional name=inception_v3, built=True>
Classifier: SVC()


Hybrid Architecture 6: InceptionV3_KNN
Feature Extractor: <Functional name=inception_v3, built=True>
Classifier: KNeighborsClassifier()


Hybrid Architecture 7: InceptionV3_DecisionTree
Feature Extractor: <Functional name=inception_v3, built=True>
Classifier: DecisionTreeClassifier()


Hybrid Architecture 8: InceptionV3_MLP
Feature Extractor:

In [11]:
# Loading Training Dataset
metadata = pd.read_csv("../processed-dataset/metadata_split.csv")

In [12]:
# Viewing the dataset
metadata

Unnamed: 0,image_path,mask_path,label,split
0,benign/benign (1).png,benign/benign (1)_mask.png,Benign,train
1,benign/benign (10).png,benign/benign (10)_mask.png,Benign,val
2,benign/benign (100).png,benign/benign (100)_mask.png,Benign,train
3,benign/benign (101).png,benign/benign (101)_mask.png,Benign,train
4,benign/benign (102).png,benign/benign (102)_mask.png,Benign,val
...,...,...,...,...
775,normal/normal (95).png,normal/normal (95)_mask.png,Normal,train
776,normal/normal (96).png,normal/normal (96)_mask.png,Normal,train
777,normal/normal (97).png,normal/normal (97)_mask.png,Normal,train
778,normal/normal (98).png,normal/normal (98)_mask.png,Normal,train


In [13]:
metadata.describe()

Unnamed: 0,image_path,mask_path,label,split
count,780,780,780,780
unique,780,780,3,3
top,benign/benign (1).png,benign/benign (1)_mask.png,Benign,train
freq,1,1,437,546


In [14]:
split_label_counts = metadata.groupby(['split', 'label']).size().reset_index(name='count')

print(split_label_counts)

   split      label  count
0   test     Benign     66
1   test  Malignant     31
2   test     Normal     20
3  train     Benign    306
4  train  Malignant    147
5  train     Normal     93
6    val     Benign     65
7    val  Malignant     32
8    val     Normal     20


In [15]:
train_data = metadata[metadata['split'] == 'train']
train_data

Unnamed: 0,image_path,mask_path,label,split
0,benign/benign (1).png,benign/benign (1)_mask.png,Benign,train
2,benign/benign (100).png,benign/benign (100)_mask.png,Benign,train
3,benign/benign (101).png,benign/benign (101)_mask.png,Benign,train
5,benign/benign (103).png,benign/benign (103)_mask.png,Benign,train
6,benign/benign (104).png,benign/benign (104)_mask.png,Benign,train
...,...,...,...,...
775,normal/normal (95).png,normal/normal (95)_mask.png,Normal,train
776,normal/normal (96).png,normal/normal (96)_mask.png,Normal,train
777,normal/normal (97).png,normal/normal (97)_mask.png,Normal,train
778,normal/normal (98).png,normal/normal (98)_mask.png,Normal,train


In [24]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def load_image(image_path, target_size=(224, 224)):
    image = tf.io.read_file('../processed-dataset/'+image_path) 
    image = tf.image.decode_png(image, channels=3)
    image = tf.image.resize(image, target_size)
    image = image / 255.0 
    return image

def load_mask(mask_path, target_size=(224, 224)):
    mask = tf.io.read_file('../processed-dataset/'+mask_path)
    mask = tf.image.decode_png(mask, channels=1)
    mask = tf.image.resize(mask, target_size) 
    mask = mask / 255.0
    return mask

X_train = np.array([load_image(image_path) for image_path in train_data['image_path']])
mask_train = np.array([load_mask(mask_path) for mask_path in train_data['mask_path']])

y_train = train_data['label'].values

print(X_train.shape)
print(mask_train.shape)
print(y_train.shape)

(546, 224, 224, 3)
(546, 224, 224, 1)
(546,)


In [25]:
hybrid_results = {}

for hybrid_name, hybrid_model in hybrid_architectures.items():
    print(f"Training Hybrid Model: {hybrid_name}")

    fe_model = hybrid_model['Feature Extractor']
    ml_model = hybrid_model['Classifier']

    features = fe_model.predict(X_train)

    ml_model.fit(features, y_train)

    y_pred = ml_model.predict(features)

    accuracy = accuracy_score(y_train, y_pred)
    precision = precision_score(y_train, y_pred, average='weighted')
    recall = recall_score(y_train, y_pred, average='weighted')
    f1 = f1_score(y_train, y_pred, average='weighted')

    hybrid_results[hybrid_name] = {
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1 Score": f1
    }

    print(f"Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1 Score: {f1}\n")

Training Hybrid Model: DenseNet201_SVC
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 927ms/step
Accuracy: 0.7747252747252747, Precision: 0.7981180777073075, Recall: 0.7747252747252747, F1 Score: 0.7584019960546797

Training Hybrid Model: DenseNet201_KNN
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 858ms/step
Accuracy: 0.8131868131868132, Precision: 0.8127205438103864, Recall: 0.8131868131868132, F1 Score: 0.8123087326631311

Training Hybrid Model: DenseNet201_DecisionTree
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 888ms/step
Accuracy: 1.0, Precision: 1.0, Recall: 1.0, F1 Score: 1.0

Training Hybrid Model: DenseNet201_MLP
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 882ms/step
Accuracy: 1.0, Precision: 1.0, Recall: 1.0, F1 Score: 1.0

Training Hybrid Model: InceptionV3_SVC
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 416ms/step
Accuracy: 0.8882783882783882, Precision: 0.8961324215000687, 

In [29]:
for hybrid_name, metrics in hybrid_results.items():
    print(f"Results for {hybrid_name}:")
    print(f"Accuracy: {metrics['Accuracy'] * 100:.2f}%")
    print(f"Precision: {metrics['Precision'] * 100:.2f}%")
    print(f"Recall: {metrics['Recall'] * 100:.2f}%")
    print(f"F1 Score: {metrics['F1 Score'] * 100:.2f}%")
    print("\n")

Results for DenseNet201_SVC:
Accuracy: 77.47%
Precision: 79.81%
Recall: 77.47%
F1 Score: 75.84%


Results for DenseNet201_KNN:
Accuracy: 81.32%
Precision: 81.27%
Recall: 81.32%
F1 Score: 81.23%


Results for DenseNet201_DecisionTree:
Accuracy: 100.00%
Precision: 100.00%
Recall: 100.00%
F1 Score: 100.00%


Results for DenseNet201_MLP:
Accuracy: 100.00%
Precision: 100.00%
Recall: 100.00%
F1 Score: 100.00%


Results for InceptionV3_SVC:
Accuracy: 88.83%
Precision: 89.61%
Recall: 88.83%
F1 Score: 88.55%


Results for InceptionV3_KNN:
Accuracy: 79.49%
Precision: 79.40%
Recall: 79.49%
F1 Score: 79.09%


Results for InceptionV3_DecisionTree:
Accuracy: 100.00%
Precision: 100.00%
Recall: 100.00%
F1 Score: 100.00%


Results for InceptionV3_MLP:
Accuracy: 100.00%
Precision: 100.00%
Recall: 100.00%
F1 Score: 100.00%


Results for MobileNetV2_SVC:
Accuracy: 86.45%
Precision: 87.67%
Recall: 86.45%
F1 Score: 85.95%


Results for MobileNetV2_KNN:
Accuracy: 80.77%
Precision: 80.87%
Recall: 80.77%
F1 Sc