In [18]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, Concatenate, GlobalAveragePooling2D, Reshape
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split


In [19]:
# --- Load and preprocess hormone data ---
hormone_data = pd.read_csv('hormone_data.csv')
hormone_data.set_index('Patient File No.', inplace=True)

images, hormone_values, labels = [], [], []

base_path = 'images'

for folder in ['infected', 'not_infected']:
    folder_path = os.path.join(base_path, folder)
    label = 1 if folder == 'infected' else 0

    for img_file in os.listdir(folder_path):
        patient_id = int(img_file.replace('patient', '').replace('.jpg', ''))
        if patient_id in hormone_data.index:
            img_path = os.path.join(folder_path, img_file)
            img = load_img(img_path, target_size=(224, 224))
            img_array = img_to_array(img) / 255.0
            images.append(img_array)

            row = hormone_data.loc[patient_id].apply(pd.to_numeric, errors='coerce')
            row.fillna(row.median(), inplace=True)
            hormone_values.append(row.values)
            labels.append(label)

images = np.array(images)
hormone_values = np.array(hormone_values)
labels = np.array(labels)

X_train_img, X_test_img, X_train_tab, X_test_tab, y_train, y_test = train_test_split(
    images, hormone_values, labels, test_size=0.2, random_state=42
)

In [20]:
print("Infected:", np.sum(labels == 1))
print("Not Infected:", np.sum(labels == 0))

Infected: 336
Not Infected: 363


In [21]:
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Input
from tensorflow.keras.applications import MobileNetV2
from sklearn.utils import class_weight


# --- Define image input and base model ---
image_input = Input(shape=(224, 224, 3))
mobilenet = MobileNetV2(include_top=False, input_tensor=image_input, weights='imagenet')
mobilenet.trainable = False  # Freeze base layers if using pretrained weights

# --- Image branch ---
x = mobilenet.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
x = Reshape((1, 128))(x)

# --- Hormone (tabular) branch ---
hormone_input = Input(shape=(X_train_tab.shape[1],))
h = Dense(128, activation='relu')(hormone_input)
h = Reshape((1, 128))(h)

# --- Fusion ---
fused = Concatenate(axis=1)([x, h])
fused = Flatten()(fused)
fused = Dense(64, activation='relu')(fused)
output = Dense(1, activation='sigmoid')(fused)

# --- Model definition ---
model = Model(inputs=[image_input, hormone_input], outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# --- Class weight to handle imbalance ---
class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weights_dict = dict(enumerate(class_weights))

# --- Early stopping ---
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# --- Train ---
model.summary()

Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_5 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 Conv1 (Conv2D)                 (None, 112, 112, 32  864         ['input_5[0][0]']                
                                )                                                                 
                                                                                                  
 bn_Conv1 (BatchNormalization)  (None, 112, 112, 32  128         ['Conv1[0][0]']                  
                                )                                                           

In [22]:
preds = model.predict([X_test_img, X_test_tab])
print("Raw predictions:", preds[:10].flatten())
print("Rounded predictions:", (preds[:10].flatten() > 0.5).astype(int))
print("True labels:", y_test[:10])

Raw predictions: [1.         1.         0.96793604 1.         1.         1.
 1.         1.         1.         0.99999595]
Rounded predictions: [1 1 1 1 1 1 1 1 1 1]
True labels: [1 0 0 1 1 1 1 1 1 0]


In [23]:
# Evaluate model performance
from sklearn.metrics import classification_report, confusion_matrix

y_pred_probs = model.predict([X_test_img, X_test_tab]).flatten()
y_pred = (y_pred_probs > 0.5).astype(int)

print("Raw predictions:", y_pred_probs)
print("Rounded predictions:", y_pred)
print("True labels:", y_test)

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


Raw predictions: [1.0000000e+00 1.0000000e+00 9.6793604e-01 1.0000000e+00 1.0000000e+00
 1.0000000e+00 1.0000000e+00 1.0000000e+00 1.0000000e+00 9.9999595e-01
 2.3272331e-03 1.0000000e+00 1.0000000e+00 1.0000000e+00 1.0000000e+00
 1.0000000e+00 9.9999791e-01 1.0000000e+00 1.0000000e+00 1.0000000e+00
 6.1136699e-01 8.4205508e-01 4.2837492e-01 1.0000000e+00 1.0000000e+00
 1.0000000e+00 1.0000000e+00 9.9997699e-01 1.0000000e+00 1.0000000e+00
 5.9185556e-13 1.0000000e+00 1.0000000e+00 1.0000000e+00 1.0000000e+00
 1.0000000e+00 1.0000000e+00 9.9999952e-01 1.0000000e+00 0.0000000e+00
 1.0000000e+00 1.0000000e+00 1.0000000e+00 1.0000000e+00 1.0000000e+00
 1.0000000e+00 3.7474108e-01 1.9026861e-03 1.0000000e+00 1.0000000e+00
 1.0000000e+00 1.0000000e+00 9.9898392e-01 1.0000000e+00 1.0000000e+00
 1.0000000e+00 9.9999595e-01 1.0000000e+00 1.0000000e+00 4.6801963e-03
 9.9994093e-01 9.9796104e-01 1.0000000e+00 1.0000000e+00 1.0000000e+00
 1.0000000e+00 1.0000000e+00 1.0000000e+00 1.0000000e+00 9.9

In [24]:
model.fit(
    [X_train_img, X_train_tab],
    y_train,
    epochs=30,
    batch_size=16,
    validation_split=0.2,
    class_weight=class_weights_dict,
    callbacks=[early_stop]
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30


<keras.callbacks.History at 0x20d128db3a0>

In [25]:
model.save('vgg_model.keras')
