In [3]:
import zipfile
import os

with zipfile.ZipFile("images.zip", "r") as zip_ref:
    zip_ref.extractall(".")


In [4]:
print(os.listdir("images"))

['infected', 'not_infected']


In [5]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Input, Concatenate
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import tensorflow as tf

# --- Data Loading and Preprocessing ---
# Load hormone data
hormone_data = pd.read_csv('hormone_data.csv')
hormone_data.set_index('Patient File No.', inplace=True)

# Initialize data lists
images, hormone_values, labels = [], [], []

# Define base path for images
base_path = 'images'

# Load and process images and hormone data
for folder in ['infected', 'not_infected']:
    folder_path = os.path.join(base_path, folder)
    label = 1 if folder == 'infected' else 0

    for img_file in os.listdir(folder_path):
        patient_id = int(img_file.replace('patient', '').replace('.jpg', ''))
        if patient_id in hormone_data.index:
            # Process image
            img_path = os.path.join(folder_path, img_file)
            img = load_img(img_path, target_size=(224, 224))
            img_array = img_to_array(img) / 255.0  # Normalize image
            images.append(img_array)

            # Process hormone data
            hormone_row = hormone_data.loc[patient_id].apply(pd.to_numeric, errors='coerce')
            hormone_row.fillna(hormone_row.median(), inplace=True)
            hormone_values.append(hormone_row.values)
            labels.append(label)

# Convert data lists to numpy arrays
images = np.array(images, dtype='float32')
hormone_values = np.array(hormone_values, dtype='float32')
labels = np.array(labels, dtype='float32')

# Split data into training, validation, and testing sets
X_train_images, X_test_images, X_train_hormones, X_test_hormones, y_train, y_test = train_test_split(
    images, hormone_values, labels, test_size=0.3, random_state=42, stratify=labels
)

X_train_images_final, X_val_images, X_train_hormones_final, X_val_hormones, y_train_final, y_val = train_test_split(
    X_train_images, X_train_hormones, y_train, test_size=0.1, random_state=42, stratify=y_train
)

# --- Data Generator Function ---
def create_dataset(image_data, hormone_data, labels, batch_size):
    dataset = tf.data.Dataset.from_tensor_slices(((image_data, hormone_data), labels))
    dataset = dataset.shuffle(buffer_size=len(labels)).batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset

batch_size = 32
train_dataset = create_dataset(X_train_images_final, X_train_hormones_final, y_train_final, batch_size)
val_dataset = create_dataset(X_val_images, X_val_hormones, y_val, batch_size)
test_dataset = create_dataset(X_test_images, X_test_hormones, y_test, batch_size)

# --- Custom CNN Model for Image Encoding ---
image_input = Input(shape=(224, 224, 3))
cnn_layers = Conv2D(32, (3, 3), activation='relu', kernel_regularizer=l2(0.01))(image_input)
cnn_layers = MaxPooling2D((2, 2))(cnn_layers)

cnn_layers = Conv2D(64, (3, 3), activation='relu', kernel_regularizer=l2(0.01))(cnn_layers)
cnn_layers = MaxPooling2D((2, 2))(cnn_layers)

cnn_layers = Conv2D(128, (3, 3), activation='relu', kernel_regularizer=l2(0.01))(cnn_layers)
cnn_layers = MaxPooling2D((2, 2))(cnn_layers)

cnn_layers = Flatten()(cnn_layers)
cnn_output = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(cnn_layers)

# --- Hormone Data Encoder ---
hormone_input = Input(shape=(X_train_hormones.shape[1],))
hormone_encoded = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(hormone_input)
hormone_encoded = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(hormone_encoded)

# --- Fusion Network ---
fused = Concatenate()([cnn_output, hormone_encoded])
fused = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(fused)

# --- Classifier ---
output = Dense(1, activation='sigmoid')(fused)

# --- Compile the Model ---
multi_modal_model = Model(inputs=[image_input, hormone_input], outputs=output)
multi_modal_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# --- Callbacks ---
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6)

# --- Train the Model ---
history = multi_modal_model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=8,
    callbacks=[early_stopping, reduce_lr],
    verbose=1
)

# --- Evaluate the Model ---
test_loss, test_accuracy = multi_modal_model.evaluate(test_dataset)
print(f'Test Accuracy: {test_accuracy:.4f}')

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Test Accuracy: 0.5667


In [6]:
import pandas as pd

# Fill in values for one patient — make sure to provide ALL 43 feature values in correct order
data = {
    'Sl. No':1,
    'Age (yrs)': 28,
    'Weight (Kg)': 44.6,
    'Height(Cm)': 152,
    'BMI': 19.3,
    'Blood Group': 'O+',              # If this was one-hot encoded earlier, use numeric columns instead
    'Pulse rate(bpm) ': 78,
    'RR (breaths/min)': 22,
    'Hb(g/dl)': 10.48,
    'Cycle(R/I)': 'R',                # Again, encode as used during training
    'Cycle length(days)': 5,
    'Marraige Status (Yrs)': 7,
    'Pregnant(Y/N)': 0,
    'No. of abortions': 0,
    '  I   beta-HCG(mIU/mL)': 1.99,
    'II    beta-HCG(mIU/mL)': 1.99,
    'FSH(mIU/mL)': 7.95,
    'LH(mIU/mL)': 3.68,
    'FSH/LH': 2.16,
    'Hip(inch)': 36,
    'Waist(inch)': 30,
    'Waist:Hip Ratio': 0.83,
    'TSH (mIU/L)': 0.68,
    'AMH(ng/mL)': 2.07,
    'PRL(ng/mL)': 45.16,
    'Vit D3 (ng/mL)': 17.1,
    'PRG(ng/mL)': 0.57,
    'RBS(mg/dl)': 92,
    'Weight gain(Y/N)': 0,
    'hair growth(Y/N)': 0,
    'Skin darkening (Y/N)': 0,
    'Hair loss(Y/N)': 0,
    'Pimples(Y/N)': 0,
    'Fast food (Y/N)': 1,
    'Reg.Exercise(Y/N)': 0,
    'BP _Systolic (mmHg)': 110,
    'BP _Diastolic (mmHg)': 80,
    'Follicle No. (L)': 3,
    'Follicle No. (R)': 3,
    'Avg. F size (L) (mm)': 18,
    'Avg. F size (R) (mm)': 18,
    'Endometrium (mm)': 8.5,
    'PCOS (Y/N)': 0
}

# Create DataFrame
df = pd.DataFrame([data])

# Save to CSV (ready to upload to Colab)
df.to_csv("hormone_sample.csv", index=False)


In [7]:
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import io

# Image preprocessing
def preprocess_image(image_path):
    img = load_img(image_path, target_size=(224, 224))
    img_array = img_to_array(img) / 255.0
    return np.expand_dims(img_array, axis=0)

# Hormone data preprocessing
def preprocess_hormone_row(row, selected_columns):
    hormone_values = [row[col] for col in selected_columns]
    hormone_array = np.array(hormone_values, dtype=np.float32)
    return np.expand_dims(hormone_array, axis=0)


In [8]:
print("DataFrame shape:", hormone_data.shape)
print("DataFrame columns:", list(hormone_data.columns))


DataFrame shape: (700, 43)
DataFrame columns: ['Sl. No', ' Age (yrs)', 'Weight (Kg)', 'Height(Cm) ', 'BMI', 'Blood Group', 'Pulse rate(bpm) ', 'RR (breaths/min)', 'Hb(g/dl)', 'Cycle(R/I)', 'Cycle length(days)', 'Marraige Status (Yrs)', 'Pregnant(Y/N)', 'No. of abortions', '  I   beta-HCG(mIU/mL)', 'II    beta-HCG(mIU/mL)', 'FSH(mIU/mL)', 'LH(mIU/mL)', 'FSH/LH', 'Hip(inch)', 'Waist(inch)', 'Waist:Hip Ratio', 'TSH (mIU/L)', 'AMH(ng/mL)', 'PRL(ng/mL)', 'Vit D3 (ng/mL)', 'PRG(ng/mL)', 'RBS(mg/dl)', 'Weight gain(Y/N)', 'hair growth(Y/N)', 'Skin darkening (Y/N)', 'Hair loss(Y/N)', 'Pimples(Y/N)', 'Fast food (Y/N)', 'Reg.Exercise(Y/N)', 'BP _Systolic (mmHg)', 'BP _Diastolic (mmHg)', 'Follicle No. (L)', 'Follicle No. (R)', 'Avg. F size (L) (mm)', 'Avg. F size (R) (mm)', 'Endometrium (mm)', 'PCOS (Y/N)']


In [9]:
multi_modal_model.save("cnn_model.h5")
