<a href="https://colab.research.google.com/github/yassine-grati/rememberme/blob/main/vgg16v3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install tensorflow



In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Lambda, Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator,load_img
from tensorflow.keras.models import Sequential
import numpy as np
from glob import glob
from keras.layers import Dense, Dropout, Activation, Flatten, GlobalAveragePooling2D
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, BatchNormalization
from tensorflow.keras.optimizers import Adam
import pandas as pd
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import StratifiedKFold
from sklearn.utils import class_weight
import zipfile
import os
from google.colab import drive

In [None]:
drive.mount('/content/drive')
zip_train = zipfile.ZipFile('/content/drive/MyDrive/archive (2).zip', 'r') #Opens the zip file in read mode
zip_train.extractall('/tmp') #Extracts the files into the /tmp folder
zip_train.close()

Mounted at /content/drive


In [None]:
# re-size all the images to this
IMAGE_SIZE = [224, 224]

train_path = '/tmp/train'
valid_path = '/tmp/test'
def count_images_per_class(folder_path):
    class_counts = {}
    for class_name in os.listdir(folder_path):
        class_dir = os.path.join(folder_path, class_name)
        if os.path.isdir(class_dir):
            num_images = len([f for f in os.listdir(class_dir) if f.endswith(('.jpg', '.png', '.jpeg'))])
            class_counts[class_name] = num_images
    return class_counts

# Example usage:

train_counts = count_images_per_class(train_path)
test_counts = count_images_per_class(valid_path)

print("Training set counts:", train_counts)
print("Test set counts:", test_counts)

Training set counts: {'Very mild Dementia': 10980, 'Moderate Dementia': 390, 'Non Demented': 53777, 'Mild Dementia': 4001}
Test set counts: {'Very mild Dementia': 2745, 'Moderate Dementia': 98, 'Non Demented': 13445, 'Mild Dementia': 1001}


In [None]:
import random
import shutil

def create_subset(original_dir, target_dir, subset_ratio=0.2):
    os.makedirs(target_dir, exist_ok=True)
    for class_name in os.listdir(original_dir):
        class_path = os.path.join(original_dir, class_name)
        if os.path.isdir(class_path):
            files = [f for f in os.listdir(class_path) if f.endswith(('.jpg', '.png'))]
            subset_files = random.sample(files, int(len(files) * subset_ratio))

            # Create target class directory
            target_class_path = os.path.join(target_dir, class_name)
            os.makedirs(target_class_path, exist_ok=True)

            # Copy selected files
            for file in subset_files:
                shutil.copy2(
                    os.path.join(class_path, file),
                    os.path.join(target_class_path, file)
                )

# Example: Create 20% subsets of original train/test
create_subset('/tmp/train', '/tmp/subset_dataset/train', subset_ratio=0.2)
create_subset('/tmp/test', '/tmp/subset_dataset/test', subset_ratio=0.2)

train_path='/tmp/subset_dataset/train'
valid_path='/tmp/subset_dataset/test'

train_counts = count_images_per_class(train_path)
test_counts = count_images_per_class(valid_path)

print("Training set counts:", train_counts)
print("Test set counts:", test_counts)

Training set counts: {'Very mild Dementia': 2196, 'Moderate Dementia': 78, 'Non Demented': 10755, 'Mild Dementia': 800}
Test set counts: {'Very mild Dementia': 549, 'Moderate Dementia': 19, 'Non Demented': 2689, 'Mild Dementia': 200}


In [None]:
#cross validation
def create_image_dataframe(directory):
    data = []
    for class_name in os.listdir(directory):
        class_path = os.path.join(directory, class_name)
        if os.path.isdir(class_path):
            for img in os.listdir(class_path):
                if img.endswith(('.jpg', '.png')):
                    data.append({
                        'filename': os.path.join(class_path, img),
                        'label': class_name
                    })
    return pd.DataFrame(data)


In [None]:
vgg16 = VGG16(  input_shape=IMAGE_SIZE + [3],
                weights='imagenet',
                include_top=False)

In [None]:
for layer in vgg16.layers:
    layer.trainable = False

In [None]:
folders = glob('/tmp/train/*')

In [None]:
for (i,layer) in enumerate(vgg16.layers):
    print(str(i) + " "+ layer.__class__.__name__, layer.trainable)

0 InputLayer False
1 Conv2D False
2 Conv2D False
3 MaxPooling2D False
4 Conv2D False
5 Conv2D False
6 MaxPooling2D False
7 Conv2D False
8 Conv2D False
9 Conv2D False
10 MaxPooling2D False
11 Conv2D False
12 Conv2D False
13 Conv2D False
14 MaxPooling2D False
15 Conv2D False
16 Conv2D False
17 Conv2D False
18 MaxPooling2D False


In [None]:
# Function to build model head
def lw(bottom_model, num_classes):
    top_model = bottom_model.output
    top_model = GlobalAveragePooling2D()(top_model)
    top_model = Dense(256, activation='relu')(top_model)
    top_model = BatchNormalization()(top_model)
    top_model = Dropout(0.5)(top_model)
    top_model = Dense(128, activation='relu')(top_model)
    top_model = BatchNormalization()(top_model)
    top_model = Dropout(0.5)(top_model)
    top_model = Dense(num_classes, activation='softmax')(top_model)
    return top_model

In [None]:
num_classes = 4
FC_Head = lw(vgg16, num_classes)
model = Model(inputs=vgg16.input, outputs=FC_Head)
print(model.summary())

None


In [None]:
train_df = create_image_dataframe('/tmp/subset_dataset/train')
print(train_df['label'].value_counts())

label
Non Demented          10755
Very mild Dementia     2196
Mild Dementia           800
Moderate Dementia        78
Name: count, dtype: int64


In [None]:
from sklearn.utils.class_weight import compute_class_weight
# ImageDataGenerators
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=10,
    width_shift_range=0.05,
    height_shift_range=0.05,
    zoom_range=0.1,
    fill_mode='nearest'
)
val_datagen = ImageDataGenerator(rescale=1./255)

# Cross-validation setup
X = train_df['filename'].values
y = train_df['label'].values
skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

fold = 1
for train_idx, val_idx in skf.split(X, y):
    print(f"\n🚀 Starting Fold {fold}")

    train_data = train_df.iloc[train_idx]
    val_data = train_df.iloc[val_idx]

    class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y[train_idx]), y=y[train_idx])
    class_weights_dict = dict(zip(np.unique(y[train_idx]), class_weights))
    print(f"Class Weights for Fold {fold}: {class_weights_dict}")

    train_generator = train_datagen.flow_from_dataframe(
        train_data,
        x_col='filename',
        y_col='label',
        target_size=(224, 224),
        class_mode='categorical',
        batch_size=32
    )
    val_generator = val_datagen.flow_from_dataframe(
        val_data,
        x_col='filename',
        y_col='label',
        target_size=(224, 224),
        class_mode='categorical',
        batch_size=32
    )

    vgg16 = VGG16(input_shape=(224,224,3), weights='imagenet', include_top=False)
    for layer in vgg16.layers:
        layer.trainable = False

    num_classes = 4
    FC_Head = lw(vgg16, num_classes)
    model = Model(inputs=vgg16.input, outputs=FC_Head)

    model.compile(
        optimizer=Adam(learning_rate=1e-4),
        loss='categorical_crossentropy',
        metrics=['accuracy', 'Precision', 'Recall']
    )

    early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6)

    history = model.fit(
        train_generator,
        validation_data=val_generator,
        epochs=50,
        class_weight=class_weights_dict,
        callbacks=[early_stop, reduce_lr],
        verbose=1
    )

    model.save(f"/content/drive/MyDrive/models/vgg16v4.h5")

    scores = model.evaluate(val_generator, verbose=0)
    print(f"Fold {fold} — Accuracy: {scores[1]:.4f}, Precision: {scores[2]:.4f}, Recall: {scores[3]:.4f}")

    fold += 1



🚀 Starting Fold 1
Class Weights for Fold 1: {'Mild Dementia': np.float64(4.324108818011257), 'Moderate Dementia': np.float64(44.32211538461539), 'Non Demented': np.float64(0.32144351464435145), 'Very mild Dementia': np.float64(1.5742827868852458)}
Found 9219 validated image filenames belonging to 4 classes.
Found 4610 validated image filenames belonging to 4 classes.


  self._warn_if_super_not_called()


Epoch 1/50
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 620ms/step - Precision: 0.3038 - Recall: 0.2336 - accuracy: 0.2861 - loss: 2.2137 - val_Precision: 0.9632 - val_Recall: 0.1475 - val_accuracy: 0.7241 - val_loss: 1.0254 - learning_rate: 1.0000e-04
Epoch 2/50
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m175s[0m 604ms/step - Precision: 0.4051 - Recall: 0.2998 - accuracy: 0.3830 - loss: 1.7569 - val_Precision: 0.8361 - val_Recall: 0.5102 - val_accuracy: 0.7087 - val_loss: 0.8643 - learning_rate: 1.0000e-04
Epoch 3/50
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m166s[0m 574ms/step - Precision: 0.5246 - Recall: 0.3892 - accuracy: 0.4722 - loss: 1.4534 - val_Precision: 0.8543 - val_Recall: 0.6052 - val_accuracy: 0.7453 - val_loss: 0.7650 - learning_rate: 1.0000e-04
Epoch 4/50
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m168s[0m 580ms/step - Precision: 0.5949 - Recall: 0.4341 - accuracy: 0.5220 - loss: 1.2616 - val_Preci



Fold 1 — Accuracy: 0.8401, Precision: 0.8710, Recall: 0.7985

🚀 Starting Fold 2
Class Weights for Fold 2: {'Mild Dementia': np.float64(4.324108818011257), 'Moderate Dementia': np.float64(44.32211538461539), 'Non Demented': np.float64(0.32144351464435145), 'Very mild Dementia': np.float64(1.5742827868852458)}
Found 9219 validated image filenames belonging to 4 classes.
Found 4610 validated image filenames belonging to 4 classes.


  self._warn_if_super_not_called()


Epoch 1/50
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m176s[0m 587ms/step - Precision: 0.3074 - Recall: 0.2358 - accuracy: 0.2966 - loss: 2.1594 - val_Precision: 0.7994 - val_Recall: 0.7538 - val_accuracy: 0.7777 - val_loss: 0.7902 - learning_rate: 1.0000e-04
Epoch 2/50
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m168s[0m 582ms/step - Precision: 0.4421 - Recall: 0.3284 - accuracy: 0.4044 - loss: 1.7295 - val_Precision: 0.8977 - val_Recall: 0.5575 - val_accuracy: 0.7473 - val_loss: 0.7948 - learning_rate: 1.0000e-04
Epoch 3/50
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m166s[0m 573ms/step - Precision: 0.5136 - Recall: 0.3783 - accuracy: 0.4659 - loss: 1.4531 - val_Precision: 0.8661 - val_Recall: 0.6748 - val_accuracy: 0.7766 - val_loss: 0.6599 - learning_rate: 1.0000e-04
Epoch 4/50
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m166s[0m 573ms/step - Precision: 0.6002 - Recall: 0.4493 - accuracy: 0.5384 - loss: 1.2452 - val_Preci



Fold 2 — Accuracy: 0.8230, Precision: 0.8467, Recall: 0.7970

🚀 Starting Fold 3
Class Weights for Fold 3: {'Mild Dementia': np.float64(4.3164794007490634), 'Moderate Dementia': np.float64(44.32692307692308), 'Non Demented': np.float64(0.3214783821478382), 'Very mild Dementia': np.float64(1.5744535519125684)}
Found 9220 validated image filenames belonging to 4 classes.
Found 4609 validated image filenames belonging to 4 classes.


  self._warn_if_super_not_called()


Epoch 1/50
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m181s[0m 603ms/step - Precision: 0.3225 - Recall: 0.2490 - accuracy: 0.3101 - loss: 2.1628 - val_Precision: 0.8566 - val_Recall: 0.6309 - val_accuracy: 0.7778 - val_loss: 0.8453 - learning_rate: 1.0000e-04
Epoch 2/50
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m167s[0m 579ms/step - Precision: 0.4394 - Recall: 0.3340 - accuracy: 0.4080 - loss: 1.7495 - val_Precision: 0.8747 - val_Recall: 0.4997 - val_accuracy: 0.7325 - val_loss: 0.8675 - learning_rate: 1.0000e-04
Epoch 3/50
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m168s[0m 582ms/step - Precision: 0.5288 - Recall: 0.3976 - accuracy: 0.4805 - loss: 1.4667 - val_Precision: 0.8656 - val_Recall: 0.6570 - val_accuracy: 0.7650 - val_loss: 0.7008 - learning_rate: 1.0000e-04
Epoch 4/50
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m183s[0m 632ms/step - Precision: 0.6048 - Recall: 0.4553 - accuracy: 0.5388 - loss: 1.2567 - val_Preci