In [15]:
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Model parameters
img_height = 224
img_width = 224
batch_size = 32
num_classes = 3  # mildly demented, moderately demented, nondemented


# Directory paths
data_dir = './notebooks/'

def filter_directories(directory):
    """ 
    Returns a list of directories that contain all three image types: 'cor', 'sag', 'tra'.
    """
    filtered_dirs = []
    for class_dir in os.listdir(directory):
        class_path = os.path.join(directory, class_dir)
        if os.path.isdir(class_path):
            for patient_dir in os.listdir(class_path):
                patient_path = os.path.join(class_path, patient_dir)
                if os.path.isdir(patient_path):
                    image_types = os.listdir(patient_path)
                    if all(x in image_types for x in ['tra', 'cor', 'sag']):
                        filtered_dirs.append(patient_path)
    return filtered_dirs

# Modify the ImageDataGenerator to not use the 'validation_split'
datagen = ImageDataGenerator(rescale=1./255)

# Use the directory filter function to get the list of valid directories
valid_training_dirs = filter_directories(data_dir + 'training')
valid_validation_dirs = filter_directories(data_dir + 'testing')

# Create a custom generator to handle the multi-directory structure
def custom_generator(generator, directories):
    while True:
        for directory in directories:
            for x, y in generator.flow_from_directory(
                directory,
                target_size=(img_height, img_width),
                batch_size=batch_size,
                class_mode='categorical'  # as we have multiple classes
            ):
                yield x, y


# Update directory paths
# Updated directory paths
train_dir = './notebooks/training/'  # Replace with the path to your training data
val_dir = './notebooks/testing/'  # Replace with the path to your validation data

datagen = ImageDataGenerator(rescale=1./255)

train_generator = datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'
)

validation_generator = datagen.flow_from_directory(
    val_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'
)



# Define a simple CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# Model summary
model.summary()

# Calculate steps per epoch and validation steps
steps_per_epoch = len(train_generator)
validation_steps = len(validation_generator)

# Train the model
model.fit(
    train_generator,
    steps_per_epoch=steps_per_epoch,
    epochs=50,  # You can adjust the number of epochs
    validation_data=validation_generator,
    validation_steps=validation_steps
)


Found 558 images belonging to 3 classes.
Found 144 images belonging to 3 classes.




Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_28 (Conv2D)          (None, 222, 222, 32)      896       
                                                                 
 max_pooling2d_20 (MaxPooli  (None, 111, 111, 32)      0         
 ng2D)                                                           
                                                                 
 conv2d_29 (Conv2D)          (None, 109, 109, 64)      18496     
                                                                 
 max_pooling2d_21 (MaxPooli  (None, 54, 54, 64)        0         
 ng2D)                                                           
                                                                 
 conv2d_30 (Conv2D)          (None, 52, 52, 128)       73856     
                                                                 
 max_pooling2d_22 (MaxPooli  (None, 26, 26, 128)     

<keras.src.callbacks.History at 0x2de594a90>

# Visualize with Grad-CAM

In [None]:
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Model parameters
img_height = 224
img_width = 224
batch_size = 32
num_classes = 3  # mildly demented, moderately demented, nondemented


# Directory paths
data_dir = './data_split/'

def filter_directories(directory):
    """ 
    Returns a list of directories that contain all three image types: 'cor', 'sag', 'tra'.
    """
    filtered_dirs = []
    for class_dir in os.listdir(directory):
        class_path = os.path.join(directory, class_dir)
        if os.path.isdir(class_path):
            for patient_dir in os.listdir(class_path):
                patient_path = os.path.join(class_path, patient_dir)
                if os.path.isdir(patient_path):
                    image_types = os.listdir(patient_path)
                    if all(x in image_types for x in ['tra', 'cor', 'sag']):
                        filtered_dirs.append(patient_path)
    return filtered_dirs

# Modify the ImageDataGenerator to not use the 'validation_split'
datagen = ImageDataGenerator(rescale=1./255)

# Use the directory filter function to get the list of valid directories
valid_training_dirs = filter_directories(data_dir + 'training')
valid_validation_dirs = filter_directories(data_dir + 'validation')

def custom_generator(directory, batch_size, target_size):
    while True:
        # Get all image paths and corresponding class names
        image_paths = []
        class_names = []
        for class_dir in os.listdir(directory):
            class_path = os.path.join(directory, class_dir)
            if os.path.isdir(class_path):
                for img_name in os.listdir(class_path):
                    if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                        image_paths.append(os.path.join(class_path, img_name))
                        class_names.append(class_dir)

        # Shuffle the data
        combined = list(zip(image_paths, class_names))
        np.random.shuffle(combined)
        image_paths[:], class_names[:] = zip(*combined)

        # Generate batches
        for i in range(0, len(image_paths), batch_size):
            batch_paths = image_paths[i:i+batch_size]
            batch_classes = class_names[i:i+batch_size]

            images = []
            labels = []
            for j, path in enumerate(batch_paths):
                try:
                    img = cv2.imread(path)
                    img = cv2.resize(img, target_size)
                    images.append(img)
                    labels.append(batch_classes[j])
                except Exception as e:
                    print(f"Error processing file: {path}, Error: {e}")

            # Convert lists to numpy arrays
            images_np = np.array(images, dtype='float32') / 255.0
            labels_np = np.array([class_names.index(c) for c in labels])

            yield images_np, labels_np

# Update directory paths
train_dir = './data_split/data_train/training/'
val_dir = './data_split/data_val/validation/'

# Image Data Generator
datagen = ImageDataGenerator(rescale=1./255)

train_generator = datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'
)

validation_generator = datagen.flow_from_directory(
    val_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'
)


# Define a simple CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# Model summary
model.summary()

# Calculate steps per epoch and validation steps
steps_per_epoch = len(train_generator)
validation_steps = len(validation_generator)

# Train the model
model.fit(
    train_generator,
    steps_per_epoch=steps_per_epoch,
    epochs=25,  # You can adjust the number of epochs
    validation_data=validation_generator,
    validation_steps=validation_steps
)

In [None]:
import os
import random

# Updated directory paths
train_dir = './notebooks/training/'  # Replace with the path to your training data
val_dir = './notebooks/testing/'  # Replace with the path to your validation data

# # Randomly choose between training and validation directories
# chosen_dir = random.choice([train_dir, val_dir])


# Modify the generator to pull a 3D input (batch stacking and take 4D input (1st time, 2D image,.....) :(
# (Do a cut off /truncate) , (1st, middle, last)
# Batch 60 (depends on compressing (224, 224)

In [None]:
# List the classes in the chosen directory
classes = os.listdir(chosen_dir)
random_class = random.choice(classes)
class_dir = os.path.join(chosen_dir, random_class)

# List the images in the chosen class directory
image_files = [f for f in os.listdir(class_dir) if f.endswith(('.png', '.jpg', '.jpeg'))]
random_image_file = random.choice(image_files)
image_path = os.path.join(class_dir, random_image_file)

# Modify the generator to pull a 3D input (batch stacking and take 4D input (1st time, 2D image,.....) :(
# (Do a cut off /truncate) , (1st, middle, last)
# Batch 60 (depends on compressing (224, 224)

In [None]:
import os
import random
import cv2
import numpy as np

# Function to preprocess a single image
def preprocess_image(image_path, target_size=(224, 224)):
    # Load the image
    img = cv2.imread(image_path)
    
    # Resize the image
    img_resized = cv2.resize(img, target_size)
    
    # Normalize the image
    img_normalized = img_resized.astype('float32') / 255.0
    
    # Convert to array and add an extra dimension
    img_array = np.expand_dims(img_normalized, axis=0)
    
    return img_array

# Function to get 10 random images from the directory
def get_random_images(directory, num_images=10):
    all_images = []
    
    # Get all class subdirectories
    class_dirs = [os.path.join(directory, d) for d in os.listdir(directory) if os.path.isdir(os.path.join(directory, d))]
    
    while len(all_images) < num_images and class_dirs:
        # Choose a random class directory
        class_dir = random.choice(class_dirs)
        
        # Get all image files in this directory
        image_files = [os.path.join(class_dir, f) for f in os.listdir(class_dir) if f.endswith(('.png', '.jpg', '.jpeg'))]
        
        # If there are no images left in this class, remove the class and continue
        if not image_files:
            class_dirs.remove(class_dir)
            continue
        
        # Randomly pick an image
        image_path = random.choice(image_files)
        all_images.append(preprocess_image(image_path))
        
        # Optional: remove the chosen image from the list if you want no repetition
        image_files.remove(image_path)
    
    return np.vstack(all_images)

# Choose between training and validation directories
chosen_dir = random.choice([train_dir, val_dir])

# Get 10 random preprocessed images
images_array = get_random_images(chosen_dir, num_images=10)
print(f"Processed {len(images_array)} images.")


In [None]:
predictions = model.predict(images_array)
predicted_class = np.argmax(predictions, axis=1)


In [None]:
import tensorflow as tf
import numpy as np
import cv2
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder

# Assuming 'images_array' contains the batch of preprocessed images

# Label encoding setup
class_labels = ['Mildly Demented', 'Moderately Demented', 'Nondemented']
label_encoder = LabelEncoder()
label_encoder.fit(class_labels)

# Define a function to compute Grad-CAM
def compute_grad_cam(img_tensor, model, last_conv_layer_name, classifier_layer_names):
    grad_model = tf.keras.models.Model(
        [model.inputs], [model.get_layer(last_conv_layer_name).output] + [model.get_layer(name).output for name in classifier_layer_names]
    )

    with tf.GradientTape() as tape:
        last_conv_layer_output, preds = grad_model(img_tensor)
        if preds.ndim == 1:
            preds = tf.expand_dims(preds, axis=0)
        class_idx = tf.argmax(preds[0])
        loss = preds[:, class_idx]

    grads = tape.gradient(loss, last_conv_layer_output)

    # Global Average Pooling of gradients
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

    last_conv_layer_output = last_conv_layer_output[0]
    heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)

    # For numerical stability
    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
    return heatmap.numpy()

# Process each image and display Grad-CAM
for i in range(len(images_array)):
    img_tensor = images_array[i:i+1]  # Select the i-th image for processing
    
    # Compute Grad-CAM
    heatmap = compute_grad_cam(img_tensor, model, 'conv2d_2', ['dense'])

    # Use cv2 to apply the heatmap to the original image
    heatmap = cv2.resize(heatmap, (img_tensor.shape[1], img_tensor.shape[2]))
    heatmap = np.uint8(255 * heatmap)
    heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
    
    # Superimpose the heatmap on the original image
    superimposed_img = heatmap * 0.4 + img_tensor[0]
    superimposed_img = np.uint8(superimposed_img)
    superimposed_img = cv2.cvtColor(superimposed_img, cv2.COLOR_RGB2BGR)

    # Display the image with Grad-CAM heatmap
    plt.figure(figsize=(6, 6))
    predicted_class_index = np.argmax(model.predict(img_tensor)[0])
    predicted_class_label = label_encoder.inverse_transform([predicted_class_index])[0]
    plt.imshow(superimposed_img)
    plt.title(f"Image {i+1} - Predicted Class: {predicted_class_label}")
    plt.axis('off')
    plt.show()



In [None]:
import os
import random
import cv2
import numpy as np

# Function to preprocess a single image
def preprocess_image(image_path, target_size=(224, 224)):
    # Load the image
    img = cv2.imread(image_path)
    
    # Resize the image
    img_resized = cv2.resize(img, target_size)
    
    # Normalize the image
    img_normalized = img_resized.astype('float32') / 255.0
    
    # Convert to array and add an extra dimension
    img_array = np.expand_dims(img_normalized, axis=0)
    
    return img_array

# Function to get 10 random images from the directory
def get_random_images(directory, num_images=10):
    all_images = []
    
    # Get all class subdirectories
    class_dirs = [os.path.join(directory, d) for d in os.listdir(directory) if os.path.isdir(os.path.join(directory, d))]
    
    while len(all_images) < num_images and class_dirs:
        # Choose a random class directory
        class_dir = random.choice(class_dirs)
        
        # Get all image files in this directory
        image_files = [os.path.join(class_dir, f) for f in os.listdir(class_dir) if f.endswith(('.png', '.jpg', '.jpeg'))]
        
        # If there are no images left in this class, remove the class and continue
        if not image_files:
            class_dirs.remove(class_dir)
            continue
        
        # Randomly pick an image
        image_path = random.choice(image_files)
        all_images.append(preprocess_image(image_path))
        
        # Optional: remove the chosen image from the list if you want no repetition
        image_files.remove(image_path)
    
    return np.vstack(all_images)

# Choose between training and validation directories
chosen_dir = random.choice([train_dir, val_dir])

# Get 10 random preprocessed images
images_array = get_random_images(chosen_dir, num_images=10)
print(f"Processed {len(images_array)} images.")


In [None]:
# Define a simple CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()

In [None]:
import os
import random
import cv2
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder

# Function definitions remain the same...

# Label encoding setup
class_labels = ['Mildly Demented', 'Moderately Demented', 'Nondemented']
label_encoder = LabelEncoder()
label_encoder.fit(class_labels)

# Choose between training and validation directories
chosen_dir = random.choice([train_dir, val_dir])

# Get 10 random preprocessed images
images_array, resized_images = get_random_images(chosen_dir, num_images=10)
print(f"Processed {len(images_array)} images.")

# Replace with 'conv2d_2' to use the same layer as in the first snippet
conv_layer_name = 'conv2d_46'
last_conv_layer = model.get_layer(conv_layer_name)

# Create a model with the same inputs as your original model and outputs of the last conv layer
grad_model = tf.keras.models.Model([model.inputs], [last_conv_layer.output, model.output])

# Generate Grad-CAM for each image
for i in range(len(images_array)):
    img_tensor = tf.convert_to_tensor(images_array[i:i+1])  # Convert to a TensorFlow tensor
    img_resized = resized_images[i]  # Corresponding resized image

    with tf.GradientTape() as tape:
        tape.watch(img_tensor)  # Ensure the image tensor is being watched
        conv_outputs, predictions = grad_model(img_tensor)
        class_idx = tf.argmax(predictions[0])
        loss = predictions[:, class_idx]

    output = conv_outputs[0]
    grads = tape.gradient(loss, conv_outputs)[0]

    # Guided gradients - use positive gradients only for the chosen class
    cast_conv_outputs = tf.cast(output > 0, 'float32')
    cast_grads = tf.cast(grads > 0, 'float32')
    guided_grads = cast_conv_outputs * cast_grads * grads

    # Weighted feature map - spatial average of the gradients
    weights = tf.reduce_mean(guided_grads, axis=(0, 1))
    cam = np.dot(output, weights)

    # Normalize the heatmap
    cam = cv2.resize(cam, (224, 224))  # Resize to image size
    cam = np.maximum(cam, 0)  # ReLU
    heatmap = (cam - cam.min()) / (cam.max() - cam.min() + 1e-8)
    heatmap = np.uint8(255 * heatmap)

    # Apply heatmap to original image
    heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
    superimposed_img = heatmap * 0.4 + img_resized
    superimposed_img = np.clip(superimposed_img, 0, 255).astype(np.uint8)

    # Display the image with Grad-CAM heatmap
    plt.figure(figsize=(6, 6))
    predicted_class_index = class_idx.numpy()
    predicted_class_label = label_encoder.inverse_transform([predicted_class_index])[0]
    plt.imshow(superimposed_img[:,:,::-1])
    plt.title(f"Image {i+1} - Predicted Class: {predicted_class_label}")
    plt.axis('off')
    plt.show()

    # Take 3 images from each class and see how the contour (heatmap) changes overtime with the training
    # Once the model is trained do a side-by-side image of the original and the heatmap image to see the difference



In [None]:
# Model summary
model.summary()

In [None]:
import os
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Model parameters
img_height = 224
img_width = 224
batch_size = 32
num_classes = 3  # mildly demented, moderately demented, nondemented

# Load VGG16 pre-trained on ImageNet data
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(img_height, img_width, 3))

# Freeze the layers of the base model
for layer in base_model.layers:
    layer.trainable = False

# Create a custom head for our dataset
model = Sequential([
    base_model,
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.3),
    Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# Directory paths
data_dir = './data_split/'

# Function to filter directories
def filter_directories(directory):
    filtered_dirs = []
    for class_dir in os.listdir(directory):
        class_path = os.path.join(directory, class_dir)
        if os.path.isdir(class_path):
            for patient_dir in os.listdir(class_path):
                patient_path = os.path.join(class_path, patient_dir)
                if os.path.isdir(patient_path):
                    image_types = os.listdir(patient_path)
                    if all(x in image_types for x in ['tra', 'cor', 'sag']):
                        filtered_dirs.append(patient_path)
    return filtered_dirs

# Image Data Generators
datagen = ImageDataGenerator(rescale=1./255)

# Training and Validation Directories
train_dir = './data_split/data_train/training/'
val_dir = './data_split/data_val/validation/'

# Training and Validation Generators
train_generator = datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'
)

validation_generator = datagen.flow_from_directory(
    val_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'
)

# Model summary
model.summary()

# Calculate steps per epoch and validation steps
steps_per_epoch = len(train_generator)
validation_steps = len(validation_generator)

# Train the model and save the training history
history = model.fit(
    train_generator,
    steps_per_epoch=steps_per_epoch,
    epochs=25,  # Adjust the number of epochs
    validation_data=validation_generator,
    validation_steps=validation_steps
)


In [None]:
import matplotlib.pyplot as plt

# Plot training & validation accuracy values
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper left')

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper left')

plt.show()


In [None]:
import numpy as np
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
from tensorflow.keras.models import Model
from sklearn.preprocessing import LabelEncoder
import matplotlib.patches as mpatches
# Assume 'model' is your pre-trained model

# Subset size - adjust this to the actual number of images you're using for t-SNE
subset_size = 685

# Extract features for a subset of your dataset
# Ensure that images_array is correctly prepared for the subset
if len(images_array) != subset_size:
    raise ValueError(f"The size of images_array ({len(images_array)}) does not match the subset_size ({subset_size}).")

print(images_array)
print(subset_size)
# Modify the model to create a feature extractor
# If 'dense' is the name of the last dense layer before the output
feature_extractor = Model(inputs=model.inputs, outputs=model.get_layer('dense_8').output)

# Collect features for the subset
features = feature_extractor.predict(images_array)

# Apply t-SNE reduction with adjusted perplexity
tsne_perplexity = min(90, subset_size - 1)  # Perplexity must be less than the number of samples

tsne = TSNE(n_components=3, verbose=1, perplexity=tsne_perplexity, n_iter=300)
tsne_results = tsne.fit_transform(features)

# Adjust class counts based on the subset size
num_mildly_demented = subset_size // 3
num_moderately_demented = subset_size // 3
num_nondemented = subset_size - (num_mildly_demented + num_moderately_demented)

# Create the class_names array for the subset
class_names = (['mildly demented'] * num_mildly_demented +
               ['moderately demented'] * num_moderately_demented +
               ['nondemented'] * num_nondemented)

# Ensure the class_names array length matches the subset size
assert len(class_names) == subset_size, "Number of class names must match number of images in the subset"

# Encode the class names into numeric labels
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(class_names)


# Plot the t-SNE results for the subset
plt.figure(figsize=(10, 8))
scatter = plt.scatter(tsne_results[:, 0], tsne_results[:, 1], c=labels, cmap='viridis')
plt.colorbar(scatter, ticks=range(len(np.unique(labels))))
plt.title('t-SNE of Feature Space')
plt.xlabel('Component 1')
plt.ylabel('Component 2')

# Create legend handles manually
unique_labels = np.unique(labels)
legend_labels = label_encoder.inverse_transform(unique_labels)
legend_handles = [mpatches.Patch(color=plt.cm.viridis(i / len(unique_labels)), label=label) for i, label in enumerate(legend_labels)]

# Use only the handles for the legend
plt.legend(handles=legend_handles)
plt.show()

# CNN is performing some transformation of features and comapre before and after image
# OK. 

# Uses a custom generator

In [None]:
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Model parameters
img_height = 224
img_width = 224
batch_size = 32
num_classes = 3  # mildly demented, moderately demented, nondemented


# Directory paths
data_dir = './data_split/'

def filter_directories(directory):
    """ 
    Returns a list of directories that contain all three image types: 'cor', 'sag', 'tra'.
    """
    filtered_dirs = []
    for class_dir in os.listdir(directory):
        class_path = os.path.join(directory, class_dir)
        if os.path.isdir(class_path):
            for patient_dir in os.listdir(class_path):
                patient_path = os.path.join(class_path, patient_dir)
                if os.path.isdir(patient_path):
                    image_types = os.listdir(patient_path)
                    if all(x in image_types for x in ['tra', 'cor', 'sag']):
                        filtered_dirs.append(patient_path)
    return filtered_dirs

# Modify the ImageDataGenerator to not use the 'validation_split'
datagen = ImageDataGenerator(rescale=1./255)

# Use the directory filter function to get the list of valid directories
valid_training_dirs = filter_directories(data_dir + 'training')
valid_validation_dirs = filter_directories(data_dir + 'validation')

def custom_generator(directory, batch_size, target_size):
    while True:
        # Get all image paths and corresponding class names
        image_paths = []
        class_names = []
        for class_dir in os.listdir(directory):
            class_path = os.path.join(directory, class_dir)
            if os.path.isdir(class_path):
                for img_name in os.listdir(class_path):
                    if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                        image_paths.append(os.path.join(class_path, img_name))
                        class_names.append(class_dir)

        # Shuffle the data
        combined = list(zip(image_paths, class_names))
        np.random.shuffle(combined)
        image_paths[:], class_names[:] = zip(*combined)

        # Generate batches
        for i in range(0, len(image_paths), batch_size):
            batch_paths = image_paths[i:i+batch_size]
            batch_classes = class_names[i:i+batch_size]

            images = []
            labels = []
            for j, path in enumerate(batch_paths):
                try:
                    img = cv2.imread(path)
                    img = cv2.resize(img, target_size)
                    images.append(img)
                    labels.append(batch_classes[j])
                except Exception as e:
                    print(f"Error processing file: {path}, Error: {e}")

            # Convert lists to numpy arrays
            images_np = np.array(images, dtype='float32') / 255.0
            labels_np = np.array([class_names.index(c) for c in labels])

            yield images_np, labels_np

# Update directory paths
train_dir = './data_split/data_train/training/'
val_dir = './data_split/data_val/validation/'

# Image Data Generator
datagen = ImageDataGenerator(rescale=1./255)

train_generator = datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'
)

validation_generator = datagen.flow_from_directory(
    val_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'
)


# Define a simple CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# Model summary
model.summary()

# Calculate steps per epoch and validation steps
steps_per_epoch = len(train_generator)
validation_steps = len(validation_generator)

# Train the model
model.fit(
    train_generator,
    steps_per_epoch=steps_per_epoch,
    epochs=25,  # You can adjust the number of epochs
    validation_data=validation_generator,
    validation_steps=validation_steps
)

In [None]:
%pip install opencv-python

In [None]:
import os
import numpy as np
import cv2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Conv2DTranspose, concatenate
from tensorflow.keras.models import Model

# Define the image and mask generator
def image_mask_generator(image_dir, mask_dir, batch_size, target_size):
    image_datagen = ImageDataGenerator(rescale=1./255)
    mask_datagen = ImageDataGenerator(rescale=1./255)

    image_generator = image_datagen.flow_from_directory(
        image_dir,
        target_size=target_size,
        batch_size=batch_size,
        class_mode=None,
        color_mode='rgb',
        shuffle=True
    )

    mask_generator = mask_datagen.flow_from_directory(
        mask_dir,
        target_size=target_size,
        batch_size=batch_size,
        class_mode=None,
        color_mode='grayscale',
        shuffle=True
    )

    while True:
        imgs_batch = image_generator.next()
        masks_batch = mask_generator.next()

        # Ensure batch sizes are equal
        min_batch_size = min(len(imgs_batch), len(masks_batch))
        if min_batch_size < batch_size:
            # If last batch is smaller, truncate both batches to the same size
            imgs_batch = imgs_batch[:min_batch_size]
            masks_batch = masks_batch[:min_batch_size]

        yield imgs_batch, masks_batch

In [None]:
# Update directory paths
# train_image_dir = './data_split/data_train/training/'
train_mask_dir = './FSL_data_split/training/'

# # Updated directory paths
train_image_dir = './notebooks/training/'  # Replace with the path to your training data
# val_mask_dir = './notebooks/testing/'  # Replace with the path to your validation data

# Set batch size and target size
batch_size = 32
img_height = 224
img_width = 224

# Create the generator for training data
train_generator = image_mask_generator(train_image_dir, train_mask_dir, batch_size, (img_height, img_width))

# Get a batch of images and masks
imgs_batch, masks_batch = next(train_generator)
print("Shape of images batch:", imgs_batch.shape)
print("Shape of masks batch:", masks_batch.shape)

# Display the first image and mask from the batch
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.imshow(imgs_batch[0])
plt.title('Image')

plt.subplot(1, 2, 2)
if len(masks_batch) > 0 and masks_batch[0].shape[-1] == 1:
    plt.imshow(masks_batch[0].squeeze(), cmap='gray')
    plt.title('Mask')
else:
    print("Invalid mask data")

plt.show()

In [None]:
test_images, test_masks = next(train_generator)
print("Test Image batch shape:", test_images.shape)
print("Test Mask batch shape:", test_masks.shape)
print(os.listdir(train_mask_dir))


In [None]:
# Iterate over the training generator
for i, (images, masks) in enumerate(train_generator):
    # Only process a certain number of batches
    if i >= 10:  # Adjust this number to process more or fewer batches
        break

    print(f"Batch {i}:")
    print(f"  Images shape: {images.shape}")
    print(f"  Masks shape: {masks.shape}")


In [None]:
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Conv2DTranspose, concatenate, BatchNormalization, Dropout
from tensorflow.keras.models import Model

def conv_block(input_tensor, num_filters, dropout_rate=None, batch_norm=False):
    x = Conv2D(num_filters, (3, 3), activation='relu', padding='same')(input_tensor)
    if batch_norm:
        x = BatchNormalization()(x)
    x = Conv2D(num_filters, (3, 3), activation='relu', padding='same')(x)
    if batch_norm:
        x = BatchNormalization()(x)
    if dropout_rate:
        x = Dropout(dropout_rate)(x)
    return x

def upsample_concat(block, bridge):
    up = Conv2DTranspose(block.shape[-1], (2, 2), strides=(2, 2), padding='same')(block)
    return concatenate([up, bridge])

def unet_model(input_size=(256, 256, 3), dropout_rate=0.3, batch_norm=True):
    inputs = Input(input_size)

    # Downsampling
    c1 = conv_block(inputs, 16, dropout_rate, batch_norm)
    p1 = MaxPooling2D((2, 2))(c1)
    c2 = conv_block(p1, 32)
    p2 = MaxPooling2D((2, 2))(c2)
    c3 = conv_block(p2, 64)
    p3 = MaxPooling2D((2, 2))(c3)
    c4 = conv_block(p3, 128)
    p4 = MaxPooling2D((2, 2))(c4)

    # Bottleneck
    b = conv_block(p4, 256)

    # Upsampling
    u1 = upsample_concat(b, c4)
    c5 = conv_block(u1, 128)
    u2 = upsample_concat(c5, c3)
    c6 = conv_block(u2, 64)
    u3 = upsample_concat(c6, c2)
    c7 = conv_block(u3, 32)
    u4 = upsample_concat(c7, c1)
    c8 = conv_block(u4, 16)

    outputs = Conv2D(1, (1, 1), activation='sigmoid')(c8)

    model = Model(inputs=[inputs], outputs=[outputs])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Class weights calculated from your dataset
class_weights = [0.5244582730517705, 1.8019145230885272, 1.8576817573005246]  # Training weights
# {0: 0.5244582730517705, 1: 1.8019145230885272, 2: 1.8576817573005246}
# Custom loss function with class weights
def weighted_cross_entropy(y_true, y_pred):
    class_weights_tensor = tf.constant(class_weights)
    weights = tf.reduce_sum(class_weights_tensor * y_true, axis=-1)
    unweighted_losses = tf.keras.losses.binary_crossentropy(y_true, y_pred)
    weighted_losses = unweighted_losses * weights
    return tf.reduce_mean(weighted_losses)

# Initialize the U-Net model and compile with custom loss
model = unet_model()
model.compile(optimizer='adam', loss=weighted_cross_entropy, metrics=['accuracy'])


# Initialize the U-Net model
model = unet_model()

# Updated directory paths
train_image_dir = './notebooks/training/'  # Replace with the path to your training data
val_mask_dir = './notebooks/testing/'  # Replace with the path to your validation data


# Corrected directory paths
# train_image_dir = './data_split/data_train/training'
train_mask_dir = './FSL_data_split/training'
val_image_dir = './data_split/data_test/testing'
# val_mask_dir = './FSL_data_split/validation'

# Create generators
batch_size = 32
img_height = 256
img_width = 256
train_generator = image_mask_generator(train_image_dir, train_mask_dir, batch_size, (img_height, img_width))
val_generator = image_mask_generator(val_image_dir, val_mask_dir, batch_size, (img_height, img_width))

# Calculate steps per epoch for training and validation
train_image_count = 685  # Update with actual count
val_image_count = 231    # Update with actual count
steps_per_epoch = train_image_count // batch_size
validation_steps = val_image_count // batch_size

# Adjust steps if there are remaining images
if train_image_count % batch_size != 0:
    steps_per_epoch += 1
if val_image_count % batch_size != 0:
    validation_steps += 1

# Training the model
epochs = 20  # Adjust this value
history = model.fit(
    train_generator,
    # learning_rate=0.001,
    steps_per_epoch=steps_per_epoch,
    epochs=epochs,
    validation_data=val_generator,
    validation_steps=validation_steps
)


In [None]:
# import os
# import cv2
# import numpy as np

# def count_pixels_in_masks(directory):
#     class_counts = [0, 0, 0]  # Assuming three classes: 0, 1, 2

#     for foldername in os.listdir(directory):
#         folder_path = os.path.join(directory, foldername)
#         if os.path.isdir(folder_path):
#             for filename in os.listdir(folder_path):
#                 if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
#                     filepath = os.path.join(folder_path, filename)
#                     mask = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)
#                     if mask is not None:
#                         for i in range(3):  # Assuming three classes
#                             class_counts[i] += np.sum(mask == i)

#     return class_counts

# # Directories
# train_dir = './data_split/data_train/training/'
# val_dir = './data_split/data_val/validation/'
# fsl_train_dir = './FSL_data_split/training/'
# fsl_val_dir = './FSL_data_split/validation/'

# # Count pixels in each class
# train_counts = count_pixels_in_masks(train_dir)
# val_counts = count_pixels_in_masks(val_dir)
# fsl_train_counts = count_pixels_in_masks(fsl_train_dir)
# fsl_val_counts = count_pixels_in_masks(fsl_val_dir)

# # Combine counts from training and validation sets
# combined_counts = [train_counts[i] + val_counts[i] + fsl_train_counts[i] + fsl_val_counts[i] for i in range(3)]

# print("Combined pixel counts for each class:", combined_counts)


In [None]:
import os
import cv2
import numpy as np

def count_pixels_in_class(directory):
    class_counts = [0, 0, 0]  # Adjust this if you have more classes

    for filename in os.listdir(directory):
        filepath = os.path.join(directory, filename)
        if os.path.isfile(filepath) and filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            mask = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)
            if mask is not None:
                for i in range(len(class_counts)):
                    class_counts[i] += np.sum(mask == i)

    return class_counts

# Directories for FSL masks
fsl_train_dir = './FSL_data_split/training/'  # Update with actual path
fsl_val_dir = './FSL_data_split/validation/'  # Update with actual path

# # Directories for regular masks
# regular_train_dir = './data_split/data_train/training/masks'  # Update with actual path
# regular_val_dir = './data_split/data_val/validation/masks'  # Update with actual path

# Calculate pixel counts
fsl_train_counts = count_pixels_in_class(fsl_train_dir)
fsl_val_counts = count_pixels_in_class(fsl_val_dir)
# regular_train_counts = count_pixels_in_class(regular_train_dir)
# regular_val_counts = count_pixels_in_class(regular_val_dir)

# Output results
print("FSL Training Counts:", fsl_train_counts)
print("FSL Validation Counts:", fsl_val_counts)
# print("Regular Training Counts:", regular_train_counts)
# print("Regular Validation Counts:", regular_val_counts)

In [None]:
def count_pixels_in_class(directory):
    class_counts = [0, 0, 0]  # [nondemented, mildly demented, moderately demented]
    debug_info = {}

    for class_folder in os.listdir(directory):
        class_folder_path = os.path.join(directory, class_folder)
        debug_info[class_folder] = {'files_processed': 0, 'unique_pixel_values': set()}

        if os.path.isdir(class_folder_path):
            for filename in os.listdir(class_folder_path):
                filepath = os.path.join(class_folder_path, filename)
                if os.path.isfile(filepath) and filename.lower().endswith(('.png', '.jpg', '.jpeg')):
                    mask = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)
                    if mask is not None:
                        debug_info[class_folder]['files_processed'] += 1
                        unique_values = np.unique(mask)
                        debug_info[class_folder]['unique_pixel_values'].update(unique_values)

                        # Update class counts based on your encoding
                        class_counts[0] += np.sum(mask == 0)  # nondemented
                        class_counts[1] += np.sum(mask == 127)  # mildly demented
                        class_counts[2] += np.sum(mask == 191)  # moderately demented

    return class_counts, debug_info


# Directories for FSL masks
fsl_train_dir = './FSL_data_split/training'  # Update with actual path
fsl_val_dir = './FSL_data_split/validation'  # Update with actual path

# Calculate pixel counts
fsl_train_counts, fsl_train_debug = count_pixels_in_class(fsl_train_dir)
fsl_val_counts, fsl_val_debug = count_pixels_in_class(fsl_val_dir)

# Output results
print("FSL Training Counts:", fsl_train_counts)
print("FSL Training Debug Info:", fsl_train_debug)
print("FSL Validation Counts:", fsl_val_counts)
print("FSL Validation Debug Info:", fsl_val_debug)

In [None]:
total_pixels_train = sum(fsl_train_counts)
total_pixels_val = sum(fsl_val_counts)

# Calculate weights for each class
class_weights_train = {i: total_pixels_train / (3.0 * count) for i, count in enumerate(fsl_train_counts)}
class_weights_val = {i: total_pixels_val / (3.0 * count) for i, count in enumerate(fsl_val_counts)}

print("Class weights for Training:", class_weights_train)
print("Class weights for Validation:", class_weights_val)


In [None]:
import os
import cv2
import numpy as np

def count_pixels_in_class(directory):
    class_counts = [0, 0, 0]  # Adjust this if you have more classes
    debug_info = {}

    # Iterate through each subfolder representing a class
    for class_folder in os.listdir(directory):
        class_folder_path = os.path.join(directory, class_folder)
        debug_info[class_folder] = {'files_processed': 0, 'unique_pixel_values': set()}

        if os.path.isdir(class_folder_path):
            for filename in os.listdir(class_folder_path):
                filepath = os.path.join(class_folder_path, filename)
                if os.path.isfile(filepath) and filename.lower().endswith(('.png', '.jpg', '.jpeg')):
                    mask = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)
                    if mask is not None:
                        debug_info[class_folder]['files_processed'] += 1
                        unique_values = np.unique(mask)
                        debug_info[class_folder]['unique_pixel_values'].update(unique_values)

                        for i in range(len(class_counts)):
                            class_counts[i] += np.sum(mask == i)

    return class_counts, debug_info

# Directories for FSL masks
fsl_train_dir = './FSL_data_split/training'  # Update with actual path
fsl_val_dir = './FSL_data_split/validation'  # Update with actual path

# Calculate pixel counts
fsl_train_counts, fsl_train_debug = count_pixels_in_class(fsl_train_dir)
fsl_val_counts, fsl_val_debug = count_pixels_in_class(fsl_val_dir)

# Output results
print("FSL Training Counts:", fsl_train_counts)
print("FSL Training Debug Info:", fsl_train_debug)
print("FSL Validation Counts:", fsl_val_counts)
print("FSL Validation Debug Info:", fsl_val_debug)


In [None]:
import os

# Define your dataset directory paths
# train_dir = './data_split/data_train/training/'
# val_dir = './data_split/data_val/validation/'
train_dir = './FSL_data_split/training/'
val_dir = './FSL_data_split/validation/'
# Function to count images in each class directory
def count_images_in_directory(directory):
    class_counts = {}
    for class_name in os.listdir(directory):
        class_dir = os.path.join(directory, class_name)
        if os.path.isdir(class_dir):
            image_count = len([name for name in os.listdir(class_dir) if os.path.isfile(os.path.join(class_dir, name))])
            class_counts[class_name] = image_count
    return class_counts

# Count images in the training and validation directories separately
train_counts = count_images_in_directory(train_dir)
val_counts = count_images_in_directory(val_dir)

# Output the counts for each directory
print("Training image counts by class:")
for class_name, count in train_counts.items():
    print(f"{class_name}: {count}")

print("\nValidation image counts by class:")
for class_name, count in val_counts.items():
    print(f"{class_name}: {count}")


In [None]:
print(os.getcwd())
