# Star Trek Uniform Classifier - CNN

* Collect images of Star Trek uniforms
    * Red, Blue, Gold

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Input
from keras.utils import to_categorical
from keras.layers import Conv2D # to add convolutional layers
from keras.layers import MaxPooling2D # to add pooling layers
from keras.layers import Flatten # to flatten data for fully connected layers
import os
from pathlib import Path
from PIL import Image

# Load data

In [None]:
# define data path and categories
data_dir = Path('rawData')
categories = ['commandRed', 'scienceBlue', 'operationGold']

In [None]:
# function to handle images
def clean_and_verify_data(base_path, folder_list):

    # iterate through categires
    for category in folder_list:
        path = base_path / category
        print(f"==== Checking for: {category} ====")

        files = list(path.glob('*'))
        for file_path in files:
            # remove everything except jpg, jpeg and png
            if file_path.suffix.lower() not in ['.jpg', '.jpeg', '.png']:
                print(f"Revoming: {file_path.name}")
                file_path.unlink()
                continue

            try:
                # try open img to check for corruption
                with Image.open(file_path) as img:
                    img.verify() # verify whether its img

                # convert to RBG in case more channels
                with Image.open(file_path) as img:
                    img = img.convert('RGB')
                    img = img.resize((128, 128)) # resize img 
                    img.save(file_path) # overwrite !!

            except (IOError, SyntaxError) as e:
                print(f"Deleting file: {file_path.name}")
                file_path.unlink()


In [None]:
clean_and_verify_data(data_dir, categories)

# Normalize data and split

In [None]:
from keras import layers
# constants
IMG_SIZE = (128, 128)
BATCH_SIZE = 32

# load and split
train_ds = keras.utils.image_dataset_from_directory(
    'rawData',
    validation_split=0.2, # 20% for testing
    subset="training",
    seed=42,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode='categorical'
)

val_ds = keras.utils.image_dataset_from_directory(
    'rawData',
    validation_split=0.2, # 20% for validation
    subset='validation',
    seed=42,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode='categorical'
)

# save classes name for debug
class_names = train_ds.class_names
print(f"Classes found: {class_names}")

# normalize
normalization_layer = layers.Rescaling(1./255)

# apply to dataset
train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y))

# Veryfing if data is GOOD

## Visual verification

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# take one batch from ds
for images, labels in train_ds.take(1):
    # images is a tensor of shape (32, 128, 128, 3)
    # labels is a tensor of shape (32, 3)
    
    first_image = images[0].numpy()
    first_label = labels[0].numpy()
    
    plt.imshow(first_image)
    plt.title(f"Label: {class_names[np.argmax(first_label)]}")
    plt.axis('off')
    plt.show()

    # print pixel range (max should be 1 and min should be 0)
    print(f"Max pixel value: {np.max(first_image)}")
    print(f"Min pixel value: {np.min(first_image)}")

## Shape verification

In [None]:
for images, labels in train_ds.take(1):
    print(f"Image batch shape: {images.shape}") # expect (32, 128, 128, 3), batch size, height, widht, channel numb RGB
    print(f"Label batch shape: {labels.shape}") # expect (32, 3)
    break

## Class balance verification - after cleaning

In [None]:
for category in class_names:
    path = f'rawData/{category}'
    print(f"{category}: {len(os.listdir(path))} images")

* operationGold is a bit smaller. **NEED TO AUGMENTATION** 

## Normalization check

In [None]:
for images, labels in train_ds.take(1):
    print(f"Data type: {images.dtype}") 
    print(f"First pixel value: {images[0, 0, 0, 0].numpy()}")
# expect float32

# Building Convolutional Neural Network

In [None]:
# building the model

# augmentation
data_augmentation = Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1)
])

def star_trek_cnn_model():
    '''
    input
    3 Conv2D / Maxpooling layers; relu
    flatten
    dense; relu
    regularization
    output, dense; softmax to probabilities
    '''
    model = keras.Sequential([
        # input
        layers.Input(shape=(128, 128, 3)),
        data_augmentation,

        # first layer, edges
        layers.Conv2D(32, (3, 3), activation='relu'),
        layers.MaxPooling2D(pool_size=(2, 2)),

        # second layer, shapes
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D(pool_size=(2, 2)),

        # third layer, complex patterns
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D(pool_size=(2,2)),

        # output layer
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(3, activation='softmax')
    ])

    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.0001),
        loss='categorical_crossentropy', 
        metrics=['accuracy']
    )
    return model

In [None]:
# init and run model
custom_model = star_trek_cnn_model()
custom_model.summary()

# Training Model

In [None]:
history = custom_model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=25,
    verbose=1
)

In [None]:
scores = custom_model.evaluate(val_ds, verbose=0)
print(f"Test Accuracy: {scores[1]:.4f}")
print(f"Test Loss: {scores[0]:.4f}")

## Confusion matrix x Loss curve graph

In [None]:
def plot_history(history):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs_range = range(len(acc))

    plt.figure(figsize=(12, 5))
    
    # Plot Accuracy
    plt.subplot(1, 2, 1)
    plt.plot(epochs_range, acc, label='Training Accuracy')
    plt.plot(epochs_range, val_acc, label='Validation Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.legend(loc='lower right')
    plt.grid(True)

    # Plot Loss
    plt.subplot(1, 2, 2)
    plt.plot(epochs_range, loss, label='Training Loss')
    plt.plot(epochs_range, val_loss, label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.legend(loc='upper right')
    plt.grid(True)

    plt.show()

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns

def plot_confusion_matrix(model, dataset, class_names):
    y_true = []
    y_pred = []

    # iterate through dataset
    for images, labels in dataset:
        preds = model.predict(images, verbose=0)
        
        # handle ondehotencode and sparselabelss
        if len(labels.shape) > 1 and labels.shape[1] > 1:
            y_true.extend(np.argmax(labels, axis=1))
        else:
            y_true.extend(labels.numpy())
            
        y_pred.extend(np.argmax(preds, axis=1))

    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    # conf matrx
    cm = confusion_matrix(y_true, y_pred)
    
    # plot
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='magma', 
                xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted Label', fontsize=12)
    plt.ylabel('True Label', fontsize=12)
    plt.title('Confusion Matrix: Model Performance Evaluation', fontsize=14)
    plt.show()

    # scientific matrics
    print("\n--- CLASSIFICATION REPORT ---")
    print(classification_report(y_true, y_pred, target_names=class_names))

In [None]:
plot_confusion_matrix(custom_model, val_ds, class_names)

In [None]:
plot_history(history)

# Using a pretrained Model

In [None]:
local_weights_path = 'mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_128_no_top.h5'

## Trying MobileNetV2

In [None]:
# transfer learning
base_model = keras.applications.MobileNetV2(
    weights=local_weights_path,
    input_shape=(128, 128, 3),
    include_top=False
)

# freeze the base model
base_model.trainable = False

mobilenet_model = keras.Sequential([
    layers.Input(shape=(128, 128, 3)),
    data_augmentation,
    base_model,
    layers.GlobalAveragePooling2D(), # flatten 3D data into 1D
    layers.Dropout(0.2),
    layers.Dense(3, activation='softmax') 
])

mobilenet_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# train again
history_transfer = mobilenet_model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=20,
    verbose=1
)

* worse than model built from scratch
    * need to unfreeze small bit of pretrained model

In [None]:
# unfreeze base model
base_model.trainable = True

# print(f"Number of layers in base model: {len(base_model.layers)}")
# base model has 154 layers

fine_tune_at = 130 # freeze everything except the last 20 layers

for layer in base_model.layers[:fine_tune_at]:
    layer.trainable = False

# recompile // low learning rate
mobilenet_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.00001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)


In [None]:
fine_tune_epochs = 15
total_epochs = 20 + fine_tune_epochs

history_fine_tune = mobilenet_model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=total_epochs,
    initial_epoch=history_transfer.epoch[-1], # start from left off
    verbose=1
)

* Accuracy 90%
* Validation accuracy 75%
* overfitting!

* Model is memorizing = overfitting. Training loss decrease while Validation loss increases
* "commandRed" class precision is 0.53
* scienceBlue f1-score is 0.61
* the model guessed commandRed for 39 instances of that actually were scienceBlue
-----
* increase dropout
* L2 regularization

In [None]:
from keras import regularizers
# make the images change to avoid memorizing
data_augmentation.add(layers.RandomContrast(0.2))

# update final layers
mobilenet_model = keras.Sequential([
    layers.Input(shape=(128, 128, 3)),
    data_augmentation,
    base_model,
    layers.GlobalAveragePooling2D(),

    layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    layers.Dropout(0.5),
    layers.Dense(3, activation='softmax')
])

In [None]:
# re compile
mobilenet_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.00001), 
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# early stop callback. If val loss do not 
early_stop = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

# Red: 410, Gold: 278, Blue: 396
total = 410 + 278 + 396
class_weight = {
    0: (1 / 410) * (total / 3.0), # red
    1: (1 / 278) * (total / 3.0), # gold
    2: (1 / 396) * (total / 3.0)  # blue
}

# fit with updated callback and weight
history_final = mobilenet_model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=50, 
    class_weight=class_weight, 
    callbacks=[early_stop],    
    verbose=1
)

In [None]:
plot_confusion_matrix(mobilenet_model, val_ds, class_names)

In [None]:
plot_history(history_fine_tune)

## Trying EfficientNetB0

In [None]:
train_ds_raw = keras.utils.image_dataset_from_directory(
    'rawData',
    validation_split=0.2,
    subset="training",
    seed=42,
    image_size=(128, 128),
    batch_size=32,
    label_mode='categorical'
)

val_ds_raw = keras.utils.image_dataset_from_directory(
    'rawData',
    validation_split=0.2,
    subset="validation",
    seed=42,
    image_size=(128, 128),
    batch_size=32,
    label_mode='categorical'
)

In [None]:
base_model = keras.applications.EfficientNetB0(
    weights='imagenet',
    input_shape=(128,128,3),
    include_top=False
)

# freeze
base_model.trainable = False

In [None]:

# build model
efficientnet_model = keras.Sequential([
    layers.Input(shape=(128, 128, 3)),
    data_augmentation,
    layers.GaussianNoise(0.1), # Add random static to the image
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(64, activation='relu'), # Smaller dense layer = less capacity to memorize
    layers.Dropout(0.5),
    layers.Dense(3, activation='softmax')
])

# freeze
base_model.trainable = False

# re compile
efficientnet_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-5),
    loss='categorical_crossentropy',
    metrics=['accuracy', keras.metrics.F1Score(average='macro')]
)

In [None]:
# training again
history_eff = efficientnet_model.fit(
    train_ds_raw,
    validation_data=val_ds_raw,
    epochs=15,
    class_weight=class_weight,
    callbacks=[early_stop], # early stopping from before
    verbose=1
)

In [None]:
# unfreeze base model
base_model.trainable = True

fine_tune_at = 188 # freeze everything except the last 50 layers

for layer in base_model.layers[:fine_tune_at]:
    layer.trainable = False

# recompile // low learning rate
efficientnet_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.00001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [None]:
history_eff_epochs = 15
total_epochs = 20 + history_eff_epochs

history_enb0 = efficientnet_model.fit(
    train_ds_raw,
    validation_data=val_ds_raw,
    epochs=total_epochs,
    class_weight=class_weight,
    callbacks=[early_stop],
    initial_epoch=history_eff.epoch[-1], # start from left off
    verbose=1
)

In [None]:
plot_confusion_matrix(efficientnet_model, val_ds_raw, class_names)

In [None]:
plot_history(history_enb0)

# Validation

---
* Scratch Model
    * accuracy: 0.8921 || 89%
    * loss: 0.2779 || 27%
    * val_accuracy: 0.8981 || 89%
    * val_loss: 0.3372 || 33%
* MobileNetV2
    * accuracy: 0.9063 || 90%
    * loss: 0.2612 || 26%
    * val_accuracy: 0.7580 || 75%
    * val_loss: 0.6910 || 69%
* EfficientNetB0
    * accuracy: 0.7079 || 70%
    * loss: 0.6972 || 69%
    * val_accuracy: 0.7452 || 74%
    * val_loss: 0.6973 || 69%

In [None]:
import tensorflow as tf
from sklearn.model_selection import StratifiedKFold

# --- CONFIGURATION ---
IMG_SIZE = (128, 128)
BATCH_SIZE = 32
DATA_DIR = 'rawData'

# --- DATA COLLECTION ---
file_paths = []
labels = []
class_names = sorted([f for f in os.listdir(DATA_DIR) if not f.startswith('.')])

for i, class_name in enumerate(class_names):
    class_path = os.path.join(DATA_DIR, class_name)
    if os.path.isdir(class_path):
        images = [os.path.join(class_path, f) for f in os.listdir(class_path) if not f.startswith('.')]
        file_paths.extend(images)
        labels.extend([i] * len(images))

file_paths = np.array(file_paths)
labels = np.array(labels)

# --- HELPER: DATASET GENERATOR ---
def get_dataset(paths, labels_int):
    # convert int labels to categorical for softmax output
    labels_cat = tf.keras.utils.to_categorical(labels_int, num_classes=3)
    
    def _parse_image(path, label):
        img = tf.io.read_file(path)
        img = tf.image.decode_jpeg(img, channels=3)
        img = tf.image.resize(img, IMG_SIZE)
        img = img / 255.0  # scale - onrmalized
        return img, label

    ds = tf.data.Dataset.from_tensor_slices((paths, labels_cat))
    ds = ds.map(_parse_image).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
    return ds

# --- K-FOLD LOOP ---
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
fold_results = []

for fold, (train_idx, val_idx) in enumerate(skf.split(file_paths, labels)):
    print(f"\n fold {fold+1}/5")
    
    train_ds = get_dataset(file_paths[train_idx], labels[train_idx])
    val_ds = get_dataset(file_paths[val_idx], labels[val_idx])
    
    # re init to clear weights
    model = star_trek_cnn_model() 
    
    # prevent overfit earlystop
    early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    
    model.fit(train_ds, validation_data=val_ds, epochs=30, verbose=0, callbacks=[early_stop])
    
    _, acc = model.evaluate(val_ds, verbose=0)
    fold_results.append(acc)
    print(f"Fold {fold+1} Accuracy: {acc*100:.2f}%")

# --- SUMMARY ---
print(f"\n--- SUMMARY ---")
print(f"Mean Accuracy: {np.mean(fold_results)*100:.2f}%")
print(f"Standard Deviation: {np.std(fold_results)*100:.2f}%")