In [None]:
# Code 1
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from keras import datasets, layers, models
from tensorflow.keras.datasets import cifar100
from keras.models import Sequential
from keras.utils import to_categorical
from keras.layers import MaxPooling2D, Conv2D, Flatten, Dense, Dropout, Activation
from sklearn.model_selection import train_test_split

#load data
(x_train, y_train), (x_test, y_test) = cifar100.load_data()

#checking data and array shape 
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

#Splitting training data into training and validation
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.1, random_state=42)

#https://www.geeksforgeeks.org/image-classification-using-cifar-10-and-cifar-100-dataset-in-tensorflow/
def show_samples(data, labels):
    plt.subplots(figsize=(10, 10))
    for i in range(12):
        plt.subplot(3, 4, i+1)
        k = np.random.randint(0, data.shape[0])
        plt.title(int(labels[k]))
        plt.imshow(data[k])
    plt.tight_layout()
    plt.show()

show_samples(x_train, y_train)


#Processing data 
#Converting pixels to float type
#https://github.com/LeoTungAnh/CNN-CIFAR-100/blob/main/CNN_models.ipynb
x_train = x_train.astype('float32') / 255.
x_val = x_val.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.

#One hot encoding to target classes 
classes = 100
ytrain_categories = to_categorical(y_train, num_classes=100)
yval_categories = to_categorical(y_val, num_classes=100)
ytest_categories = to_categorical(y_test, num_classes=100)

#Building CNN model 
#Uses layers as a 'filtering' system that making model learn based on patterns from training
#https://github.com/uzairlol/CIFAR100-Image-Classification-CNN/blob/main/Item%20Image%20Model%20Training%20and%20Evaluation.ipynb
model = keras.models.Sequential([
    keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    keras.layers.MaxPooling2D((2, 2)),
    keras.layers.Conv2D(64, (3, 3), activation='relu'),
    keras.layers.MaxPooling2D((2, 2)),
    keras.layers.Conv2D(64, (3, 3), activation='relu'),
    keras.layers.MaxPooling2D((2,2)),
    keras.layers.Flatten(),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(100, activation='softmax')
])

model.summary()


#Beginning the training of model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

#main training section
#chatgpt helped to structure how model can undergo training
history = model.fit(x_train, ytrain_categories, epochs=25, batch_size=64, validation_data=(x_val, yval_categories))

test_loss, test_accuracy = model.evaluate(x_test, ytest_categories)
print(f"Test accuracy: {test_accuracy * 100:.2f}%")

#visualisation of results through graphs using matplotlib
plt.figure(figsize=(10,5))
plt.subplot(1,2,1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.title('Accuracy over Epochs')
plt.xlabel('Epoch Number')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)

plt.subplot(1,2,2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss over Epochs')
plt.xlabel('Epoch Number')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
# Note: IF ERROR OCCURS TRY:
# REFRESHING KERNEL 
# UPDATING LIBRARIES AND MODULES (LIKE NUMPY AND TENSORFLOW)

In [None]:
# Code 2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.datasets import cifar100
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Dropout, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

from tensorflow.keras.mixed_precision import Policy
policy = Policy('mixed_float16')
tf.keras.mixed_precision.set_global_policy(policy)

# This is to load the data from the tensorflow dataset cifar 100
(X_train, y_train), (X_test, y_test) = cifar100.load_data()
num_classes = len(np.unique(y_train))
print(X_train.shape, X_test.shape)

#check one image to see what it looks like and randomly 
# we picked index 12 to test.
imageindex = 12
print(f"array pointer = {imageindex}")
print(f"image shape: {X_train[imageindex].shape}")
print(f"Class label: {y_train[imageindex][0]}") # [0] since it is a 2D array

#this prints the image and shows it on a graph
plt.imshow(X_train[imageindex],cmap='viridis')
plt.show()

def check_pics(data, dataset_name):
    """
    Check CIFAR-100 images for:
    - Array type
    - Shape (32x32x3)
    - Pixel values (0-255)
    - No NaNs
    """
    bad_imgs = 0
    good_imgs = 0
    for i, img in enumerate(data):
        if not isinstance(img, np.ndarray):
            print(f"{dataset_name} img {i}: Not an array")
            bad_imgs += 1
            continue
        if img.shape != (32, 32, 3):
            print(f"{dataset_name} img {i}: Shape {img.shape}, need (32, 32, 3)")
            bad_imgs += 1
            continue
        if not (img.dtype == np.uint8 and img.min() >= 0 and img.max() <= 255):
            print(f"{dataset_name} img {i}: Bad pixels, min={img.min()}, max={img.max()}")
            bad_imgs += 1
            continue
        if np.isnan(img).any():
            print(f"{dataset_name} img {i}: NaN found")
            bad_imgs += 1
            continue
        good_imgs += 1
    print(f"{dataset_name}: {good_imgs} good, {bad_imgs} bad")

print("Checking images...\n")
check_pics(X_train, "Train")
check_pics(X_test, "Test")

X_train, X_val, y_train, y_val = train_test_split(
    X_train,
    y_train,
    test_size=0.2,
    random_state=15
)

#checking to see if the shapes are the same 
print("Training set:", X_train.shape, y_train.shape)
print("Validating set:", X_val.shape, y_val.shape)
print("Test set:", X_test.shape, y_test.shape)

# Normalising pixels to 0-1
X_train = X_train.astype('float32') / 255
X_val = X_val.astype('float32') / 255
X_test = X_test.astype('float32') / 255

# One-hot encode labels
y_train = to_categorical(y_train, num_classes)
y_val = to_categorical(y_val, num_classes)
y_test = to_categorical(y_test, num_classes)

counts = pd.DataFrame(columns=['Set', 'Class', 'Count'])
def count_cls(data, name):
    global counts
    cls, nums = np.unique(np.argmax(data, axis=1) if data.ndim > 1 else data, return_counts=True)
    for c, n in zip(cls, nums):
        counts = pd.concat([counts, pd.DataFrame([{'Set': name, 'Class': str(c), 'Count': n}])], ignore_index=True)

count_cls(y_train, "Train")
count_cls(y_val, "Val")
count_cls(y_test, "Test")

# Plot class counts
plt.figure(figsize=(10, 6))
sns.barplot(data=counts, x='Set', y='Count', hue='Class')
plt.title("Class Counts")
plt.legend([], [], frameon=False)  # Too many classes

# Model 1: Very Lightweight CNN
# Super simple to train fast
def light_cnn(shape, classes):
    model = Sequential()
    model.add(Conv2D(16, (3, 3), activation='relu', input_shape=shape, padding='same'))
    model.add(MaxPool2D((2, 2)))
    model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
    model.add(MaxPool2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dense(classes, activation='softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

cnn1 = light_cnn((32, 32, 3), num_classes)
cnn1.summary()

# Train with bigger batch size
stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Use tf.data for faster loading
train_data = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(10000).batch(128).prefetch(tf.data.AUTOTUNE)
val_data = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(128).prefetch(tf.data.AUTOTUNE)

print("Training Light CNN...")
cnn1_hist = cnn1.fit(
    train_data,
    epochs=20,  # Less epochs to be quick
    validation_data=val_data,
    callbacks=[stop]
)

# Plot training
def plot_training(hist, name):
    df = pd.DataFrame(hist.history)
    plt.figure(figsize=(10, 4))
    plt.subplot(1, 2, 1)
    plt.plot(df['loss'], label='Train')
    plt.plot(df['val_loss'], label='Val')
    plt.title(f'{name} Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.subplot(1, 2, 2)
    plt.plot(df['accuracy'], label='Train')
    plt.plot(df['val_accuracy'], label='Val')
    plt.title(f'{name} Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()


plot_training(cnn1_hist, "light_cnn")

# Test models
cnn1_loss, cnn1_acc = cnn1.evaluate(X_test, y_test)
print(f"Light CNN: Loss = {cnn1_loss:.4f}, Acc = {cnn1_acc:.4f}")

# Predict
idx = 100
img = X_test[idx:idx+1]
true = np.argmax(y_test[idx])
probs = cnn1.predict(img)
pred = np.argmax(probs)

plt.figure(figsize=(3, 3))
plt.imshow(img[0])
plt.title(f"True: {class_names[true]}, Pred: {class_names[pred]}")
plt.axis('off')

# Probabilities (top 10)
prob_df = pd.DataFrame(probs[0], columns=['Prob'])
prob_df['Class'] = class_names
plt.figure(figsize=(12, 4))
sns.barplot(data=prob_df.head(10), x='Class', y='Prob')
plt.title('Top Probabilities')
plt.xticks(rotation=45)

# Results
def show_results(data, labels, model):
    preds = np.argmax(model.predict(data), axis=1)
    true = np.argmax(labels, axis=1)
    
    # Compute confusion matrix
    cm = confusion_matrix(true, preds)
    
    # Plot confusion matrix
    plt.figure(figsize=(20, 20))  # Big plot for 100 classes
    sns.heatmap(cm, annot=False, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.xticks(rotation=90, fontsize=8)
    plt.yticks(rotation=0, fontsize=8)
    
    # Print confusion matrix
    print("Confusion Matrix (rows/cols are class names):")
    cm_df = pd.DataFrame(cm, index=class_names, columns=class_names)
    print(cm_df)
    
    # Classification report
    print("\nClassification Report:")
    print(classification_report(true, preds, target_names=class_names))

show_results(X_test, y_test, cnn1)