In [2]:
# get tiny-imagenet from huggingface datasets
# https://huggingface.co/datasets/viewer/?dataset=tiny_imagenet

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras import optimizers
from tensorflow.keras import losses
from tensorflow.keras import metrics
from tensorflow.keras import regularizers
from tensorflow.keras import callbacks
from tensorflow.keras import backend as K
from tensorflow.keras import utils
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16, ResNet50, MobileNetV2, DenseNet121, DenseNet169,EfficientNetB0
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from datasets import load_dataset
from tensorflow.keras.utils import to_categorical

# import early stopping and model checkpoint
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import pickle


2023-11-01 18:11:04.351814: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-01 18:11:04.351839: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-01 18:11:04.352426: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-11-01 18:11:04.428082: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as notebook_tqdm


In [6]:

HISTORY_DIR = "training_histories"
if not os.path.exists(HISTORY_DIR):
    os.makedirs(HISTORY_DIR)
def save_history(history, model_name):
    with open(os.path.join(HISTORY_DIR, model_name + ".pkl"), 'wb') as file:
        pickle.dump(history.history, file)


def get_top_layers(img_shape, num_classes):
    """
    This function provides different top layers for fine-tuning.
    """
    # Flattening and dense layers
    top_flat_dense = [
        layers.Flatten(),
        layers.Dense(1024, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.5),
        layers.Dense(512, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(256, activation='relu'),
        layers.Dense(num_classes, activation='softmax')
    ]




    return [top_flat_dense]

parameters = {
    "imagenet": {
        "dataset_path": "Maysee/tiny-imagenet",
        "split": ["train", "valid"],
        "input_shape": (64, 64, 3),
        "num_classes": 200,
        "image_key": "image",
    },
    "cifar10": {
        "dataset_path": "cifar10",
        "split": ["train", "test"],
        "input_shape": (32, 32, 3),
        "num_classes": 10,
        "image_key": "img",
    },
    # "beans": {
    #     "dataset_path": "beans",
    #     "split": ["train+validation", "test"],
    #     "input_shape": (500, 500, 3),
    #     "num_classes": 3
    # }
}

# Define the base models
base_model_functions = [
    lambda shape: EfficientNetB0(include_top=False, input_shape=shape, weights=None),  # Replacing Xception
    lambda shape: ResNet50(include_top=False, input_shape=shape, weights=None),
    lambda shape: MobileNetV2(include_top=False, input_shape=shape, weights=None),
    lambda shape: DenseNet121(include_top=False, input_shape=shape, weights=None),
    lambda shape: DenseNet169(include_top=False, input_shape=shape, weights=None),
    lambda shape: VGG16(include_top=False, input_shape=shape, weights=None)
]
def get_data_set(Parameter, validation_split=0.1):
    train_dataset, test_dataset = load_dataset(Parameter["dataset_path"], split=Parameter["split"])
    
    # Processing images
    images = train_dataset[Parameter["image_key"]]
    images = [img.convert("RGB") if len(img.split()) == 1 else img for img in images]
    labels = train_dataset['label']
    X = np.array([np.array(img) for img in images])
    # Normalize
    y = np.array(labels)
    # one hot encode
    y = utils.to_categorical(y, num_classes=Parameter["num_classes"])

    # Process test images
    test_images = test_dataset[Parameter["image_key"]]
    test_images = [img.convert("RGB") if len(img.split()) == 1 else img for img in test_images]
    test_labels = test_dataset['label']
    X_test = np.array([np.array(img) for img in test_images])
    # Normalize
    y_test = np.array(test_labels)
    # one hot encode
    y_test = to_categorical(y_test, num_classes=Parameter["num_classes"])
    
    # Split data into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=validation_split, random_state=42, stratify=y)
    
    # Image Data Generator for augmentation
    datagen = ImageDataGenerator(
    rotation_range=40,            # Random rotation in the range [0, 40)
    width_shift_range=0.2,        # Fraction of total width for random horizontal shifts
    height_shift_range=0.2,       # Fraction of total height for random vertical shifts
    shear_range=0.2,              # Shear Intensity (shear angle in counter-clockwise direction in degrees)
    zoom_range=0.2,               # Range for random zoom
    horizontal_flip=True,         # Randomly flip inputs horizontally
    vertical_flip=True,           # Randomly flip inputs vertically
    brightness_range=(0.8, 1.2),  # Range for picking a brightness shift value
    fill_mode='nearest',          # Points outside the boundaries are filled according to the given mode
    rescale=1./255                # Rescale factor (original_value * rescale)
)
    
    datagen.fit(X_train)
    
    train_gen = datagen.flow(X_train, y_train, batch_size=32)    
    return train_gen, X_val, y_val, X_test, y_test

final_models = {}
def train_model(model, model_name, train_gen, X_val,y_val, X_test, y_test, epochs=20, batch_size=32,):
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    
    # Create EarlyStopping callback
    early_stopping = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True, verbose=1)
    
    # Create ModelCheckpoint callback to save best model weights
    checkpoint_path = f'best_weights_{model_name}.h5'
    checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_accuracy', save_best_only=True, verbose=1)
    
    history = model.fit(train_gen, 
                        epochs=epochs, 
                        validation_data=(X_val,y_val),
                        batch_size=batch_size,
                        verbose=1,
                        callbacks=[early_stopping, checkpoint])
    
    # Load the best weights
    model.load_weights(checkpoint_path)
    
    eval = model.evaluate(X_test, y_test, verbose=0)
    
    return history, eval


# Initialize CSV file with headers
# with open("model_training_results.csv", "w") as file:
    # file.write("dataset,base_model,top_layers_variant,model_name,train_accuracy,train_loss, test_accuracy\n")

# add a counter skip first models

skip = 0
counter = 0
for dataset, data_params in parameters.items():
    # Load data for each dataset
    train_gen, X_val, y_val, X_test, y_test = get_data_set(data_params)
    
    img_shape = data_params["input_shape"]
    num_classes = data_params["num_classes"]

    for base_func in base_model_functions:
        base = base_func(img_shape)
        top_layers_variants = get_top_layers(img_shape, num_classes)
        
        for idx, top_layers in enumerate(top_layers_variants):
            counter += 1
            if counter <= skip:
                continue
            
            top_layers_name = f"v2top_variant_{idx+1}"
            model_name = f"{base.name}_{top_layers_name}_on_{dataset}"
            model = models.Sequential([base] + top_layers)
            # print start training
            print(f"Training {model_name}")
            # Train the model
            history, eval = train_model(model, model_name,train_gen, X_val, y_val, X_test, y_test, epochs=50)
            save_history(history, model_name)
            # Save results immediately.
            with open("model_training_results.csv", "a") as file:
                file.write(f"{dataset},{base.name},{top_layers_name},{model_name},{history.history['accuracy'][-1]},{history.history['loss'][-1]},{eval[1]}\n")


Training densenet121_top_variant_1_on_imagenet
Epoch 1/50
Epoch 1: val_accuracy improved from -inf to 0.00550, saving model to best_weights_densenet121_top_variant_1_on_imagenet.h5


  saving_api.save_model(


Epoch 2/50

KeyboardInterrupt: 

In [27]:
train_gen, X_val, y_val, X_test, y_test = get_data_set(parameters["cifar10"])

In [31]:
# define VGG16 to train on CIFAR

# load cifar on tensorflow

cifar10 = tf.keras.datasets.cifar10
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

y_train = to_categorical(y_train, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)

model = VGG16(include_top=False, input_shape=(32, 32, 3), weights=None)
top_layer = Sequential([
    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    layers.Dense(10, activation='softmax')
])

model = Sequential([model, top_layer])

model.compile(optimizer='adam',
                loss='categorical_crossentropy',
                metrics=['accuracy'])

model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x7f11459ae9a0>

In [32]:
# define resnet50 for cifar

model = ResNet50(include_top=False, input_shape=(32, 32, 3), weights=None)

top_layer = Sequential([
    layers.GlobalAveragePooling2D(),
    layers.Dense(10, activation='softmax')
])

model = Sequential([model, top_layer])

model.compile(optimizer='adam',
                loss='categorical_crossentropy',
                metrics=['accuracy'])
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/50


2023-10-29 03:19:54.616203: W tensorflow/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 4.57GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2023-10-29 03:19:54.616236: W tensorflow/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 4.57GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2023-10-29 03:19:57.506256: W tensorflow/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 4.60GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2023-10-29 03:19:57.506291: W tensorflow/tsl/framework/bfc_allocator.cc:296] Allocator (GPU



2023-10-29 03:20:46.390011: W tensorflow/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 4.57GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2023-10-29 03:20:46.390044: W tensorflow/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 4.57GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x7f1145e0f5b0>

In [17]:
d1,d2 = load_dataset("Maysee/tiny-imagenet", split=["train", "valid"])

In [18]:
a = d1['label']

In [4]:
import matplotlib.pyplot as plt

def plot_training_histories(model_names):
    for model_name in model_names:
        with open(os.path.join(HISTORY_DIR, model_name + ".pkl"), 'rb') as file:
            history = pickle.load(file)
        
        # Plotting training and validation accuracy
        plt.figure(figsize=(12, 5))
        plt.subplot(1, 2, 1)
        plt.plot(history['accuracy'], label='Training Accuracy')
        plt.plot(history['val_accuracy'], label='Validation Accuracy')
        plt.title(f'Accuracy for {model_name}')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.legend()
        
        # Plotting training and validation loss
        plt.subplot(1, 2, 2)
        plt.plot(history['loss'], label='Training Loss')
        plt.plot(history['val_loss'], label='Validation Loss')
        plt.title(f'Loss for {model_name}')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        
        plt.tight_layout()
        plt.show()

# After training all models
trained_model_names = [f"{base.name}_top_variant_{idx+1}_on_{dataset}" for base in base_model_functions for idx in range(3)]
plot_training_histories(trained_model_names)


AttributeError: 'function' object has no attribute 'name'

In [9]:
# define resnet 18 to train on cifar10
from keras.models import Model
class ResnetBlock(Model):
    def __init__(self, channels: int, down_sample=False):
        super(ResnetBlock, self).__init__()
        self.down_sample = down_sample
        self.channels = channels
        # Set stride to (2, 2) if downsampling is required
        self.conv1_stride = (2, 2) if self.down_sample else (1, 1)
        self.conv1 = layers.Conv2D(self.channels, (3, 3), padding='same', activation='relu', strides=self.conv1_stride)
        self.bn1 = layers.BatchNormalization()
        self.conv2 = layers.Conv2D(self.channels, (3, 3), padding='same', activation='relu')
        self.bn2 = layers.BatchNormalization()
        if self.down_sample:
            self.downsample_conv = layers.Conv2D(self.channels, (1, 1), strides=(2, 2), padding='same', activation='relu')
            self.downsample_bn = layers.BatchNormalization()
        self.relu = layers.ReLU()

    def call(self, inputs):
        residual = inputs
        x = self.conv1(inputs)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)
        
        if self.down_sample:
            residual = self.downsample_conv(inputs)
            residual = self.downsample_bn(residual)
        
        x = layers.add([x, residual])
        x = self.relu(x)
        return x
class ResNet18(Model):
    def __init__(self, num_classes):
        super(ResNet18, self).__init__()
        self.conv1 = layers.Conv2D(64, (7,7), strides=(2,2), padding='same', activation='relu')
        self.bn1 = layers.BatchNormalization()
        self.maxpool = layers.MaxPool2D((3,3), strides=(2,2), padding='same')
        self.layer1 = ResnetBlock(64)
        self.layer2 = ResnetBlock(128, down_sample=True)
        self.layer3 = ResnetBlock(256, down_sample=True)
        self.layer4 = ResnetBlock(512, down_sample=True)
        self.avgpool = layers.GlobalAveragePooling2D()
        self.fc = layers.Dense(num_classes, activation='softmax')
    def call(self,inputs):
        x = self.conv1(inputs)
        x = self.bn1(x)
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x) 
        x = self.avgpool(x)
        x = self.fc(x)
        return x
model = ResNet18(10)

In [12]:
from keras.datasets import cifar10
(X_train, Y_train), (X_test, Y_test) = cifar10.load_data()

In [15]:
model = ResNet18(10)

train_gen, X_val, y_val, X_test, y_test = get_data_set(parameters["cifar10"])

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255.0
X_test /= 255.0
aug = ImageDataGenerator(horizontal_flip=True, width_shift_range=0.05,
                             height_shift_range=0.05)
aug.fit(X_train)
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
STEPS = len(X_train) // 256
history = model.fit(aug.flow(X_train,Y_train,batch_size = 256), steps_per_epoch=STEPS, batch_size = 256, epochs=50, validation_data=(X_train, Y_train))

KeyboardInterrupt: 

In [7]:
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout

# Load the CIFAR-10 dataset
(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()

# Normalize pixel values to be between 0 and 1
train_images, test_images = train_images / 255.0, test_images / 255.0

# Build a simple CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.5),  # Dropout layer to prevent overfitting
    Dense(10, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(train_images, train_labels, epochs=10, 
                    validation_data=(test_images, test_labels))

# Evaluate the model
test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)
print("\nTest accuracy:", test_acc)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
313/313 - 0s - loss: 0.8749 - accuracy: 0.7027 - 360ms/epoch - 1ms/step

Test accuracy: 0.7027000188827515


In [14]:
(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()

# Normalize pixel values to be between 0 and 1
train_images, test_images = train_images / 255.0, test_images / 255.0
X_train, y_train = train_images, train_labels
X_test, y_test = test_images, test_labels

In [15]:
from tensorflow.keras.layers import BatchNormalization, MaxPool2D
model5 = Sequential()
model5.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', input_shape=(32, 32, 3)))
model5.add(BatchNormalization())
model5.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
model5.add(BatchNormalization())
model5.add(MaxPool2D((2, 2)))
model5.add(Dropout(0.2))
model5.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
model5.add(BatchNormalization())
model5.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
model5.add(BatchNormalization())
model5.add(MaxPool2D((2, 2)))
model5.add(Dropout(0.3))
model5.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
model5.add(BatchNormalization())
model5.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
model5.add(BatchNormalization())
model5.add(MaxPool2D((2, 2)))
model5.add(Dropout(0.4))
model5.add(Flatten())
model5.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
model5.add(BatchNormalization())
model5.add(Dropout(0.5))
model5.add(Dense(10, activation='softmax'))
# compile model
# opt = SGD(lr=0.001, momentum=0.9)
model5.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# model5.fit_generator(train_generator,epochs=200,steps_per_epoch=training_steps,validation_data=test_generator,validation_steps=validation_steps,callbacks=[board])
history5=model5.fit(X_train,y_train,epochs=50,validation_data=(X_test,y_test))


Epoch 1/50


ValueError: in user code:

    File "/home/an/miniconda3/envs/myenv/lib/python3.9/site-packages/keras/src/engine/training.py", line 1377, in train_function  *
        return step_function(self, iterator)
    File "/home/an/miniconda3/envs/myenv/lib/python3.9/site-packages/keras/src/engine/training.py", line 1360, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/an/miniconda3/envs/myenv/lib/python3.9/site-packages/keras/src/engine/training.py", line 1349, in run_step  **
        outputs = model.train_step(data)
    File "/home/an/miniconda3/envs/myenv/lib/python3.9/site-packages/keras/src/engine/training.py", line 1127, in train_step
        loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "/home/an/miniconda3/envs/myenv/lib/python3.9/site-packages/keras/src/engine/training.py", line 1185, in compute_loss
        return self.compiled_loss(
    File "/home/an/miniconda3/envs/myenv/lib/python3.9/site-packages/keras/src/engine/compile_utils.py", line 277, in __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "/home/an/miniconda3/envs/myenv/lib/python3.9/site-packages/keras/src/losses.py", line 143, in __call__
        losses = call_fn(y_true, y_pred)
    File "/home/an/miniconda3/envs/myenv/lib/python3.9/site-packages/keras/src/losses.py", line 270, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "/home/an/miniconda3/envs/myenv/lib/python3.9/site-packages/keras/src/losses.py", line 2221, in categorical_crossentropy
        return backend.categorical_crossentropy(
    File "/home/an/miniconda3/envs/myenv/lib/python3.9/site-packages/keras/src/backend.py", line 5575, in categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)

    ValueError: Shapes (None, 1) and (None, 10) are incompatible


In [5]:
train_gen, X_val, y_val, X_test, y_test = get_data_set(parameters["cifar10"])
model.fit(train_gen, 
                        epochs=50, 
                        validation_data=(X_val,y_val),
                        batch_size=256,
                        verbose=1,
                    ) 

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x7f102c726f40>

In [19]:
parameters = {
    # "imagenet": {
    #     "dataset_path": "Maysee/tiny-imagenet",
    #     "split": ["train", "valid"],
    #     "input_shape": (64, 64, 3),
    #     "num_classes": 200
    # },
    "cifar10": {
        "dataset_path": "cifar10",
        "split": ["train", "test"],
        "input_shape": (32, 32, 3),
        "num_classes": 10,
        "image_key": "img",
    },
    # "beans": {
    #     "dataset_path": "beans",
    #     "split": ["train+validation", "test"],
    #     "input_shape": (500, 500, 3),
    #     "num_classes": 3
    # }
}

train_gen, val_gen, X_test, y_test = get_data_set(parameters["cifar10"])

In [10]:
# create model to train cifar10, resnet50
train_gen, val_gen, X_test, y_test = get_data_set(parameters["imagenet"])
model = ResNet50(include_top=False, input_shape=parameters["imagenet"]["input_shape"], weights=None)
model.trainable = False

top_layers = [
    layers.GlobalAveragePooling2D(),
    layers.Dense(512, activation='relu'),
    layers.BatchNormalization(),
    layers.Dense(10, activation='softmax')
]

model = models.Sequential([model] + top_layers)

model.compile(optimizer='adam',
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])

model.fit(train_gen, epochs=30, validation_data=val_gen, verbose=1)

Epoch 1/30
Epoch 2/30

KeyboardInterrupt: 

In [22]:
# create model resnet to train, with top layer using batch normalization

model = ResNet50(include_top=False, input_shape=(64, 64, 3), weights=None)

top_flat_dense = [
    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    layers.Dense(200, activation='softmax')
]

top_global_avg_pool = [
        layers.GlobalAveragePooling2D(),
        layers.Dense(num_classes, activation='softmax')
    ]

    # A Convolutional Block
top_conv_block = [
        layers.Conv2D(128, (3,3), activation='relu', padding='same'),
        layers.MaxPooling2D(pool_size=(2,2)),
        layers.Flatten(),
        layers.Dense(num_classes, activation='softmax')
    ]

model = models.Sequential([model] + top_global_avg_pool)
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(train_gen, 
                        epochs=10, 
                        validation_data=val_gen,
                        batch_size=32,
                        verbose=1,)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [8]:
train_gen, val_gen, X_test, y_test = get_data_set(parameters["imagenet"])

In [14]:
model = ResNet50(include_top=False, input_shape=(64, 64, 3), weights=None)
num_classes = 200
top_flat_dense = [
    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    layers.Dense(200, activation='softmax')
]

top_global_avg_pool = [
        layers.GlobalAveragePooling2D(),
        layers.Dense(1024, activation='relu'),
        layers.BatchNormalization(),
        layers.Dense(num_classes, activation='softmax')
    ]

    # A Convolutional Block
top_conv_block = [
        layers.Conv2D(128, (3,3), activation='relu', padding='same'),
        layers.MaxPooling2D(pool_size=(2,2)),
        layers.Flatten(),
        layers.BatchNormalization(),
        layers.Dense(num_classes, activation='softmax')
    ]

model = models.Sequential([model] + top_global_avg_pool)
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(train_gen, 
                        epochs=40, 
                        validation_data=val_gen,
                        batch_size=32,
                        verbose=1,)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [7]:
# training EfficentNetB0 on tiny-imagenet but with freeze base model

model = EfficientNetB0(include_top=False, input_shape=(64, 64, 3))
model.trainable = False
# Flattening and dense layers
top_flat_dense = [
    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(200, activation='softmax')
]

model = models.Sequential([model] + top_flat_dense)
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=10, batch_size=32)

Epoch 1/10


2023-10-24 10:10:37.521036: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape insequential_2/efficientnetb0/block2b_drop/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
