In [1]:
import keras
from keras.datasets import fashion_mnist
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten
from keras.optimizers import SGD
from keras.utils import to_categorical
import numpy as np

import tensorflow as tf

In [2]:
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

# Preprocess the data
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1).astype('float32') / 255
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1).astype('float32') / 255

# Convert labels to one-hot encoding
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

In [3]:
SHIFT_FACTOR = tf.exp(2.0)
SCALE_FACTOR = 6.5
TOP_SCALE = 2.0
def bottom_sig(x):
    return SCALE_FACTOR/(1+tf.exp(-(2*x+SHIFT_FACTOR))) - SCALE_FACTOR
def bottom_sig_relu(x):
    return -(x+SCALE_FACTOR)**TOP_SCALE-SCALE_FACTOR
def top_sig(x):
    return SCALE_FACTOR/(1+tf.exp(-(2*x-SHIFT_FACTOR)))
def top_sig_relu(x):
    return (x-SCALE_FACTOR)**TOP_SCALE+SCALE_FACTOR
def three_sigmoid_activation(x):
    return tf.where(x < -0, bottom_sig(x), tf.where(x >= 0, top_sig(x), x))

def three_sig_relu_activation(x):
    return tf.where(x < -SCALE_FACTOR, bottom_sig_relu(x), tf.where(x > SCALE_FACTOR, top_sig_relu(x), three_sigmoid_activation(x)))

2024-10-17 22:41:22.778624: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Ultra
2024-10-17 22:41:22.778643: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 128.00 GB
2024-10-17 22:41:22.778648: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 48.00 GB
2024-10-17 22:41:22.778662: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-10-17 22:41:22.778673: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


## Functional API Experimentation

In [4]:
model = Sequential([
    Conv2D(16, (3, 3), activation='sigmoid', input_shape=(28, 28, 1)),  # 16 filters (reduced), 3x3 kernel
    MaxPooling2D(pool_size=(5, 5)),  # Max pooling with 2x2 pool size
    # Flatten the output before passing to Dense layers
    Flatten(),
    Dense(64, activation='softmax'),  # Reduced from 128 to 64 units
    Dense(10, activation='softmax')  # Output layer with 10 units for classification
])

learning_rate = 0.002
epochs = 1
batch_size = 48

# Compile the model
model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, verbose=1)

# Evaluate the model
loss, accuracy = model.evaluate(x_test, y_test)
print('Test accuracy:', accuracy)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2024-10-17 22:41:23.808142: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 13ms/step - accuracy: 0.2356 - loss: 1.9328
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.3374 - loss: 1.4441
Test accuracy: 0.33309999108314514


In [7]:
epochs = 50

image_inputs_source = keras.Input(shape=(28, 28, 1))

image_inputs = keras.layers.Dropout(0.25)(image_inputs_source)

conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(image_inputs)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
conv11 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool1)
pool11 = MaxPooling2D(pool_size=(2, 2))(conv11)
flat1 = Flatten()(pool11)

conv2 = Conv2D(32, (4, 4), activation='relu', padding='same')(image_inputs)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
conv22 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool2)
pool22 = MaxPooling2D(pool_size=(2, 2))(conv22)
flat2 = Flatten()(pool22)

conv3 = Conv2D(32, (5, 5), activation='relu', padding='same')(image_inputs)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
conv33 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool3)
pool33 = MaxPooling2D(pool_size=(2, 2))(conv33)
flat3 = Flatten()(pool33)

conv4 = Conv2D(32, (6, 6), activation='relu', padding='same')(image_inputs)
pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)
conv44 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool4)
pool44 = MaxPooling2D(pool_size=(2, 2))(conv44)
flat4 = Flatten()(pool44)

conv5 = Conv2D(32, (7, 7), activation='relu', padding='same')(image_inputs)
pool5 = MaxPooling2D(pool_size=(2, 2))(conv5)
conv55 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool5)
pool55 = MaxPooling2D(pool_size=(2, 2))(conv55)
flat5 = Flatten()(pool55)

convhalf = Conv2D(32, (14, 14), activation='relu', padding='same')(image_inputs)
poolhalf = MaxPooling2D(pool_size=(2, 2))(convhalf)
convhalf2 = Conv2D(128, (3, 3), activation='relu', padding='same')(poolhalf)
poolhalf2 = MaxPooling2D(pool_size=(2, 2))(convhalf2)
flathalf = Flatten()(convhalf2)


concat_first_conv = keras.layers.concatenate([conv1, conv2, conv3, conv4, conv5, convhalf])
flatten_first_conv = Flatten()(concat_first_conv)

concat_input = keras.layers.concatenate([flat1, flat2, flat3, flat4, flat5, flathalf, flatten_first_conv])
dropout = keras.layers.Dropout(0.1)(concat_input)
dense2 = Dense(64, activation='sigmoid')(dropout)
dense3 = Dense(64, activation='relu')(dense2)
output2 = Dense(10, activation='softmax')(dense3)

model = keras.Model(inputs=image_inputs, outputs=output2)
model.compile(optimizer=keras.optimizers.Adam(learning_rate=keras.optimizers.schedules.ExponentialDecay(learning_rate, decay_steps=10000, decay_rate=0.999, staircase=True)),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, verbose=1, validation_data=(x_test, y_test))
loss, accuracy = model.evaluate(x_test, y_test)
print('Test accuracy:', accuracy)


Epoch 1/50
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 50ms/step - accuracy: 0.7954 - loss: 0.6097 - val_accuracy: 0.8850 - val_loss: 0.3148
Epoch 2/50
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 49ms/step - accuracy: 0.9114 - loss: 0.2418 - val_accuracy: 0.8968 - val_loss: 0.2921
Epoch 3/50
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 49ms/step - accuracy: 0.9296 - loss: 0.1904 - val_accuracy: 0.9023 - val_loss: 0.2738
Epoch 4/50
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 49ms/step - accuracy: 0.9451 - loss: 0.1519 - val_accuracy: 0.9062 - val_loss: 0.2755
Epoch 5/50
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 49ms/step - accuracy: 0.9545 - loss: 0.1251 - val_accuracy: 0.9139 - val_loss: 0.2648
Epoch 6/50
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 50ms/step - accuracy: 0.9638 - loss: 0.0994 - val_accuracy: 0.9067 - val_loss: 0.3041
Epoc