In [1]:
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf

import PyNetwork
import pyopencl as cl
import pyopencl.array as cl_array

In [2]:
platform = cl.get_platforms()
devices = platform[0].get_devices()
context = cl.Context(devices)
queue = cl.CommandQueue(context)

In [3]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()

shape = (28, 28)
x_train = x_train.reshape(-1, *shape).astype(np.float32) / 255
x_test = x_test.reshape(-1, *shape).astype(np.float32) / 255

labels = np.eye(10)

y_train = labels[y_train.flatten()].astype(np.float32)
y_test = labels[y_test.flatten()].astype(np.float32)

# Transform x_train and y_train into C-contiguous
x_train = np.ascontiguousarray(x_train)
y_train = np.ascontiguousarray(y_train)
x_test = np.ascontiguousarray(x_test)
y_test = np.ascontiguousarray(y_test)

# Send data to device
x_train_gpu = cl_array.to_device(queue, x_train)
y_train_gpu = cl_array.to_device(queue, y_train)
x_test_gpu = cl_array.to_device(queue, x_test)
y_test_gpu = cl_array.to_device(queue, y_test)

# Large Learning Rate

In [4]:
model = PyNetwork.Sequential()

model.add(PyNetwork.layers.Input((28, 28)))
model.add(PyNetwork.layers.Flatten())
model.add(PyNetwork.layers.Dense(100, activation_function='relu', l2=0.00, l1=0.0))
# model.add(PyNetwork.layers.BatchNorm())
model.add(PyNetwork.layers.Dense(10, activation_function='softmax', l2=0.0, l1=0.0))

optimizer = PyNetwork.optimizers.RMSprop(learning_rate=0.0005)
model.build(context, queue, loss_function='cross_entropy', optimizer=optimizer, metrics='accuracy')

In [5]:
model.summary()

Input           :    Input Shape  (None, 28, 28)
Flatten         :    Output Shape (None, 784)
Dense (100,)    :    Output Shape (None, 100)
Dense (10,)     :    Output Shape (None, 10)


In [6]:
%%time
model.train(x_train_gpu, y_train_gpu, epochs=1, batch_size=128, verbose=True)

Training on 60000 samples
Epoch 1/1
cross_entropy: 0.5751 - accuracy: 0.8359
CPU times: total: 11.8 s
Wall time: 15.6 s


In [7]:
model.evaluate(x_test_gpu, y_test_gpu)

(array(0.8216), 'cross_entropy: 0.5048 - accuracy: 0.8216')

# Smaller Learning Rate

In [8]:
model = PyNetwork.Sequential()

model.add(PyNetwork.layers.Input((28, 28)))
model.add(PyNetwork.layers.Flatten())
model.add(PyNetwork.layers.Dense(100, activation_function='relu', l2=0.01, l1=0.0))
# model.add(PyNetwork.layers.BatchNorm())
model.add(PyNetwork.layers.Dense(10, activation_function='relu', l2=0.0, l1=0.0))

optimizer = PyNetwork.optimizers.RMSprop(learning_rate=0.0001)
model.build(context, queue, loss_function='cross_entropy', optimizer=optimizer, metrics='accuracy')

In [9]:
%%time
model.train(x_train_gpu, y_train_gpu, epochs=10, batch_size=128, verbose=True)

Training on 60000 samples
Epoch 1/10
cross_entropy: 1.2577 - accuracy: 0.6875
Training on 60000 samples
Epoch 2/10
cross_entropy: 0.8182 - accuracy: 0.7500
Training on 60000 samples
Epoch 3/10
cross_entropy: 1.1802 - accuracy: 0.7812
Training on 60000 samples
Epoch 4/10
cross_entropy: 0.7374 - accuracy: 0.7734
Training on 60000 samples
Epoch 5/10
cross_entropy: 0.7173 - accuracy: 0.8125
Training on 60000 samples
Epoch 6/10
cross_entropy: 0.9659 - accuracy: 0.7969
Training on 60000 samples
Epoch 7/10
cross_entropy: 0.9598 - accuracy: 0.8047
Training on 60000 samples
Epoch 8/10
cross_entropy: 1.0757 - accuracy: 0.7266
Training on 60000 samples
Epoch 9/10
cross_entropy: 0.8088 - accuracy: 0.7500
Training on 60000 samples
Epoch 10/10
cross_entropy: 0.6333 - accuracy: 0.8125
CPU times: total: 2min
Wall time: 3min 14s


In [10]:
model.evaluate(x_test_gpu, y_test_gpu)

(array(0.7807), 'cross_entropy: 0.8212 - accuracy: 0.7807')

In [11]:
model.evaluate(x_train_gpu, y_train_gpu)

(array(0.7905), 'cross_entropy: 0.7890 - accuracy: 0.7905')