In [None]:
# Small Benchmark on mnist dataset based on https://www.tensorflow.org/datasets/keras_example
import tensorflow as tf
import tensorflow_datasets as tfds

In [None]:
# Show info
print("TensorFlow version:", tf.__version__)
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print("List GPU devices: ", tf.config.experimental.list_physical_devices('GPU'))

non_cuda_gpu = tf.test.is_gpu_available(
    cuda_only=False, min_cuda_compute_capability=None
)
print("Is Non CUDA GPU available: ", non_cuda_gpu)

cuda_gpu = tf.test.is_gpu_available(
    cuda_only=True
)
print("Is CUDA GPU available: ", cuda_gpu)


In [None]:
# Load dataset
(ds_train, ds_test), ds_info = tfds.load(
    'mnist',
    split=['train', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True,
)

In [None]:
# Training pipeline
def normalize_img(image, label):
  """Normalizes images: `uint8` -> `float32`."""
  return tf.cast(image, tf.float32) / 255., label

ds_train = ds_train.map(
    normalize_img, num_parallel_calls=tf.data.AUTOTUNE)
ds_train = ds_train.cache()
ds_train = ds_train.shuffle(ds_info.splits['train'].num_examples)
ds_train = ds_train.batch(128)
ds_train = ds_train.prefetch(tf.data.AUTOTUNE)

In [None]:
# Evaluation pipeline
ds_test = ds_test.map(
    normalize_img, num_parallel_calls=tf.data.AUTOTUNE)
ds_test = ds_test.batch(128)
ds_test = ds_test.cache()
ds_test = ds_test.prefetch(tf.data.AUTOTUNE)

In [None]:
# Create model
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(10)
])
model.compile(
    optimizer=tf.keras.optimizers.Adam(0.001),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)

In [None]:
%%time

# Train model
model.fit(
    ds_train,
    epochs=12,
    validation_data=ds_test,
)

In [None]:
# 17.10.2022 MacBook Air M1
# miniconda python 3.10.6 w/ tensorflow-macos & tensorflow-metal - reports gpu, and gpu busy while training
# CPU times: user 19.7 s, sys: 13.4 s, total: 33.1 s
# Wall time: 26.2 s

# 17.10.2022 Intel Core i9-11900KB + Arc A750 GPU
# miniconda python 3.9 w/ intel channel tensorflow - 100% CPU, GPU not used :/ yet faster than M1
# CPU times: total: 1min 35s
# Wall time: 9.31 s