## TF 2.0 initialization

Initialize Tensorflow 2.0 and import libraries

In [1]:
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

TensorFlow 2.x selected.


In [0]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

## Datasets preveiw

Keras provide 7 classic data sets:
[Datasets](https://keras.io/datasets/#imdb-movie-reviews-sentiment-classification)
* CIFAR10 small image classification
* CIFAR100 small image classification
* IMDB Movie reviews sentiment classification
* Reuters newswire topics classification
* MNIST database of handwritten digits
* Fashion-MNIST database of fashion articles
* Boston housing price regression dataset




MINST

In [3]:
(x, y),(x_test, y_test) = keras.datasets.mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [4]:
type(x)

numpy.ndarray

In [5]:
x.shape, y.shape, x_test.shape, y_test.shape

((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

In [6]:
x.min(), x.max(),x.mean()

(0, 255, 33.318421449829934)

In [7]:
y[:5]

array([5, 0, 4, 1, 9], dtype=uint8)

In [0]:
y_onehot = tf.one_hot(y,10)

In [9]:
y_onehot[:5]

<tf.Tensor: id=8, shape=(5, 10), dtype=float32, numpy=
array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]], dtype=float32)>

CIFAR 10/100

In [10]:
(x, y),(x_test, y_test) = keras.datasets.cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [11]:
x.shape, y.shape, x_test.shape, y_test.shape

((50000, 32, 32, 3), (50000, 1), (10000, 32, 32, 3), (10000, 1))

In [12]:
x.min(), x.max(),x.mean()

(0, 255, 120.70756512369792)

### Tensorflow Dataset

Convert numpy array to tensorflow datasets

In [0]:
ds_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))

In [71]:
type(ds_test)

tensorflow.python.data.ops.dataset_ops.TensorSliceDataset

TensorflowSliceDataset supports iterater which enables preprocess and multithread process

In [0]:
# it = iter(ds)
# next(it)
res = next(iter(ds_test)) 

In [73]:
res[0].shape, res[1].shape

(TensorShape([32, 32, 3]), TensorShape([1]))

### tfds

Alternative handy data loading tool provided by tensorflow. 

In [0]:
import tensorflow_datasets as tfds

In [0]:
ds2 = tfds.load(name="cifar10")

In [51]:
type(ds2)

{'test': <_OptionsDataset shapes: {image: (32, 32, 3), label: ()}, types: {image: tf.uint8, label: tf.int64}>,
 'train': <_OptionsDataset shapes: {image: (32, 32, 3), label: ()}, types: {image: tf.uint8, label: tf.int64}>}

In [81]:
len(list(tfds.as_numpy(ds2)['test']))

10000

In [0]:
ds2_train, ds2_test = ds2['train'], ds2['test']

In [56]:
type(ds2_train), type(ds2_test)

(tensorflow.python.data.ops.dataset_ops._OptionsDataset,
 tensorflow.python.data.ops.dataset_ops._OptionsDataset)

In [57]:
next(iter(ds2_train))['image'].shape

TensorShape([32, 32, 3])

In [67]:
type(next(iter(ds2_train))['image'])

tensorflow.python.framework.ops.EagerTensor

In [84]:
len(list(ds2_train)), len(list(ds2_test))

(50000, 10000)

### Preprocess

Shuffle

In [0]:
ds2_train= ds2_train.shuffle(1000)

Map preprocess function to all the elements in the dataset. A handy tool for data preprocess.

In [0]:
def preprocess(x, y):
  x = tf.cast(x, tf.float32)/255
  y = tf.squeeze(y,axis=0)
  y = tf.cast(y, tf.int32)
  y = tf.one_hot(y, depth=10)
  return x, y

In [0]:
ds_test = ds_test.map(preprocess)

In [0]:
res = next(iter(ds_test))

In [92]:
res[0].shape, res[1].shape

(TensorShape([32, 32, 3]), TensorShape([10]))

Batch

In [0]:
ds_test_b = ds_test.batch(2000)

In [0]:
res = next(iter(ds_test_b))

In [95]:
res[0].shape, res[1].shape

(TensorShape([2000, 32, 32, 3]), TensorShape([2000, 10]))

In [99]:
# i = 0
# for x, y in ds_test_b:
#   print(i, x.shape, y.shape)
#   i += 1

for i, (x, y) in enumerate(ds_test_b):
  print(i, x.shape, y.shape)

0 (2000, 32, 32, 3) (2000, 10)
1 (2000, 32, 32, 3) (2000, 10)
2 (2000, 32, 32, 3) (2000, 10)
3 (2000, 32, 32, 3) (2000, 10)
4 (2000, 32, 32, 3) (2000, 10)


In [0]:
df4 = df3.repeat(2)

In [100]:
for i, (x, y) in enumerate(ds_test_b):
  print(i, x.shape, y.shape)

0 (2000, 32, 32, 3) (2000, 10)
1 (2000, 32, 32, 3) (2000, 10)
2 (2000, 32, 32, 3) (2000, 10)
3 (2000, 32, 32, 3) (2000, 10)
4 (2000, 32, 32, 3) (2000, 10)


## Full data loading example
fashion mnist

In [0]:
def mnist_preprocess(x, y):
  x = tf.cast(x, tf.float32)/255
  # y = tf.squeeze(y,axis=0)
  y = tf.cast(y, tf.int64)
  y = tf.one_hot(y, depth=10)
  return x, y

In [102]:
(x, y), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [0]:
ds = tf.data.Dataset.from_tensor_slices((x,y))
ds = ds.map(mnist_preprocess)
ds = ds.shuffle(1000).batch(100)

ds_test = tf.data.Dataset.from_tensor_slices((x_test,y_test))
ds_test = ds_test.map(mnist_preprocess)
ds_test = ds_test.shuffle(1000).batch(100)

In [0]:
res = next(iter(ds))
res_test = next(iter(ds_test))

In [105]:
res[0].shape, res[1].shape, res_test[0].shape, res_test[1].shape

(TensorShape([100, 28, 28]),
 TensorShape([100, 10]),
 TensorShape([100, 28, 28]),
 TensorShape([100, 10]))

In [106]:
for step, (x,y) in enumerate(ds_test):
  print(step, x.shape, y.shape)

0 (100, 28, 28) (100, 10)
1 (100, 28, 28) (100, 10)
2 (100, 28, 28) (100, 10)
3 (100, 28, 28) (100, 10)
4 (100, 28, 28) (100, 10)
5 (100, 28, 28) (100, 10)
6 (100, 28, 28) (100, 10)
7 (100, 28, 28) (100, 10)
8 (100, 28, 28) (100, 10)
9 (100, 28, 28) (100, 10)
10 (100, 28, 28) (100, 10)
11 (100, 28, 28) (100, 10)
12 (100, 28, 28) (100, 10)
13 (100, 28, 28) (100, 10)
14 (100, 28, 28) (100, 10)
15 (100, 28, 28) (100, 10)
16 (100, 28, 28) (100, 10)
17 (100, 28, 28) (100, 10)
18 (100, 28, 28) (100, 10)
19 (100, 28, 28) (100, 10)
20 (100, 28, 28) (100, 10)
21 (100, 28, 28) (100, 10)
22 (100, 28, 28) (100, 10)
23 (100, 28, 28) (100, 10)
24 (100, 28, 28) (100, 10)
25 (100, 28, 28) (100, 10)
26 (100, 28, 28) (100, 10)
27 (100, 28, 28) (100, 10)
28 (100, 28, 28) (100, 10)
29 (100, 28, 28) (100, 10)
30 (100, 28, 28) (100, 10)
31 (100, 28, 28) (100, 10)
32 (100, 28, 28) (100, 10)
33 (100, 28, 28) (100, 10)
34 (100, 28, 28) (100, 10)
35 (100, 28, 28) (100, 10)
36 (100, 28, 28) (100, 10)
37 (100, 28