# Working with tf.data.Dataset

In [1]:
import tensorflow as tf
x = tf.range(5)
dataset = tf.data.Dataset.from_tensor_slices(x)

In [2]:
dataset

<_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>

In [4]:
for item in dataset:
  print(item.numpy())

0
1
2
3
4


In [5]:
list(dataset.as_numpy_iterator())

[0, 1, 2, 3, 4]

In [6]:
dataset.element_spec

TensorSpec(shape=(), dtype=tf.int32, name=None)

In [7]:
dataset1= tf.data.Dataset.from_tensor_slices(
    tf.random.uniform([3,5])
)

In [8]:
dataset1.element_spec

TensorSpec(shape=(5,), dtype=tf.float32, name=None)

In [11]:
data = [5,1,-3,-2,-4,7,-10]
tf_dataset = tf.data.Dataset.from_tensor_slices(data)

In [12]:
for item in tf_dataset:
  print(item.numpy())

5
1
-3
-2
-4
7
-10


In [13]:
for item in tf_dataset.take(3):
  print(item.numpy())

5
1
-3


In [16]:
for item in tf_dataset.filter(lambda x:x>0):
  print(item.numpy())

5
1
7


In [17]:
for item in tf_dataset.map(lambda x:x*2):
  print(item.numpy())

10
2
-6
-4
-8
14
-20


In [19]:
for item in tf_dataset.shuffle(buffer_size=3):
  print(item.numpy())

5
1
-2
-4
-10
-3
7


In [20]:
for item in tf_dataset.batch(2):
  print(item.numpy())

[5 1]
[-3 -2]
[-4  7]
[-10]


In [21]:
dataset=tf.data.Dataset.from_tensor_slices(data)
dataset=dataset.filter(lambda x:x>0).map(lambda y:y*2).shuffle(3).batch(2)
for item in dataset:
  print(item.numpy())

[10  2]
[14]


# How to use tf.data for the image datasets

In [22]:
flowers_root = tf.keras.utils.get_file(     
    'flower_photos',     
    'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz', 
    untar=True)

Downloading data from https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz


In [23]:
flowers_root

'/root/.keras/datasets/flower_photos'

In [24]:
import pathlib
flowers_root = pathlib.Path(flowers_root)
flowers_root

PosixPath('/root/.keras/datasets/flower_photos')

In [26]:
ds = tf.data.Dataset.list_files(str(flowers_root/"*/*"))

In [27]:
image_count = len(ds)
image_count

3670

In [28]:
for file in ds.take(3):
  print(file.numpy())

b'/root/.keras/datasets/flower_photos/roses/15697872479_ed48e9dd73_n.jpg'
b'/root/.keras/datasets/flower_photos/tulips/8695372372_302135aeb2.jpg'
b'/root/.keras/datasets/flower_photos/daisy/2551708158_1f10e81e11.jpg'


In [29]:
ds = ds.shuffle(200)
for file in ds.take(3):
  print(file.numpy())

b'/root/.keras/datasets/flower_photos/tulips/17908793211_ff0f1f81d3_n.jpg'
b'/root/.keras/datasets/flower_photos/tulips/3430229687_32645b5738.jpg'
b'/root/.keras/datasets/flower_photos/roses/3697780051_83e50a6dd1_m.jpg'


In [30]:
train_size = int(image_count*0.8)
train_ds = ds.take(train_size)
test_ds = ds.skip(train_size)

In [31]:
len(train_ds)

2936

In [32]:
len(test_ds)

734

In [33]:
def get_label(file_path):
  import os
  parts= tf.strings.split(file_path, os.sep)
  return parts[-2]

In [35]:
x = b'/root/.keras/datasets/flower_photos/tulips/17908793211_ff0f1f81d3_n.jpg'
get_label(x).numpy()

b'tulips'

In [36]:
def process_image(file_path):
  label = get_label(file_path)
  img = tf.io.read_file(file_path)
  img = tf.image.decode_jpeg(img)
  img = tf.image.resize(img, [128,128])
  img = img / 255
  return img, label 

In [37]:
img, label = process_image(x)

In [38]:
img.shape

TensorShape([128, 128, 3])

In [39]:
img[:3, :3, :1]

<tf.Tensor: shape=(3, 3, 1), dtype=float32, numpy=
array([[[0.2936887 ],
        [0.23970588],
        [0.27518383]],

       [[0.2322304 ],
        [0.22095588],
        [0.24650735]],

       [[0.24056372],
        [0.27291667],
        [0.23762254]]], dtype=float32)>

In [40]:
train_ds = train_ds.map(process_image)
test_ds = test_ds.map(process_image)

In [41]:
for image, label in train_ds.take(1):
  print(label.numpy())
  print(image.numpy())

b'tulips'
[[[0.5300865  0.5380675  0.6119033 ]
  [0.8150934  0.7704695  0.84009194]
  [0.80323225 0.751196   0.78500307]
  ...
  [0.02125986 0.02518143 0.03302457]
  [0.02560987 0.02953144 0.04521772]
  [0.0238973  0.02308541 0.03336421]]

 [[0.7470792  0.6947646  0.733307  ]
  [0.84166837 0.79053694 0.83618426]
  [0.90115154 0.861079   0.88749546]
  ...
  [0.02284271 0.02676428 0.03669075]
  [0.01812409 0.01899941 0.02684254]
  [0.0213702  0.02677768 0.0301631 ]]

 [[0.85518825 0.8054793  0.80282795]
  [0.9096074  0.8550307  0.8518976 ]
  [0.9741821  0.95758945 0.97247314]
  ...
  [0.02234988 0.02627145 0.03619792]
  [0.02965686 0.02965686 0.03468137]
  [0.03474265 0.03501838 0.03488052]]

 ...

 [[0.5053402  0.5036882  0.39783528]
  [0.598442   0.56596655 0.45224106]
  [0.60740155 0.58883536 0.4771932 ]
  ...
  [0.10349528 0.12310312 0.13486783]
  [0.08990048 0.10558675 0.11735146]
  [0.09226529 0.10795157 0.11971627]]

 [[0.53394896 0.54215974 0.42221633]
  [0.5820267  0.55457574 0.