In [1]:
import tensorflow as tf
import numpy as np
import os

In [2]:
daily_sales_numbers = [21, 22, -108, 31, -1, 32, 34, 31]

tf_dataset = tf.data.Dataset.from_tensor_slices(daily_sales_numbers)
tf_dataset

<TensorSliceDataset shapes: (), types: tf.int32>

### Iterate through tf dataset

In [3]:
for sales in tf_dataset:
    print(sales.numpy())

21
22
-108
31
-1
32
34
31


### Iterate through elements as numpy elements

In [4]:
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

21
22
-108
31
-1
32
34
31


### Iterate through first n elements in dataset

In [5]:
for sales in tf_dataset.take(3):
    print(sales.numpy())

21
22
-108


### Filter sales numbers that are < 0

In [6]:
tf_dataset = tf_dataset.filter(lambda x: x > 0)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

21
22
31
32
34
31


### Convert sales from USA dollars to PLN

In [7]:
tf_dataset = tf_dataset.map(lambda x: float(x) * 3.72)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

78.12
81.840004
115.32
119.04
126.48
115.32


### Shuffle

In [8]:
tf_dataset = tf_dataset.shuffle(2)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

78.12
81.840004
115.32
119.04
126.48
115.32


### Batching

In [9]:
for sales_batch in tf_dataset.batch(2):
    print(sales_batch.numpy())

[78.12     81.840004]
[115.32 119.04]
[115.32 126.48]


## All of the above in one shot

In [10]:
tf_dataset = tf.data.Dataset.from_tensor_slices(daily_sales_numbers)

tf_dataset = tf_dataset.filter(lambda x: x > 0).map(lambda y: float(y) * 3.72).shuffle(2).batch(2)

In [11]:
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

[ 78.12 115.32]
[ 81.840004 126.48    ]
[115.32 119.04]


### Images

In [12]:
img_ds = tf.data.Dataset.list_files("train/*/*", shuffle=False)

In [13]:
img_count = len(img_ds)
img_count

25000

In [15]:
type(img_ds)

tensorflow.python.data.ops.dataset_ops.TensorSliceDataset

In [16]:
for file in img_ds.take(3):
    print(file.numpy())

b'train\\cat\\cat.0.jpg'
b'train\\cat\\cat.1.jpg'
b'train\\cat\\cat.10.jpg'


In [17]:
class_names = ["cat", "dog"]

In [19]:
train_size = int(img_count*0.8)
train_ds = img_ds.take(train_size)
test_ds = img_ds.skip(train_size)

In [20]:
len(train_ds)

20000

In [21]:
len(test_ds)

5000

In [22]:
def get_label(file_path):
    import os
    parts = tf.strings.split(file_path, os.path.sep)
    return parts[-2]

In [23]:
get_label("train\\dog\\dog.14.jpg")

<tf.Tensor: shape=(), dtype=string, numpy=b'dog'>

In [24]:
def process_img(file_path):
    label = get_label(file_path)
    img = tf.io.read_file(file_path)
    img = tf.image.decode_jpeg(img)
    img = tf.image.resize(img, [128, 128])
    return img, label

In [26]:
img, label = process_img("train\\dog\\dog.14.jpg")
img.numpy()[:2]

array([[[ 84.529785,  92.533325,  94.522705],
        [ 86.16284 ,  92.15723 ,  92.15442 ],
        [ 95.680664,  99.149414,  97.618164],
        [110.57495 , 112.39661 , 109.03992 ],
        [114.18213 , 114.18213 , 114.18213 ],
        [ 87.11804 ,  87.11804 ,  87.11804 ],
        [112.680786, 112.680786, 112.680786],
        [112.46716 , 112.46716 , 112.46716 ],
        [137.9303  , 137.9303  , 137.9303  ],
        [125.98877 , 125.98877 , 125.98877 ],
        [137.55872 , 137.55872 , 137.55872 ],
        [139.37048 , 139.37048 , 139.37048 ],
        [145.98499 , 146.31311 , 143.32874 ],
        [138.6576  , 139.6576  , 134.6576  ],
        [117.03027 , 118.03027 , 113.03027 ],
        [134.07068 , 135.07068 , 130.07068 ],
        [136.80383 , 136.85071 , 136.61633 ],
        [134.86047 , 134.86047 , 134.86047 ],
        [128.52527 , 128.52527 , 128.52527 ],
        [139.95557 , 139.95557 , 139.95557 ],
        [137.44568 , 136.86755 , 134.86755 ],
        [142.57031 , 141.57031 , 1

In [27]:
train_ds = train_ds.map(process_img)
test_ds = test_ds.map(process_img)

In [28]:
for image, label in train_ds.take(1):
    print("****",image)
    print("****",label)

**** tf.Tensor(
[[[203.45312  164.45312   87.453125]
  [208.35938  169.35938   92.359375]
  [209.26562  170.26562   93.265625]
  ...
  [245.26562  207.26562  124.265625]
  [243.97498  203.97498  118.974976]
  [239.45312  201.45312  120.453125]]

 [[203.45312  164.45312   87.453125]
  [208.35938  169.35938   92.359375]
  [209.26562  170.26562   93.265625]
  ...
  [245.35168  208.91394  125.796875]
  [243.39233  205.24219  122.00781 ]
  [240.45312  202.45312  121.453125]]

 [[203.45312  164.45312   87.453125]
  [208.35938  169.35938   92.359375]
  [209.26562  170.26562   93.265625]
  ...
  [246.       209.53125  129.07031 ]
  [243.92981  206.28918  124.007935]
  [241.45312  203.45312  122.453125]]

 ...

 [[156.45312  124.25781   51.648438]
  [158.28918  126.09387   53.484497]
  [159.21375  127.01843   54.409058]
  ...
  [  3.         4.         0.      ]
  [  3.         4.         0.      ]
  [  1.         2.         0.      ]]

 [[153.88281  122.88281   55.882812]
  [155.88281  124.882

In [29]:
def scale(image, label):
    return image/255, label

In [30]:
train_ds = train_ds.map(scale)

In [31]:
for image, label in train_ds.take(5):
    print("****Image: ",image.numpy()[0][0])
    print("****Label: ",label.numpy())

****Image:  [0.7978554  0.6449142  0.34295344]
****Label:  b'cat'
****Image:  [0.14849687 0.17594784 0.14849687]
****Label:  b'cat'
****Image:  [0.10955637 0.13700736 0.16485661]
****Label:  b'cat'
****Image:  [0.8671957 0.8711173 0.8475879]
****Label:  b'cat'
****Image:  [0.54707414 0.4373255  0.29222748]
****Label:  b'cat'
