<a href="https://colab.research.google.com/github/yiyichanmyae/time_series/blob/main/4_0_0_windows_based_datasets.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
try:
  %tensorflow_version 2.x
except Exception:
  pass

Colab only includes TensorFlow 2.x; %tensorflow_version has no effect.


In [2]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
print(tf.__version__)

2.12.0


In [50]:
dataset_size = 10

window_size = 5
shift = 1
drop_remainder = True
batch_size = 5

### window

In [51]:
dataset = tf.data.Dataset.range(dataset_size)
dataset = dataset.window(size=window_size, shift = shift, drop_remainder=False)
for window_dataset in dataset:
  print([item.numpy() for item in window_dataset])

[0, 1, 2, 3, 4]
[1, 2, 3, 4, 5]
[2, 3, 4, 5, 6]
[3, 4, 5, 6, 7]
[4, 5, 6, 7, 8]
[5, 6, 7, 8, 9]
[6, 7, 8, 9]
[7, 8, 9]
[8, 9]
[9]


In [52]:
dataset = tf.data.Dataset.range(dataset_size)
dataset = dataset.window(window_size, shift = shift, drop_remainder=True)
for window_dataset in dataset:
  print([item.numpy() for item in window_dataset])

[0, 1, 2, 3, 4]
[1, 2, 3, 4, 5]
[2, 3, 4, 5, 6]
[3, 4, 5, 6, 7]
[4, 5, 6, 7, 8]
[5, 6, 7, 8, 9]


### window batch

In [53]:
dataset = tf.data.Dataset.range(dataset_size)
dataset = dataset.window(window_size, shift = shift, drop_remainder=False)

dataset = dataset.flat_map(lambda window: window.batch(batch_size))
for window in dataset:
  print([element.numpy() for element in window])

[0, 1, 2, 3, 4]
[1, 2, 3, 4, 5]
[2, 3, 4, 5, 6]
[3, 4, 5, 6, 7]
[4, 5, 6, 7, 8]
[5, 6, 7, 8, 9]
[6, 7, 8, 9]
[7, 8, 9]
[8, 9]
[9]


In [54]:
dataset = tf.data.Dataset.range(dataset_size)
dataset = dataset.window(size=window_size, shift=shift, drop_remainder=True)

dataset = dataset.flat_map(lambda window: window.batch(batch_size))
for window in dataset:
  print([item.numpy() for item in window])

[0, 1, 2, 3, 4]
[1, 2, 3, 4, 5]
[2, 3, 4, 5, 6]
[3, 4, 5, 6, 7]
[4, 5, 6, 7, 8]
[5, 6, 7, 8, 9]


### Group into labels and windows

In [65]:
dataset = tf.data.Dataset.range(dataset_size)
dataset = dataset.window(size=window_size, shift=shift, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(batch_size))

dataset = dataset.map(lambda window: (window[:-1], window[-1]))

for window_x, y in dataset:
  print(window_x.numpy())
  print(y.numpy())

[0 1 2 3]
4
[1 2 3 4]
5
[2 3 4 5]
6
[3 4 5 6]
7
[4 5 6 7]
8
[5 6 7 8]
9


### shuffle

In [69]:
dataset = tf.data.Dataset.range(dataset_size)
dataset = dataset.window(size=window_size, shift=shift, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(batch_size))
dataset = dataset.map(lambda window: (window[:-1], window[-1]))

dataset = dataset.shuffle(buffer_size=dataset_size+5)

for window_x, y in dataset:
  print(window_x.numpy())
  print(y.numpy())

[0 1 2 3]
4
[4 5 6 7]
8
[1 2 3 4]
5
[3 4 5 6]
7
[5 6 7 8]
9
[2 3 4 5]
6


### training batch

In [72]:
dataset = tf.data.Dataset.range(dataset_size)
dataset = dataset.window(size=window_size, shift=shift, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(batch_size))
dataset = dataset.map(lambda window: (window[:-1], window[-1]))
dataset = dataset.shuffle(buffer_size=dataset_size+5)

dataset = dataset.batch(2).prefetch(1)

for window_x, y in dataset:
  print(window_x.numpy())
  print(y.numpy())

[[0 1 2 3]
 [2 3 4 5]]
[4 6]
[[5 6 7 8]
 [3 4 5 6]]
[9 7]
[[1 2 3 4]
 [4 5 6 7]]
[5 8]


In [73]:
dataset = tf.data.Dataset.range(dataset_size)
dataset = dataset.window(size=window_size, shift=shift, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(batch_size))
dataset = dataset.map(lambda window: (window[:-1], window[-1]))
dataset = dataset.shuffle(buffer_size=dataset_size+5)

dataset = dataset.batch(3).prefetch(1)

for window_x, y in dataset:
  print(window_x.numpy())
  print(y.numpy())

[[5 6 7 8]
 [0 1 2 3]
 [3 4 5 6]]
[9 4 7]
[[4 5 6 7]
 [2 3 4 5]
 [1 2 3 4]]
[8 6 5]


In [74]:
dataset = tf.data.Dataset.range(dataset_size)
dataset = dataset.window(size=window_size, shift=shift, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(batch_size))
dataset = dataset.map(lambda window: (window[:-1], window[-1]))
dataset = dataset.shuffle(buffer_size=dataset_size+5)

dataset = dataset.batch(5).prefetch(1)

for window_x, y in dataset:
  print(window_x.numpy())
  print(y.numpy())

[[3 4 5 6]
 [5 6 7 8]
 [1 2 3 4]
 [0 1 2 3]
 [4 5 6 7]]
[7 9 5 4 8]
[[2 3 4 5]]
[6]
