# 13강
- Dataset Preprocessing

In [4]:
import tensorflow as tf
import tensorflow_datasets as tfds

train_ds = tfds.load(name = 'mnist',
                     shuffle_files=True,
                     as_supervised=True,
                     split='train',
                     batch_size=4)

for images, labels in train_ds:
  print(images.shape)
  print(images.dtype)

  print(tf.reduce_max(images)) # 반드시 scaling해주어야함, weight의 변동이 너무커지기 때문.
  break

(4, 28, 28, 1)
<dtype: 'uint8'>
tf.Tensor(255, shape=(), dtype=uint8)


In [5]:
t1 = tf.constant([1, 2, 3, 4, 5])
print(t1.dtype)
t2 = tf.cast(t1, tf.float32) # casting
print(t2.dtype)

<dtype: 'int32'>
<dtype: 'float32'>


In [7]:
# 0부터 1까지의 값을 갖도록
# float32형식을 갖도록
# 함수를 이용해 한번에 해줄 수 있었다!
def standardization(images, labels):
  images = tf.cast(images, tf.float32) / 255.
  return [images, labels]

train_ds, test_ds = tfds.load(name = 'mnist',
                     shuffle_files=True,
                     as_supervised=True,
                     split=['train','test'],
                     batch_size=4)

train_ds_iter = iter(train_ds)
images, labels = next(train_ds_iter)
print(images.dtype, tf.reduce_max(images))

# 텐서에도 map 가능
train_ds = train_ds.map(standardization)
test_ds = test_ds.map(standardization)

train_ds_iter = iter(train_ds)
images, labels = next(train_ds_iter)
print(images.dtype, tf.reduce_max(images))

<dtype: 'uint8'> tf.Tensor(255, shape=(), dtype=uint8)
<dtype: 'float32'> tf.Tensor(1.0, shape=(), dtype=float32)


In [8]:
def mnist_data_loader():
  def standardization(images, labels):
    images = tf.cast(images, tf.float32) / 255.
    return [images, labels]

  train_ds, test_ds = tfds.load(name = 'mnist',
                       shuffle_files=True,
                       as_supervised=True,
                       split=['train','test'],
                       batch_size=4)
  
  train_ds = train_ds.map(standardization)
  test_ds = test_ds.map(standardization)
  return train_ds, test_ds

train_ds, test_ds = mnist_data_loader()
print(train_ds)

<MapDataset shapes: ((None, 28, 28, 1), (None,)), types: (tf.float32, tf.int64)>


# 14강
- Losses and Metrics

In [10]:
import tensorflow as tf
import numpy as np

from tensorflow.keras.losses import BinaryCrossentropy, CategoricalCrossentropy, SparseCategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy, SparseCategoricalAccuracy

loss_object = BinaryCrossentropy()

predictions = np.array([0.3]).reshape(-1, 1)
labels = np.array([1])

loss = loss_object(labels, predictions)
loss_manual = -1*((labels*np.log(predictions)) + \
                  (1-labels)*np.log(1-predictions))
print(loss.numpy())
print(loss_manual)

1.2039724588394165
[[1.2039728]]


In [12]:
# label 이 vector형식([0, 1])인 경우
predictions = np.array([0.3, 0.6]).reshape(-1, 1)
labels = np.array([1, 0]).reshape(-1, 1)

loss = loss_object(labels, predictions)
loss_manual = -1*((labels*np.log(predictions)) + \
                  (1-labels)*np.log(1-predictions))
loss_manual = np.mean(loss_manual)

print(loss.numpy())
print(loss_manual)

1.0601314306259155
1.0601317681000455


In [19]:
predictions = np.array([[0.3, 0.7], [0.4, 0.6], [0.1, 0.9]])
labels = np.array([[0,1], [1,0], [1,0]])

loss = loss_object(labels, predictions)
loss_manual = -1*labels*np.log(predictions)
loss_manual = np.sum(loss_manual, axis=1)
loss_manual = np.mean(loss_manual)

print(loss.numpy())
print(loss_manual)

1.1918498277664185
1.1918502562689777


In [28]:
loss_object = CategoricalCrossentropy()

predictions = np.array([[0.2, 0.1, 0.7], [0.4, 0.3, 0.3], [0.1, 0.8, 0.1]])
labels = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]])

print(predictions)
print(labels)

loss = loss_object(labels, predictions)
loss_manual = -1*labels*np.log(predictions)
loss_manual = np.sum(loss_manual, axis=1)
loss_manual = np.mean(loss_manual)

print(loss.numpy())
print(loss_manual)

[[0.2 0.1 0.7]
 [0.4 0.3 0.3]
 [0.1 0.8 0.1]]
[[0 0 1]
 [0 1 0]
 [1 0 0]]
1.2877442836761475
1.2877442804195713


In [32]:
loss_object = SparseCategoricalCrossentropy()

predictions = np.array([[0.2, 0.1, 0.7], [0.4, 0.3, 0.3], [0.1, 0.8, 0.1]])
labels = np.array([2, 1, 0])

loss = loss_object(tf.constant(labels), tf.constant(predictions))

ce_loss =0
for data_idx in range(len(labels)):
  prediction = predictions[data_idx]
  label = labels[data_idx]

  t_prediction = prediction[label]
  ce_loss += -1*np.log(t_prediction)
ce_loss = ce_loss / len(labels)

print(loss.numpy())
print(ce_loss)

1.2877442836761475
1.2877442804195713


In [35]:
import tensorflow_datasets as tfds

train_ds = tfds.load(name = 'mnist',
                     shuffle_files=True,
                     as_supervised=True,
                     split='train')

train_ds = train_ds.batch(8)

train_ds_iter = iter(train_ds)
images, labels = next(train_ds_iter) 
print(labels) # Sparse를 써야겠구나!

tf.Tensor([4 1 0 7 1 8 2 7], shape=(8,), dtype=int64)


In [37]:
metric = CategoricalAccuracy()

predictions = np.array([[0.2, 0.1, 0.7], [0.4, 0.3, 0.3], [0.1, 0.8, 0.1]])
labels = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]])

acc = metric(labels, predictions)

print(acc*100)

tf.Tensor(33.333336, shape=(), dtype=float32)


In [39]:
metric = SparseCategoricalAccuracy

predictions = np.array([[0.2, 0.1, 0.7], [0.4, 0.3, 0.3], [0.1, 0.8, 0.1]])
labels = np.array([2, 1, 0])

acc = metric(labels, predictions)
print(acc*100)

ValueError: ignored