<a href="https://colab.research.google.com/github/xcellentbird/STUDY/blob/main/AI/Deep_Learning_Tensorflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## TensorFlow: Neural

In [1]:
import tensorflow as tf
import numpy as np

In [2]:
N, D, H = 64, 1000, 100

In [3]:
# input tensor
x = tf.convert_to_tensor(np.random.randn(N, D), np.float32)

# target tensor
y = tf.convert_to_tensor(np.random.randn(N, D), np.float32)

# weight
w1 = tf.Variable(tf.random.uniform((D, H)))
w2 = tf.Variable(tf.random.uniform((H, D)))

In [4]:
lr = 1e-6
for epoch in range(20):
  with tf.GradientTape() as tape: # dynamic computation
    h = tf.maximum(tf.matmul(x, w1), 0) # layer, ReLU
    y_pred = tf.matmul(h, w2) # layer2
    diff = y_pred - y # error
    loss = tf.reduce_mean(tf.reduce_sum(diff ** 2, axis=1)) # loss

  # compute grad
  gradients = tape.gradient(loss, [w1, w2])

  w1.assign(w1 - lr * gradients[0])
  w2.assign(w2 - lr * gradients[1])

In [5]:
gradients

[<tf.Tensor: shape=(1000, 100), dtype=float32, numpy=
 array([[ 106.65317  , -229.56056  ,  244.22047  , ...,  387.68646  ,
          125.75412  ,  355.15585  ],
        [  18.040977 , -154.27551  ,  109.451965 , ..., -364.91553  ,
            5.654018 ,  804.6157   ],
        [  46.610996 ,  -93.416275 , -109.18049  , ..., -332.39496  ,
         -122.52287  , -844.1428   ],
        ...,
        [ 271.49423  ,  220.42593  ,  983.1101   , ...,  -87.242836 ,
          -91.432655 ,   34.775513 ],
        [ 592.61346  , -110.44813  ,  234.08894  , ...,  393.41888  ,
         -201.97852  ,  279.60574  ],
        [  -3.6907933,   59.504856 , -468.314    , ...,  996.1352   ,
          163.98288  ,  294.79688  ]], dtype=float32)>,
 <tf.Tensor: shape=(100, 1000), dtype=float32, numpy=
 array([[ 4.676632 ,  1.8077067,  4.0906067, ...,  4.3303995,  3.7657716,
          3.8067212],
        [ 1.2126491,  1.1211177,  1.8127842, ...,  1.4323751,  1.2389755,
          1.6853117],
        [ 8.310822 , 

## TensorFlow: Optimizer

In [6]:
lr = 1e-6
optimizer = tf.optimizers.SGD(lr)

In [7]:
for epoch in range(20):
  with tf.GradientTape() as tape: # dynamic computation
    h = tf.maximum(tf.matmul(x, w1), 0) # layer, ReLU
    y_pred = tf.matmul(h, w2) # layer2
    diff = y_pred - y # error
    loss = tf.reduce_mean(tf.reduce_sum(diff ** 2, axis=1)) # loss

  # compute grad
  gradients = tape.gradient(loss, [w1, w2])

  # update weights
  optimizer.apply_gradients(zip(gradients, [w1, w2]))

## TensorFlow: Loss

In [8]:
loss_fn = tf.losses.MeanSquaredError()

In [9]:
for epoch in range(20):
  with tf.GradientTape() as tape: # dynamic computation
    h = tf.maximum(tf.matmul(x, w1), 0) # layer, ReLU
    y_pred = tf.matmul(h, w2) # layer2
    diff = y_pred - y # error
    
    # loss
    loss = loss_fn(y_pred, y)

  # compute grad
  gradients = tape.gradient(loss, [w1, w2])

  # update weights
  optimizer.apply_gradients(zip(gradients, [w1, w2]))

## Keras: High-Level Wrapper

In [10]:
N, D, H = 64, 1000, 100

In [11]:
# input tensor
x = tf.convert_to_tensor(np.random.randn(N, D), np.float32)

# target tensor
y = tf.convert_to_tensor(np.random.randn(N, D), np.float32)

### method 1

In [14]:
# model(weights)
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(H, input_shape=(D,), activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(D))

In [15]:
lr = 1e-6
optimizer = tf.optimizers.SGD(lr)
loss_fn = tf.losses.MeanSquaredError()

In [16]:
for epoch in range(20):
  with tf.GradientTape() as tape:
    y_pred = model(x)
    loss = loss_fn(y_pred, y)
  
  # apply gradient to all trainable variables(weights) in model
  gradients = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))

### method 2

In [18]:
# model(weights)
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(H, input_shape=(D,), activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(D))

In [19]:
lr = 1e-6
optimizer = tf.optimizers.SGD(lr)
loss_fn = tf.losses.MeanSquaredError()

In [20]:
model.compile(loss=loss_fn, optimizer=optimizer)

In [21]:
history = model.fit(x, y, epochs=50, batch_size=N)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [23]:
# @tf.function 을 사용하여 static_graph를 이용할 수도 있다.
# 모델의 구조에 따라 성능이 더 좋을 수도 더 나쁠 수도 있다.

@tf.function
def model_static(x, y):
  y_pred = model(x)
  loss = loss_fn(y_pred, y)
  return y_pred, loss

def model_dynamic(x, y):
  y_pred = model(x)
  loss = loss_fn(y_pred, y)
  return y_pred, loss