<a href="https://colab.research.google.com/github/thanhnguyen2612/diveintocode-ml/blob/master/ML_sprint13_IntroTF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Tensorflow

In [1]:
import numpy as np
import pandas as pd
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

Instructions for updating:
non-resource variables are not supported in the long term


## [Problem 1] Looking back on the scratch

*   Have to initialize the weights & bias
*   Need epoch loop
*   Determine the number of nodes at each layer
*   Determine activation function and optimizer for each layer
*   Determine mini-batch size
*   Compute loss function
*   Predict with validation data


In [2]:
df = pd.read_csv('Iris.csv')
data = df[df['Species'].isin(['Iris-versicolor', 'Iris-virginica'])]
data

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
50,51,7.0,3.2,4.7,1.4,Iris-versicolor
51,52,6.4,3.2,4.5,1.5,Iris-versicolor
52,53,6.9,3.1,4.9,1.5,Iris-versicolor
53,54,5.5,2.3,4.0,1.3,Iris-versicolor
54,55,6.5,2.8,4.6,1.5,Iris-versicolor
...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,Iris-virginica
146,147,6.3,2.5,5.0,1.9,Iris-virginica
147,148,6.5,3.0,5.2,2.0,Iris-virginica
148,149,6.2,3.4,5.4,2.3,Iris-virginica


In [3]:
X = data.loc[:, ['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']]
y = data['Species']
y = np.where(y == 'Iris-versicolor', 0, 1)
X = np.array(X)
y = np.array(y)
y = y.astype(np.int32)[:, np.newaxis]

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, train_size=0.8, random_state=0)

X_train.shape, X_val.shape, X_test.shape, y_train.shape, y_val.shape, y_test.shape

((64, 4), (16, 4), (20, 4), (64, 1), (16, 1), (20, 1))

## [Problem 2] Consider the correspondence between scratch and TensorFlow

*   Tensorflow initializes weight and bias variables with global_variables_initializer() (only need to specify shapes)
*   Built-in activation function (ReLU, Sigmoid, etc.)
*   No hard code operation formulas (matmul, add, etc.)
*   Built-in optimizer, loss function, etc.

=> No need to write code from scratch which can be really annoyed and hard to debug.

In [5]:
class GetMiniBatch:
    """
    Iterator to get a mini-batch
    Parameters
    ----------
    X : The following forms of ndarray, shape (n_samples, n_features)
      Training data
    y : The following form of ndarray, shape (n_samples, 1)
      Correct answer value
    batch_size : int
      Batch size
    seed : int
      NumPy random number seed
    """
    def __init__(self, X, y, batch_size=10, seed=0):
        self.batch_size = batch_size
        np.random.seed(seed)
        shuffle_idx = np.random.permutation(np.arange(X.shape[0]))
        self.X = X[shuffle_idx]
        self.y = y[shuffle_idx]
        self._stop = np.ceil(X.shape[0] / self.batch_size).astype(np.int)
    
    def __len__(self):
        return self._stop
    
    def __getitem__(self, item):
        p0 = item * self.batch_size
        p1 = (item + 1) * self.batch_size
        return self.X[p0:p1], self.y[p0:p1]
    
    def __iter__(self):
        self._counter = 0
        return self
    
    def __next__(self):
        if self._counter >= self._stop:
            raise StopIteration()
        p0 = self._counter * self.batch_size
        p1 = (self._counter + 1) * self.batch_size
        self._counter += 1
        return self.X[p0:p1], self.y[p0:p1]

In [6]:
learning_rate = 0.001
batch_size = 10
num_epochs = 100
n_hidden1 = 50
n_hidden2 = 100
n_input = X_train.shape[1]
n_samples = X_train.shape[0]
n_classes = 1

#Determine the shape of the argument to be passed to the calculation graph
X_placeholder = tf.placeholder(tf.float32, [None, n_input])
Y_placeholder = tf.placeholder(tf.float32, [None, n_classes])

# train mini batch iterator
get_mini_batch_train = GetMiniBatch(X_train, y_train, batch_size=batch_size)

def example_net(x):
    """
    Simple 3-layer neural network
    """
    tf.random.set_random_seed(0)

    weights = {
        'w1': tf.Variable(tf.random_normal([n_input, n_hidden1])),
        'w2': tf.Variable(tf.random_normal([n_hidden1, n_hidden2])),
        'w3': tf.Variable(tf.random_normal([n_hidden2, n_classes]))
    }
    biases = {
        'b1': tf.Variable(tf.random_normal([n_hidden1])),
        'b2': tf.Variable(tf.random_normal([n_hidden2])),
        'b3': tf.Variable(tf.random_normal([n_classes]))
    }
    layer_1 = tf.add(tf.matmul(x, weights['w1']), biases['b1'])
    layer_1 = tf.nn.relu(layer_1)
    layer_2 = tf.add(tf.matmul(layer_1, weights['w2']), biases['b2'])
    layer_2 = tf.nn.relu(layer_2)
    layer_output = tf.matmul(layer_2, weights['w3']) + biases['b3']
    return layer_output

logits = example_net(X_placeholder)
loss_op = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=Y_placeholder, logits=logits))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

# Estimate
correct_pred = tf.equal(tf.sign(Y_placeholder - 0.5), tf.sign(tf.sigmoid(logits) - 0.5))

# Accuracy
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Init variable weight and bias
init = tf.global_variables_initializer()

In [7]:
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(num_epochs):
        total_batch = np.ceil(X_train.shape[0] / batch_size).astype(np.int64)
        total_loss = 0
        total_acc = 0

        for i, (mini_X, mini_y) in enumerate(get_mini_batch_train):
            sess.run(train_op, feed_dict={X_placeholder: mini_X, Y_placeholder: mini_y})
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X_placeholder: mini_X, Y_placeholder: mini_y})
            total_loss += loss
        
        total_loss /= n_samples
        val_loss, acc = sess.run([loss_op, accuracy], feed_dict={X_placeholder: X_val, Y_placeholder: y_val})

        print(f"Epoch {epoch}, loss: {total_loss:.4f}, val_loss: {val_loss:.4f}, acc: {acc:.3f}")
    
    test_acc = sess.run(accuracy, feed_dict={X_placeholder: X_test, Y_placeholder: y_test})
    print(f"Test acc: {test_acc:.3f}")

Epoch 0, loss: 7.0241, val_loss: 67.6860, acc: 0.375
Epoch 1, loss: 3.4241, val_loss: 23.4026, acc: 0.312
Epoch 2, loss: 1.9387, val_loss: 11.6681, acc: 0.375
Epoch 3, loss: 2.0917, val_loss: 13.1400, acc: 0.312
Epoch 4, loss: 1.7685, val_loss: 17.7284, acc: 0.312
Epoch 5, loss: 1.6097, val_loss: 12.9607, acc: 0.312
Epoch 6, loss: 1.4402, val_loss: 10.0593, acc: 0.312
Epoch 7, loss: 1.3704, val_loss: 9.4797, acc: 0.312
Epoch 8, loss: 1.2536, val_loss: 9.8518, acc: 0.312
Epoch 9, loss: 1.1476, val_loss: 8.5670, acc: 0.375
Epoch 10, loss: 1.0930, val_loss: 8.0430, acc: 0.375
Epoch 11, loss: 1.0412, val_loss: 7.8791, acc: 0.375
Epoch 12, loss: 0.9804, val_loss: 7.1233, acc: 0.375
Epoch 13, loss: 0.9326, val_loss: 6.7908, acc: 0.375
Epoch 14, loss: 0.8792, val_loss: 6.2492, acc: 0.375
Epoch 15, loss: 0.8304, val_loss: 5.7681, acc: 0.375
Epoch 16, loss: 0.7835, val_loss: 5.2886, acc: 0.438
Epoch 17, loss: 0.7384, val_loss: 4.8037, acc: 0.438
Epoch 18, loss: 0.6961, val_loss: 4.3575, acc: 0.

## [Problem 3] Create a model of Iris using all three types of objective variables

In [8]:
from sklearn.preprocessing import OneHotEncoder

X, y = df.loc[:, ['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']], df['Species']
X = np.array(X)
y = np.array(y).reshape(-1, 1)
y = OneHotEncoder(sparse=False).fit_transform(y)

In [9]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=0)

X_train.shape, X_val.shape, y_train.shape, y_val.shape

((120, 4), (30, 4), (120, 3), (30, 3))

In [10]:
learning_rate = 0.01
batch_size = 10
num_epochs = 10
n_input = X_train.shape[1]
n_hidden1 = 50
n_hidden2 = 100
n_classes = y_train.shape[1]
n_samples = X_train.shape[0]

# Placeholder
X = tf.placeholder(tf.float32, [None, n_input])
Y = tf.placeholder(tf.float32, [None, n_classes])

# Mini batch
get_mini_batch_train = GetMiniBatch(X_train, y_train, batch_size=batch_size)

# Network layers
logits = example_net(X)

# Objective function
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=logits))

# Optimization method
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

# Prediction
correct_pred = tf.equal(tf.argmax(Y), tf.argmax(tf.nn.softmax(logits, axis=1)))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Init variables
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(num_epochs):
        total_batch = np.ceil(n_samples / batch_size).astype(np.int)
        total_loss = 0
        total_acc = 0

        for i, (mini_X, mini_y) in enumerate(get_mini_batch_train):
            sess.run(train_op, feed_dict={X: mini_X, Y: mini_y})
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: mini_X, Y: mini_y})
            total_loss += loss
            total_acc += acc
        
        total_loss /= n_samples
        total_acc /= n_samples

        val_loss, val_acc = sess.run([loss_op, accuracy], feed_dict={X: mini_X, Y: mini_y})
        print(f"Epoch {epoch}, loss: {total_loss:.4f}, acc: {total_acc:.3f}, val_loss: {val_loss:.4f}, val_acc: {val_acc:.3f}")

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

Epoch 0, loss: 4.7694, acc: 0.033, val_loss: 0.0016, val_acc: 0.333
Epoch 1, loss: 0.3614, acc: 0.056, val_loss: 1.5877, val_acc: 0.667
Epoch 2, loss: 0.2166, acc: 0.081, val_loss: 0.0226, val_acc: 0.667
Epoch 3, loss: 0.1867, acc: 0.089, val_loss: 0.0000, val_acc: 1.000
Epoch 4, loss: 0.1340, acc: 0.086, val_loss: 0.0009, val_acc: 1.000
Epoch 5, loss: 0.0450, acc: 0.094, val_loss: 0.0000, val_acc: 1.000
Epoch 6, loss: 0.0517, acc: 0.086, val_loss: 0.0000, val_acc: 0.667
Epoch 7, loss: 0.1532, acc: 0.083, val_loss: 0.0000, val_acc: 1.000
Epoch 8, loss: 0.1074, acc: 0.081, val_loss: 0.0000, val_acc: 0.667
Epoch 9, loss: 0.1797, acc: 0.089, val_loss: 0.0000, val_acc: 1.000


## [Problem 4] Creating a model of House Prices

In [11]:
df = pd.read_csv('train.csv')
df.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,...,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2003,2003,Gable,CompShg,VinylSd,VinylSd,BrkFace,196.0,Gd,TA,PConc,Gd,TA,No,GLQ,706,Unf,0,150,856,GasA,...,Y,SBrkr,856,854,0,1710,1,0,2,1,3,1,Gd,8,Typ,0,,Attchd,2003.0,RFn,2,548,TA,TA,Y,0,61,0,0,0,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,FR2,Gtl,Veenker,Feedr,Norm,1Fam,1Story,6,8,1976,1976,Gable,CompShg,MetalSd,MetalSd,,0.0,TA,TA,CBlock,Gd,TA,Gd,ALQ,978,Unf,0,284,1262,GasA,...,Y,SBrkr,1262,0,0,1262,0,1,2,0,3,1,TA,6,Typ,1,TA,Attchd,1976.0,RFn,2,460,TA,TA,Y,298,0,0,0,0,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2001,2002,Gable,CompShg,VinylSd,VinylSd,BrkFace,162.0,Gd,TA,PConc,Gd,TA,Mn,GLQ,486,Unf,0,434,920,GasA,...,Y,SBrkr,920,866,0,1786,1,0,2,1,3,1,Gd,6,Typ,1,TA,Attchd,2001.0,RFn,2,608,TA,TA,Y,0,42,0,0,0,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,Corner,Gtl,Crawfor,Norm,Norm,1Fam,2Story,7,5,1915,1970,Gable,CompShg,Wd Sdng,Wd Shng,,0.0,TA,TA,BrkTil,TA,Gd,No,ALQ,216,Unf,0,540,756,GasA,...,Y,SBrkr,961,756,0,1717,1,0,1,0,3,1,Gd,7,Typ,1,Gd,Detchd,1998.0,Unf,3,642,TA,TA,Y,0,35,272,0,0,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,FR2,Gtl,NoRidge,Norm,Norm,1Fam,2Story,8,5,2000,2000,Gable,CompShg,VinylSd,VinylSd,BrkFace,350.0,Gd,TA,PConc,Gd,TA,Av,GLQ,655,Unf,0,490,1145,GasA,...,Y,SBrkr,1145,1053,0,2198,1,0,2,1,4,1,Gd,9,Typ,1,TA,Attchd,2000.0,RFn,3,836,TA,TA,Y,192,84,0,0,0,0,,,,0,12,2008,WD,Normal,250000


In [12]:
X, y = df[["GrLivArea", "YearBuilt"]], df["SalePrice"]
X = np.array(np.log1p(X))
y = np.array(np.log1p(y))[:, np.newaxis]
X, y

(array([[7.44483327, 7.60290046],
        [7.14124512, 7.58933582],
        [7.48829352, 7.60190196],
        ...,
        [7.75833347, 7.57147365],
        [6.98378997, 7.57609734],
        [7.13648321, 7.5837563 ]]), array([[12.24769912],
        [12.10901644],
        [12.31717117],
        ...,
        [12.49313327],
        [11.86446927],
        [11.90159023]]))

In [13]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=0)
X_train.shape, X_val.shape, y_train.shape, y_val.shape

((1168, 2), (292, 2), (1168, 1), (292, 1))

In [14]:
def create_neural_network(X, y, layers=[]):
    n_input = X.shape[1]
    n_output = y.shape[1]

    variable_shapes = [n_input] + [layer["n_nodes"] for layer in layers] + [n_output]
    W, B = [], []
    for n0, n1 in zip(variable_shapes[:-1], variable_shapes[1:]):
        W.append(tf.Variable(tf.random_normal([n0, n1])))
        B.append(tf.Variable(tf.random_normal([n1])))
    
    Z = X
    for i, layer in enumerate(layers):
        l = layer["activation"](tf.matmul(Z, W[i]) + B[i])
        Z = l
    return tf.matmul(Z, W[-1]) + B[-1]

def train_regression(X_train, y_train, X_val, y_val,
                     layers=[], optimizer=tf.train.AdamOptimizer(0.01),
                     batch_size=1, epochs=10):
    _X = tf.placeholder(tf.float32, [None, X_train.shape[1]])
    _Y = tf.placeholder(tf.float32, [None, y_train.shape[1]])

    get_mini_batch_train = GetMiniBatch(X_train, y_train, batch_size=batch_size)

    # Network
    net = create_neural_network(_X, _Y, layers)
    loss_op = tf.reduce_mean(tf.square(net - _Y))
    train_op = optimizer.minimize(loss_op)
    init = tf.global_variables_initializer()

    with tf.Session() as sess:
        sess.run(init)

        for epoch in range(epochs):
            total_batch = np.ceil(X_train.shape[0] / batch_size).astype(np.int)
            total_loss = 0

            for i, (mini_X, mini_y) in enumerate(get_mini_batch_train):
                sess.run(train_op, feed_dict={_X: mini_X, _Y: mini_y})
                total_loss += sess.run(loss_op, feed_dict={_X: mini_X, _Y: mini_y})
            
            total_loss /= X_train.shape[0]
            val_loss = sess.run(loss_op, feed_dict={_X: X_val, _Y: y_val})
            print(f"Epoch {epoch}, loss: {total_loss:.4f}, val_loss: {val_loss:.4f}")

In [15]:
layer_1 = {
    "n_nodes": 50,
    "activation": tf.nn.relu
}
layer_2 = {
    "n_nodes": 100,
    "activation": tf.nn.relu
}

train_regression(X_train, y_train, X_val, y_val, layers=[layer_1, layer_2], batch_size=10, epochs=10)

Epoch 0, loss: 311.8773, val_loss: 15.1369
Epoch 1, loss: 0.9590, val_loss: 6.0154
Epoch 2, loss: 0.3866, val_loss: 3.6285
Epoch 3, loss: 0.1329, val_loss: 1.7315
Epoch 4, loss: 0.0443, val_loss: 0.3415
Epoch 5, loss: 0.0239, val_loss: 0.1625
Epoch 6, loss: 0.0199, val_loss: 0.1505
Epoch 7, loss: 0.0189, val_loss: 0.1383
Epoch 8, loss: 0.0185, val_loss: 0.1375
Epoch 9, loss: 0.0183, val_loss: 0.1178


## [Problem 5] Creating a MNIST model

In [16]:
from keras.datasets import mnist

(X, y), (X_test, y_test) = mnist.load_data()
X.shape, X_test.shape, X[0].dtype

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


((60000, 28, 28), (10000, 28, 28), dtype('uint8'))

In [17]:
X = X.reshape(-1, 28 * 28).astype(np.float)
X_test = X_test.reshape(-1, 28 * 28).astype(np.float)
X /= 255
X_test /= 255
X.shape, X_test.shape, X.min(), X.max()

((60000, 784), (10000, 784), 0.0, 1.0)

In [18]:
encoder = OneHotEncoder(handle_unknown='ignore', sparse=False)
y_oh = encoder.fit_transform(y[:, np.newaxis])
y_test_oh = encoder.transform(y_test[:, np.newaxis])
y.shape, y_oh.shape, y_test_oh.shape

X_train, X_val, y_train, y_val = train_test_split(X, y_oh, test_size=0.2)
X_train.shape, X_val.shape, y_train.shape, y_val.shape

((48000, 784), (12000, 784), (48000, 10), (12000, 10))

In [19]:
def lenet(x):
    weights = {
        'w1': tf.Variable(tf.random_normal([5, 5, 1, 6])),
        'w2': tf.Variable(tf.random_normal([5, 5, 6, 16])),
        'w3': tf.Variable(tf.random_normal([7 * 7 * 16, 120])),
        'w4': tf.Variable(tf.random_normal([120, 84])),
        'w5': tf.Variable(tf.random_normal([84, n_classes]))
    }

    biases = {
        'b1': tf.Variable(tf.random_normal([6])),
        'b2': tf.Variable(tf.random_normal([16])),
        'b3': tf.Variable(tf.random_normal([120])),
        'b4': tf.Variable(tf.random_normal([84])),
        'b5': tf.Variable(tf.random_normal([n_classes])),
    }

    x = tf.reshape(x, [-1, 28, 28, 1])
    conv_1 = tf.nn.conv2d(x, weights['w1'], strides=[1, 1, 1, 1], padding='SAME') + biases['b1']
    conv_1 = tf.nn.relu(conv_1)
    pool_1 = tf.nn.pool(conv_1, window_shape=[2, 2], strides=[2, 2],
                        pooling_type='MAX', padding='VALID')
    conv_2 = tf.nn.conv2d(pool_1, weights['w2'], strides=[1, 1, 1, 1], padding='SAME') + biases['b2']
    conv_2 = tf.nn.relu(conv_2)
    pool_2 = tf.nn.pool(conv_2, window_shape=[2, 2], strides=[2, 2],
                        pooling_type='MAX', padding='VALID')
    
    x_reshape = tf.reshape(pool_2, [-1, 7 * 7 * 16])
    layer_1 = tf.add(tf.matmul(x_reshape, weights['w3']), biases['b3'])
    layer_1 = tf.nn.relu(layer_1)
    layer_2 = tf.add(tf.matmul(layer_1, weights['w4']), biases['b4'])
    layer_2 = tf.nn.relu(layer_2)
    
    return tf.add(tf.matmul(layer_2, weights['w5']), biases['b5'])

In [20]:
lr = 0.01
batch_size = 200
epochs = 30

n_input = X_train.shape[1]
n_samples = X_train.shape[0]
n_classes = y_train.shape[1]

X = tf.placeholder(tf.float32, [None, n_input])
Y = tf.placeholder(tf.float32, [None, n_classes])

get_mini_batch_train = GetMiniBatch(X_train, y_train, batch_size=batch_size)

cnn = lenet(X)
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=cnn))
optimizer = tf.train.AdamOptimizer(lr)
train_op = optimizer.minimize(loss_op)

correct_pred = tf.equal(tf.argmax(Y, axis=1), tf.argmax(tf.nn.softmax(cnn), axis=1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(epochs):
        total_loss, total_acc = 0, 0

        for i, (mini_X, mini_y) in enumerate(get_mini_batch_train):
            sess.run(train_op, feed_dict={X: mini_X, Y: mini_y})
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: mini_X, Y: mini_y})
            total_loss += loss
            total_acc += acc
        
        total_loss /= n_samples
        total_acc /= n_samples

        val_loss, val_acc = sess.run([loss_op, accuracy], feed_dict={X: X_train, Y: y_train})
        print(f"Epoch {epoch}, loss: {total_loss:.4f}, acc: {total_acc:.3f}, val_loss: {val_loss:.4f}, val_acc: {val_acc:.3f}")
    
    test_loss, test_acc = sess.run([loss_op, accuracy], feed_dict={X: X_test, Y: y_test_oh})
    print(f"Test loss: {test_loss:.4f}, Test acc: {test_acc:.3f}")

Epoch 0, loss: 4.6025, acc: 0.004, val_loss: 154.9071, val_acc: 0.897
Epoch 1, loss: 0.5076, acc: 0.005, val_loss: 69.3372, val_acc: 0.930
Epoch 2, loss: 0.2492, acc: 0.005, val_loss: 37.2842, val_acc: 0.949
Epoch 3, loss: 0.1522, acc: 0.005, val_loss: 26.0699, val_acc: 0.956
Epoch 4, loss: 0.0994, acc: 0.005, val_loss: 21.3223, val_acc: 0.959
Epoch 5, loss: 0.0666, acc: 0.005, val_loss: 12.5711, val_acc: 0.971
Epoch 6, loss: 0.0475, acc: 0.005, val_loss: 10.7327, val_acc: 0.972
Epoch 7, loss: 0.0350, acc: 0.005, val_loss: 10.2882, val_acc: 0.971
Epoch 8, loss: 0.0259, acc: 0.005, val_loss: 7.8881, val_acc: 0.977
Epoch 9, loss: 0.0203, acc: 0.005, val_loss: 6.2422, val_acc: 0.980
Epoch 10, loss: 0.0149, acc: 0.005, val_loss: 6.6677, val_acc: 0.979
Epoch 11, loss: 0.0113, acc: 0.005, val_loss: 5.0326, val_acc: 0.983
Epoch 12, loss: 0.0114, acc: 0.005, val_loss: 3.6447, val_acc: 0.987
Epoch 13, loss: 0.0089, acc: 0.005, val_loss: 4.6765, val_acc: 0.984
Epoch 14, loss: 0.0083, acc: 0.005,