**1. Tensorflow**

In [1]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

Instructions for updating:
non-resource variables are not supported in the long term


In [2]:
x1 = tf.constant(5)
x2 = tf.constant(6)

result = tf.multiply(x1, x2)

In [3]:
print(result)

Tensor("Mul:0", shape=(), dtype=int32)


In [4]:
with tf.Session() as sess:
  output = sess.run(result)
  print(output)

30


**2. Triển khai MLP**

In [5]:
import numpy as np
import random

In [6]:
tf.reset_default_graph()

**a. class MLP**

In [7]:
class MLP:
  def __init__(self, vocab_size, hidden_size):
    self._vocab_size = vocab_size
    self._hidden_size = hidden_size

  def build_graph(self):
    self._X = tf.placeholder(tf.float32, shape=[None, self._vocab_size])
    self._real_Y = tf.placeholder(tf.int32, shape=[None, ])

    weights_1 = tf.get_variable(
        name='weights_input_hidden',
        shape=(self._vocab_size, self._hidden_size),
        initializer=tf.random_normal_initializer(seed=2021)
    )
    biases_1 = tf.get_variable(
        name='biases_input_hidden',
        shape=(self._hidden_size),
        initializer=tf.random_normal_initializer(seed=2021)
    )
    weights_2 = tf.get_variable(
        name='weights_hidden_output',
        shape=(self._hidden_size, NUM_CLASSES),
        initializer=tf.random_normal_initializer(seed=2021)
    )
    biases_2 = tf.get_variable(
        name='biases_hidden_output',
        shape=(NUM_CLASSES),
        initializer=tf.random_normal_initializer(seed=2021)
    )

    hidden = tf.matmul(self._X, weights_1) + biases_1
    hidden = tf.sigmoid(hidden)
    logits = tf.matmul(hidden, weights_2) + biases_2

    labels_one_hot = tf.one_hot(indices=self._real_Y, depth=NUM_CLASSES, dtype=tf.float32)
    loss = tf.nn.softmax_cross_entropy_with_logits(labels=labels_one_hot, logits=logits)
    loss = tf.reduce_mean(loss)

    probs = tf.nn.softmax(logits)
    predicted_labels = tf.argmax(probs, axis=1)
    predicted_labels = tf.squeeze(predicted_labels)

    return predicted_labels, loss

  def trainer(self, loss, learning_rate):
    train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
    return train_op

**b. class DataReader**

In [8]:
class DataReader:
  def __init__(self, data_path, batch_size, vocab_size):
    self._batch_size = batch_size
    with open(data_path) as f:
      d_lines = f.read().splitlines()

    self._data = []
    self._labels = []
    for data_id, line in enumerate(d_lines):
      vector = [0.0 for _ in range(vocab_size)]
      features = line.split('<fff>')
      label, doc_id = int(features[0]), int(features[1])
      tokens = features[2].split()

      for token in tokens:
        index, value = int(token.split(':')[0]), float(token.split(':')[1])
        vector[index] = value
        
      self._data.append(vector)
      self._labels.append(label)

    self._data = np.array(self._data)
    self._labels = np.array(self._labels)
    self._num_epoch = 0
    self._batch_id = 0

  def next_batch(self):
    start = self._batch_id * self._batch_size
    end = start + self._batch_size
    self._batch_id += 1

    if end + self._batch_size > len(self._data):
      end = len(self._data)
      self._num_epoch += 1
      self._batch_id = 0
      indices = list(range(len(self._data)))
      random.seed(2021)
      random.shuffle(indices)
      self._data, self._labels = self._data[indices], self._labels[indices]

    return self._data[start:end], self._labels[start:end]

**c. function:**
+ load_dataset
+ save_parameters
+ restore_parameters

In [9]:
# load_dataset()

def load_dataset():
  train_data_reader = DataReader(
      data_path='/content/drive/MyDrive/Colab Notebooks/DSLAB TRAINING/SESSION 3/data/20news-train-tfidf.txt',
      batch_size=50,
      vocab_size=vocab_size
  )
  test_data_reader = DataReader(
      data_path='/content/drive/MyDrive/Colab Notebooks/DSLAB TRAINING/SESSION 3/data/20news-test-tfidf.txt',
      batch_size=50,
      vocab_size=vocab_size
  )
  return train_data_reader, test_data_reader

In [10]:
# save_parameters

def save_parameters(name, value, epoch):
  filename = name.replace(':', '-colon-') + '-epoch-{}.txt'.format(epoch)
  if len(value.shape) == 1: # is a list
    string_form = ','.join([str(number) for number in value])
  else:
    string_form = '\n'.join([','.join([str(number) for number in value[row]]) for row in range(value.shape[0])])
  with open('/content/drive/MyDrive/Colab Notebooks/DSLAB TRAINING/SESSION 3/data/saved-paras/' + filename, 'w') as f:
    f.write(string_form)

In [11]:
# restore_parameters

def restore_parameters(name, epoch):
  filename = name.replace(':', '-colon-') + '-epoch-{}.txt'.format(epoch)
  with open('/content/drive/MyDrive/Colab Notebooks/DSLAB TRAINING/SESSION 3/data/saved-paras/' + filename) as f:
    lines = f.read().splitlines()
  if len(lines) == 1: # is a vector
    value = [float(number) for number in lines[0].split(',')]
  else: # is a matrix
    value = [[float(number) for number in lines[row].split(',')] for row in range(len(lines))]
  return value

**d. main**

In [12]:
# create a computation graph

with open('/content/drive/MyDrive/Colab Notebooks/DSLAB TRAINING/SESSION 3/data/words_idfs.txt') as f:
  vocab_size = len(f.read().splitlines())
  
NUM_CLASSES = 20

mlp = MLP(
    vocab_size=vocab_size, 
    hidden_size=50
)

predicted_labels, loss = mlp.build_graph()
train_op = mlp.trainer(loss=loss, learning_rate=0.1)

train_data_reader, test_data_reader = load_dataset()

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



In [13]:
# open a session to run

with tf.Session() as sess:
  step, MAX_STEP = 0, 1000

  sess.run(tf.global_variables_initializer())
  while step < MAX_STEP:
    train_data, train_labels = train_data_reader.next_batch()
    plabels_eval, loss_eval, _ = sess.run(
        [predicted_labels, loss, train_op],
        feed_dict = {
            mlp._X: train_data,
            mlp._real_Y: train_labels
        }
    )
    step += 1
    print('step: {}, loss: {}'.format(step, loss_eval))

    trainable_variables = tf.trainable_variables()
    for variable in trainable_variables:
      save_parameters(
          name=variable.name,
          value=variable.eval(),
          epoch=train_data_reader._num_epoch
      )

step: 1, loss: 12.036225318908691
step: 2, loss: 2.477519989013672
step: 3, loss: 0.0009005915489979088
step: 4, loss: 1.212670213135425e-05
step: 5, loss: 1.0490408186569766e-07
step: 6, loss: 0.0
step: 7, loss: 17.395544052124023
step: 8, loss: 25.100133895874023
step: 9, loss: 21.139331817626953
step: 10, loss: 15.55300521850586
step: 11, loss: 9.098770141601562
step: 12, loss: 3.6952903270721436
step: 13, loss: 0.9884459972381592
step: 14, loss: 0.060531944036483765
step: 15, loss: 21.10125732421875
step: 16, loss: 24.458280563354492
step: 17, loss: 22.112314224243164
step: 18, loss: 18.431842803955078
step: 19, loss: 14.820002555847168
step: 20, loss: 10.447005271911621
step: 21, loss: 6.201825141906738
step: 22, loss: 2.4968008995056152
step: 23, loss: 15.853484153747559
step: 24, loss: 16.919397354125977
step: 25, loss: 15.861337661743164
step: 26, loss: 13.748368263244629
step: 27, loss: 11.361886978149414
step: 28, loss: 9.32450008392334
step: 29, loss: 6.441883563995361
step:

In [14]:
# test

with tf.Session() as sess:
  epoch = train_data_reader._num_epoch

  trainable_variables = tf.trainable_variables()
  for variable in trainable_variables:
    saved_value = restore_parameters(variable.name, epoch)
    assign_op = variable.assign(saved_value)
    sess.run(assign_op)

  num_true_preds = 0
  while True:
    test_data, test_labels = test_data_reader.next_batch()
    test_plabels_eval = sess.run(
        predicted_labels,
        feed_dict={
            mlp._X: test_data,
            mlp._real_Y: test_labels
        }
    )
    matches = np.equal(test_plabels_eval, test_labels)
    num_true_preds += np.sum(matches.astype(float))

    if test_data_reader._batch_id == 0:
      break
      
  print('Epoch:{}'.format(epoch))
  print('Accuracy on test data:{}'.format(num_true_preds / len(test_data_reader._data)))

Epoch:6
Accuracy on test data:0.7761180837899947
