<a href="https://colab.research.google.com/github/yylonly/ServeNet/blob/stacked-bert/ServeNet/ALBERT_TF2_Keras_TPU_GoogleDrive(TF_Module).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
#!kill -9 -1
%tensorflow_version 2.x
#%tensorflow_version 1.x

TensorFlow 2.x selected.


In [0]:
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras import backend as K

print("TF version: ", tf.__version__)
print("Hub version: ", hub.__version__)

TF version:  2.0.0
Hub version:  0.7.0


In [0]:
SavePrefix='ALBERT-TF2-Keras-TPU-GoolgeDrive'

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
import os

if os.path.isdir("/content/drive/My Drive/ServeNet"):
  os.chdir("/content/drive/My Drive/ServeNet")
else: 
  !git clone https://github.com/yylonly/ServeNet.git
  os.chdir("/content/drive/My Drive/ServeNet")

In [0]:
# Detect hardware
# tf.keras.backend.clear_session()

# for compatible tensorflow 2.0
# tf.compat.v1.disable_eager_execution()

# resolver = tf.contrib.cluster_resolver.TPUClusterResolver('grpc://' + os.environ['COLAB_TPU_ADDR'])
# tf.contrib.distribute.initialize_tpu_system(resolver)
# strategy = tf.contrib.distribute.TPUStrategy(resolver)

try:
  tpu = tf.distribute.cluster_resolver.TPUClusterResolver() # TPU detection
except ValueError:
  tpu = None
  gpus = tf.config.experimental.list_logical_devices("GPU")
    
# Select appropriate distribution strategy
if tpu:
  # tf.config.experimental_connect_to_host('grpc://' + os.environ['COLAB_TPU_ADDR'])
  tf.tpu.experimental.initialize_tpu_system(tpu)
  strategy = tf.distribute.experimental.TPUStrategy(tpu) # Going back and forth between TPU and host is expensive. Better to run 128 batches on the TPU before reporting back.
  print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])  
elif len(gpus) > 1:
  strategy = tf.distribute.MirroredStrategy([gpu.name for gpu in gpus])
  print('Running on multiple GPUs ', [gpu.name for gpu in gpus])
elif len(gpus) == 1:
  strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
  print('Running on single GPU ', gpus[0].name)
else:
  strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
  print('Running on CPU')
print("Number of accelerators: ", strategy.num_replicas_in_sync)

Running on single GPU  /job:localhost/replica:0/task:0/device:GPU:0
Number of accelerators:  1


In [0]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import csv
import h5py
import pandas as pd
import numpy as np

#from sklearn.model_selection import train_test_split

from tensorflow.keras.models import Model
from tensorflow.keras.models import load_model
from tensorflow.keras import metrics
from tensorflow.keras.layers import Dense, Input, Dropout, LSTM, Activation, Conv2D, Reshape, Average
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.initializers import Orthogonal

In [0]:
h5f = h5py.File('Data/BERT-ServiceDataset.h5','r') 
X_train = h5f['train_input_ids'][:8064].astype(np.int32)
X_mask_train = h5f['train_input_masks'][:8064].astype(np.int32)
X_segment_train = h5f['train_segment_ids'][:8064].astype(np.int32)
Y_train = h5f['Train_Y_one_hot'][:8064].astype(np.int32)

X_test = h5f['test_input_ids'][:2048].astype(np.int32)
X_mask_test = h5f['test_input_masks'][:2048].astype(np.int32)
X_segment_test = h5f['test_segment_ids'][:2048].astype(np.int32)
Y_test = h5f['Test_Y_one_hot'][:2048].astype(np.int32)

print(X_train.shape, Y_train.shape)
print(X_test.shape, Y_test.shape)
h5f.close()

(8064, 110) (8064, 50)
(2048, 110) (2048, 50)


In [0]:
batch_size = 64 #@param {type:"integer"}
step = len(X_train) // batch_size
maxLen = 110
hiddenSize = 768

In [0]:
# import tensorflow as tf
# import tensorflow_hub as hub
# from tensorflow.keras import backend as K

# #Initialize session
# sess = tf.compat.v1.Session()

# Params for bert model and tokenization
# H-768
# bert_path = "https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/1"
# bert_path = "https://tfhub.dev/google/albert_base/2"
# H-1024
# bert_path = "https://tfhub.dev/google/albert_large/2"
# H-2048
#bert_path = "https://tfhub.dev/google/albert_xlarge/2"

# max_seq_length = 110

In [0]:
def ServeNet(input_shape):
    """
    Function creating the ServeNet model
    
    Arguments:
    input_shape -- shape of the input, usually (max_len,)

    Returns:
    model -- a model instance in Keras
    """

    input_ids = tf.keras.layers.Input(shape=input_shape, dtype=tf.int32, name='input_ids')
    input_mask = tf.keras.layers.Input(shape=input_shape, dtype=tf.int32, name='input_mask')
    segment_ids = tf.keras.layers.Input(shape=input_shape, dtype=tf.int32, name="segment_ids")

    albert = hub.KerasLayer(
        "https://tfhub.dev/google/albert_base/2",
        trainable=False, #True flag is not working on tensorflow hub 0.7 
        signature="tokens",
        output_key="sequence_output",
    )

    bert_inputs = {
        "input_ids": input_ids,
        "input_mask": input_mask,
        "segment_ids": segment_ids,
    }

    sequence_output = albert(bert_inputs)
    embeddings = Reshape((maxLen, hiddenSize, 1))(sequence_output)  
    
    # CNN
    features1 = Conv2D(64, kernel_size=(3, 3), padding='same')(embeddings)
    features1 = Dropout(0.4)(features1)
    features2 = Conv2D(1, kernel_size=(1, 1), padding='same')(features1)
    features2 = Dropout(0.4)(features2)

    features = Reshape((maxLen, hiddenSize))(features2)
     
    # LSTM
    X = Bidirectional(LSTM(512, return_sequences=False))(features)
    X = Dropout(0.5)(X)

    # TASK 
    X = Dense(200, activation='tanh')(X)
    X = Dropout(0.2)(X)
    X = Dense(50, activation='softmax')(X)
    
    # Create Model instance which converts sentence_indices into X.
    model = Model(inputs=[input_ids, input_mask, segment_ids], outputs=X)
    
    ### END CODE HERE ###
    
    return model

In [0]:
checkpointer = ModelCheckpoint(filepath='Data/ServeNet-' + SavePrefix + '.hdf5', monitor='val_top_k_categorical_accuracy', verbose=1, save_best_only=True)

In [0]:
adam = tf.keras.optimizers.Adam(lr=0.003, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0)

In [0]:
with strategy.scope():
    model = ServeNet((maxLen, ))
    model.compile(loss='categorical_crossentropy', optimizer="adam", metrics=[metrics.top_k_categorical_accuracy, metrics.categorical_accuracy])

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [0]:
# for layer in model.layers:
#     print(layer.output_shape)
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_ids (InputLayer)          [(None, 110)]        0                                            
__________________________________________________________________________________________________
input_mask (InputLayer)         [(None, 110)]        0                                            
__________________________________________________________________________________________________
segment_ids (InputLayer)        [(None, 110)]        0                                            
__________________________________________________________________________________________________
keras_layer (KerasLayer)        (None, None, 768)    11812272    input_ids[0][0]                  
                                                                 input_mask[0][0]             

In [0]:
history = model.fit([X_train, X_mask_train, X_segment_train], Y_train, validation_data=([X_test, X_mask_test, X_segment_test],  Y_test), epochs=20, steps_per_epoch=step, verbose = 1, callbacks=[checkpointer])

Train on 8064 samples, validate on 2048 samples
Epoch 1/20




LookupError: ignored

In [0]:
plt.figure(figsize=(8, 4), dpi=100)
plt.title("Top 1 Accuracy: Tranning Set vs Test Set")
plt.xlabel("Epoch")
plt.ylabel("Top 1 Accuracy")
plt.plot(history.history['val_categorical_accuracy'], label="Test Set")
plt.plot(history.history['categorical_accuracy'], label="Trainning Set")
plt.legend()
plt.show()
plt.savefig(SavePrefix + 'ServeNetTop1.pdf', format='pdf', dpi=300)

In [0]:
# plot metrics
plt.figure(figsize=(8, 4), dpi=100)
plt.title("Top 5 Accuracy: Tranning Set vs Test Set")
plt.xlabel("Epoch")
plt.ylabel("Top 5 Accuracy")
plt.plot(history.history['top_k_categorical_accuracy'], label="Trainning Set")
plt.plot(history.history['val_top_k_categorical_accuracy'], label="Test Set")
plt.legend()
plt.show()
plt.savefig(SavePrefix + '-ServeNetTop5.pdf', format='pdf', dpi=300)

In [0]:
plt.figure(figsize=(8, 4), dpi=100)
plt.title("Tranning Loss vs Test Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.plot(history.history['val_loss'], label="Test Set")
plt.plot(history.history['loss'], label="Trainning Set")
plt.legend()
plt.show()
plt.savefig(SavePrefix + '-ServeNetLoss.pdf', format='pdf', dpi=300)

In [0]:
import pickle
  
f = open(SavePrefix + '-ServeNetHistory', 'wb')
pickle.dump(history.history, f)
f.close()

In [0]:
import pickle

f = open(SavePrefix + '-ServeNetHistory', 'rb')
his = pickle.load(f)
f.close()

In [0]:
val_top_k = his['val_top_k_categorical_accuracy']
print("top5: ", max(val_top_k))
print(np.argmax(val_top_k))
val_loss = his['val_loss']
print("loss: ", min(val_loss))
print(np.argmin(val_loss))
val_ca = his['val_categorical_accuracy']
print("top1: ", max(val_ca))
print(np.argmax(val_ca))

In [0]:
model = load_model('Data/ServeNet-' + SavePrefix + '.hdf5', custom_objects={'KerasLayer':hub.KerasLayer})

In [0]:
print("Training set:")
loss_train, top5error_train, top1error_train = model.evaluate([X_train, X_mask_train, X_segment_train], Y_train)
print("Top5 Training accuracy = ", top5error_train)
print("Top1 Training accuracy = ", top1error_train)
print('Test set:')
loss_test, top5error_test, top1error_test = model.evaluate([X_test, X_mask_test, X_segment_test], Y_test)
print("Training accuracy = ", top5error_test)
print("Top1 Training accuracy = ", top1error_test)