In [None]:
import os
import pandas as pd
import numpy as np

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
from sklearn.model_selection import train_test_split

from utilize import gen_dataframe

In [None]:
# physical_devices = tf.config.list_physical_devices("GPU")
# print(physical_devices)

# tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [None]:
df = gen_dataframe('data/')
df.head()

In [None]:
# this is temporary
# assgin random binary label for df
rand_label = np.random.randint(2, size=len(df))
df['label'] = rand_label

print(df.loc[df['label']==0].shape)
print(df.loc[df['label']==1].shape)

df.head()

In [None]:
# split dataset to train test
X = df['cleaned_tweet']
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.33)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
# preprocessing https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3
bert_preprocess = hub.KerasLayer('https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3')
# encoder https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4
bert_encoder = hub.KerasLayer('https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4')

In [None]:
# build functional model
text_inputs = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')

preprocessed_text = bert_preprocess(text_inputs)
encoded_text = bert_encoder(preprocessed_text)

layer = tf.keras.layers.Dropout(.1, name='dropout')(encoded_text['pooled_output'])
layer = tf.keras.layers.Dense(1, activation='sigmoid', name='output')(layer)

model = tf.keras.Model(inputs=[text_inputs], outputs=[layer])
model.summary()

In [None]:
log_dir = 'model/log'
checkpoint_filepath = 'model/checkpoint'

callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_filepath, 
                                                save_weights_only=True, 
                                                monitor='val_loss', 
                                                mode='min', 
                                                save_best_only=True), 
            tf.keras.callbacks.TensorBoard(log_dir=log_dir)]

In [None]:
loss = tf.keras.losses.BinaryCrossentropy(from_logits=False)
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-5)
metrics = ['accuracy']

model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

In [None]:
history = model.fit(X_train,
                    y_train,
                    batch_size=32,
                    epochs=5,
                    validation_data=(X_test, y_test),
                    verbose=2,
                    callbacks=callbacks)