In [None]:
import tensorflow as tf

In [None]:
from tensorflow.keras.layers import Dense, Input, Bidirectional, LSTM, dot, concatenate, Activation, Conv1D, GRU
from tensorflow.keras.layers import GlobalAveragePooling1D, GlobalMaxPooling1D, Dropout, LeakyReLU, GlobalAveragePooling2D
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model

In [None]:
class TwoStreamBiLSTM:
    #chng
    def __init__(self, input_shape=((15, 768), (15, 1280)), hidden_states=256):
        self.input_shape = input_shape
        self.hidden_states = hidden_states
    
    def build(self):
        inp_text = Input(shape=self.input_shape[0])
        inp_video = Input(shape=self.input_shape[1])
        
        x = Bidirectional(LSTM(self.hidden_states, return_sequences=True, kernel_initializer='glorot_uniform'))(inp)
        x1 = LeakyReLU(0.2)(x)
        
        x = Bidirectional(LSTM(self.hidden_states, return_sequences=True, kernel_initializer='glorot_uniform'))(x1)
        x = LeakyReLU(0.2)(x)
        
        x = concatenate([x1, x])
        
        x = Bidirectional(LSTM(self.hidden_states, return_sequences=True, kernel_initializer='glorot_uniform'))(x)
        x1 = LeakyReLU(0.2)(x)
        
        x = Bidirectional(LSTM(self.hidden_states, return_sequences=True, kernel_initializer='glorot_uniform'))(x1)
        x = LeakyReLU(0.2)(x)
        
        x = concatenate([x1, x])
        
        
        x = Bidirectional(LSTM(self.hidden_states, return_sequences=False, kernel_initializer='glorot_uniform'))(x)
        x = LeakyReLU(0.2)(x)
        
        x = Dense(512, kernel_initializer='glorot_uniform')(x)
        x = LeakyReLU(0.2)(x)
        x = Dense(1, kernel_initializer='glorot_uniform', activation='sigmoid')(x)
        
        model = Model(inputs=inp, outputs=x)
        return model
    def build_bilstm(self):
        inp_text = Input(shape=self.input_shape[0])
        inp_video = Input(shape=self.input_shape[1])
        
        x_text = Bidirectional(LSTM(self.hidden_states, return_sequences=True, kernel_initializer='glorot_uniform'))(inp_text)
        x_text = Bidirectional(LSTM(self.hidden_states, return_sequences=True, kernel_initializer='glorot_uniform'))(x_text)
        x_text = GlobalMaxPooling1D()(x_text)
        x_text = Dense(512, kernel_initializer='glorot_uniform')(x_text)
        x_text = LeakyReLU(0.2)(x_text)
        
        x_video = Bidirectional(LSTM(self.hidden_states, return_sequences=True, kernel_initializer='glorot_uniform'))(inp_video)
        x_video = Bidirectional(LSTM(self.hidden_states, return_sequences=True, kernel_initializer='glorot_uniform'))(x_video)
        x_video = GlobalMaxPooling1D()(x_video)
        x_video = Dense(512, kernel_initializer='glorot_uniform')(x_video)
        x_video = LeakyReLU(0.2)(x_video)
        
        x = concatenate([x_text, x_video])
                
        x = Dense(512, kernel_initializer='glorot_uniform')(x)
        x = Dense(1, kernel_initializer='glorot_uniform', activation='sigmoid')(x)
        
        model = Model(inputs=[inp_text, inp_video], outputs=x)
        return model
    
    def build_convnet(self):
        input_text =  Input(shape=input_shape)
        x = Conv1D(32, 3)(input_text)
        x = Conv1D(64, 3)(x)
        x = Conv1D(128, 3)(x)
#         x = Conv1D(512, 3, padding = 'same')(x)
#         x = Conv1D(1024, 3, padding = 'same')(x)
#         x = Bidirectional(LSTM(self.hidden_size, return_sequences=True, kernel_initializer='glorot_uniform'))(input_text)
#         x = Bidirectional(LSTM(self.hidden_size, return_sequences=True, kernel_initializer='glorot_uniform'))(x)
#         x = Bidirectional(LSTM(self.hidden_size*2, return_sequences=True, kernel_initializer='glorot_uniform'))(x)
#         x = Bidirectional(LSTM(self.hidden_size*2, return_sequences=True, kernel_initializer='glorot_uniform'))(x)
#         x = Bidirectional(LSTM(self.hidden_size*2, return_sequences=True, kernel_initializer='glorot_uniform'))(x)

        #x = GlobalAveragePooling2D()(x)
#         x = Bidirectional(LSTM(self.hidden_size*2, return_sequences=True, kernel_initializer='glorot_uniform'))(x)
#         x = Bidirectional(LSTM(self.hidden_size*2, return_sequences=True, kernel_initializer='glorot_uniform'))(x)

        x = GlobalAveragePooling1D()(x)
        #x1 = GlobalMaxPooling1D()(x)

        x = Dense(self.hidden_size*2, kernel_initializer='glorot_uniform')(x)
        x = LeakyReLU(0.2)(x)

        x = Dropout(rate=0.2)(x)

        output = Dense(self.no_classes, activation='sigmoid', kernel_initializer='glorot_uniform')(x)

        return Model(inputs=input_text, outputs=output)

In [None]:
import pandas as pd
df = pd.read_csv('../input/text-video-data-1/postwise_comment_video_embeds_v2.csv')
df

In [None]:
import numpy as np
def convert_str_to_array(array_str):
    array_str = array_str.replace('[', '')
    array_str = array_str.replace(']', '')
    array_str = array_str.replace(' ', '')
    return np.fromstring(array_str, sep=', ')
for i in range(len(df)):
  arr = df['comment_embedding'][i]
  arr = convert_str_to_array(arr)
#   print(arr.shape)
  seq = []
  for j in range(0,15):
    seq.append(np.asarray(arr[j*768: j*768+768]))
  df['comment_embedding'][i]=np.asarray(seq)

  arr = df['video_embedding'][i]
  arr = convert_str_to_array(arr)
#   print(arr.shape)
  seq = []
  for j in range(0,15):
    seq.append(np.array(arr[j*1280: j*1280+1280], dtype=np.float32))
  df['video_embedding'][i]=np.array(seq, dtype='object')
df

In [None]:
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix, precision_score, recall_score

kf = KFold(n_splits=4,random_state = 43, shuffle=True)

precision_vals = []
recall_vals = []
f1_vals = []
acc_vals = []
thresholds = []

count = 0

for i in range(1, 400):
  thresholds.append(.002*i+.1)
#print(thresholds)

#for train_index, test_index in kf.split(df['embedding'].values):
  #print("TRAIN:", train_index, "TEST:", test_index)
#   X_train, X_test = df['embedding'].values[train_index], df['embedding'].values[test_index]
#   y_train, y_test = df['label'].values[train_index], df['label'].values[test_index]
for rst in range(0,10):
    train, test = train_test_split(df, test_size=0.25, random_state=26)
    print('train_size: ', train.shape)
    print('test_size: ', test.shape)
    X_train_text = train['comment_embedding'].values
    X_train_video = train['video_embedding'].values
    y_train = train['label'].values
    X_test_text = test['comment_embedding'].values
    X_test_video = test['video_embedding'].values
    y_test = test['label'].values
#     print(len(X_train_text), len(X_train_video), len(y_train))
#     print(len(X_test_text), len(X_test_video), len(y_test))

#     monitor = 'val_recall_at_precision'
#     if(count!=0):
#       monitor += ('_'+str(count))
#       #print(monitor)
#     count+=1

    callbacks = [
        tf.keras.callbacks.EarlyStopping(monitor= 'val_accuracy', patience=12, min_delta=0, restore_best_weights=True, mode='max'),
        #tf.keras.callbacks.LearningRateScheduler(warmup, verbose=0),
        #tf.keras.callbacks.ReduceLROnPlateau(monitor=monitor, patience=2, mode='auto', min_delta=0.001, cooldown=0, min_lr=1e-6)
    ]
    #print(monitor)
    model = TwoStreamBiLSTM()
    model = model.build_bilstm()

#     model = CapsNet(input_shape=[15,768],
#                 n_class=1, num_routing =2)
    model.compile(loss='binary_crossentropy', optimizer='Adam', metrics=['accuracy'])
    #model.summary()

    X_text = []
    X_video = []
    y = []
    X_val_text = []
    X_val_video = []
    y_val = []

    for i in range(len(X_train_text)):
        try:
          X_text.append(np.array(X_train_text[i],dtype=np.float32))
          X_video.append(np.array(X_train_video[i],dtype=np.float32))
          y.append(np.array([y_train[i]],dtype=np.float32))
        except:
          pass

    for i in range(len(X_test_text)):
        try:
          X_val_text.append(np.array(X_test_text[i],dtype=np.float32))
          X_val_video.append(np.array(X_test_video[i],dtype=np.float32))
          y_val.append(np.array([y_test[i]],dtype=np.float32))
        except:
          pass
#     print(len(X_text), len(X_video), len(y))
#     print(len(X_val_text), len(X_val_video), len(y_val))

    #   print(len(X))
    #   print(len(y))
    model.fit([np.array(X_text), np.array(X_video)], np.array(y), epochs=50, validation_data = ([np.array(X_val_text), np.array(X_val_video)], np.array(y_val)), callbacks = callbacks, verbose=0)

#     X = []
#     y = []
#     for i in range(len(X_test)):
#         try:
#           X.append(np.array(X_test[i],dtype=np.float32))
#           y.append(np.array([y_test[i]],dtype=np.float32))
#         except:
#           pass
#     print(len(X))
#     print(len(y))
    y_predicted = model.predict([np.array(X_val_text), np.array(X_val_video)])
    print(y_predicted[:10])


    pvals = []
    rvals = []
    fvals = [] 
    avals = []
    
    f1_max = 0
    f1 = 0
    matrix = 0
    th_max = 0

    for threshold in thresholds:
        y_pred = np.copy(y_predicted)
        #print(y_pred[:10])
        for i in range(len(y_pred)):
          if y_pred[i][0]>=threshold:
            y_pred[i][0]=1
          else:
            y_pred[i][0]=0
        f1 = f1_score(y_val, y_pred)
        if f1>f1_max:
            f1_max = f1
            th_max = threshold
            matrix = confusion_matrix(y_val, y_pred)
            
    print('max_f1 = ', f1_max, ' th_max = ', th_max, ' rst = ', rst)
    print(matrix)
    f1_vals.append(f1_max)
f1_avg = 0
for val in f1_vals:
    f1_avg+=val
print('Averaged f1_score: ', f1_avg/len(f1_vals))
#print(f1_vals)