In [1]:
import os,sys,tqdm
import numpy as np
import tensorflow as tf
import tensorflow.keras
from tensorflow.keras.datasets import *
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from tensorflow.keras.preprocessing.text import *


import pandas as pd
import shutil
import pickle
import gc
# os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
# The GPU id to use, usually either "0" or "1"
os.environ["CUDA_VISIBLE_DEVICES"]="1"

In [2]:
def basic_statistics(all_length):
    '''
    input: length list of elements e.g.[1,1,1,3,5,9,4,2,1,3,54,78,5...]
    output1: mean、std、mode、min、q1、median(q2)、q3、max、iqr、outlier、far out
    output2: statistics graph、10%~90% form
    '''
    stat_dict = {}
    stat_dict['mean'] = np.mean(all_length)
    stat_dict['std'] = np.std(all_length)
    stat_dict['mode'] = np.argmax(np.bincount(all_length))
    stat_dict['min'] = np.min(all_length)
    stat_dict['q1'] = np.quantile(all_length,0.25)
    stat_dict['median'] = np.quantile(all_length,0.5)
    stat_dict['q3'] = np.quantile(all_length,0.75)
    stat_dict['max'] = np.max(all_length)
    stat_dict['iqr'] = stat_dict['q3'] - stat_dict['q1']
    stat_dict['outlier'] = stat_dict['q3'] + 1.5*stat_dict['iqr']
    stat_dict['far_out'] = stat_dict['q3'] + 3*stat_dict['iqr']
    for i in [10,20,30,40,50,60,70,80,90,100]:
        stat_dict[str(i)+'%'] = np.percentile(all_length,i)
    return pd.DataFrame.from_dict(stat_dict,orient='index',columns=['length'])

In [3]:
max_words = 2000 #Top most frequent words to consider. Any less frequent word will appear as oov_char value in the sequence data.
max_length = 358

In [4]:
word_index = reuters.get_word_index()
print('all_words#:',len(word_index))
(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words,maxlen=max_length,
                                                         test_split=0.2)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

all_words#: 30979
8248 train sequences
2063 test sequences


In [None]:
x_train

In [65]:
train_len = [len(x) for x in x_train]
basic_statistics(train_len)

Unnamed: 0,length
mean,110.996363
std,76.006433
mode,17.0
min,13.0
q1,57.0
median,90.0
q3,148.0
max,357.0
iqr,91.0
outlier,284.5


In [5]:
trainX = tf.keras.preprocessing.sequence.pad_sequences(x_train,maxlen=max_length,padding='post',value=0)
testX = tf.keras.preprocessing.sequence.pad_sequences(x_test,maxlen=max_length,padding='post',value=0)
print(trainX.shape, testX.shape)

(8248, 358) (2063, 358)


In [6]:
hidden_dim = 128
do = 0.1

## Graph execution
### Embedder

In [42]:
int_id = Input(shape=(max_length,), dtype='int32', name='int_ids') # 輸入的api funvtion name ID
int_ids = Masking(mask_value=0)(int_id)
sent_emb = Embedding(max_words, hidden_dim,input_length=max_length
                    ,trainable=True,name='glove_emb')(int_ids) 

### Encoder

In [43]:
rnn = GRU(int(hidden_dim/2),return_sequences=True,return_state=False,name='common_extract'
                      ,trainable=True)(sent_emb)
rnn = BatchNormalization(name='bn')(rnn)


### Filter

In [44]:
fil = TimeDistributed(Dense(1,activation='sigmoid',
                             name='filter_out'),name='TD2')(rnn)

### Classfier

In [47]:
mul = Multiply()([fil,sent_emb])
clf = LSTM(int(hidden_dim/2),dropout=do,recurrent_dropout=do,name='lstm')(mul)
clf = BatchNormalization(name='bn3')(clf)
clf = Dense(max(y_train)+1,activation='softmax',name='clf')(clf)

## Compile

In [48]:
model = Model(inputs=int_id, outputs = clf)
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
int_ids (InputLayer)            [(None, 358)]        0                                            
__________________________________________________________________________________________________
masking_1 (Masking)             (None, 358)          0           int_ids[0][0]                    
__________________________________________________________________________________________________
glove_emb (Embedding)           (None, 358, 128)     1280000     masking_1[0][0]                  
__________________________________________________________________________________________________
common_extract (GRU)            (None, 358, 64)      37248       glove_emb[0][0]                  
______________________________________________________________________________________________

In [53]:
# loss
import keras.backend as K
def custom_objective(layer):
    return K.sum(layer.output)
#     return K.sum(layer.output)
# kk = tf.keras.backend.ea
model.compile(loss=custom_objective(model.get_layer(name='TD2')),optimizer='adam')

Using TensorFlow backend.


OperatorNotAllowedInGraphError: using a `tf.Tensor` as a Python `bool` is not allowed in Graph execution. Use Eager execution or decorate this function with @tf.function.

## Eager Execution

In [163]:
do = 0
class base_model(Model):
    def __init__(self):
        super(base_model, self).__init__()
        self.mask = Masking(mask_value=0)
        self.emb = Embedding(max_words, hidden_dim,input_length=max_length
                    ,trainable=True,name='glove_emb')
        self.rnn1 = GRU(int(hidden_dim/2),return_sequences=True,return_state=False,name='common_extract'
                      ,trainable=True)
        self.bn1 = BatchNormalization(name='bn1')
        self.fil = TimeDistributed(Dense(1,activation='sigmoid',
                             name='filter_out'),name='TD2')
        self.mul = Multiply()
        self.rnn2 = Bidirectional(GRU(int(hidden_dim/2),dropout=do,recurrent_dropout=do,name='lstm'))
        self.rnn3 = LSTM(int(hidden_dim/2))
        self.bn2 = BatchNormalization(name='bn2')
        self.out = Dense(max(y_train)+1,activation='softmax',name='clf')
    def transform(self,x):
        return tf.math.round(x)        
    def call(self,x):
        x = self.mask(x)
        x1 = self.emb(x)
        x = self.rnn1(x1)
        #x = self.bn1(x)
        y = self.fil(x)
        y1 = self.transform(y)
        x2 = self.mul([y1,x1])
        x = self.rnn2(x2) #x
        x = self.bn2(x)
        y2 = self.out(x)
        return y,y2
        #return y,y1,y2,x2
        
model = base_model()

In [162]:
# TEST
x = tf.random.uniform((1, max_length))
out1,out2,out3,out4 = model(x)
out1

<tf.Tensor: id=264354, shape=(1, 358, 1), dtype=float32, numpy=
array([[[0.50008374],
        [0.50039524],
        [0.50061977],
        [0.50072163],
        [0.5007394 ],
        [0.50071514],
        [0.5006769 ],
        [0.50063914],
        [0.50060797],
        [0.5005844 ],
        [0.50056773],
        [0.50055635],
        [0.5005488 ],
        [0.5005439 ],
        [0.50054073],
        [0.50053877],
        [0.5005376 ],
        [0.5005368 ],
        [0.5005363 ],
        [0.500536  ],
        [0.50053585],
        [0.5005358 ],
        [0.5005357 ],
        [0.50053567],
        [0.50053567],
        [0.5005356 ],
        [0.5005356 ],
        [0.5005356 ],
        [0.5005356 ],
        [0.5005356 ],
        [0.5005356 ],
        [0.5005356 ],
        [0.5005356 ],
        [0.5005356 ],
        [0.5005356 ],
        [0.5005356 ],
        [0.5005356 ],
        [0.5005356 ],
        [0.5005356 ],
        [0.5005356 ],
        [0.5005356 ],
        [0.5005356 ],
        [0.5

In [141]:
out4

<tf.Tensor: id=259061, shape=(1, 358, 128), dtype=float32, numpy=
array([[[ 0.        ,  0.        ,  0.        , ..., -0.        ,
          0.        ,  0.        ],
        [ 0.02912379,  0.03589214,  0.0456765 , ..., -0.01467606,
          0.01763055,  0.0358587 ],
        [ 0.02912379,  0.03589214,  0.0456765 , ..., -0.01467606,
          0.01763055,  0.0358587 ],
        ...,
        [ 0.02912379,  0.03589214,  0.0456765 , ..., -0.01467606,
          0.01763055,  0.0358587 ],
        [ 0.02912379,  0.03589214,  0.0456765 , ..., -0.01467606,
          0.01763055,  0.0358587 ],
        [ 0.02912379,  0.03589214,  0.0456765 , ..., -0.01467606,
          0.01763055,  0.0358587 ]]], dtype=float32)>

In [164]:
batch_size = 128 #,reshuffle_each_iteration=True
train_ds = tf.data.Dataset.from_tensor_slices((trainX,y_train)).shuffle(trainX.shape[0]).batch(batch_size)
valid_ds = tf.data.Dataset.from_tensor_slices((testX,y_test)).batch(batch_size)

In [165]:
def loss_object1(predictions):
    mask = tf.math.logical_not(tf.math.equal(predictions, 0))
    loss_ = tf.reduce_mean(predictions)
    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask
    return tf.reduce_mean(loss_)
loss_object2 = tf.keras.losses.SparseCategoricalCrossentropy()

optimizer1 = tf.keras.optimizers.Nadam()
optimizer2 = tf.keras.optimizers.RMSprop()

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

In [166]:
alpha = 0.1
@tf.function
def train_step(x,yc):
    with tf.GradientTape(persistent=True) as tape:
        pred_imp , pred_cat = model(x)
#         pred_cat = model(x)
#         loss = alpha*loss_object1(pred_imp) + loss_object2(yc,pred_cat)
        loss1 = alpha*loss_object1(pred_imp)
        loss2 = loss_object2(yc,pred_cat)
        loss = loss_object2(yc,pred_cat)
#     gradients = tape.gradient(loss, model.trainable_variables)
    grad1 = tape.gradient(loss1, model.trainable_variables)
    grad2 = tape.gradient(loss2, model.trainable_variables)
#     optimizer1.apply_gradients(zip(gradients, model.trainable_variables))
    optimizer1.apply_gradients(zip(grad1, model.trainable_variables))
    optimizer2.apply_gradients(zip(grad2, model.trainable_variables))
#     with tf.GradientTape() as tape:
#         pred_imp , pred_cat = model(x)
#         loss2 = loss_object2(yc,pred_cat)
#         loss = alpha*loss_object1(pred_imp) + loss_object2(yc,pred_cat)
#     grad2 = tape.gradient(loss2, model.trainable_variables)
#     optimizer2.apply_gradients(zip(grad2, model.trainable_variables))

    train_loss(loss)
    train_accuracy(yc, pred_cat)
    
@tf.function
def test_step(x,yc):
    pred_imp, pred_cat = model(x)
#     pred_cat = model(x)
    t_loss = alpha*loss_object1(pred_imp) + loss_object2(yc,pred_cat) 
#     t_loss = loss_object2(yc,pred_cat)
    
    test_loss(t_loss)
    test_accuracy(yc, pred_cat)

In [167]:
EPOCHS = 20
gc.collect()
for epoch in range(EPOCHS):
    for text, labels in train_ds:
        train_step(text, labels)

    for test_text, test_labels in valid_ds:
        test_step(test_text, test_labels)

    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print(template.format(epoch+1,
                        train_loss.result(),
                        train_accuracy.result()*100,
                        test_loss.result(),
                        test_accuracy.result()*100))

    # Reset the metrics for the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

Epoch 1, Loss: 2.556689739227295, Accuracy: 36.821044921875, Test Loss: 2.3505802154541016, Test Accuracy: 38.730003356933594
Epoch 2, Loss: 2.3517208099365234, Accuracy: 37.3908805847168, Test Loss: 2.3402976989746094, Test Accuracy: 38.730003356933594
Epoch 3, Loss: 2.343385696411133, Accuracy: 37.3908805847168, Test Loss: 2.360708713531494, Test Accuracy: 38.730003356933594
Epoch 4, Loss: 2.3441457748413086, Accuracy: 37.3908805847168, Test Loss: 2.3469791412353516, Test Accuracy: 38.730003356933594
Epoch 5, Loss: 2.3457727432250977, Accuracy: 37.3908805847168, Test Loss: 2.3407504558563232, Test Accuracy: 38.730003356933594
Epoch 6, Loss: 2.3440752029418945, Accuracy: 37.3908805847168, Test Loss: 2.348440170288086, Test Accuracy: 38.730003356933594
Epoch 7, Loss: 2.34462308883667, Accuracy: 37.3908805847168, Test Loss: 2.3414554595947266, Test Accuracy: 38.730003356933594
Epoch 8, Loss: 2.3419158458709717, Accuracy: 37.3908805847168, Test Loss: 2.344940662384033, Test Accuracy: 38.

* 同一個opt若加入transform就會train不起來
* 兩個不同的opt加入transform也會train不起來 (persistent、non-persis都不行)