In [1]:
import os,sys,tqdm
import numpy as np
import tensorflow as tf
import tensorflow.keras
from tensorflow.keras.datasets import *
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from tensorflow.keras.preprocessing.text import *

from collections import Counter
import pandas as pd
import shutil
import pickle
import gc
# os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
# The GPU id to use, usually either "0" or "1"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [2]:
def basic_statistics(all_length):
    '''
    input: length list of elements e.g.[1,1,1,3,5,9,4,2,1,3,54,78,5...]
    output1: mean、std、mode、min、q1、median(q2)、q3、max、iqr、outlier、far out
    output2: statistics graph、10%~90% form
    '''
    stat_dict = {}
    stat_dict['mean'] = np.mean(all_length)
    stat_dict['std'] = np.std(all_length)
    stat_dict['mode'] = np.argmax(np.bincount(all_length))
    stat_dict['min'] = np.min(all_length)
    stat_dict['q1'] = np.quantile(all_length,0.25)
    stat_dict['median'] = np.quantile(all_length,0.5)
    stat_dict['q3'] = np.quantile(all_length,0.75)
    stat_dict['max'] = np.max(all_length)
    stat_dict['iqr'] = stat_dict['q3'] - stat_dict['q1']
    stat_dict['outlier'] = stat_dict['q3'] + 1.5*stat_dict['iqr']
    stat_dict['far_out'] = stat_dict['q3'] + 3*stat_dict['iqr']
    for i in [10,20,30,40,50,60,70,80,90,100]:
        stat_dict[str(i)+'%'] = np.percentile(all_length,i)
    return pd.DataFrame.from_dict(stat_dict,orient='index',columns=['length'])

In [3]:
max_words = 8352#8352 #Top most frequent words to consider. Any less frequent word will appear as oov_char value in the sequence data.
max_length = 360#360

In [4]:
word_index = reuters.get_word_index()
print('all_words#:',len(word_index))
(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words,maxlen=max_length,
                                                         test_split=0.2,seed=830913)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

all_words#: 30979
8260 train sequences
2066 test sequences


In [13]:
# train_len = [len(x) for x in x_train]
# test_len = [len(x) for x in x_test]
# all_len = train_len
# all_len.extend(test_len)
# basic_statistics(all_len)

Unnamed: 0,length
mean,145.964197
std,145.878476
mode,17.0
min,2.0
q1,60.0
median,95.0
q3,180.0
max,2376.0
iqr,120.0
outlier,360.0


In [15]:
# df = pd.DataFrame(all_len)
# df.to_excel('./results/length_dist.xlsx', header=False, index=False)
# df

In [21]:
# train_words = []
# for x in x_train:
#     train_words.extend(x)
# test_words = []
# for x in x_test:
#     test_words.extend(x)
# all_words = train_words
# all_words.extend(test_words)
# all_statistcs = Counter(all_words)
# all_statistcs

Counter({1: 11228,
         53: 4213,
         352: 647,
         26: 8451,
         14: 15015,
         279: 801,
         39: 5818,
         72: 3091,
         4497: 26,
         18: 11039,
         83: 2597,
         5291: 21,
         88: 2381,
         5397: 20,
         11: 20141,
         3412: 37,
         19: 10755,
         151: 1363,
         230: 962,
         831: 253,
         15: 13329,
         165: 1232,
         318: 707,
         3780: 33,
         124: 1676,
         1527: 117,
         1424: 128,
         35: 6588,
         5302: 20,
         12: 16668,
         17: 11191,
         486: 459,
         341: 663,
         142: 1466,
         255: 870,
         219: 997,
         429: 528,
         68: 3363,
         146: 1402,
         252: 882,
         191: 1098,
         15448: 3,
         3631: 35,
         2283: 65,
         71: 3120,
         10: 29581,
         342: 660,
         49: 4565,
         1977: 80,
         324: 695,
         27: 8311,
         9222: 

In [28]:
# df = pd.DataFrame.from_dict(dict(all_statistcs), orient = 'index')
# df.to_excel('./results/words_dist2.xlsx', header=False, index=True)
# df

Unnamed: 0,0
1,11228
53,4213
352,647
26,8451
14,15015
...,...
24452,1
18567,2
27222,1
26864,1


In [5]:
trainX = tf.keras.preprocessing.sequence.pad_sequences(x_train,maxlen=max_length,padding='post',value=0)
testX = tf.keras.preprocessing.sequence.pad_sequences(x_test,maxlen=max_length,padding='post',value=0)
print(trainX.shape, testX.shape)

(8260, 360) (2066, 360)


In [563]:
hidden_dim = 128
# do = 0.1

## Graph execution
### Embedder

In [42]:
int_id = Input(shape=(max_length,), dtype='int32', name='int_ids') # 輸入的api funvtion name ID
int_ids = Masking(mask_value=0)(int_id)
sent_emb = Embedding(max_words, hidden_dim,input_length=max_length
                    ,trainable=True,name='glove_emb')(int_ids) 

### Encoder

In [43]:
rnn = GRU(int(hidden_dim/2),return_sequences=True,return_state=False,name='common_extract'
                      ,trainable=True)(sent_emb)
rnn = BatchNormalization(name='bn')(rnn)


### Filter

In [44]:
fil = TimeDistributed(Dense(1,activation='sigmoid',
                             name='filter_out'),name='TD2')(rnn)

### Classfier

In [47]:
mul = Multiply()([fil,sent_emb])
clf = LSTM(int(hidden_dim/2),dropout=do,recurrent_dropout=do,name='lstm')(mul)
clf = BatchNormalization(name='bn3')(clf)
clf = Dense(max(y_train)+1,activation='softmax',name='clf')(clf)

## Compile

In [48]:
model = Model(inputs=int_id, outputs = clf)
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
int_ids (InputLayer)            [(None, 358)]        0                                            
__________________________________________________________________________________________________
masking_1 (Masking)             (None, 358)          0           int_ids[0][0]                    
__________________________________________________________________________________________________
glove_emb (Embedding)           (None, 358, 128)     1280000     masking_1[0][0]                  
__________________________________________________________________________________________________
common_extract (GRU)            (None, 358, 64)      37248       glove_emb[0][0]                  
______________________________________________________________________________________________

In [53]:
# loss
import keras.backend as K
def custom_objective(layer):
    return K.sum(layer.output)
#     return K.sum(layer.output)
# kk = tf.keras.backend.ea
model.compile(loss=custom_objective(model.get_layer(name='TD2')),optimizer='adam')

Using TensorFlow backend.


OperatorNotAllowedInGraphError: using a `tf.Tensor` as a Python `bool` is not allowed in Graph execution. Use Eager execution or decorate this function with @tf.function.

## Eager Execution

In [52]:
# whole model
do = 0
init = tensorflow.keras.initializers.Ones()
class base_model(Model):
    def __init__(self):
        super(base_model, self).__init__()
        self.mask = Masking(mask_value=0)
        self.emb = Embedding(max_words, hidden_dim,input_length=max_length
                    ,trainable=True,name='glove_emb')
        self.rnn1 = GRU(int(hidden_dim/2),return_sequences=True,return_state=False,name='common_extract'
                      ,trainable=True)
        self.bn1 = BatchNormalization(name='bn1')
        self.fil = Dense(1,activation='hard_sigmoid',kernel_initializer=init,bias_initializer=init,name='filter_out')
        #self.fil = TimeDistributed(Dense(1,activation='sigmoid', name='filter_out'),name='TD2')
        self.mul = Multiply()
        self.rnn2 = Bidirectional(GRU(int(hidden_dim/2),dropout=do,recurrent_dropout=do,name='lstm'))
        self.rnn3 = LSTM(int(hidden_dim/2))
        self.bn2 = BatchNormalization(name='bn2')
        self.out = Dense(max(y_train)+1,activation='softmax',name='clf')
    def transform(self,x):
        return tf.math.round(x)
    def call(self,x):
        x = self.mask(x)
        x1 = self.emb(x)
        x = self.rnn1(x1)
        x = self.bn1(x)
        y = self.fil(x)
        y1 = self.transform(y)
        x2 = self.mul([y1,x1])
        x = self.rnn2(x2) #x
        x = self.bn2(x)
        y2 = self.out(x)
        return y,y1,y2
        #return y,y1,y2,x2
        
model = base_model()

In [958]:
# partial1 model
init_w = tensorflow.keras.initializers.Constant(value=2.0) #portyion=0.6, w=0.9, b = 0.8-0.85 (0.83從0開始)
init_b = tensorflow.keras.initializers.Constant(value=2.0) #w=1 ; b=0.499, portion=1
def onezero(x):
    portion = 1.0#0.6#0.6 #0.6~1
    z = tf.where(x>=1.0, x - x + 1.0, x)
    y = tf.where(z<=0.0, z - z + 0.0, portion*z)
    return y

class base_model_1(Model):
    def __init__(self):
        super(base_model_1, self).__init__()
        self.mask = Masking(mask_value=0)
        self.emb = Embedding(max_words, hidden_dim,input_length=max_length
                    ,trainable=True,name='glove_emb')
        self.rnn1 = GRU(int(hidden_dim/4),return_sequences=True,return_state=False,name='common_extract'
                      ,trainable=True)
        #self.att = Attention(name='selfatt')
        self.bn1 = BatchNormalization(name='bn1')
        #self.fil = Dense(1,activation=onezero,name='filter_out')
        self.fil = TimeDistributed(Dense(1,activation=onezero,kernel_initializer=init_w,bias_initializer=init_b, name='filter_out'),name='TD2') #relu/linear/step function

    def call(self,x):
        x = self.mask(x)
        x1 = self.emb(x)
        #x = self.att([x1,x1])
        x = self.rnn1(x1)
        x = self.bn1(x)
        y = self.fil(x)
        return x1,y

model1 = base_model_1()
#phase2
# model3 = load_model(saveP)

model3 = base_model_1()
model3.load_weights('./model/2019110501/model1')#,by_name=True)
model1.emb.set_weights(model3.emb.get_weights())



In [959]:
# partial2 model
class base_model_2(Model):
    def __init__(self):
        super(base_model_2, self).__init__()
        self.mul = Multiply()
        self.rnn2 = Bidirectional(GRU(int(hidden_dim/2),dropout=do,recurrent_dropout=do,name='lstm'))
        self.rnn3 = GRU(int(hidden_dim/2))
        self.bn2 = BatchNormalization(name='bn2')
        self.out = Dense(max(y_train)+1,activation='softmax',name='clf')

    def call(self,x1,y1):
        x2 = self.mul([y1,x1])
        x = self.rnn3(x2) #x2 #y1=weight|binary
        x = self.bn2(x)
        y2 = self.out(x)
        return y2
    
model2 = base_model_2()

In [448]:
'''
# TEST
x = tf.random.uniform((32, 6))
out1,out2 = model1(x)
out2 = tf.squeeze(out2,axis=-1)
# out2 = out2.astype('float32')
# out2 = tf.dtypes.cast(out2, tf.float8)
# print(out2.shape)
yy = tf.matmul(out2,kk)
print(yy[0].shape)
tf.where(yy[0]==0,1,0)

gg = tf.range(-5,5)
gg = tf.expand_dims(gg,axis=0)
gg = tf.keras.backend.repeat_elements(gg,rep=32,axis=0)
tf.where(gg==1,gg,0)

kk = tf.Variable(np.array([[1.0,1.0,1.0,0.0,0.0,0.0],[0.0,1.0,1.0,1.0,0.0,0.0],[0.0,0.0,1.0,1.0,1.0,0.0],
                           [0.0,0.0,0.0,1.0,1.0,1.0]]).T,dtype='float32')
kk = tf.expand_dims(kk,axis=0)
kk = tf.keras.backend.repeat_elements(kk,rep=1,axis=0)
kk.shape
'''

(32, 4)


<tf.Tensor: id=5770753, shape=(32, 4), dtype=int32, numpy=
array([[1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1]], dtype=int32)>

In [896]:
記得要跑到一個好的data
# batch_size = 128 #,reshuffle_each_iteration=True
# train_ds = tf.data.Dataset.from_tensor_slices((trainX,y_train)).shuffle(trainX.shape[0]).batch(batch_size)
# valid_ds = tf.data.Dataset.from_tensor_slices((testX,y_test)).batch(batch_size)

In [960]:
seq_num = 2 #連續幾個才叫做連續，要改modify_idx看有幾個

arr_len = max_length - seq_num + 1
seq_arr = []
for i in range(arr_len):
    ori_np = np.array([0]*max_length)
    modify_idx = [i,i+1] #要跟著seq_num改
    ori_np[modify_idx]=1
    seq_arr.append(ori_np)
seq_arr = np.array(seq_arr)
seq_mask = tf.Variable(seq_arr.T,dtype='float32')
seq_mask = tf.expand_dims(seq_mask,axis=0)
seq_mask

<tf.Tensor: id=12571214, shape=(1, 360, 359), dtype=float32, numpy=
array([[[1., 0., 0., ..., 0., 0., 0.],
        [1., 1., 0., ..., 0., 0., 0.],
        [0., 1., 1., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 1., 1., 0.],
        [0., 0., 0., ..., 0., 1., 1.],
        [0., 0., 0., ..., 0., 0., 1.]]], dtype=float32)>

In [961]:
def loss_object1(predictions): #filter loss
    mask = tf.math.logical_not(tf.math.equal(predictions, 0))
    loss_ = tf.reduce_mean(predictions)
    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask
    return tf.reduce_mean(loss_)
def one_percentage(predictions): #1 num
    mask = tf.math.logical_not(tf.math.equal(predictions, 0))
    loss_ = tf.reduce_mean(predictions)
    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask
    return tf.reduce_mean(loss_)
def seq_loss(predictions):
    mask = tf.math.logical_not(tf.math.equal(predictions, 0))
    predictions = tf.squeeze(predictions,axis=-1)
    results = tf.matmul(predictions,seq_mask)
    results = tf.where(results==seq_num,1.0,0.0)
    loss_ = tf.reduce_mean(results)
    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask
    return tf.reduce_mean(loss_)
loss_object2 = tf.keras.losses.SparseCategoricalCrossentropy() #clf loss

optimizer1 = tf.keras.optimizers.RMSprop()
optimizer2 = tf.keras.optimizers.RMSprop()

train_loss = tf.keras.metrics.Mean(name='train_loss') #total_loss
train_accloss = tf.keras.metrics.Mean(name='train_accloss')#loss_acc
train_filloss = tf.keras.metrics.Mean(name='train_filloss') #loss_filter
train_seqloss = tf.keras.metrics.Mean(name='train_seqloss') #loss_seq
train_ones = tf.keras.metrics.Mean(name='train_ones') #ones_num
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy') #acc_rate

test_loss = tf.keras.metrics.Mean(name='test_loss') #total_loss
test_accloss = tf.keras.metrics.Mean(name='test_accloss')#loss_acc
test_filloss = tf.keras.metrics.Mean(name='test_filloss') #loss_filter
test_seqloss = tf.keras.metrics.Mean(name='test_seqloss')
test_ones = tf.keras.metrics.Mean(name='test_ones') #ones_num
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

In [962]:
# seperate partial model
alpha = 0.00001 #pahse1: -0.1 / 0.0 ; phase2: 0.01~0.05~0.1 有1-就是希望1越多，沒1-就是希望0越多1越少
beta = 1.0 #clf loss 越大越要求分好
gamma = 0.0 #seqloss 越大越要求連續
#-0.001 / 1.0 / 1.0

@tf.function
def train_step(x,yc):
    with tf.GradientTape(persistent=False) as tape:
        emb, pred_imp = model1(x)
        #loss1 = alpha*loss_object1(pred_imp) #phase1
        #pred_imp2 = tf.math.round(pred_imp)
        #pred_imp3 = tf.clip_by_value(pred_imp,clip_value_max=1,clip_value_min=0)
        pred_imp2 = tf.math.round(pred_imp)
        loss1 = loss_object1(pred_imp) #有1-就是希望1越多，沒1-就是希望0越多1越少 #pahse2:alpha*loss_object1(pred_imp) ; phase1: alpha*(1-loss_object1(pred_imp))
        pred_cat = model2(emb,pred_imp2) #pahse1: pred_imp; phase2; pred_imp2
        loss2 = loss_object2(yc, pred_cat)
        loss3 = 1-seq_loss(pred_imp2)
        loss = alpha*loss1 + beta*loss2 + gamma*loss3
    trainable_variable = model1.trainable_variables
    trainable_variable.extend(model2.trainable_variables)
    gradients = tape.gradient(loss,trainable_variable)
    optimizer1.apply_gradients(zip(gradients,trainable_variable))
    
    train_loss(loss) #total_loss
    train_filloss(loss1)
    train_accloss(loss2)
    train_seqloss(loss3) #loss_seq
    train_accuracy(yc, pred_cat) #acc_rate
    ones = one_percentage(pred_imp2) #pred_imp2
    train_ones(ones) #ones_num
    
    
@tf.function
def test_step(x,yc):
    emb, pred_imp = model1(x)
    #loss1 = alpha*loss_object1(pred_imp) #phase1
    #pred_imp2 = tf.math.round(pred_imp)
    #pred_imp3 = tf.clip_by_value(pred_imp,clip_value_max=1,clip_value_min=0)
    pred_imp2 = tf.math.round(pred_imp)
    loss1 = loss_object1(pred_imp) #phase2
    pred_cat = model2(emb,pred_imp2) #phase1: pred_imp ; phase2:pred_imp2
    loss2 = loss_object2(yc, pred_cat)
    loss3 = 1-seq_loss(pred_imp2)
    #t_loss = loss1 + loss2
    t_loss = alpha*loss1 + beta*loss2 + gamma*loss3
    
    test_loss(t_loss)
    test_filloss(loss1)
    test_accloss(loss2)
    test_seqloss(loss3)
    test_accuracy(yc, pred_cat)
    t_ones = one_percentage(pred_imp2) #pred_imp2
    test_ones(t_ones)
    

In [331]:
'''
#AIO
alpha = 0.1
beta = 1
gamma = 0
@tf.function
def train_step(x,yc):
    with tf.GradientTape(persistent=True) as tape: #persistent=True
        pred_imp,pred_round , pred_cat = model(x)
#         pred_cat = model(x)
#         loss = alpha*loss_object1(pred_imp) + loss_object2(yc,pred_cat)
        loss1 = alpha*loss_object1(pred_imp)
        loss2 = beta*loss_object2(yc,pred_cat)
        loss = loss_object2(yc,pred_cat)
#     gradients = tape.gradient(loss, model.trainable_variables)
    grad1 = tape.gradient(loss1, model.trainable_variables)
    grad2 = tape.gradient(loss2, model.trainable_variables)
#     optimizer1.apply_gradients(zip(gradients, model.trainable_variables))
    optimizer1.apply_gradients(zip(grad1, model.trainable_variables))
    optimizer2.apply_gradients(zip(grad2, model.trainable_variables))
#     with tf.GradientTape() as tape:
#         pred_imp , pred_cat = model(x)
#         loss2 = loss_object2(yc,pred_cat)
#         loss = alpha*loss_object1(pred_imp) + loss_object2(yc,pred_cat)
#     grad2 = tape.gradient(loss2, model.trainable_variables)
#     optimizer2.apply_gradients(zip(grad2, model.trainable_variables))

    train_loss(loss)
    train_accuracy(yc, pred_cat)
    ones = one_percentage(pred_round)
    train_ones(ones)
    
@tf.function
def test_step(x,yc):
    pred_imp,pred_round, pred_cat = model(x)
#     pred_cat = model(x)
    t_loss = alpha*loss_object1(pred_imp) + loss_object2(yc,pred_cat) 
#     t_loss = loss_object2(yc,pred_cat)
    
    test_loss(t_loss)
    test_accuracy(yc, pred_cat)
    t_ones = one_percentage(pred_round)
    test_ones(t_ones)
'''    

In [None]:
EPOCHS = 2000
DateID = '2019110801'

saveP1 = './model/'+DateID+'/model1'
saveP2 = './model/'+DateID+'/model2'
train_loss_acc = []
train_loss_filter = []
train_loss_seq = []
train_weighted_loss = []
train_acc_rate = []
train_ones_num = []

test_loss_acc = []
test_loss_filter = []
test_loss_seq = []
test_weighted_loss = []
test_acc_rate = []
test_ones_num = []

# signature_dict = {'att':model1.att}

gc.collect()
best_clf = 0.0
for epoch in range(EPOCHS):
    for text, labels in train_ds:
        train_step(text, labels)

    for test_text, test_labels in valid_ds:
        test_step(test_text, test_labels)

    template = 'Epoch {}, Total Loss: {}, Clf Loss: {}, Filter Loss: {}, Seq Loss: {}, Accuracy Rate: {:5.2f}%, Ones Portion: {}, \
            Test_Total_Loss: {}, Test_Clf_Loss: {}, Test_Filter_Loss: {}, TEST_Seq_Loss: {}, Test_Accuracy_Rate: {:5.2f}%, Test_Ones_Portion: {}'
    print(template.format(epoch+1,train_loss.result(),
                          train_accloss.result(),train_filloss.result(),train_seqloss.result(),
                        train_accuracy.result()*100,train_ones.result(),
                        test_loss.result(),
                        test_accloss.result(),test_filloss.result(),test_seqloss.result(),
                        test_accuracy.result()*100,test_ones.result(),
                        ))

    train_loss_acc.append( train_accloss.result().numpy())
    train_loss_filter.append( train_filloss.result().numpy())
    train_loss_seq.append( train_seqloss.result().numpy())
    train_weighted_loss.append( train_loss.result().numpy())
    train_acc_rate.append( train_accuracy.result().numpy())
    train_ones_num.append( train_ones.result().numpy())
    
    test_loss_acc.append( test_accloss.result().numpy())
    test_loss_filter.append( test_filloss.result().numpy())
    test_loss_seq.append( test_seqloss.result().numpy())
    test_weighted_loss.append( test_loss.result().numpy())
    test_acc_rate.append( test_accuracy.result().numpy())
    test_ones_num.append( test_ones.result().numpy())
    if best_clf<=test_accuracy.result()*100:
        #tf.saved_model.save(model1,saveP1+'_all')
        #model1.save(saveP1,save_format='h5')
        #model2.save(saveP2,save_format='h5')
        #tf.saved_model.save(model2,saveP2+'_all')
        model1.save_weights(saveP1,save_format='tf')
        model2.save_weights(saveP2,save_format='tf')
        best_clf = test_accuracy.result()*100
        print('===MODEL WEIGHTS SAVED===',saveP1,saveP2)
    # Reset the metrics for the next epoch
    train_loss.reset_states()
    train_accloss.reset_states()
    train_filloss.reset_states()
    train_seqloss.reset_states()
    train_ones.reset_states()
    train_accuracy.reset_states()
    
    test_loss.reset_states()
    test_accloss.reset_states()
    test_filloss.reset_states()
    test_seqloss.reset_states()
    test_ones.reset_states()
    test_accuracy.reset_states()

Epoch 1, Total Loss: 2.5168564319610596, Clf Loss: 2.516845703125, Filter Loss: 1.0, Seq Loss: 0.0, Accuracy Rate: 36.89%, Ones Portion: 1.0,             Test_Total_Loss: 2.407064437866211, Test_Clf_Loss: 2.4070541858673096, Test_Filter_Loss: 1.0, TEST_Seq_Loss: 0.0, Test_Accuracy_Rate: 22.41%, Test_Ones_Portion: 1.0
===MODEL WEIGHTS SAVED=== ./model/2019110801/model1 ./model/2019110801/model2
Epoch 2, Total Loss: 2.330303907394409, Clf Loss: 2.3302934169769287, Filter Loss: 1.0, Seq Loss: 0.0, Accuracy Rate: 37.45%, Ones Portion: 1.0,             Test_Total_Loss: 2.2936646938323975, Test_Clf_Loss: 2.2936549186706543, Test_Filter_Loss: 1.0, TEST_Seq_Loss: 0.0, Test_Accuracy_Rate: 48.84%, Test_Ones_Portion: 1.0
===MODEL WEIGHTS SAVED=== ./model/2019110801/model1 ./model/2019110801/model2
Epoch 3, Total Loss: 2.0387141704559326, Clf Loss: 2.0387041568756104, Filter Loss: 1.0, Seq Loss: 0.0, Accuracy Rate: 49.93%, Ones Portion: 1.0,             Test_Total_Loss: 1.9128453731536865, Test_Cl

Epoch 22, Total Loss: 2.068540096282959, Clf Loss: 2.068540096282959, Filter Loss: 2.0950203179381788e-05, Seq Loss: 0.9999971985816956, Accuracy Rate: 41.38%, Ones Portion: 8.210616215365008e-06,             Test_Total_Loss: 2.130150318145752, Test_Clf_Loss: 2.130150318145752, Test_Filter_Loss: 2.0399251297931187e-05, TEST_Seq_Loss: 0.999997079372406, Test_Accuracy_Rate: 39.98%, Test_Ones_Portion: 8.517554306308739e-06
Epoch 23, Total Loss: 2.0676822662353516, Clf Loss: 2.0676822662353516, Filter Loss: 1.7593471056898125e-05, Seq Loss: 0.9999977946281433, Accuracy Rate: 41.50%, Ones Portion: 6.9800662458874285e-06,             Test_Total_Loss: 2.0957794189453125, Test_Clf_Loss: 2.0957794189453125, Test_Filter_Loss: 1.6593950931564905e-05, TEST_Seq_Loss: 0.999997615814209, Test_Accuracy_Rate: 40.66%, Test_Ones_Portion: 7.110312708391575e-06
Epoch 24, Total Loss: 2.063262701034546, Clf Loss: 2.063262701034546, Filter Loss: 1.3776911146123894e-05, Seq Loss: 0.9999983310699463, Accuracy R

Epoch 42, Total Loss: 2.007746696472168, Clf Loss: 2.007746696472168, Filter Loss: 3.358519279572647e-06, Seq Loss: 0.9999997615814209, Accuracy Rate: 44.41%, Ones Portion: 1.5327132132370025e-06,             Test_Total_Loss: 2.0109758377075195, Test_Clf_Loss: 2.0109758377075195, Test_Filter_Loss: 3.986023784818826e-06, TEST_Seq_Loss: 0.999999463558197, Test_Accuracy_Rate: 43.61%, Test_Ones_Portion: 1.863438683358254e-06
Epoch 43, Total Loss: 2.010176420211792, Clf Loss: 2.010176420211792, Filter Loss: 3.278037866039085e-06, Seq Loss: 0.9999997615814209, Accuracy Rate: 44.26%, Ones Portion: 1.4772714393984643e-06,             Test_Total_Loss: 2.019054651260376, Test_Clf_Loss: 2.019054651260376, Test_Filter_Loss: 3.949987785745179e-06, TEST_Seq_Loss: 0.999999463558197, Test_Accuracy_Rate: 43.42%, Test_Ones_Portion: 1.90824380297272e-06
Epoch 44, Total Loss: 2.0085437297821045, Clf Loss: 2.0085437297821045, Filter Loss: 3.201583695044974e-06, Seq Loss: 0.9999997615814209, Accuracy Rate: 

Epoch 62, Total Loss: 2.006505012512207, Clf Loss: 2.006505012512207, Filter Loss: 3.4911086004285607e-06, Seq Loss: 0.9999996423721313, Accuracy Rate: 44.62%, Ones Portion: 1.638032131268119e-06,             Test_Total_Loss: 2.0177066326141357, Test_Clf_Loss: 2.0177066326141357, Test_Filter_Loss: 3.965902124036802e-06, TEST_Seq_Loss: 0.9999993443489075, Test_Accuracy_Rate: 43.76%, Test_Ones_Portion: 1.8577040918899002e-06
Epoch 63, Total Loss: 2.0104706287384033, Clf Loss: 2.0104706287384033, Filter Loss: 3.3922665352292825e-06, Seq Loss: 0.9999996423721313, Accuracy Rate: 44.59%, Ones Portion: 1.5511805031565018e-06,             Test_Total_Loss: 2.0037965774536133, Test_Clf_Loss: 2.0037965774536133, Test_Filter_Loss: 4.0845557123248e-06, TEST_Seq_Loss: 0.9999993443489075, Test_Accuracy_Rate: 43.90%, Test_Ones_Portion: 1.9319759303471074e-06
Epoch 64, Total Loss: 2.0101494789123535, Clf Loss: 2.0101494789123535, Filter Loss: 3.579975100365118e-06, Seq Loss: 0.9999996423721313, Accurac

Epoch 82, Total Loss: 2.0140247344970703, Clf Loss: 2.0140247344970703, Filter Loss: 3.3084870665334165e-06, Seq Loss: 0.9999998807907104, Accuracy Rate: 44.59%, Ones Portion: 1.177769149762753e-06,             Test_Total_Loss: 2.018075942993164, Test_Clf_Loss: 2.018075942993164, Test_Filter_Loss: 3.695504574352526e-06, TEST_Seq_Loss: 0.9999996423721313, Test_Accuracy_Rate: 43.66%, Test_Ones_Portion: 1.4441267239817535e-06
Epoch 83, Total Loss: 2.0162880420684814, Clf Loss: 2.0162880420684814, Filter Loss: 3.1749009394843597e-06, Seq Loss: 1.0, Accuracy Rate: 44.54%, Ones Portion: 1.1390500276320381e-06,             Test_Total_Loss: 2.017179250717163, Test_Clf_Loss: 2.017179250717163, Test_Filter_Loss: 3.20597541758616e-06, TEST_Seq_Loss: 0.9999996423721313, Test_Accuracy_Rate: 43.66%, Test_Ones_Portion: 1.2570712897286285e-06
Epoch 84, Total Loss: 2.0178990364074707, Clf Loss: 2.0178990364074707, Filter Loss: 2.947851726275985e-06, Seq Loss: 0.9999998807907104, Accuracy Rate: 44.53%, 

Epoch 102, Total Loss: 2.051609992980957, Clf Loss: 2.051609992980957, Filter Loss: 1.9516214706527535e-06, Seq Loss: 0.9999998807907104, Accuracy Rate: 43.93%, Ones Portion: 6.60308614897076e-07,             Test_Total_Loss: 2.108915328979492, Test_Clf_Loss: 2.108915328979492, Test_Filter_Loss: 2.083849494738388e-06, TEST_Seq_Loss: 0.9999998807907104, Test_Accuracy_Rate: 42.64%, Test_Ones_Portion: 6.694542662444292e-07
Epoch 103, Total Loss: 2.052340030670166, Clf Loss: 2.052340030670166, Filter Loss: 1.855986056398251e-06, Seq Loss: 1.0, Accuracy Rate: 43.96%, Ones Portion: 6.100966061239887e-07,             Test_Total_Loss: 2.1129770278930664, Test_Clf_Loss: 2.1129770278930664, Test_Filter_Loss: 2.0273168956919108e-06, TEST_Seq_Loss: 0.9999998807907104, Test_Accuracy_Rate: 42.88%, Test_Ones_Portion: 6.727892696289928e-07
Epoch 104, Total Loss: 2.0594606399536133, Clf Loss: 2.0594606399536133, Filter Loss: 1.8338238305659615e-06, Seq Loss: 1.0, Accuracy Rate: 43.75%, Ones Portion: 6.

Epoch 122, Total Loss: 2.093470573425293, Clf Loss: 2.093470573425293, Filter Loss: 1.2849777704104781e-06, Seq Loss: 1.0, Accuracy Rate: 42.99%, Ones Portion: 4.0229073761111067e-07,             Test_Total_Loss: 2.126749277114868, Test_Clf_Loss: 2.126749277114868, Test_Filter_Loss: 1.3538400480683777e-06, TEST_Seq_Loss: 1.0, Test_Accuracy_Rate: 42.16%, Test_Ones_Portion: 4.477475386011065e-07
Epoch 123, Total Loss: 2.087232828140259, Clf Loss: 2.087232828140259, Filter Loss: 1.1926988463528687e-06, Seq Loss: 0.9999998807907104, Accuracy Rate: 43.08%, Ones Portion: 3.9582377553415427e-07,             Test_Total_Loss: 2.1323699951171875, Test_Clf_Loss: 2.1323699951171875, Test_Filter_Loss: 1.3809101346851094e-06, TEST_Seq_Loss: 1.0, Test_Accuracy_Rate: 42.35%, Test_Ones_Portion: 4.4278866084823676e-07
Epoch 124, Total Loss: 2.091646909713745, Clf Loss: 2.091646909713745, Filter Loss: 1.2223380281284335e-06, Seq Loss: 1.0, Accuracy Rate: 43.01%, Ones Portion: 3.873401226428541e-07,      

Epoch 143, Total Loss: 2.096904993057251, Clf Loss: 2.096904993057251, Filter Loss: 9.959245517165982e-07, Seq Loss: 1.0, Accuracy Rate: 42.89%, Ones Portion: 3.0359464631146693e-07,             Test_Total_Loss: 2.147115707397461, Test_Clf_Loss: 2.147115707397461, Test_Filter_Loss: 1.0698213372961618e-06, TEST_Seq_Loss: 1.0, Test_Accuracy_Rate: 41.92%, Test_Ones_Portion: 3.366241401181469e-07
Epoch 144, Total Loss: 2.0985567569732666, Clf Loss: 2.0985567569732666, Filter Loss: 9.798266091820551e-07, Seq Loss: 1.0, Accuracy Rate: 42.97%, Ones Portion: 2.9102997700647393e-07,             Test_Total_Loss: 2.1463871002197266, Test_Clf_Loss: 2.1463871002197266, Test_Filter_Loss: 1.0701241990318522e-06, TEST_Seq_Loss: 1.0, Test_Accuracy_Rate: 42.06%, Test_Ones_Portion: 3.3922819397957937e-07
Epoch 145, Total Loss: 2.10115909576416, Clf Loss: 2.10115909576416, Filter Loss: 1.0205225180470734e-06, Seq Loss: 1.0, Accuracy Rate: 42.86%, Ones Portion: 3.022158807652886e-07,             Test_Total

Epoch 164, Total Loss: 2.119631767272949, Clf Loss: 2.119631767272949, Filter Loss: 9.30080091166019e-07, Seq Loss: 1.0, Accuracy Rate: 42.43%, Ones Portion: 2.3683966787757527e-07,             Test_Total_Loss: 2.17671275138855, Test_Clf_Loss: 2.17671275138855, Test_Filter_Loss: 1.0025116807810264e-06, TEST_Seq_Loss: 1.0, Test_Accuracy_Rate: 41.05%, Test_Ones_Portion: 2.634951101754268e-07
Epoch 165, Total Loss: 2.1228525638580322, Clf Loss: 2.1228525638580322, Filter Loss: 8.749279913899954e-07, Seq Loss: 1.0, Accuracy Rate: 42.31%, Ones Portion: 2.2205536254205072e-07,             Test_Total_Loss: 2.1869521141052246, Test_Clf_Loss: 2.1869521141052246, Test_Filter_Loss: 9.8160273864778e-07, TEST_Seq_Loss: 1.0, Test_Accuracy_Rate: 40.71%, Test_Ones_Portion: 2.5615386789468175e-07
Epoch 166, Total Loss: 2.1202759742736816, Clf Loss: 2.1202759742736816, Filter Loss: 9.857163831838989e-07, Seq Loss: 1.0, Accuracy Rate: 42.49%, Ones Portion: 2.32034153668792e-07,             Test_Total_Los

Epoch 185, Total Loss: 2.1564526557922363, Clf Loss: 2.1564526557922363, Filter Loss: 6.498304401247879e-07, Seq Loss: 1.0, Accuracy Rate: 41.51%, Ones Portion: 1.3352931205190544e-07,             Test_Total_Loss: 2.204991340637207, Test_Clf_Loss: 2.204991340637207, Test_Filter_Loss: 7.02462557455874e-07, TEST_Seq_Loss: 1.0, Test_Accuracy_Rate: 40.22%, Test_Ones_Portion: 1.4943522330668202e-07
Epoch 186, Total Loss: 2.1571624279022217, Clf Loss: 2.1571624279022217, Filter Loss: 6.778421379749489e-07, Seq Loss: 1.0, Accuracy Rate: 41.54%, Ones Portion: 1.301673648868018e-07,             Test_Total_Loss: 2.212212562561035, Test_Clf_Loss: 2.212212562561035, Test_Filter_Loss: 6.873644338156737e-07, TEST_Seq_Loss: 1.0, Test_Accuracy_Rate: 40.22%, Test_Ones_Portion: 1.4774533951822377e-07
Epoch 187, Total Loss: 2.1596877574920654, Clf Loss: 2.1596877574920654, Filter Loss: 6.256728966036462e-07, Seq Loss: 1.0, Accuracy Rate: 41.44%, Ones Portion: 1.239151288245921e-07,             Test_Total

Epoch 206, Total Loss: 2.160193920135498, Clf Loss: 2.160193920135498, Filter Loss: 6.811404773543472e-07, Seq Loss: 1.0, Accuracy Rate: 41.50%, Ones Portion: 1.3104596519042389e-07,             Test_Total_Loss: 2.232536554336548, Test_Clf_Loss: 2.232536554336548, Test_Filter_Loss: 6.581620937140542e-07, TEST_Seq_Loss: 1.0, Test_Accuracy_Rate: 40.27%, Test_Ones_Portion: 1.0740675548959189e-07
Epoch 207, Total Loss: 2.1606738567352295, Clf Loss: 2.1606738567352295, Filter Loss: 7.281432772288099e-07, Seq Loss: 1.0, Accuracy Rate: 41.57%, Ones Portion: 1.3271800014535984e-07,             Test_Total_Loss: 2.235502243041992, Test_Clf_Loss: 2.235502243041992, Test_Filter_Loss: 7.068653644637379e-07, TEST_Seq_Loss: 1.0, Test_Accuracy_Rate: 40.03%, Test_Ones_Portion: 1.0915204029515735e-07
Epoch 208, Total Loss: 2.15777850151062, Clf Loss: 2.15777850151062, Filter Loss: 7.019247050266131e-07, Seq Loss: 1.0, Accuracy Rate: 41.60%, Ones Portion: 1.3588407909992384e-07,             Test_Total_Lo

Epoch 227, Total Loss: 2.1741445064544678, Clf Loss: 2.1741445064544678, Filter Loss: 6.034656507836189e-07, Seq Loss: 1.0, Accuracy Rate: 41.34%, Ones Portion: 1.1153228740568011e-07,             Test_Total_Loss: 2.251171588897705, Test_Clf_Loss: 2.251171588897705, Test_Filter_Loss: 5.312607527230284e-07, TEST_Seq_Loss: 1.0, Test_Accuracy_Rate: 39.79%, Test_Ones_Portion: 9.158837599443359e-08
Epoch 228, Total Loss: 2.1734964847564697, Clf Loss: 2.1734964847564697, Filter Loss: 5.80153141527262e-07, Seq Loss: 1.0, Accuracy Rate: 41.25%, Ones Portion: 1.0562730068386372e-07,             Test_Total_Loss: 2.25058913230896, Test_Clf_Loss: 2.25058913230896, Test_Filter_Loss: 5.402675355981046e-07, TEST_Seq_Loss: 1.0, Test_Accuracy_Rate: 39.93%, Test_Ones_Portion: 9.380460852526085e-08
Epoch 229, Total Loss: 2.176816463470459, Clf Loss: 2.176816463470459, Filter Loss: 5.829512588206853e-07, Seq Loss: 1.0, Accuracy Rate: 41.23%, Ones Portion: 1.0524376392595514e-07,             Test_Total_Los

Epoch 248, Total Loss: 2.1819446086883545, Clf Loss: 2.1819446086883545, Filter Loss: 3.611830550198647e-07, Seq Loss: 1.0, Accuracy Rate: 41.08%, Ones Portion: 8.714674493148777e-08,             Test_Total_Loss: 2.2494006156921387, Test_Clf_Loss: 2.2494006156921387, Test_Filter_Loss: 3.3913786978700955e-07, TEST_Seq_Loss: 1.0, Test_Accuracy_Rate: 40.08%, Test_Ones_Portion: 7.075644248288881e-08
Epoch 249, Total Loss: 2.1812307834625244, Clf Loss: 2.1812307834625244, Filter Loss: 3.4775206358972355e-07, Seq Loss: 1.0, Accuracy Rate: 41.15%, Ones Portion: 8.880353163931431e-08,             Test_Total_Loss: 2.2538790702819824, Test_Clf_Loss: 2.2538790702819824, Test_Filter_Loss: 3.651251176961523e-07, TEST_Seq_Loss: 1.0, Test_Accuracy_Rate: 40.08%, Test_Ones_Portion: 7.360984000115423e-08
Epoch 250, Total Loss: 2.1829030513763428, Clf Loss: 2.1829030513763428, Filter Loss: 3.5041034607274923e-07, Seq Loss: 1.0, Accuracy Rate: 41.11%, Ones Portion: 8.83185435895939e-08,             Test_T

Epoch 269, Total Loss: 2.1916615962982178, Clf Loss: 2.1916615962982178, Filter Loss: 3.167941997617163e-07, Seq Loss: 1.0, Accuracy Rate: 40.88%, Ones Portion: 6.616991043983944e-08,             Test_Total_Loss: 2.264676094055176, Test_Clf_Loss: 2.264676094055176, Test_Filter_Loss: 3.168507589634828e-07, TEST_Seq_Loss: 1.0, Test_Accuracy_Rate: 39.84%, Test_Ones_Portion: 7.372065624622337e-08
Epoch 270, Total Loss: 2.1944332122802734, Clf Loss: 2.1944332122802734, Filter Loss: 3.1433020808435685e-07, Seq Loss: 1.0, Accuracy Rate: 40.81%, Ones Portion: 6.753592884933823e-08,             Test_Total_Loss: 2.263044834136963, Test_Clf_Loss: 2.263044834136963, Test_Filter_Loss: 2.985368610097794e-07, TEST_Seq_Loss: 1.0, Test_Accuracy_Rate: 39.74%, Test_Ones_Portion: 6.967601962060144e-08
Epoch 271, Total Loss: 2.1915769577026367, Clf Loss: 2.1915769577026367, Filter Loss: 3.0815539275863557e-07, Seq Loss: 1.0, Accuracy Rate: 40.84%, Ones Portion: 7.01248339396443e-08,             Test_Total_

Epoch 290, Total Loss: 2.201082229614258, Clf Loss: 2.201082229614258, Filter Loss: 3.232491962990025e-07, Seq Loss: 1.0, Accuracy Rate: 40.71%, Ones Portion: 6.538633812169792e-08,             Test_Total_Loss: 2.2733099460601807, Test_Clf_Loss: 2.2733099460601807, Test_Filter_Loss: 3.0339265322254505e-07, TEST_Seq_Loss: 1.0, Test_Accuracy_Rate: 39.50%, Test_Ones_Portion: 6.842938660156506e-08
Epoch 291, Total Loss: 2.197120428085327, Clf Loss: 2.197120428085327, Filter Loss: 2.9366574949563073e-07, Seq Loss: 1.0, Accuracy Rate: 40.70%, Ones Portion: 5.990167295522042e-08,             Test_Total_Loss: 2.2733757495880127, Test_Clf_Loss: 2.2733757495880127, Test_Filter_Loss: 2.668760146207205e-07, TEST_Seq_Loss: 1.0, Test_Accuracy_Rate: 39.55%, Test_Ones_Portion: 6.136513519550135e-08
Epoch 292, Total Loss: 2.200676679611206, Clf Loss: 2.200676679611206, Filter Loss: 2.5538321324347635e-07, Seq Loss: 1.0, Accuracy Rate: 40.63%, Ones Portion: 5.533804525725827e-08,             Test_Total_

Epoch 311, Total Loss: 2.209826707839966, Clf Loss: 2.209826707839966, Filter Loss: 1.7674828711733426e-07, Seq Loss: 1.0, Accuracy Rate: 40.50%, Ones Portion: 4.746875958971941e-08,             Test_Total_Loss: 2.271760940551758, Test_Clf_Loss: 2.271760940551758, Test_Filter_Loss: 1.8528444911680708e-07, TEST_Seq_Loss: 1.0, Test_Accuracy_Rate: 39.50%, Test_Ones_Portion: 5.3945182543202463e-08
Epoch 312, Total Loss: 2.2080936431884766, Clf Loss: 2.2080936431884766, Filter Loss: 1.7305019639479724e-07, Seq Loss: 1.0, Accuracy Rate: 40.40%, Ones Portion: 4.8359666493524855e-08,             Test_Total_Loss: 2.278505325317383, Test_Clf_Loss: 2.278505325317383, Test_Filter_Loss: 1.8056272210742463e-07, TEST_Seq_Loss: 1.0, Test_Accuracy_Rate: 39.50%, Test_Ones_Portion: 5.123029467313245e-08
Epoch 313, Total Loss: 2.2075884342193604, Clf Loss: 2.2075884342193604, Filter Loss: 1.8731029172158742e-07, Seq Loss: 1.0, Accuracy Rate: 40.51%, Ones Portion: 5.393888002913627e-08,             Test_To

Epoch 332, Total Loss: 2.2065682411193848, Clf Loss: 2.2065682411193848, Filter Loss: 1.7565608345648798e-07, Seq Loss: 1.0, Accuracy Rate: 40.50%, Ones Portion: 5.033872696458275e-08,             Test_Total_Loss: 2.291361093521118, Test_Clf_Loss: 2.291361093521118, Test_Filter_Loss: 1.936349462994258e-07, TEST_Seq_Loss: 1.0, Test_Accuracy_Rate: 39.01%, Test_Ones_Portion: 5.0925564210047014e-08
Epoch 333, Total Loss: 2.207064151763916, Clf Loss: 2.207064151763916, Filter Loss: 1.853891689052034e-07, Seq Loss: 1.0, Accuracy Rate: 40.46%, Ones Portion: 5.2967834562878124e-08,             Test_Total_Loss: 2.2839744091033936, Test_Clf_Loss: 2.2839744091033936, Test_Filter_Loss: 1.912545712912106e-07, TEST_Seq_Loss: 1.0, Test_Accuracy_Rate: 39.16%, Test_Ones_Portion: 5.461005869733526e-08
Epoch 334, Total Loss: 2.207014322280884, Clf Loss: 2.207014322280884, Filter Loss: 1.76403005980319e-07, Seq Loss: 1.0, Accuracy Rate: 40.50%, Ones Portion: 5.1207379669904185e-08,             Test_Total_

Epoch 353, Total Loss: 2.2008121013641357, Clf Loss: 2.2008121013641357, Filter Loss: 1.967657823342961e-07, Seq Loss: 1.0, Accuracy Rate: 40.63%, Ones Portion: 6.515457329214769e-08,             Test_Total_Loss: 2.274864435195923, Test_Clf_Loss: 2.274864435195923, Test_Filter_Loss: 2.012991160427191e-07, TEST_Seq_Loss: 1.0, Test_Accuracy_Rate: 39.45%, Test_Ones_Portion: 6.358581572385447e-08
Epoch 354, Total Loss: 2.2003228664398193, Clf Loss: 2.2003228664398193, Filter Loss: 2.0109688136926707e-07, Seq Loss: 1.0, Accuracy Rate: 40.65%, Ones Portion: 6.838117627694373e-08,             Test_Total_Loss: 2.275681257247925, Test_Clf_Loss: 2.275681257247925, Test_Filter_Loss: 2.002127672540155e-07, TEST_Seq_Loss: 1.0, Test_Accuracy_Rate: 39.35%, Test_Ones_Portion: 6.167431365611264e-08
Epoch 355, Total Loss: 2.196406126022339, Clf Loss: 2.196406126022339, Filter Loss: 1.9307186960304534e-07, Seq Loss: 1.0, Accuracy Rate: 40.59%, Ones Portion: 6.282175490923692e-08,             Test_Total_L

Epoch 374, Total Loss: 2.187715768814087, Clf Loss: 2.187715768814087, Filter Loss: 2.1298451713391842e-07, Seq Loss: 1.0, Accuracy Rate: 41.02%, Ones Portion: 8.644474291941151e-08,             Test_Total_Loss: 2.247349977493286, Test_Clf_Loss: 2.247349977493286, Test_Filter_Loss: 2.084578198946474e-07, TEST_Seq_Loss: 1.0, Test_Accuracy_Rate: 39.69%, Test_Ones_Portion: 8.989849220597534e-08
Epoch 375, Total Loss: 2.185805082321167, Clf Loss: 2.185805082321167, Filter Loss: 2.0079424700725212e-07, Seq Loss: 1.0, Accuracy Rate: 40.93%, Ones Portion: 7.836533200134e-08,             Test_Total_Loss: 2.2579946517944336, Test_Clf_Loss: 2.2579946517944336, Test_Filter_Loss: 2.1097602598274534e-07, TEST_Seq_Loss: 1.0, Test_Accuracy_Rate: 39.64%, Test_Ones_Portion: 7.881800456743804e-08
Epoch 376, Total Loss: 2.185260772705078, Clf Loss: 2.185260772705078, Filter Loss: 2.081661563124726e-07, Seq Loss: 1.0, Accuracy Rate: 41.08%, Ones Portion: 8.353209324241107e-08,             Test_Total_Loss:

Epoch 395, Total Loss: 2.183882713317871, Clf Loss: 2.183882713317871, Filter Loss: 2.0444824144760787e-07, Seq Loss: 1.0, Accuracy Rate: 41.03%, Ones Portion: 8.122474781657729e-08,             Test_Total_Loss: 2.2695000171661377, Test_Clf_Loss: 2.2695000171661377, Test_Filter_Loss: 2.136863486157381e-07, TEST_Seq_Loss: 1.0, Test_Accuracy_Rate: 39.45%, Test_Ones_Portion: 7.7017311639338e-08
Epoch 396, Total Loss: 2.1811654567718506, Clf Loss: 2.1811654567718506, Filter Loss: 2.046058682481089e-07, Seq Loss: 1.0, Accuracy Rate: 41.00%, Ones Portion: 8.160716902239074e-08,             Test_Total_Loss: 2.2678606510162354, Test_Clf_Loss: 2.2678606510162354, Test_Filter_Loss: 2.0884830576051172e-07, TEST_Seq_Loss: 1.0, Test_Accuracy_Rate: 39.55%, Test_Ones_Portion: 7.665716594829064e-08
Epoch 397, Total Loss: 2.1816279888153076, Clf Loss: 2.1816279888153076, Filter Loss: 2.0418946178324404e-07, Seq Loss: 1.0, Accuracy Rate: 41.02%, Ones Portion: 8.355885228183979e-08,             Test_Tota

### Store process

In [914]:
save_dir = './results/'+DateID+'/'
if not os.path.isdir(save_dir):
    os.makedirs(save_dir, exist_ok=True)
saveR = save_dir + 'losses_metrics.xlsx'
data = {'train total loss':train_weighted_loss, 'train acc loss':train_loss_acc,
        'train filter loss':train_loss_filter,'train seq loss':train_loss_seq,
        'train acc rate':train_acc_rate, 'train ones num':train_ones_num,
        'test total loss':test_weighted_loss, 'test acc loss':test_loss_acc,
        'test filter loss':test_loss_filter, 'test seq loss': test_loss_seq,
        'test acc rate':test_acc_rate, 'test ones num':test_ones_num
       }
df = pd.DataFrame(data)
df.to_excel(saveR)
df

Unnamed: 0,train total loss,train acc loss,train filter loss,train seq loss,train acc rate,train ones num,test total loss,test acc loss,test filter loss,test seq loss,test acc rate,test ones num
0,2.659511,2.659450,0.600000,0.000000,0.369855,1.000000,2.362077,2.362016,0.600000,0.000000,0.371733,1.000000
1,2.329190,2.329129,0.600000,0.000000,0.377240,1.000000,2.251624,2.251564,0.600000,0.000000,0.372701,1.000000
2,1.995442,1.995381,0.600000,0.000000,0.496731,1.000000,1.899837,1.899777,0.600000,0.000000,0.518877,1.000000
3,1.836639,1.836579,0.600000,0.000000,0.535956,1.000000,1.799838,1.799778,0.600000,0.000000,0.543562,1.000000
4,1.789347,1.789287,0.600000,0.000000,0.552542,1.000000,1.904030,1.903970,0.600000,0.000000,0.518877,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
1995,0.964969,0.964969,0.000033,0.999999,0.707264,0.000048,2.291040,2.291040,0.000035,0.999998,0.564860,0.000051
1996,0.960884,0.960884,0.000033,0.999998,0.709201,0.000048,2.273444,2.273444,0.000035,0.999998,0.567280,0.000051
1997,1.058729,1.058729,0.000033,0.999999,0.681840,0.000048,2.222906,2.222906,0.000035,0.999998,0.563892,0.000050
1998,1.028507,1.028507,0.000034,0.999999,0.691889,0.000048,2.163985,2.163985,0.000035,0.999998,0.570668,0.000051


In [843]:
DateID

'2019110601'

In [714]:
np.array(train_weighted_loss[0])

array(None, dtype=object)

In [732]:
kk = np.array(test_accloss.result())
kk

array(2.3583207, dtype=float32)

In [735]:
test_accloss.result().numpy()

2.3583207

In [523]:
# model1.save('./model/emb_layer') #tf.keras.models.load_model
tf.saved_model.save(model1,'./model/emb_layer')
model3 = tf.keras.models.load_model('./model/emb_layer')

TypeError: unsupported callable

In [524]:
model3 = model1.load_weights('./model/emb_layer_weight')

AttributeError: 'CheckpointLoadStatus' object has no attribute 'summary'

In [527]:
model1.emb.set_weights(model3.emb.get_weights())

In [526]:
model3 = model1
model3.load_weights('./model/emb_layer_weight')
model3.summary()

Model: "base_model_1_58"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
masking_58 (Masking)         multiple                  0         
_________________________________________________________________
glove_emb (Embedding)        multiple                  2138112   
_________________________________________________________________


ValueError: You tried to call `count_params` on common_extract, but the layer isn't built. You can build it manually via: `common_extract.build(batch_input_shape)`.

* 在某些參數的情況下0 1是train不起來的但是weight可以
    * alpha = 0.0, beta=0.6
    * init_w = tensorflow.keras.initializers.Constant(value=0.9), init_b = tensorflow.keras.initializers.Constant(value=0.7)
* 0/1 with emb比較容易overfit。如果是weight的比較沒那麼嚴重

* 不用sigmoid或是hard_sigmoid。改良relu + linear，並拆成兩個model，把round前面多加上clip
    * sigmoid中間的變化太快(一瞬間就會掉到0或是1)，改成relu在>0~無限大(linear為了還在0~1)再去clip再round，可以看到每個epoch的變化
* 1st phase的beta一定要>=0.6否則不會動，ones#都會是0
    * beta=1 (放0 1進去): weight init設成1也沒用，但把bias設成1就會一開始都是ones#=1了。weight=1 bias=0.2都匯市0，bias=0.3會是0.87。0.6/0.3都是0。0.8/0.3差不多是0.5(但是train很慢acc進步很慢就是了)。0.5/0.5是從0.01開始往上升 (0 1放進去會比較難train是因為它的變化量太大，一下就是有或沒有，所以clf可能學不好，但如果是weight每次gradient進步的都是一小點就會比較容易上升)
    * beta=0.6 (放0 1進去): 0.8/0.5都是0。0.9/0.8 從0.9一直到0。0.9/0.6差不多是從0.5但又有時候到0.7都是0.0(很難train，Nadam換個opt有時候沒用。EX變成adam 0.9/0.8才有0.96開始但如果0.9/0.75變成0開始。Rmsprop 0.9/0.8又是從0.95開始往下)。但如果都改成傳入weight就都沒問題。ones一開始大概0.5 weight平均，也不會卡住
* 建議: 先訓練embedding weight matrix，但是要看goal是要怎樣的matrix
* 其實他不管幾%都會train得很好，除非固定embedding，或是用更弱的clf
* 若設定alpha，就像是regularizer term (penalty)，設越大drop越多
* 一開始先很多ones，再越來越少個

* 同一個opt若加入transform就會train不起來
* 兩個不同的opt加入transform也會train不起來 (persistent、non-persis都不行)，且與BN無關