In [None]:
import numpy as np
from keras.models import Sequential
from keras.layers.wrappers import TimeDistributed
from keras.layers import Convolution1D, MaxPooling1D
from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional, GRU
from keras.preprocessing.sequence import pad_sequences
from Metrics import *
from datetime import datetime
import pickle

In [None]:
# Load Data
train_set, valid_set, dicts = pickle.load(open('atis.pkl','rb'), encoding='latin1')

# Create index to word/label dicts
w2idx, ne2idx, labels2idx = dicts['words2idx'], dicts['tables2idx'], dicts['labels2idx']
idx2w  = {w2idx[k]:k for k in w2idx}
idx2ne = {ne2idx[k]:k for k in ne2idx}
idx2la = {labels2idx[k]:k for k in labels2idx}
idx2w[len(idx2w)]='X' # for padding
idx2la[len(idx2la)]='X' # for padding

# Create Training/Val data
train_x, train_ne, train_label = train_set
val_x, val_ne, val_label = valid_set

# Tranfer coding to words for check
words_val = [ list(map(lambda x: idx2w[x], w)) for w in val_x]
groundtruth_val = [ list(map(lambda x: idx2la[x], y)) for y in val_label]
words_train = [ list(map(lambda x: idx2w[x], w)) for w in train_x]
groundtruth_train = [ list(map(lambda x: idx2la[x], y)) for y in train_label]

In [None]:
#Check training data
from IPython.core.display import display, HTML
import tabulate
print("Training sentences: {}".format(len(words_train)))
for i in range(12):
    i=np.random.randint(0,len(words_train))
    print("Sample no.{}".format(i))
    table= [words_train[i],groundtruth_train[i]]
    display(HTML(tabulate.tabulate(table, tablefmt='html')))

In [None]:
n_classes = len(idx2la)
n_vocab = len(idx2w)
# n_vocab-1 => latest index number

# find max length, need consider both traning and val data
maxlen_train_x=np.max([len(train_x[i]) for i in range(len(train_x))])
maxlen_val_x=np.max([len(val_x[i]) for i in range(len(val_x))])
maxlen=max(maxlen_train_x,maxlen_val_x)

#padding data to make all sentences have same length (maxlen)
train_x=pad_sequences(train_x,value=n_vocab-1,maxlen=maxlen) # pad value= n_vocab-1
train_label=pad_sequences(train_label,value=n_classes-1,maxlen=maxlen) # pad value= n_classes-1= 127
val_x=pad_sequences(val_x,value=n_vocab-1,maxlen=maxlen) 
val_label=pad_sequences(val_label,value=n_classes-1,maxlen=maxlen)

#Encode label data to one-hot format
train_label=np.eye(n_classes)[train_label] #(4978, 46, 128)
val_label=np.eye(n_classes)[val_label] #(893, 46, 128)

# model conv1d, GRU
model = Sequential()
model.add(Embedding(n_vocab,100))
model.add(Convolution1D(64,5,padding='same', activation='relu'))
model.add(Dropout(0.25))
model.add(GRU(100,return_sequences=True))
model.add(TimeDistributed(Dense(n_classes, activation='softmax')))
model.compile('rmsprop', 'categorical_crossentropy',metrics=['accuracy',precision,recall,fbeta_score])
model.summary()

In [None]:
# Build Model Bidirectional LSTM API
from keras.layers import Input
from keras.models import Model

main_input = Input(shape=(train_x.shape[1],))
x = Embedding(n_vocab,100)(main_input)
x= Bidirectional(LSTM(128,return_sequences=True))(x)
x= Dropout(0.25)(x)
lstm_out=TimeDistributed(Dense(n_classes, activation='softmax'))(x)
model = Model(main_input,lstm_out)
#from keras.utils import multi_gpu_model
#try:
#    model = multi_gpu_model(model)
#except:
#    pass
model.compile('rmsprop', 'categorical_crossentropy',metrics=['accuracy',precision,recall,fbeta_score])
model.summary()
#Print model
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
SVG(model_to_dot(model,True).create(prog='dot', format='svg'))

In [None]:
# Model training
a = datetime.now().replace(microsecond=0)
BATCH_SIZE = 2000
model.fit(train_x, train_label,batch_size=BATCH_SIZE,epochs=140,validation_split=0.1)
b = datetime.now().replace(microsecond=0)
print(b-a) 

In [None]:
# Evaluate
BATCH_SIZE = 2048
score = model.evaluate(val_x, val_label,batch_size=BATCH_SIZE, verbose=1)
print("Lose:%1.4f \naccuracy:%1.4f \nprecision:%1.4f \nrecall:%1.4f \nfbeta_score:%1.4f"%(
    score[0],score[1],score[2],score[3],score[4]))

In [None]:
# Load/Save trained witghts
from keras.models import load_model
from keras.models import model_from_json

# save model
json_string = model.to_json()
open('model_architecture.json','w').write(json_string)

# read model
#json_string=open('model_architecture.json','r').readlines() 
#model = model_from_json(json_string[0])

# save weights
#model.save_weights('model_weights_API_epochs140.h5',overwrite=True)

# load weights
#model.load_weights('model_weights_API_epochs140.h5') #


In [None]:
# Model performance, need remove 
False_count=0
False_sum=0
Total_sum=0
# val_x.shape=(sample,maxlen)
pred = model.predict(val_x) #pred.shape=(sample, maxlen, n_classes)
pred_arr = np.argmax(pred,axis=-1) #pred_arr.shape=(sample,maxlen), find last dim max value's index number

# val_label.shape=(sample,maxlen,n_classes)
# val_label[i].shape=(maxlen,n_classes), i sample's label
for i in range(len(val_x)):
    truth=np.argmax(val_label[i],axis=-1) # val_label[i].shape=(maxlen,n_classes), find last dim max value's index number
    Total_sum+=np.sum(truth!=n_classes-1) # Count all words trhough all samples, exclude padding 

for i in range(len(val_x)):
    pred=pred_arr[i]
    truth=np.argmax(val_label[i],axis=-1)
    if np.all(pred == truth)==False: # If any sentences with any predict wrong word
        False_count+=1 # Count by sentences
        False_sum+=np.sum(pred != truth) # Count by words
print("No.%4d \nError:%4d/%4d \nAccuracy by word:%3.2f%% \nAccuracy by sentences:%3.2f%%" %(
    False_count,False_sum,Total_sum,
    100-False_sum/Total_sum*100,
    100-False_count/len(val_x)*100
))

In [None]:
# Model performance visual check
from IPython.core.display import display, HTML
import tabulate
False_count=0
False_sum=0
Total_sum=0

#val_x.shape=(sample,maxlen)
pred = model.predict(val_x) #pred.shape=(sample, maxlen, n_classes)
pred_arr = np.argmax(pred,axis=-1) #pred_arr.shape=(sample,maxlen), find last dim max value's index number

for i in range(len(val_x)):
    truth=np.argmax(val_label[i],axis=-1).reshape(-1,)
    Total_sum+=np.sum(truth!=n_classes-1)

for i in range(len(val_x)):
    pred=pred_arr[i]
    truth=np.argmax(val_label[i],axis=-1).reshape(-1,)
    if np.all(pred == truth)==False:
        False_count+=1
        False_sum+=np.sum(pred != truth)
        val_input=[idx2w[x] for x in val_x[i] if x!=n_vocab-1]
        pred_label = [idx2la[x] for x in pred if x!=n_classes-1 ]
        truth_label = [idx2la[x] for x in truth if x!=n_classes-1]
        truth_ind=((pred == truth)[len(pred == truth)-len(val_input):])
        table= [val_input,pred_label,truth_label,truth_ind]
        display(HTML(tabulate.tabulate(table, tablefmt='html')))