# Deep Learning for Natural Language Processing Using word2vec-keras
## A deep learning approach for NLP by combining Word2vec with LSTM

Refer to the article [Deep Learning for Natural Language Processing Using word2vec-keras](https://medium.com/@zhangyuefeng1/deep-learning-for-natural-language-processing-using-word2vec-keras-d9a240c7bb9d)

In [1]:
from sklearn.datasets import fetch_20newsgroups
from word2vec_keras import Word2VecKeras
from pprint import pprint
import pandas as pd
import matplotlib.pyplot as plt
import itertools
import numpy as np
import nltk
import string
import re
import ast # abstract syntax tree: https://docs.python.org/3/library/ast.html
from sklearn.model_selection import train_test_split
import mlflow
import mlflow.sklearn

%matplotlib inline

Using TensorFlow backend.


# Preprocessing

In [2]:
class Preprocessing(object):
    def __init__(self, data, target_column_name='body_text_clean'):
        self.data = data
        self.feature_name = target_column_name
        
    def remove_punctuation(self, text):
        # string.punctuation: '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'
        text_nopunct = "".join([char for char in text if char not in string.punctuation])# It will discard all punctuations
        return text_nopunct
    
    def tokenize(self, text):
        #W+ Matches one or more characters which are not word character.
        tokens = re.split('\W+', text) 
        return tokens
    
    def remove_stopwords(self, tokenized_list):
        '''
        download stopwords zip file at: http://www.nltk.org/nltk_data/
        move the unzipped directory stopwords to: /Users/yuhuang/anaconda3/envs/deeplearning/nltk_data/corpora
        '''
        # Remove all English Stopwords
        stopword = nltk.corpus.stopwords.words('english')
        text = [word for word in tokenized_list if word not in stopword]
        return text   

    def stemming(self, tokenized_text):
        ps = nltk.PorterStemmer()
        text = [ps.stem(word) for word in tokenized_text]
        return text
    
    def lemmatizing(self, tokenized_text):
        '''
        download wordnet at: http://www.nltk.org/nltk_data/
        move wprdnet.zip file to: /Users/yuhuang/anaconda3/envs/deeplearning/nltk_data/corpora
        '''
        wn = nltk.WordNetLemmatizer()
        text = [wn.lemmatize(word) for word in tokenized_text]
        return text
    
    def tokens_to_string(self, tokens_string):
        try:
            list_obj = ast.literal_eval(tokens_string)
            text = " ".join(list_obj)
        except:
            text = None
        return text
    
    def dropna(self):
        feature_name = self.feature_name
        if self.data[feature_name].isnull().sum() > 0:
            column_list=[feature_name]
            self.data = self.data.dropna(subset=column_list)
            return self.data
        
    def preprocessing(self):
        self.data['body_text_nopunc'] = self.data['body_text'].apply(lambda x: self.remove_punctuation(x))
        self.data['body_text_tokenized'] = self.data['body_text_nopunc'].apply(lambda x: self.tokenize(x.lower())) 
        self.data['body_text_nostop'] = self.data['body_text_tokenized'].apply(lambda x: self.remove_stopwords(x))
        self.data['body_text_stemmed'] = self.data['body_text_nostop'].apply(lambda x: self.stemming(x))
        self.data['body_text_lemmatized'] = self.data['body_text_nostop'].apply(lambda x: self.lemmatizing(x))
        
        # save cleaned dataset into csv file and load back
        self.save()
        self.load()
        
        self.data[self.feature_name] = self.data['body_text_lemmatized'].apply(lambda x: self.tokens_to_string(x))
        
        self.dropna() # error occurred!!!
        
        drop_columns = ['body_text_nopunc', 'body_text_tokenized', 'body_text_nostop', 'body_text_stemmed', 'body_text_lemmatized'] 
        self.data.drop(drop_columns, axis=1, inplace=True)
        
        
        return self.data
    
    def save(self, filepath="./data/spam_cleaned.csv"):
        self.data.to_csv(filepath, index=False, sep=',')  
        
    def load(self, filepath="./data/spam_cleaned.csv"):
        self.data = pd.read_csv(filepath)
        return self.data

  tokens = re.split('\W+', text)


## Classification

In [3]:
class SpamClassifier(object):
    def __init__(self):
        self.model = Word2VecKeras()
        
    def load_data(self):
        '''
        data source:
        https://www.kaggle.com/uciml/sms-spam-collection-dataset
        text encoding must use ISO-8859-1, not the default utf-8
        '''
        column_names = ['label', 'body_text', 'missing_1', 'missing_2', 'missing_3']
        data = pd.read_csv('./data/spam.csv', encoding = "ISO-8859-1")
        data.columns = column_names
        data.drop(['missing_1', 'missing_2', 'missing_3'], axis=1, inplace=True)
        self.raw_data = data.sample(frac=1.0) 
        
        print('Rows: {}, Columns: {}'.format(self.raw_data.shape[0], self.raw_data.shape[1]))
        print("Total rows: {}, spam: {}, ham: {}".format(len(self.raw_data),
                                                       len(self.raw_data[self.raw_data['label']=='spam']),
                                                       len(self.raw_data[self.raw_data['label']=='ham'])))
        
        print("Total number of missing labels: {}".format(self.raw_data['label'].isnull().sum()))
        print("Total number of missging text: {}".format(self.raw_data['body_text'].isnull().sum()))
        
        return self.raw_data
    
    def split_data(self):
        # Shuffle and split the data into training and testing subsets
        self.x_train, self.x_test, self.y_train, self.y_test = train_test_split(self.x, self.y, test_size=0.25, random_state=42)
        
    def numpy_to_list(self):
        '''
        convert Numpy ndarray to Python list for word2vec-keras API
        '''
        self.x_train = self.x_train.tolist()
        self.y_train = self.y_train.tolist()
        self.x_test  = self.x_test.tolist()
        self.y_test  = self.y_test.tolist()
    
    def prepare_data(self, feature, label='label'):
        self.load_data()
        pp = Preprocessing(self.raw_data)
        self.data = pp.preprocessing()
        
        print('self.data[feture] type: ', type(self.data))
        
        self.x = self.data[feature].values
        self.y = self.data[label].values
        self.split_data()
        self.numpy_to_list()
        
        return self.data
        
    def train_model(self):
        '''
        w2v_min_count
        
        RuntimeError: you must first build vocabulary before training the model.
        
        You configured a min_count of 50 (-m 50), but maybe there is no word in 
        your vocabulary with frequency greater than 50, hence your vocab will be empty and 
        gensim returns the error. Try a lower min_count ...
        '''
        self.w2v_size = 300
        self.w2v_min_count = 1 # 5
        self.w2v_epochs = 100
        self.k_epochs = 5 # 32
        self.k_lstm_neurons = 512
        self.k_max_sequence_len = 1000
        
        self.model.train(self.x_train, self.y_train, 
            w2v_size=self.w2v_size, 
            w2v_min_count=self.w2v_min_count, 
            w2v_epochs=self.w2v_epochs, 
            k_epochs=self.k_epochs, 
            k_lstm_neurons=self.k_lstm_neurons, 
            k_max_sequence_len=self.k_max_sequence_len, 
            k_hidden_layer_neurons=[])
        
    def evaluate(self):
        self.result = self.model.evaluate(self.x_test, self.y_test)
        self.accuracy = self.result["ACCURACY"]
        self.clf_report_df = pd.DataFrame(self.result["CLASSIFICATION_REPORT"])
        self.cnf_matrix = self.result["CONFUSION_MATRIX"]
        print('Confusion Matrix: ', self.cnf_matrix)
        return self.result
    
    def predict(self, idx=1):
        print("LABEL:", self.y_test[idx])
        print("TEXT :", self.x_test[idx])
        print("/n============================================")
        print("PREDICTION:", self.model.predict(self.x_test[idx]))
        
    def mlFlow(self, feature='body_text_clean'):
        np.random.seed(40)  
        with mlflow.start_run():
            self.prepare_data(feature=feature) # feature should be 'body_text' if no need to preprocessing
            self.train_model()
            self.evaluate()
            self.predict()
            mlflow.log_param("feature", feature) 
            mlflow.log_param("w2v_size", self.w2v_size)  
            mlflow.log_param("w2v_min_count", self.w2v_min_count)
            mlflow.log_param("w2v_epochs", self.w2v_epochs)
            mlflow.log_param("k_lstm_neurons", self.k_lstm_neurons)
            mlflow.log_param("k_max_sequence_len", self.k_max_sequence_len)
            mlflow.log_metric("accuracy", self.accuracy)
            mlflow.sklearn.log_model(self.model, "Word2Vec-Keras")
        

In [4]:
spam_clf = SpamClassifier()

In [5]:
spam_clf.mlFlow(feature='body_text')

  from collections import (


Rows: 5572, Columns: 2
Total rows: 5572, spam: 747, ham: 4825
Total number of missing labels: 0
Total number of missging text: 0


2019-11-03 19:30:43,615 : INFO : Build & train Word2Vec model
2019-11-03 19:30:43,616 : INFO : collecting all words and their counts
2019-11-03 19:30:43,617 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2019-11-03 19:30:43,626 : INFO : collected 6702 word types from a corpus of 58956 raw words and 4177 sentences
2019-11-03 19:30:43,627 : INFO : Loading a fresh vocabulary
2019-11-03 19:30:43,638 : INFO : effective_min_count=1 retains 6702 unique words (100% of original 6702, drops 0)
2019-11-03 19:30:43,638 : INFO : effective_min_count=1 leaves 58956 word corpus (100% of original 58956, drops 0)
2019-11-03 19:30:43,655 : INFO : deleting the raw counts dictionary of 6702 items
2019-11-03 19:30:43,656 : INFO : sample=0.001 downsamples 63 most-common words
2019-11-03 19:30:43,656 : INFO : downsampling leaves estimated 48579 word corpus (82.4% of prior 58956)
2019-11-03 19:30:43,668 : INFO : estimated required memory for 6702 words and 300 dimensions: 19435800 b

self.data[feture] type:  <class 'pandas.core.frame.DataFrame'>


2019-11-03 19:30:43,771 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-11-03 19:30:43,772 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-11-03 19:30:43,776 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-11-03 19:30:43,777 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-11-03 19:30:43,783 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-11-03 19:30:43,783 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-11-03 19:30:43,784 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-11-03 19:30:43,785 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-11-03 19:30:43,788 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-11-03 19:30:43,791 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-11-03 19:30:43,796 : INFO : worker thread finished; awaiting finish of 2 more thre

2019-11-03 19:30:44,056 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-11-03 19:30:44,062 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-11-03 19:30:44,063 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-11-03 19:30:44,064 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-11-03 19:30:44,065 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-11-03 19:30:44,069 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-11-03 19:30:44,070 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-11-03 19:30:44,076 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-11-03 19:30:44,080 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-11-03 19:30:44,082 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-11-03 19:30:44,082 : INFO : EPOCH - 6 : training on 58956 raw words (48522 effective 

2019-11-03 19:30:44,315 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-11-03 19:30:44,316 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-11-03 19:30:44,320 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-11-03 19:30:44,328 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-11-03 19:30:44,329 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-11-03 19:30:44,332 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-11-03 19:30:44,333 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-11-03 19:30:44,333 : INFO : EPOCH - 11 : training on 58956 raw words (48596 effective words) took 0.0s, 1152313 effective words/s
2019-11-03 19:30:44,345 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-11-03 19:30:44,347 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-11-03 19:30:44,347 : INFO : worker thre

2019-11-03 19:30:44,554 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-11-03 19:30:44,557 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-11-03 19:30:44,558 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-11-03 19:30:44,561 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-11-03 19:30:44,561 : INFO : EPOCH - 16 : training on 58956 raw words (48624 effective words) took 0.0s, 1266519 effective words/s
2019-11-03 19:30:44,570 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-11-03 19:30:44,573 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-11-03 19:30:44,579 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-11-03 19:30:44,582 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-11-03 19:30:44,585 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-11-03 19:30:44,587 : INFO : worker t

2019-11-03 19:30:44,796 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-11-03 19:30:44,796 : INFO : EPOCH - 21 : training on 58956 raw words (48612 effective words) took 0.0s, 1276626 effective words/s
2019-11-03 19:30:44,805 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-11-03 19:30:44,806 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-11-03 19:30:44,811 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-11-03 19:30:44,813 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-11-03 19:30:44,813 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-11-03 19:30:44,814 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-11-03 19:30:44,815 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-11-03 19:30:44,818 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-11-03 19:30:44,825 : INFO : worker 

2019-11-03 19:30:45,033 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-11-03 19:30:45,037 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-11-03 19:30:45,040 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-11-03 19:30:45,043 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-11-03 19:30:45,044 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-11-03 19:30:45,047 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-11-03 19:30:45,049 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-11-03 19:30:45,053 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-11-03 19:30:45,055 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-11-03 19:30:45,056 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-11-03 19:30:45,056 : INFO : worker thread finished; awaiting finish of 5 more t

2019-11-03 19:30:45,275 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-11-03 19:30:45,277 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-11-03 19:30:45,277 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-11-03 19:30:45,279 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-11-03 19:30:45,283 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-11-03 19:30:45,284 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-11-03 19:30:45,285 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-11-03 19:30:45,285 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-11-03 19:30:45,292 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-11-03 19:30:45,293 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-11-03 19:30:45,295 : INFO : worker thread finished; awaiting finish of 2 more thre

2019-11-03 19:30:45,503 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-11-03 19:30:45,503 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-11-03 19:30:45,506 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-11-03 19:30:45,506 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-11-03 19:30:45,508 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-11-03 19:30:45,508 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-11-03 19:30:45,518 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-11-03 19:30:45,520 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-11-03 19:30:45,521 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-11-03 19:30:45,522 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-11-03 19:30:45,522 : INFO : EPOCH - 37 : training on 58956 raw words (48549 effective

2019-11-03 19:30:45,732 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-11-03 19:30:45,733 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-11-03 19:30:45,744 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-11-03 19:30:45,746 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-11-03 19:30:45,748 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-11-03 19:30:45,749 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-11-03 19:30:45,750 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-11-03 19:30:45,751 : INFO : EPOCH - 42 : training on 58956 raw words (48665 effective words) took 0.0s, 1187084 effective words/s
2019-11-03 19:30:45,760 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-11-03 19:30:45,767 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-11-03 19:30:45,769 : INFO : worker thre

2019-11-03 19:30:45,977 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-11-03 19:30:45,978 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-11-03 19:30:45,983 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-11-03 19:30:45,985 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-11-03 19:30:45,986 : INFO : EPOCH - 47 : training on 58956 raw words (48615 effective words) took 0.0s, 1221402 effective words/s
2019-11-03 19:30:45,994 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-11-03 19:30:46,000 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-11-03 19:30:46,002 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-11-03 19:30:46,005 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-11-03 19:30:46,009 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-11-03 19:30:46,009 : INFO : worker t

2019-11-03 19:30:46,214 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-11-03 19:30:46,215 : INFO : EPOCH - 52 : training on 58956 raw words (48509 effective words) took 0.0s, 1290830 effective words/s
2019-11-03 19:30:46,224 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-11-03 19:30:46,229 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-11-03 19:30:46,231 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-11-03 19:30:46,234 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-11-03 19:30:46,235 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-11-03 19:30:46,237 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-11-03 19:30:46,238 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-11-03 19:30:46,240 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-11-03 19:30:46,245 : INFO : worker 

2019-11-03 19:30:46,459 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-11-03 19:30:46,461 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-11-03 19:30:46,466 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-11-03 19:30:46,468 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-11-03 19:30:46,471 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-11-03 19:30:46,475 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-11-03 19:30:46,477 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-11-03 19:30:46,479 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-11-03 19:30:46,480 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-11-03 19:30:46,481 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-11-03 19:30:46,481 : INFO : worker thread finished; awaiting finish of 5 more t

2019-11-03 19:30:46,703 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-11-03 19:30:46,706 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-11-03 19:30:46,707 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-11-03 19:30:46,708 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-11-03 19:30:46,710 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-11-03 19:30:46,712 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-11-03 19:30:46,715 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-11-03 19:30:46,715 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-11-03 19:30:46,718 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-11-03 19:30:46,727 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-11-03 19:30:46,728 : INFO : worker thread finished; awaiting finish of 2 more thre

2019-11-03 19:30:46,949 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-11-03 19:30:46,950 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-11-03 19:30:46,952 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-11-03 19:30:46,953 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-11-03 19:30:46,954 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-11-03 19:30:46,959 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-11-03 19:30:46,962 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-11-03 19:30:46,967 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-11-03 19:30:46,968 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-11-03 19:30:46,969 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-11-03 19:30:46,970 : INFO : EPOCH - 68 : training on 58956 raw words (48671 effective

2019-11-03 19:30:47,191 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-11-03 19:30:47,193 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-11-03 19:30:47,199 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-11-03 19:30:47,200 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-11-03 19:30:47,206 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-11-03 19:30:47,207 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-11-03 19:30:47,209 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-11-03 19:30:47,210 : INFO : EPOCH - 73 : training on 58956 raw words (48603 effective words) took 0.0s, 1077325 effective words/s
2019-11-03 19:30:47,220 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-11-03 19:30:47,223 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-11-03 19:30:47,229 : INFO : worker thre

2019-11-03 19:30:47,459 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-11-03 19:30:47,461 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-11-03 19:30:47,463 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-11-03 19:30:47,464 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-11-03 19:30:47,464 : INFO : EPOCH - 78 : training on 58956 raw words (48553 effective words) took 0.0s, 1217125 effective words/s
2019-11-03 19:30:47,473 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-11-03 19:30:47,478 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-11-03 19:30:47,478 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-11-03 19:30:47,481 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-11-03 19:30:47,482 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-11-03 19:30:47,483 : INFO : worker t

2019-11-03 19:30:47,697 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-11-03 19:30:47,698 : INFO : EPOCH - 83 : training on 58956 raw words (48538 effective words) took 0.0s, 1184966 effective words/s
2019-11-03 19:30:47,707 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-11-03 19:30:47,710 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-11-03 19:30:47,712 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-11-03 19:30:47,713 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-11-03 19:30:47,718 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-11-03 19:30:47,718 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-11-03 19:30:47,720 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-11-03 19:30:47,722 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-11-03 19:30:47,729 : INFO : worker 

2019-11-03 19:30:47,940 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-11-03 19:30:47,943 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-11-03 19:30:47,948 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-11-03 19:30:47,952 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-11-03 19:30:47,953 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-11-03 19:30:47,954 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-11-03 19:30:47,955 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-11-03 19:30:47,959 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-11-03 19:30:47,961 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-11-03 19:30:47,962 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-11-03 19:30:47,963 : INFO : worker thread finished; awaiting finish of 5 more t

2019-11-03 19:30:48,177 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-11-03 19:30:48,180 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-11-03 19:30:48,183 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-11-03 19:30:48,186 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-11-03 19:30:48,187 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-11-03 19:30:48,190 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-11-03 19:30:48,191 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-11-03 19:30:48,192 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-11-03 19:30:48,194 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-11-03 19:30:48,195 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-11-03 19:30:48,204 : INFO : worker thread finished; awaiting finish of 2 more thre

2019-11-03 19:30:48,429 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-11-03 19:30:48,430 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-11-03 19:30:48,431 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-11-03 19:30:48,431 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-11-03 19:30:48,432 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-11-03 19:30:48,439 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-11-03 19:30:48,441 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-11-03 19:30:48,442 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-11-03 19:30:48,445 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-11-03 19:30:48,448 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-11-03 19:30:48,448 : INFO : EPOCH - 99 : training on 58956 raw words (48559 effective

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 1000, 300)         2010900   
_________________________________________________________________
lstm_1 (LSTM)                (None, 512)               1665024   
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 1026      
Total params: 3,676,950
Trainable params: 1,666,050
Non-trainable params: 2,010,900
_________________________________________________________________
Epoch 1/5
Epoch 2/5




Epoch 3/5
Epoch 4/5
Epoch 5/5


2019-11-03 20:04:32,109 : INFO : Done


Confusion Matrix:  [[1207    5]
 [  21  160]]
LABEL: ham
TEXT : U r too much close to my heart. If u go away i will be shattered. Plz stay with me.
PREDICTION: {'label': 'ham', 'confidence': 0.9996090531349182, 'elapsed_time': 0.5015852451324463}
