# W266 Term Project: Event Temporal State Identification

## CNN Model

### John Chiang, Vincent Chu

In [1]:
#! /usr/bin/env python

import numpy as np
import os
import time
import datetime

# Scikit-learn imports
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

# Tensorflow imports
import tensorflow as tf
from tensorflow.contrib import learn

# Custom library for CNN model
from nlp_cnn import NLPCNN

# Custom library for data processing
import societal_data_processor as sdp

### Parameters for Data Loading, CNN Model Ops and CNN Model Training

In [2]:
#===========================================================================================================
# Parameters
#
# Adopted and modified from train.py of Danny Britz's cnn-text-classification-tf Github page
# <https://github.com/dennybritz/cnn-text-classification-tf>
#===========================================================================================================

try:
    # Data loading params
    tf.flags.DEFINE_float("dev_sample_percentage", .1, "Percentage of the training data to use for validation")
    tf.flags.DEFINE_string("data_dir", '/home/vslchu/w266/project/data/eventstatus_eng/', "Directory for Annotated Societal Events Data")
    
    # Model Hyperparameters
    tf.flags.DEFINE_integer("embedding_dim", 50, "Dimensionality of character embedding") 
    tf.flags.DEFINE_string("filter_sizes", "3,4,5", "Comma-separated filter sizes")
    tf.flags.DEFINE_integer("num_filters", 128, "Number of filters per filter size") 
    tf.flags.DEFINE_float("dropout_keep_prob", 1, "Dropout keep probability") 
    tf.flags.DEFINE_float("l2_reg_lambda", 0.0, "L2 regularization lambda")

    # Training parameters
    tf.flags.DEFINE_integer("batch_size", 50, "Batch Size")
    tf.flags.DEFINE_integer("num_epochs", 5, "Number of training epochs")
    tf.flags.DEFINE_integer("evaluate_every", 100, "Evaluate model on dev set after this many steps")
    tf.flags.DEFINE_integer("checkpoint_every", 500, "Save model after this many steps")
    tf.flags.DEFINE_integer("num_checkpoints", 5, "Number of checkpoints to store")
    
    # Misc Parameters
    tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
    tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices")
except:
    print "Tf Flags already defined"

# Inspecting the parameters

params = tf.flags.FLAGS
params._parse_flags()
print("\nParameters:")
for attr, value in sorted(params.__flags.items()):
    print("{}={}".format(attr.upper(), value))
print("")


Parameters:
ALLOW_SOFT_PLACEMENT=True
BATCH_SIZE=50
CHECKPOINT_EVERY=500
DATA_DIR=/home/vslchu/w266/project/data/eventstatus_eng/
DEV_SAMPLE_PERCENTAGE=0.1
DROPOUT_KEEP_PROB=1
EMBEDDING_DIM=50
EVALUATE_EVERY=100
FILTER_SIZES=3,4,5
L2_REG_LAMBDA=0.0
LOG_DEVICE_PLACEMENT=False
NUM_CHECKPOINTS=5
NUM_EPOCHS=5
NUM_FILTERS=128



### Function Definitions

In [3]:
#===========================================================================================================
# Functions
#
# Adopted and modified from train.py of Danny Britz's cnn-text-classification-tf Github page
# <https://github.com/dennybritz/cnn-text-classification-tf>
#===========================================================================================================

############################################################################################################
# Function Name: load_text_data
# Description  : Use functions from the societal_data_processor library to load and prepare annotated 
#                data from the EventStatus corpus. The annotations (i.e., the y's) are transformed to 
#                lists of binaries.
# Parameters         :
#   data_dir         : path of the directory wih the annotated files
#   processor_ver    : version of the data processor to use: 
#                      1 - Chunk to multiple annotations
#                      2 - Accumulated phrase to single annotation
#                      3 - Phrase to single annotation
#   remove_stopwords : Whether to remove the stopwords
#   replace_num      : Whether to replace numbers with <NUM>
#   remove_non_alpha : Whether to remove non-alphbetical strings
#   to_lower         : Whether to convert all words to lowercase
#   to_subwords      : Whether to futher break down the words into subwords
# Return Values      :
#   x_train          : Training data set with word chunks or phrases
#   x_test           : Test data set with word chunks or phrases
#   y_train          : List of binaries representing annotations (i.e., labels) corresponding to each 
#                      item in the training data set
#   y_test           : List of binaries representing annotations (i.e., labels) corresponding to each 
#                      item in the test data set
#   y_orig_train     : Original annotations (i.e., labels as strings) corresponding to each 
#                      item in the training data set
#   y_orig_test      : Original annotations (i.e., labels as strings) corresponding to each 
#                      item in the test data set
#   vocab_processor  : Vocabulary processor that maps the words chunks or phrases to a list of IDs 
############################################################################################################
def load_text_data(data_dir, 
                   processor_ver, 
                   remove_stopwords = False, 
                   replace_num = False, 
                   remove_non_alpha = False, 
                   to_lower = False, 
                   to_subwords = False):
    
    # Load data from the annotated files
    print("Loading data...")
    (original_chunks, clean_chunks, clean_chunk_sents, temporal_states, event_files) = \
    sdp.get_chunks_n_annotations(data_dir, 
                                 processor_ver, 
                                 remove_stopwords, 
                                 replace_num, 
                                 remove_non_alpha, 
                                 to_lower, 
                                 to_subwords)

    # Tranform annotations into lists of binaries
    y = sdp.transform_annotations_to_binary(temporal_states)

    # Build vocabulary
    max_chunk_length = max([len(x.split(" ")) for x in clean_chunks])
    print "max_chunk_length = ", max_chunk_length

    vocab_processor = learn.preprocessing.VocabularyProcessor(max_chunk_length)
    x = np.array(list(vocab_processor.fit_transform(clean_chunks)))

    (x_train, x_test) = sdp.split_train_test_data(x, params.dev_sample_percentage)
    (y_train, y_test) = sdp.split_train_test_data(y, params.dev_sample_percentage)
    (y_orig_train, y_orig_test) = sdp.split_train_test_data(temporal_states, params.dev_sample_percentage)

    x_train = np.array(x_train)
    x_test = np.array(x_test)
    y_train = np.array(y_train)
    y_test = np.array(y_test)

    print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))
    print("Train/Dev split on data (x): {:d}/{:d}".format(len(x_train), len(x_test)))
    print("Train/Dev split on labels (y): {:d}/{:d}".format(len(y_train), len(y_test)))
    
    return (x_train, x_test, y_train, y_test, y_orig_train, y_orig_test, vocab_processor)

############################################################################################################
# Function Name: train_step
# Description  : Implement a single training step on a batch from the training data set
# Parameters   :
#   sess       : Tensorflow session
#   cnn        : Object of class NLPCNN, which has all training and test ops defined 
#   x_batch    : A batch of word chunks or phrases from the training data set
#   y_batch    : List of binaries representing annotations (i.e., labals) corresponding to the x_batch 
#                from the training data set
# Return Values:
#   loss       : Mean cross-entropy loss
#   accuracy   : Accuracy of predictions
#   predictions: Predicted annotations (i.e., labels)
############################################################################################################
def train_step(sess, cnn, x_batch, y_batch):

    feed_dict = {
      cnn.input_x: x_batch,
      cnn.input_y: y_batch,
      cnn.dropout_keep_prob: params.dropout_keep_prob
    }

    _, step, loss, accuracy, predictions = \
        sess.run([cnn.train_op, 
                  cnn.global_step, 
                  cnn.loss, 
                  cnn.accuracy, 
                  cnn.predictions],
                 feed_dict)
        
    time_str = datetime.datetime.now().isoformat()
    print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))

    return (loss, accuracy, predictions)

############################################################################################################
# Function Name: test_step
# Description  : Evaluate the trained CNN model on the test data set
# Parameters   :
#   sess       : Tensorflow session
#   cnn        : Object of class NLPCNN, which has all training and test ops defined 
#   x_batch    : A batch of word chunks or phrases from the test data set
#   y_batch    : List of binaries representing annotations (i.e., labals) corresponding to the x_batch 
#                from the test data set
# Return Values:
#   loss       : Mean cross-entropy loss
#   accuracy   : Accuracy of predictions
#   predictions: Predicted annotations (i.e., labels)
############################################################################################################
def test_step(sess, cnn, x_batch, y_batch):

    feed_dict = {
      cnn.input_x: x_batch,
      cnn.input_y: y_batch,
      cnn.dropout_keep_prob: 1.0
    }
    

    step, loss, accuracy, predictions = \
        sess.run([cnn.global_step, 
                  cnn.loss, 
                  cnn.accuracy, 
                  cnn.predictions],
                 feed_dict)
        
    time_str = datetime.datetime.now().isoformat()
    print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
    
    return (loss, accuracy, predictions) 

############################################################################################################
# Function Name: run_cnn
# Description  : Instantiate and run a CNN model, train the model on the training data set and evaluate 
#                its performance using the test data set
# Parameters       :
#   x_train        : Training data set with word chunks or phrases
#   y_train        : Annotations (i.e., labels) corresponding to each item in the training data set
#   x_test         : Test data set with word chunks or phrases
#   y_test         : Annotations (i.e., labels) corresponding to each item in the test data set
#   vocab_processor: Vocabulary processor that maps the words chunks or phrases to a list of IDs 
# Return Values    :
#   test_preds     : List of lists of predicted labels from each evaluation (i.e., testing) step
############################################################################################################
def run_cnn(x_train, y_train, x_test, y_test, vocab_processor): 
    
    train_preds = []
    test_preds = []

    with tf.Graph().as_default():

        session_conf = tf.ConfigProto(
          allow_soft_placement=params.allow_soft_placement,
          log_device_placement=params.log_device_placement)

        sess = tf.Session(config=session_conf)

        with sess.as_default():
            cnn = NLPCNN(sequence_length = x_train.shape[1],
                         num_classes = y_train.shape[1],
                         vocab_size = len(vocab_processor.vocabulary_),
                         embedding_size = params.embedding_dim,
                         filter_sizes = list(map(int, params.filter_sizes.split(","))),
                         num_filters = params.num_filters,
                         l2_reg_lambda = params.l2_reg_lambda)
            
            cnn.build_core_graph()
            cnn.build_train_test_graph()
            
            print "cnn.out_dir = ", cnn.out_dir
            
            checkpoint_dir = os.path.abspath(os.path.join(cnn.out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(), max_to_keep = cnn.num_checkpoints)            

            # Initialize all variables
            sess.run(tf.global_variables_initializer())                     

            # Generate batches
            batches = sdp.batch_iter(
                list(zip(x_train, y_train)), params.batch_size, params.num_epochs)

            batch_count = 0
            for batch in batches:
                x_batch, y_batch = zip(*batch)
                loss, accuracy, predictions = train_step(sess, cnn, x_batch, y_batch)

                current_step = tf.train.global_step(sess, cnn.global_step)                
                print "current_step: ", current_step

                for i in range(len(predictions)):
                    train_preds.append(predictions[i])

                if current_step % params.evaluate_every == 0:
                    print "\nPredicting annotation for test data:"
                    loss, accuracy, predictions = test_step(sess, cnn, x_test, y_test)
                    test_preds.append(list(predictions))
                    print

                if current_step % params.checkpoint_every == 0:
                    path = saver.save(sess, checkpoint_prefix, global_step = current_step)
                    print "Saved model checkpoint to {}\n".format(path)

                batch_count += 1
                
            print "\nRan %d batches during training and created %d rounds of predictions" % (batch_count, len(test_preds))

    return test_preds

############################################################################################################
# Function Name: eval_preds
# Description  : Evaluate predictions from the test steps against the real annotations
# Parameters       :
#   test_preds_list: list of predictions from various evaluation checkpoint during the training cycle
#   test_labels    : Labels (annotations) for the test data set
# Return Values    :
#   test_pred_eval : List of tuples of metrics (i.e., F1 Score, Precision and Recall) from each evaluation 
#                    (i.e., testing) step
############################################################################################################
def eval_preds(test_preds_list, test_labels):

    #reload(sdp)
    test_pred_annotations = []
    test_pred_eval = []
    
    for i in range(len(test_preds_list)):
        temp_test_pred_annotations = sdp.transform_digits_to_annotations(test_preds_list[i])
        test_pred_annotations.append(temp_test_pred_annotations)
                
        ### Evaluate Performance of model        
        f1 = f1_score(test_labels, temp_test_pred_annotations, average='weighted')
        precision = precision_score(test_labels, temp_test_pred_annotations, average='weighted')
        recall = recall_score(test_labels, temp_test_pred_annotations, average='weighted')
        
        print "\nPerformance Evaluation of CNN Model (i = %d):" % i
        print "F1 Score = %f" % f1
        print "Precision Score = %f" % precision
        print "Recall Score = %f" % recall 

        test_pred_eval.append((f1, precision, recall))
        
    return test_pred_eval

### CNN Model scenarios

In [4]:
############################################################################################################
# Subword-level Data Processor v1 with stopwords but without non-alpha words
############################################################################################################

x_train, x_test, y_train, y_test, y_orig_train, y_orig_test, vocab_processor = \
    load_text_data(params.data_dir, 1, remove_non_alpha = True, to_subwords = True)
test_preds = run_cnn(x_train, y_train, x_test, y_test, vocab_processor)
test_eval = eval_preds(test_preds, y_orig_test)

x_train = None
x_test = None
y_train = None
y_test = None
y_orig_train = None
y_orig_test = None
vocab_processor = None

Loading data...
max_chunk_length =  643
Vocabulary Size: 8522
Train/Dev split on data (x): 5059/562
Train/Dev split on labels (y): 5059/562
Writing to /home/vslchu/w266/project/code/runs/20170823_0303_UTC

cnn.out_dir =  /home/vslchu/w266/project/code/runs/20170823_0303_UTC
2017-08-23T03:03:23.909296: step 1, loss 4.29361, acc 0.02
current_step:  1
2017-08-23T03:03:24.088741: step 2, loss 3.21933, acc 0.02
current_step:  2
2017-08-23T03:03:24.270868: step 3, loss 2.18547, acc 0.3
current_step:  3
2017-08-23T03:03:24.454777: step 4, loss 1.7048, acc 0.3
current_step:  4
2017-08-23T03:03:24.638798: step 5, loss 1.51942, acc 0.38
current_step:  5
2017-08-23T03:03:24.819940: step 6, loss 1.40831, acc 0.4
current_step:  6
2017-08-23T03:03:25.008410: step 7, loss 1.54705, acc 0.52
current_step:  7
2017-08-23T03:03:25.184522: step 8, loss 1.28973, acc 0.54
current_step:  8
2017-08-23T03:03:25.372190: step 9, loss 1.91805, acc 0.42
current_step:  9
2017-08-23T03:03:25.546834: step 10, loss 1.6

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [5]:
############################################################################################################
# Subword-level Data Processor v2 with stopwords but without non-alpha words
############################################################################################################

x_train, x_test, y_train, y_test, y_orig_train, y_orig_test, vocab_processor = \
    load_text_data(params.data_dir, 2, remove_non_alpha = True, to_subwords = True)
test_preds = run_cnn(x_train, y_train, x_test, y_test, vocab_processor)
test_eval = eval_preds(test_preds, y_orig_test)

x_train = None
x_test = None
y_train = None
y_test = None
y_orig_train = None
y_orig_test = None
vocab_processor = None

Loading data...
max_chunk_length =  411
Vocabulary Size: 6361
Train/Dev split on data (x): 5059/562
Train/Dev split on labels (y): 5059/562
Writing to /home/vslchu/w266/project/code/runs/20170823_0306_UTC

cnn.out_dir =  /home/vslchu/w266/project/code/runs/20170823_0306_UTC
2017-08-23T03:06:08.864738: step 1, loss 2.72359, acc 0.02
current_step:  1
2017-08-23T03:06:08.985534: step 2, loss 2.10008, acc 0.12
current_step:  2
2017-08-23T03:06:09.095633: step 3, loss 1.67343, acc 0.34
current_step:  3
2017-08-23T03:06:09.211802: step 4, loss 1.36412, acc 0.44
current_step:  4
2017-08-23T03:06:09.326955: step 5, loss 1.47887, acc 0.44
current_step:  5
2017-08-23T03:06:09.438638: step 6, loss 1.59233, acc 0.38
current_step:  6
2017-08-23T03:06:09.547550: step 7, loss 1.22439, acc 0.56
current_step:  7
2017-08-23T03:06:09.658616: step 8, loss 1.47965, acc 0.54
current_step:  8
2017-08-23T03:06:09.777671: step 9, loss 1.37254, acc 0.46
current_step:  9
2017-08-23T03:06:09.884492: step 10, loss

In [6]:
############################################################################################################
# Subword-level Data Processor v3 with stopwords but without non-alpha words
############################################################################################################

x_train, x_test, y_train, y_test, y_orig_train, y_orig_test, vocab_processor = \
    load_text_data(params.data_dir, 3, remove_non_alpha = True, to_subwords = True)
test_preds = run_cnn(x_train, y_train, x_test, y_test, vocab_processor)
test_eval = eval_preds(test_preds, y_orig_test)

x_train = None
x_test = None
y_train = None
y_test = None
y_orig_train = None
y_orig_test = None
vocab_processor = None

Loading data...
max_chunk_length =  264
Vocabulary Size: 6361
Train/Dev split on data (x): 5059/562
Train/Dev split on labels (y): 5059/562
Writing to /home/vslchu/w266/project/code/runs/20170823_0308_UTC

cnn.out_dir =  /home/vslchu/w266/project/code/runs/20170823_0308_UTC
2017-08-23T03:08:00.826807: step 1, loss 3.4203, acc 0.08
current_step:  1
2017-08-23T03:08:00.914325: step 2, loss 2.46566, acc 0.1
current_step:  2
2017-08-23T03:08:00.989778: step 3, loss 1.66181, acc 0.28
current_step:  3
2017-08-23T03:08:01.061591: step 4, loss 1.3752, acc 0.38
current_step:  4
2017-08-23T03:08:01.134203: step 5, loss 1.45375, acc 0.52
current_step:  5
2017-08-23T03:08:01.212795: step 6, loss 1.27894, acc 0.56
current_step:  6
2017-08-23T03:08:01.291391: step 7, loss 1.71936, acc 0.5
current_step:  7
2017-08-23T03:08:01.367442: step 8, loss 2.21025, acc 0.42
current_step:  8
2017-08-23T03:08:01.442047: step 9, loss 1.63909, acc 0.44
current_step:  9
2017-08-23T03:08:01.521980: step 10, loss 1.5

In [7]:
############################################################################################################
# Word-level Data Processor v1 with stopwords but without non-alpha words
############################################################################################################

x_train, x_test, y_train, y_test, y_orig_train, y_orig_test, vocab_processor = \
    load_text_data(params.data_dir, 1, remove_non_alpha = True)
test_preds = run_cnn(x_train, y_train, x_test, y_test, vocab_processor)
test_eval = eval_preds(test_preds, y_orig_test)

x_train = None
x_test = None
y_train = None
y_test = None
y_orig_train = None
y_orig_test = None
vocab_processor = None

Loading data...
max_chunk_length =  320
Vocabulary Size: 20222
Train/Dev split on data (x): 5059/562
Train/Dev split on labels (y): 5059/562
Writing to /home/vslchu/w266/project/code/runs/20170823_0309_UTC

cnn.out_dir =  /home/vslchu/w266/project/code/runs/20170823_0309_UTC
2017-08-23T03:09:18.702140: step 1, loss 3.01912, acc 0.04
current_step:  1
2017-08-23T03:09:18.794672: step 2, loss 1.93971, acc 0.14
current_step:  2
2017-08-23T03:09:18.894374: step 3, loss 1.79899, acc 0.48
current_step:  3
2017-08-23T03:09:18.989998: step 4, loss 1.19963, acc 0.5
current_step:  4
2017-08-23T03:09:19.085034: step 5, loss 0.890985, acc 0.62
current_step:  5
2017-08-23T03:09:19.181276: step 6, loss 1.61626, acc 0.42
current_step:  6
2017-08-23T03:09:19.277671: step 7, loss 1.56966, acc 0.46
current_step:  7
2017-08-23T03:09:19.375756: step 8, loss 1.57618, acc 0.5
current_step:  8
2017-08-23T03:09:19.465699: step 9, loss 1.55041, acc 0.42
current_step:  9
2017-08-23T03:09:19.560353: step 10, loss

In [8]:
############################################################################################################
# Word-level Data Processor v2 with stopwords but without non-alpha words
############################################################################################################

x_train, x_test, y_train, y_test, y_orig_train, y_orig_test, vocab_processor = \
    load_text_data(params.data_dir, 2, remove_non_alpha = True)
test_preds = run_cnn(x_train, y_train, x_test, y_test, vocab_processor)
test_eval = eval_preds(test_preds, y_orig_test)

x_train = None
x_test = None
y_train = None
y_test = None
y_orig_train = None
y_orig_test = None
vocab_processor = None

Loading data...
max_chunk_length =  205
Vocabulary Size: 13764
Train/Dev split on data (x): 5059/562
Train/Dev split on labels (y): 5059/562
Writing to /home/vslchu/w266/project/code/runs/20170823_0310_UTC

cnn.out_dir =  /home/vslchu/w266/project/code/runs/20170823_0310_UTC
2017-08-23T03:10:47.272967: step 1, loss 7.2555, acc 0.04
current_step:  1
2017-08-23T03:10:47.340437: step 2, loss 4.41457, acc 0.24
current_step:  2
2017-08-23T03:10:47.405412: step 3, loss 4.04104, acc 0.26
current_step:  3
2017-08-23T03:10:47.479342: step 4, loss 3.18448, acc 0.26
current_step:  4
2017-08-23T03:10:47.548936: step 5, loss 3.05283, acc 0.32
current_step:  5
2017-08-23T03:10:47.616171: step 6, loss 2.64419, acc 0.24
current_step:  6
2017-08-23T03:10:47.683953: step 7, loss 1.98713, acc 0.24
current_step:  7
2017-08-23T03:10:47.753197: step 8, loss 1.6498, acc 0.38
current_step:  8
2017-08-23T03:10:47.818565: step 9, loss 1.66788, acc 0.5
current_step:  9
2017-08-23T03:10:47.886322: step 10, loss 2

In [9]:
############################################################################################################
# Word-level Data Processor v3 with stopwords but without non-alpha words
############################################################################################################

x_train, x_test, y_train, y_test, y_orig_train, y_orig_test, vocab_processor = \
    load_text_data(params.data_dir, 3, remove_non_alpha = True)
test_preds = run_cnn(x_train, y_train, x_test, y_test, vocab_processor)
test_eval = eval_preds(test_preds, y_orig_test)

x_train = None
x_test = None
y_train = None
y_test = None
y_orig_train = None
y_orig_test = None
vocab_processor = None

Loading data...
max_chunk_length =  149
Vocabulary Size: 13762
Train/Dev split on data (x): 5059/562
Train/Dev split on labels (y): 5059/562
Writing to /home/vslchu/w266/project/code/runs/20170823_0311_UTC

cnn.out_dir =  /home/vslchu/w266/project/code/runs/20170823_0311_UTC
2017-08-23T03:11:51.906305: step 1, loss 3.01969, acc 0.34
current_step:  1
2017-08-23T03:11:51.955529: step 2, loss 2.41527, acc 0.24
current_step:  2
2017-08-23T03:11:52.005605: step 3, loss 2.38905, acc 0.18
current_step:  3
2017-08-23T03:11:52.056179: step 4, loss 1.6626, acc 0.36
current_step:  4
2017-08-23T03:11:52.102722: step 5, loss 1.37136, acc 0.46
current_step:  5
2017-08-23T03:11:52.146711: step 6, loss 1.69469, acc 0.5
current_step:  6
2017-08-23T03:11:52.193618: step 7, loss 2.20259, acc 0.44
current_step:  7
2017-08-23T03:11:52.241457: step 8, loss 2.10866, acc 0.42
current_step:  8
2017-08-23T03:11:52.288928: step 9, loss 1.979, acc 0.46
current_step:  9
2017-08-23T03:11:52.335989: step 10, loss 1.