In [1]:
import itertools
import pandas as pd
import utils
from collections import defaultdict, OrderedDict
import numpy as np
from IPython.display import display
import matplotlib.pyplot as plt
from scipy.sparse import hstack, vstack
import tensorflow as tf
from cnn_model import TextCNN
import data_helpers
import os
import time
import datetime

In [2]:
ordered_names = [u'study',
                 u'history',
                 u'comparison',
                 u'technique',
                 u'findings',
                 u'impression', 
                 u'signed by',
                 ]

--------
# Read data

In [3]:
filename = 'Data/upto1528.xlsx'
df_raw = pd.read_excel(open(filename, 'rb'))

In [4]:
# Data is stored in df
ps = utils.Parser()
ps.parse(df_raw)
df = ps.df
for idx, row in df['findings'].items():
    try:
        text, velos = utils.parse_findings(row)
        df.at[idx, 'findings'] = text
        for n, v in velos:
            df.at[0, n] = v
    except:
        pass
discardField = ['Report Text']
foo = [item for item in df.columns.tolist() if item not in ordered_names+discardField]
foo.sort()
CORE_COL = ordered_names + foo
df = df[CORE_COL]
df = pd.concat([df_raw[['Past', 'Present', 'Left', 'Right', 'Count']], df[CORE_COL]], axis=1)
# turn null to []
df = utils.null2empty(df, ['history', 'impression', 'comparison'])
print(df.shape)

(1527, 52)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


---
# Data Prep

In [5]:
# To predict PAST
to_predict = ['Past']
fields = [
#     'history', 
    'findings', 
#     'comparison', 
    'impression'
]
df_filtered = df[~df['Past'].isnull() & df['Past'] != 0].sample(frac=1, random_state=1)
df_filtered = df_filtered[to_predict + fields]

df_train = df_filtered.iloc[:1220]
y_train = np.array(df_train['Past'].astype(int))

df_test = df_filtered.iloc[1220:]
y_test = np.array(df_test['Past'].astype(int))

print(df_train.shape)
print(df_test.shape)

(1220, 3)
(307, 3)


---
# Tranditional ML

### TFIDF features

In [None]:
ngram, min_count = 5, 5

obj = utils.Df2TFIDF()
obj.fit(df_train, ngram=ngram, min_count=min_count)
output_train = obj.transform(df_train)
output_test = obj.transform(df_test)
# concatenate sparse matrices of all fields
x_train = hstack([foo['bow_tfidf'] for foo in output_train.values()])
x_test = hstack([foo['bow_tfidf'] for foo in output_test.values()])

print(x_train.shape)
print(x_test.shape)

### Run classifier

In [None]:
from sklearn.svm import LinearSVC, SVC
from sklearn.linear_model import LogisticRegression
# svm
clf = LinearSVC(C=1, loss='squared_hinge')
#clf = SVC()
#clf = LogisticRegression(C=3)

clf.fit(x_train, y_train)
coef = clf.coef_.squeeze()

In [None]:
y_train_pred = clf.predict(x_train)
y_test_pred = clf.predict(x_test)
results = pd.concat([utils.my_classification_report(y_train, y_train_pred),
                     utils.my_classification_report(y_test, y_test_pred),
                     ], axis=1).transpose()
results.index = ['training', 'testing']
display(results)

In [None]:
# features with the big weights
idx = np.absolute(coef).argsort()[::-1]
plt.plot(coef[idx])
plt.show()
idx2word_agg = pd.Series(obj.idx2word_concat)
display(idx2word_agg[idx[:20]])

In [None]:
# output failed cases.
pd.set_option('display.max_colwidth', -1)
false_positive = (y_test_pred - y_test) > 0
display(df_test.loc[false_positive,:].applymap(utils.list2str))

---
---
# Neural Nets

### Data Prep

In [14]:
## Training data
x_train_text = utils.df2texts(df_train, df_train.columns[1:])
word2idx, idx2word = utils.ngram_vocab_processor(x_train_text, ngram=1, min_count=2)
x_train = np.array(utils.encode_texts(x_train_text, word2idx, maxlen=200))

y_train = df_train['Past'].values[:, None]
y_train = np.concatenate([(y_train + 1) / 2, (1 - y_train) / 2], axis=1).astype(np.int)

x_dev_text = utils.df2texts(df_test, df_train.columns[1:])
x_dev = np.array(utils.encode_texts(x_dev_text, word2idx, maxlen=x_train.shape[1]))

y_dev = df_test['Past'].values[:, None]
y_dev = np.concatenate([(y_dev + 1) / 2, (1 - y_dev) / 2], axis=1).astype(np.int)

### Run CNN

In [15]:
# Data loading params
tf.flags.DEFINE_float("dev_sample_percentage", .1, "Percentage of the training data to use for validation")
# tf.flags.DEFINE_string("positive_data_file", "./data/rt-polaritydata/rt-polarity.pos", "Data source for the positive data.")
# tf.flags.DEFINE_string("negative_data_file", "./data/rt-polaritydata/rt-polarity.neg", "Data source for the negative data.")

# Model Hyperparameters
tf.flags.DEFINE_integer("embedding_dim", 128, "Dimensionality of character embedding (default: 128)")
tf.flags.DEFINE_string("filter_sizes", "3,4,5", "Comma-separated filter sizes (default: '3,4,5')")
tf.flags.DEFINE_integer("num_filters", 128, "Number of filters per filter size (default: 128)")
tf.flags.DEFINE_float("dropout_keep_prob", 0.5, "Dropout keep probability (default: 0.5)")
tf.flags.DEFINE_float("l2_reg_lambda", 0.0, "L2 regularization lambda (default: 0.0)")

# Training parameters
tf.flags.DEFINE_integer("batch_size", 64, "Batch Size (default: 64)")
tf.flags.DEFINE_integer("num_epochs", 200, "Number of training epochs (default: 200)")
tf.flags.DEFINE_integer("evaluate_every", 100, "Evaluate model on dev set after this many steps (default: 100)")
tf.flags.DEFINE_integer("checkpoint_every", 100, "Save model after this many steps (default: 100)")
tf.flags.DEFINE_integer("num_checkpoints", 5, "Number of checkpoints to store (default: 5)")
# Misc Parameters
tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices")

FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()

['-f',
 '/run/user/1000/jupyter/kernel-1f172ae7-ce5b-4024-88c3-8297b644efe2.json']

In [16]:
with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
      allow_soft_placement=FLAGS.allow_soft_placement,
      log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        cnn = TextCNN(
            sequence_length=x_train.shape[1],
            num_classes=y_train.shape[1],
            vocab_size=len(word2idx),
            embedding_size=FLAGS.embedding_dim,
            filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
            num_filters=FLAGS.num_filters,
            l2_reg_lambda=FLAGS.l2_reg_lambda)

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-3)
        grads_and_vars = optimizer.compute_gradients(cnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in grads_and_vars:
            if g is not None:
                grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g)
                sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                grad_summaries.append(grad_hist_summary)
                grad_summaries.append(sparsity_summary)
        grad_summaries_merged = tf.summary.merge(grad_summaries)

        # Output directory for models and summaries
        timestamp = str(int(time.time()))
        out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
        print("Writing to {}\n".format(out_dir))

        # Summaries for loss and accuracy
        loss_summary = tf.summary.scalar("loss", cnn.loss)
        acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

        # Train Summaries
        train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged])
        train_summary_dir = os.path.join(out_dir, "summaries", "train")
        train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

        # Dev summaries
        dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
        dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
        dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph)

        # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
        checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
        checkpoint_prefix = os.path.join(checkpoint_dir, "model")
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints)

        # Write vocabulary
        # vocab_processor.save(os.path.join(out_dir, "vocab"))

        # Initialize all variables
        sess.run(tf.global_variables_initializer())

        def train_step(x_batch, y_batch):
            """
            A single training step
            """
            feed_dict = {
              cnn.input_x: x_batch,
              cnn.input_y: y_batch,
              cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
            }
            _, step, summaries, loss, accuracy = sess.run(
                [train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy],
                feed_dict)
            time_str = datetime.datetime.now().isoformat()
            print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
            train_summary_writer.add_summary(summaries, step)

        def dev_step(x_batch, y_batch, writer=None):
            """
            Evaluates model on a dev set
            """
            feed_dict = {
              cnn.input_x: x_batch,
              cnn.input_y: y_batch,
              cnn.dropout_keep_prob: 1.0
            }
            step, summaries, loss, accuracy = sess.run(
                [global_step, dev_summary_op, cnn.loss, cnn.accuracy],
                feed_dict)
            time_str = datetime.datetime.now().isoformat()
            print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
            if writer:
                writer.add_summary(summaries, step)

        # Generate batches
        batches = data_helpers.batch_iter(
            list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs)
        # Training loop. For each batch...
        for batch in batches:
            x_batch, y_batch = zip(*batch)
            train_step(x_batch, y_batch)
            current_step = tf.train.global_step(sess, global_step)
            if current_step % FLAGS.evaluate_every == 0:
                print("\nEvaluation:")
                dev_step(x_dev, y_dev, writer=dev_summary_writer)
                print("")
            if current_step % FLAGS.checkpoint_every == 0:
                path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                print("Saved model checkpoint to {}\n".format(path))

INFO:tensorflow:Summary name embedding/W:0/grad/hist is illegal; using embedding/W_0/grad/hist instead.
INFO:tensorflow:Summary name embedding/W:0/grad/sparsity is illegal; using embedding/W_0/grad/sparsity instead.
INFO:tensorflow:Summary name conv-maxpool-3/W:0/grad/hist is illegal; using conv-maxpool-3/W_0/grad/hist instead.
INFO:tensorflow:Summary name conv-maxpool-3/W:0/grad/sparsity is illegal; using conv-maxpool-3/W_0/grad/sparsity instead.
INFO:tensorflow:Summary name conv-maxpool-3/b:0/grad/hist is illegal; using conv-maxpool-3/b_0/grad/hist instead.
INFO:tensorflow:Summary name conv-maxpool-3/b:0/grad/sparsity is illegal; using conv-maxpool-3/b_0/grad/sparsity instead.
INFO:tensorflow:Summary name conv-maxpool-4/W:0/grad/hist is illegal; using conv-maxpool-4/W_0/grad/hist instead.
INFO:tensorflow:Summary name conv-maxpool-4/W:0/grad/sparsity is illegal; using conv-maxpool-4/W_0/grad/sparsity instead.
INFO:tensorflow:Summary name conv-maxpool-4/b:0/grad/hist is illegal; using 

Saved model checkpoint to /home/vzhao/Documents/Projects/Med-NLP/runs/1526175836/checkpoints/model-100

2018-05-12T21:44:00.313935: step 101, loss 0.413966, acc 0.859375
2018-05-12T21:44:00.338014: step 102, loss 0.45764, acc 0.828125
2018-05-12T21:44:00.368755: step 103, loss 1.23241, acc 0.734375
2018-05-12T21:44:00.398825: step 104, loss 0.725405, acc 0.8125
2018-05-12T21:44:00.420794: step 105, loss 0.782949, acc 0.734375
2018-05-12T21:44:00.445136: step 106, loss 0.793675, acc 0.75
2018-05-12T21:44:00.470234: step 107, loss 0.748904, acc 0.828125
2018-05-12T21:44:00.496400: step 108, loss 0.749364, acc 0.828125
2018-05-12T21:44:00.517343: step 109, loss 0.54769, acc 0.859375
2018-05-12T21:44:00.540517: step 110, loss 0.764875, acc 0.828125
2018-05-12T21:44:00.565067: step 111, loss 0.449173, acc 0.890625
2018-05-12T21:44:00.587564: step 112, loss 0.612851, acc 0.859375
2018-05-12T21:44:00.613487: step 113, loss 1.3654, acc 0.734375
2018-05-12T21:44:00.636520: step 114, loss 0.9622

2018-05-12T21:44:03.488269: step 227, loss 0.606824, acc 0.875
2018-05-12T21:44:03.513530: step 228, loss 0.460801, acc 0.859375
2018-05-12T21:44:03.534931: step 229, loss 0.319363, acc 0.890625
2018-05-12T21:44:03.558811: step 230, loss 0.332548, acc 0.890625
2018-05-12T21:44:03.582129: step 231, loss 0.60181, acc 0.828125
2018-05-12T21:44:03.610420: step 232, loss 0.586118, acc 0.8125
2018-05-12T21:44:03.634873: step 233, loss 0.574079, acc 0.828125
2018-05-12T21:44:03.661360: step 234, loss 0.340829, acc 0.890625
2018-05-12T21:44:03.686961: step 235, loss 0.24124, acc 0.90625
2018-05-12T21:44:03.710407: step 236, loss 0.325364, acc 0.90625
2018-05-12T21:44:03.738843: step 237, loss 0.322953, acc 0.875
2018-05-12T21:44:03.765637: step 238, loss 0.280945, acc 0.875
2018-05-12T21:44:03.789784: step 239, loss 0.395096, acc 0.828125
2018-05-12T21:44:03.803809: step 240, loss 0.0454161, acc 1
2018-05-12T21:44:03.835768: step 241, loss 0.385476, acc 0.875
2018-05-12T21:44:03.863793: step 2

2018-05-12T21:44:06.658443: step 355, loss 0.279127, acc 0.921875
2018-05-12T21:44:06.682989: step 356, loss 0.247541, acc 0.890625
2018-05-12T21:44:06.706514: step 357, loss 0.281829, acc 0.90625
2018-05-12T21:44:06.730094: step 358, loss 0.284573, acc 0.875
2018-05-12T21:44:06.755446: step 359, loss 0.506629, acc 0.859375
2018-05-12T21:44:06.764876: step 360, loss 0.00191314, acc 1
2018-05-12T21:44:06.788333: step 361, loss 0.410053, acc 0.875
2018-05-12T21:44:06.811051: step 362, loss 0.177228, acc 0.9375
2018-05-12T21:44:06.835335: step 363, loss 0.294089, acc 0.921875
2018-05-12T21:44:06.860240: step 364, loss 0.464902, acc 0.859375
2018-05-12T21:44:06.882962: step 365, loss 0.146344, acc 0.921875
2018-05-12T21:44:06.904413: step 366, loss 0.118707, acc 0.953125
2018-05-12T21:44:06.929204: step 367, loss 0.180679, acc 0.953125
2018-05-12T21:44:06.954441: step 368, loss 0.479068, acc 0.921875
2018-05-12T21:44:06.980608: step 369, loss 0.139969, acc 0.953125
2018-05-12T21:44:07.0052

2018-05-12T21:44:09.883103: step 486, loss 0.252708, acc 0.90625
2018-05-12T21:44:09.906878: step 487, loss 0.0754436, acc 0.984375
2018-05-12T21:44:09.935201: step 488, loss 0.246364, acc 0.90625
2018-05-12T21:44:09.961882: step 489, loss 0.0980451, acc 0.96875
2018-05-12T21:44:09.991868: step 490, loss 0.0718986, acc 0.984375
2018-05-12T21:44:10.017951: step 491, loss 0.106736, acc 0.953125
2018-05-12T21:44:10.041802: step 492, loss 0.247999, acc 0.953125
2018-05-12T21:44:10.067979: step 493, loss 0.111875, acc 0.953125
2018-05-12T21:44:10.093015: step 494, loss 0.206649, acc 0.953125
2018-05-12T21:44:10.115255: step 495, loss 0.239024, acc 0.90625
2018-05-12T21:44:10.146822: step 496, loss 0.16768, acc 0.9375
2018-05-12T21:44:10.172106: step 497, loss 0.13872, acc 0.984375
2018-05-12T21:44:10.202809: step 498, loss 0.264166, acc 0.875
2018-05-12T21:44:10.224062: step 499, loss 0.0938736, acc 0.953125
2018-05-12T21:44:10.238766: step 500, loss 0.0116485, acc 1

Evaluation:
2018-05-12

2018-05-12T21:44:13.060804: step 612, loss 0.191959, acc 0.9375
2018-05-12T21:44:13.087252: step 613, loss 0.281061, acc 0.890625
2018-05-12T21:44:13.112386: step 614, loss 0.135136, acc 0.9375
2018-05-12T21:44:13.135749: step 615, loss 0.123385, acc 0.96875
2018-05-12T21:44:13.160157: step 616, loss 0.270836, acc 0.921875
2018-05-12T21:44:13.186411: step 617, loss 0.113512, acc 0.953125
2018-05-12T21:44:13.210710: step 618, loss 0.1563, acc 0.90625
2018-05-12T21:44:13.231554: step 619, loss 0.143381, acc 0.953125
2018-05-12T21:44:13.241434: step 620, loss 0.00361325, acc 1
2018-05-12T21:44:13.265175: step 621, loss 0.203652, acc 0.9375
2018-05-12T21:44:13.288824: step 622, loss 0.125896, acc 0.953125
2018-05-12T21:44:13.314360: step 623, loss 0.223312, acc 0.9375
2018-05-12T21:44:13.337122: step 624, loss 0.290474, acc 0.921875
2018-05-12T21:44:13.361748: step 625, loss 0.0467169, acc 1
2018-05-12T21:44:13.385209: step 626, loss 0.128363, acc 0.96875
2018-05-12T21:44:13.408559: step 6

2018-05-12T21:44:16.132549: step 736, loss 0.086183, acc 0.96875
2018-05-12T21:44:16.154673: step 737, loss 0.0246753, acc 1
2018-05-12T21:44:16.180141: step 738, loss 0.0953947, acc 0.953125
2018-05-12T21:44:16.203629: step 739, loss 0.244775, acc 0.921875
2018-05-12T21:44:16.217192: step 740, loss 0.00932045, acc 1
2018-05-12T21:44:16.239343: step 741, loss 0.17888, acc 0.921875
2018-05-12T21:44:16.261576: step 742, loss 0.123233, acc 0.953125
2018-05-12T21:44:16.287186: step 743, loss 0.0438823, acc 0.984375
2018-05-12T21:44:16.309029: step 744, loss 0.0603734, acc 0.96875
2018-05-12T21:44:16.337811: step 745, loss 0.0373688, acc 0.984375
2018-05-12T21:44:16.366496: step 746, loss 0.110124, acc 0.9375
2018-05-12T21:44:16.388810: step 747, loss 0.0567277, acc 0.953125
2018-05-12T21:44:16.414152: step 748, loss 0.111721, acc 0.96875
2018-05-12T21:44:16.436063: step 749, loss 0.0618769, acc 0.96875
2018-05-12T21:44:16.461390: step 750, loss 0.0621955, acc 0.984375
2018-05-12T21:44:16.4

2018-05-12T21:44:19.163694: step 860, loss 0.0417022, acc 1
2018-05-12T21:44:19.186439: step 861, loss 0.033371, acc 0.984375
2018-05-12T21:44:19.210030: step 862, loss 0.115528, acc 0.96875
2018-05-12T21:44:19.234719: step 863, loss 0.0288731, acc 0.984375
2018-05-12T21:44:19.258548: step 864, loss 0.0759234, acc 0.96875
2018-05-12T21:44:19.286090: step 865, loss 0.100842, acc 0.9375
2018-05-12T21:44:19.310770: step 866, loss 0.130127, acc 0.9375
2018-05-12T21:44:19.336010: step 867, loss 0.23139, acc 0.953125
2018-05-12T21:44:19.362672: step 868, loss 0.127553, acc 0.921875
2018-05-12T21:44:19.390328: step 869, loss 0.0420997, acc 0.984375
2018-05-12T21:44:19.417562: step 870, loss 0.0233716, acc 1
2018-05-12T21:44:19.440082: step 871, loss 0.0743835, acc 0.96875
2018-05-12T21:44:19.464161: step 872, loss 0.133664, acc 0.984375
2018-05-12T21:44:19.486095: step 873, loss 0.0516089, acc 0.984375
2018-05-12T21:44:19.508455: step 874, loss 0.0849497, acc 0.96875
2018-05-12T21:44:19.53294

2018-05-12T21:44:22.274243: step 987, loss 0.0882267, acc 0.96875
2018-05-12T21:44:22.300824: step 988, loss 0.0563504, acc 0.96875
2018-05-12T21:44:22.324480: step 989, loss 0.0942078, acc 0.953125
2018-05-12T21:44:22.348696: step 990, loss 0.0333176, acc 1
2018-05-12T21:44:22.371249: step 991, loss 0.111819, acc 0.953125
2018-05-12T21:44:22.397015: step 992, loss 0.118663, acc 0.953125
2018-05-12T21:44:22.417950: step 993, loss 0.10108, acc 0.9375
2018-05-12T21:44:22.443443: step 994, loss 0.0503656, acc 0.96875
2018-05-12T21:44:22.468067: step 995, loss 0.198016, acc 0.953125
2018-05-12T21:44:22.492272: step 996, loss 0.166933, acc 0.96875
2018-05-12T21:44:22.515681: step 997, loss 0.0450896, acc 0.984375
2018-05-12T21:44:22.541264: step 998, loss 0.0170118, acc 1
2018-05-12T21:44:22.567280: step 999, loss 0.169352, acc 0.953125
2018-05-12T21:44:22.578523: step 1000, loss 0.00651833, acc 1

Evaluation:
2018-05-12T21:44:22.596985: step 1000, loss 0.360502, acc 0.86645

Saved model ch

2018-05-12T21:44:25.294441: step 1110, loss 0.0306782, acc 0.984375
2018-05-12T21:44:25.320777: step 1111, loss 0.014453, acc 1
2018-05-12T21:44:25.343440: step 1112, loss 0.137178, acc 0.96875
2018-05-12T21:44:25.367776: step 1113, loss 0.0588965, acc 0.96875
2018-05-12T21:44:25.390748: step 1114, loss 0.0368084, acc 0.984375
2018-05-12T21:44:25.414988: step 1115, loss 0.0971906, acc 0.96875
2018-05-12T21:44:25.437936: step 1116, loss 0.0797758, acc 0.9375
2018-05-12T21:44:25.461752: step 1117, loss 0.0973202, acc 0.9375
2018-05-12T21:44:25.484580: step 1118, loss 0.0382777, acc 0.96875
2018-05-12T21:44:25.511466: step 1119, loss 0.121987, acc 0.953125
2018-05-12T21:44:25.520122: step 1120, loss 0.0182641, acc 1
2018-05-12T21:44:25.540680: step 1121, loss 0.244371, acc 0.953125
2018-05-12T21:44:25.565850: step 1122, loss 0.080152, acc 0.984375
2018-05-12T21:44:25.587220: step 1123, loss 0.0749025, acc 0.984375
2018-05-12T21:44:25.610376: step 1124, loss 0.200258, acc 0.9375
2018-05-12

2018-05-12T21:44:28.361703: step 1236, loss 0.0938409, acc 0.984375
2018-05-12T21:44:28.386336: step 1237, loss 0.132832, acc 0.953125
2018-05-12T21:44:28.408595: step 1238, loss 0.0946034, acc 0.953125
2018-05-12T21:44:28.437800: step 1239, loss 0.0998084, acc 0.96875
2018-05-12T21:44:28.450320: step 1240, loss 0.017802, acc 1
2018-05-12T21:44:28.473356: step 1241, loss 0.0732311, acc 0.984375
2018-05-12T21:44:28.496003: step 1242, loss 0.0256538, acc 0.984375
2018-05-12T21:44:28.519308: step 1243, loss 0.0683302, acc 0.953125
2018-05-12T21:44:28.547434: step 1244, loss 0.0873276, acc 0.9375
2018-05-12T21:44:28.569334: step 1245, loss 0.0325074, acc 0.984375
2018-05-12T21:44:28.592699: step 1246, loss 0.0476805, acc 0.984375
2018-05-12T21:44:28.617670: step 1247, loss 0.0576531, acc 0.984375
2018-05-12T21:44:28.643854: step 1248, loss 0.068797, acc 0.984375
2018-05-12T21:44:28.672917: step 1249, loss 0.0196162, acc 0.984375
2018-05-12T21:44:28.702743: step 1250, loss 0.0312698, acc 1


2018-05-12T21:44:31.386909: step 1359, loss 0.0461334, acc 0.984375
2018-05-12T21:44:31.399344: step 1360, loss 0.00126847, acc 1
2018-05-12T21:44:31.423849: step 1361, loss 0.0132233, acc 1
2018-05-12T21:44:31.450442: step 1362, loss 0.00644936, acc 1
2018-05-12T21:44:31.474456: step 1363, loss 0.0210975, acc 0.984375
2018-05-12T21:44:31.498721: step 1364, loss 0.0339791, acc 0.984375
2018-05-12T21:44:31.523518: step 1365, loss 0.0528494, acc 0.984375
2018-05-12T21:44:31.548546: step 1366, loss 0.0291235, acc 0.984375
2018-05-12T21:44:31.572450: step 1367, loss 0.0157405, acc 1
2018-05-12T21:44:31.615328: step 1368, loss 0.104801, acc 0.953125
2018-05-12T21:44:31.637360: step 1369, loss 0.0142313, acc 1
2018-05-12T21:44:31.666784: step 1370, loss 0.0105714, acc 1
2018-05-12T21:44:31.693656: step 1371, loss 0.0669161, acc 0.984375
2018-05-12T21:44:31.715652: step 1372, loss 0.0599628, acc 0.984375
2018-05-12T21:44:31.736412: step 1373, loss 0.112405, acc 0.96875
2018-05-12T21:44:31.758

2018-05-12T21:44:34.385606: step 1483, loss 0.0480307, acc 0.984375
2018-05-12T21:44:34.423166: step 1484, loss 0.0436038, acc 0.984375
2018-05-12T21:44:34.447581: step 1485, loss 0.149525, acc 0.96875
2018-05-12T21:44:34.468525: step 1486, loss 0.0205313, acc 1
2018-05-12T21:44:34.490446: step 1487, loss 0.0748529, acc 0.984375
2018-05-12T21:44:34.514865: step 1488, loss 0.0636753, acc 0.953125
2018-05-12T21:44:34.538422: step 1489, loss 0.0308207, acc 0.984375
2018-05-12T21:44:34.563084: step 1490, loss 0.01441, acc 1
2018-05-12T21:44:34.585642: step 1491, loss 0.125997, acc 0.953125
2018-05-12T21:44:34.616193: step 1492, loss 0.0424857, acc 0.984375
2018-05-12T21:44:34.639408: step 1493, loss 0.0270429, acc 1
2018-05-12T21:44:34.668409: step 1494, loss 0.0163652, acc 1
2018-05-12T21:44:34.693477: step 1495, loss 0.0064079, acc 1
2018-05-12T21:44:34.716446: step 1496, loss 0.0110624, acc 1
2018-05-12T21:44:34.738833: step 1497, loss 0.0776156, acc 0.984375
2018-05-12T21:44:34.764959:

2018-05-12T21:44:37.596939: step 1611, loss 0.103351, acc 0.96875
2018-05-12T21:44:37.619094: step 1612, loss 0.00582531, acc 1
2018-05-12T21:44:37.642582: step 1613, loss 0.035935, acc 0.984375
2018-05-12T21:44:37.669486: step 1614, loss 0.0155442, acc 1
2018-05-12T21:44:37.692481: step 1615, loss 0.14773, acc 0.953125
2018-05-12T21:44:37.715595: step 1616, loss 0.00590388, acc 1
2018-05-12T21:44:37.737972: step 1617, loss 0.086204, acc 0.96875
2018-05-12T21:44:37.771844: step 1618, loss 0.0619979, acc 0.96875
2018-05-12T21:44:37.796320: step 1619, loss 0.0289792, acc 0.984375
2018-05-12T21:44:37.809171: step 1620, loss 0.00293607, acc 1
2018-05-12T21:44:37.834529: step 1621, loss 0.0816576, acc 0.96875
2018-05-12T21:44:37.858034: step 1622, loss 0.0538722, acc 0.984375
2018-05-12T21:44:37.885655: step 1623, loss 0.0335691, acc 0.984375
2018-05-12T21:44:37.908273: step 1624, loss 0.0152718, acc 1
2018-05-12T21:44:37.934506: step 1625, loss 0.0217328, acc 1
2018-05-12T21:44:37.958912: 

2018-05-12T21:44:40.644283: step 1735, loss 0.00920112, acc 1
2018-05-12T21:44:40.669607: step 1736, loss 0.0665928, acc 0.984375
2018-05-12T21:44:40.693352: step 1737, loss 0.0233149, acc 0.984375
2018-05-12T21:44:40.718320: step 1738, loss 0.0281079, acc 0.984375
2018-05-12T21:44:40.741510: step 1739, loss 0.0104764, acc 1
2018-05-12T21:44:40.751446: step 1740, loss 2.78644e-05, acc 1
2018-05-12T21:44:40.773948: step 1741, loss 0.101262, acc 0.9375
2018-05-12T21:44:40.804089: step 1742, loss 0.0143673, acc 1
2018-05-12T21:44:40.829393: step 1743, loss 0.00736646, acc 1
2018-05-12T21:44:40.859035: step 1744, loss 0.0735707, acc 0.96875
2018-05-12T21:44:40.884869: step 1745, loss 0.0742248, acc 0.984375
2018-05-12T21:44:40.911610: step 1746, loss 0.00721149, acc 1
2018-05-12T21:44:40.936799: step 1747, loss 0.00495531, acc 1
2018-05-12T21:44:40.961468: step 1748, loss 0.0132907, acc 1
2018-05-12T21:44:40.984926: step 1749, loss 0.00917984, acc 1
2018-05-12T21:44:41.008812: step 1750, l

2018-05-12T21:44:43.695850: step 1861, loss 0.0259168, acc 0.984375
2018-05-12T21:44:43.718267: step 1862, loss 0.0216232, acc 0.984375
2018-05-12T21:44:43.742781: step 1863, loss 0.0105755, acc 1
2018-05-12T21:44:43.767947: step 1864, loss 0.00533403, acc 1
2018-05-12T21:44:43.793558: step 1865, loss 0.0198059, acc 1
2018-05-12T21:44:43.816310: step 1866, loss 0.0177915, acc 0.984375
2018-05-12T21:44:43.841923: step 1867, loss 0.00586919, acc 1
2018-05-12T21:44:43.868654: step 1868, loss 0.0107484, acc 1
2018-05-12T21:44:43.891519: step 1869, loss 0.00726624, acc 1
2018-05-12T21:44:43.916783: step 1870, loss 0.0234571, acc 1
2018-05-12T21:44:43.939791: step 1871, loss 0.00655464, acc 1
2018-05-12T21:44:43.965086: step 1872, loss 0.0158988, acc 1
2018-05-12T21:44:43.989265: step 1873, loss 0.0157137, acc 1
2018-05-12T21:44:44.013587: step 1874, loss 0.00199002, acc 1
2018-05-12T21:44:44.037510: step 1875, loss 0.0777177, acc 0.984375
2018-05-12T21:44:44.060850: step 1876, loss 0.009625

2018-05-12T21:44:46.728886: step 1986, loss 0.0248993, acc 0.984375
2018-05-12T21:44:46.752701: step 1987, loss 0.0242305, acc 0.984375
2018-05-12T21:44:46.775363: step 1988, loss 0.0980668, acc 0.984375
2018-05-12T21:44:46.802025: step 1989, loss 0.13404, acc 0.96875
2018-05-12T21:44:46.825244: step 1990, loss 0.0252152, acc 0.984375
2018-05-12T21:44:46.849347: step 1991, loss 0.0158152, acc 1
2018-05-12T21:44:46.873823: step 1992, loss 0.0361113, acc 0.984375
2018-05-12T21:44:46.899096: step 1993, loss 0.0512495, acc 0.96875
2018-05-12T21:44:46.924556: step 1994, loss 0.0232232, acc 1
2018-05-12T21:44:46.948604: step 1995, loss 0.0455502, acc 0.984375
2018-05-12T21:44:46.970544: step 1996, loss 0.0836146, acc 0.96875
2018-05-12T21:44:46.997121: step 1997, loss 0.0243148, acc 0.984375
2018-05-12T21:44:47.019056: step 1998, loss 0.0133073, acc 1
2018-05-12T21:44:47.042660: step 1999, loss 0.0114552, acc 1
2018-05-12T21:44:47.052153: step 2000, loss 0.00722685, acc 1

Evaluation:
2018-0

2018-05-12T21:44:49.915958: step 2115, loss 0.00612775, acc 1
2018-05-12T21:44:49.937888: step 2116, loss 0.0531756, acc 0.984375
2018-05-12T21:44:49.962838: step 2117, loss 0.0265743, acc 0.984375
2018-05-12T21:44:49.989061: step 2118, loss 0.0254492, acc 0.984375
2018-05-12T21:44:50.013212: step 2119, loss 0.00390289, acc 1
2018-05-12T21:44:50.024633: step 2120, loss 2.59279e-06, acc 1
2018-05-12T21:44:50.047966: step 2121, loss 0.108746, acc 0.984375
2018-05-12T21:44:50.070468: step 2122, loss 0.00230882, acc 1
2018-05-12T21:44:50.096054: step 2123, loss 0.00337667, acc 1
2018-05-12T21:44:50.119410: step 2124, loss 0.0540632, acc 0.984375
2018-05-12T21:44:50.141592: step 2125, loss 0.00350124, acc 1
2018-05-12T21:44:50.165214: step 2126, loss 0.00419722, acc 1
2018-05-12T21:44:50.187549: step 2127, loss 0.00405724, acc 1
2018-05-12T21:44:50.212892: step 2128, loss 0.00928848, acc 1
2018-05-12T21:44:50.235933: step 2129, loss 0.10212, acc 0.96875
2018-05-12T21:44:50.259536: step 2130

2018-05-12T21:44:53.120916: step 2246, loss 0.0225018, acc 0.984375
2018-05-12T21:44:53.146451: step 2247, loss 0.010811, acc 1
2018-05-12T21:44:53.169263: step 2248, loss 0.0743312, acc 0.953125
2018-05-12T21:44:53.191220: step 2249, loss 0.11726, acc 0.984375
2018-05-12T21:44:53.215903: step 2250, loss 0.0919064, acc 0.984375
2018-05-12T21:44:53.239530: step 2251, loss 0.0267472, acc 0.984375
2018-05-12T21:44:53.269435: step 2252, loss 0.0613927, acc 0.984375
2018-05-12T21:44:53.300767: step 2253, loss 0.0178044, acc 1
2018-05-12T21:44:53.323516: step 2254, loss 0.0489872, acc 0.984375
2018-05-12T21:44:53.349859: step 2255, loss 0.0285019, acc 1
2018-05-12T21:44:53.372814: step 2256, loss 0.034885, acc 0.984375
2018-05-12T21:44:53.402443: step 2257, loss 0.0246852, acc 0.984375
2018-05-12T21:44:53.430278: step 2258, loss 0.0220877, acc 1
2018-05-12T21:44:53.454345: step 2259, loss 0.00900917, acc 1
2018-05-12T21:44:53.467990: step 2260, loss 0.000178218, acc 1
2018-05-12T21:44:53.499

2018-05-12T21:44:56.382418: step 2375, loss 0.00197864, acc 1
2018-05-12T21:44:56.405288: step 2376, loss 0.0782886, acc 0.96875
2018-05-12T21:44:56.428582: step 2377, loss 0.0451678, acc 0.984375
2018-05-12T21:44:56.452203: step 2378, loss 0.00871447, acc 1
2018-05-12T21:44:56.476022: step 2379, loss 0.0235607, acc 0.984375
2018-05-12T21:44:56.486002: step 2380, loss 0.00147416, acc 1
2018-05-12T21:44:56.507696: step 2381, loss 0.0175764, acc 1
2018-05-12T21:44:56.532843: step 2382, loss 0.01072, acc 1
2018-05-12T21:44:56.557867: step 2383, loss 0.0479463, acc 0.984375
2018-05-12T21:44:56.583437: step 2384, loss 0.0202594, acc 1
2018-05-12T21:44:56.607309: step 2385, loss 0.00939191, acc 1
2018-05-12T21:44:56.634739: step 2386, loss 0.00865544, acc 1
2018-05-12T21:44:56.659726: step 2387, loss 0.0656041, acc 0.984375
2018-05-12T21:44:56.683834: step 2388, loss 0.011477, acc 1
2018-05-12T21:44:56.707769: step 2389, loss 0.0600965, acc 0.96875
2018-05-12T21:44:56.730597: step 2390, loss

Saved model checkpoint to /home/vzhao/Documents/Projects/Med-NLP/runs/1526175836/checkpoints/model-2500

2018-05-12T21:44:59.476371: step 2501, loss 0.0261992, acc 0.96875
2018-05-12T21:44:59.501427: step 2502, loss 0.00699005, acc 1
2018-05-12T21:44:59.526763: step 2503, loss 0.00453207, acc 1
2018-05-12T21:44:59.549879: step 2504, loss 0.00616743, acc 1
2018-05-12T21:44:59.576550: step 2505, loss 0.0103149, acc 1
2018-05-12T21:44:59.598720: step 2506, loss 0.0052263, acc 1
2018-05-12T21:44:59.623448: step 2507, loss 0.0104683, acc 1
2018-05-12T21:44:59.648003: step 2508, loss 0.0228065, acc 0.984375
2018-05-12T21:44:59.677821: step 2509, loss 0.0105525, acc 1
2018-05-12T21:44:59.700894: step 2510, loss 0.00512335, acc 1
2018-05-12T21:44:59.725410: step 2511, loss 0.0949972, acc 0.96875
2018-05-12T21:44:59.751632: step 2512, loss 0.0256141, acc 0.984375
2018-05-12T21:44:59.785881: step 2513, loss 0.0110239, acc 1
2018-05-12T21:44:59.810455: step 2514, loss 0.0579792, acc 0.96875
2018-

2018-05-12T21:45:02.705910: step 2629, loss 0.00693777, acc 1
2018-05-12T21:45:02.729164: step 2630, loss 0.0199335, acc 0.984375
2018-05-12T21:45:02.754596: step 2631, loss 0.0110308, acc 1
2018-05-12T21:45:02.778278: step 2632, loss 0.00286606, acc 1
2018-05-12T21:45:02.802276: step 2633, loss 0.107995, acc 0.984375
2018-05-12T21:45:02.825102: step 2634, loss 0.0125754, acc 1
2018-05-12T21:45:02.853762: step 2635, loss 0.0974245, acc 0.984375
2018-05-12T21:45:02.879427: step 2636, loss 0.153177, acc 0.984375
2018-05-12T21:45:02.902966: step 2637, loss 0.0030249, acc 1
2018-05-12T21:45:02.927879: step 2638, loss 0.0357583, acc 0.984375
2018-05-12T21:45:02.949307: step 2639, loss 0.00683629, acc 1
2018-05-12T21:45:02.960393: step 2640, loss 0.000361531, acc 1
2018-05-12T21:45:02.986428: step 2641, loss 0.00210378, acc 1
2018-05-12T21:45:03.008095: step 2642, loss 0.00482874, acc 1
2018-05-12T21:45:03.032952: step 2643, loss 0.0653004, acc 0.96875
2018-05-12T21:45:03.063851: step 2644, 

2018-05-12T21:45:05.780059: step 2755, loss 0.043618, acc 0.984375
2018-05-12T21:45:05.802161: step 2756, loss 0.00573272, acc 1
2018-05-12T21:45:05.824739: step 2757, loss 0.00421892, acc 1
2018-05-12T21:45:05.848566: step 2758, loss 0.0107869, acc 1
2018-05-12T21:45:05.871854: step 2759, loss 0.0134799, acc 1
2018-05-12T21:45:05.883702: step 2760, loss 3.40619e-05, acc 1
2018-05-12T21:45:05.907888: step 2761, loss 0.0080287, acc 1
2018-05-12T21:45:05.934301: step 2762, loss 0.00975431, acc 1
2018-05-12T21:45:05.959008: step 2763, loss 0.0104002, acc 1
2018-05-12T21:45:05.985751: step 2764, loss 0.0066229, acc 1
2018-05-12T21:45:06.009691: step 2765, loss 0.00980922, acc 1
2018-05-12T21:45:06.034260: step 2766, loss 0.147156, acc 0.96875
2018-05-12T21:45:06.058792: step 2767, loss 0.0508318, acc 0.984375
2018-05-12T21:45:06.082556: step 2768, loss 0.00682656, acc 1
2018-05-12T21:45:06.105920: step 2769, loss 0.0732484, acc 0.984375
2018-05-12T21:45:06.133035: step 2770, loss 0.0076504

2018-05-12T21:45:08.971498: step 2885, loss 0.0156224, acc 1
2018-05-12T21:45:08.994232: step 2886, loss 0.081824, acc 0.984375
2018-05-12T21:45:09.018317: step 2887, loss 0.0080823, acc 1
2018-05-12T21:45:09.039709: step 2888, loss 0.00730962, acc 1
2018-05-12T21:45:09.066154: step 2889, loss 0.0094255, acc 1
2018-05-12T21:45:09.089786: step 2890, loss 0.00295648, acc 1
2018-05-12T21:45:09.113122: step 2891, loss 0.0755144, acc 0.984375
2018-05-12T21:45:09.135448: step 2892, loss 0.0166742, acc 1
2018-05-12T21:45:09.158546: step 2893, loss 0.00517906, acc 1
2018-05-12T21:45:09.187536: step 2894, loss 0.0590228, acc 0.984375
2018-05-12T21:45:09.213214: step 2895, loss 0.0173806, acc 0.984375
2018-05-12T21:45:09.236349: step 2896, loss 0.0093393, acc 1
2018-05-12T21:45:09.258183: step 2897, loss 0.00425011, acc 1
2018-05-12T21:45:09.283515: step 2898, loss 0.00403602, acc 1
2018-05-12T21:45:09.305108: step 2899, loss 0.0107682, acc 1
2018-05-12T21:45:09.320296: step 2900, loss 4.28544e-

2018-05-12T21:45:12.198977: step 3015, loss 0.00469681, acc 1
2018-05-12T21:45:12.221760: step 3016, loss 0.0462791, acc 0.984375
2018-05-12T21:45:12.250135: step 3017, loss 0.0026475, acc 1
2018-05-12T21:45:12.272944: step 3018, loss 0.00272951, acc 1
2018-05-12T21:45:12.298185: step 3019, loss 0.00194225, acc 1
2018-05-12T21:45:12.308180: step 3020, loss 0.000361083, acc 1
2018-05-12T21:45:12.333043: step 3021, loss 0.0907243, acc 0.984375
2018-05-12T21:45:12.358269: step 3022, loss 0.0128559, acc 1
2018-05-12T21:45:12.388058: step 3023, loss 0.0146155, acc 1
2018-05-12T21:45:12.413746: step 3024, loss 0.00477456, acc 1
2018-05-12T21:45:12.436983: step 3025, loss 0.00216429, acc 1
2018-05-12T21:45:12.459955: step 3026, loss 0.00775545, acc 1
2018-05-12T21:45:12.485112: step 3027, loss 0.013521, acc 1
2018-05-12T21:45:12.509269: step 3028, loss 0.000658846, acc 1
2018-05-12T21:45:12.532156: step 3029, loss 0.0107503, acc 1
2018-05-12T21:45:12.554668: step 3030, loss 0.00935649, acc 1


2018-05-12T21:45:15.231185: step 3142, loss 0.00577883, acc 1
2018-05-12T21:45:15.253356: step 3143, loss 0.00796342, acc 1
2018-05-12T21:45:15.276621: step 3144, loss 0.00208089, acc 1
2018-05-12T21:45:15.300041: step 3145, loss 0.0162511, acc 1
2018-05-12T21:45:15.332685: step 3146, loss 0.0117384, acc 1
2018-05-12T21:45:15.376052: step 3147, loss 0.00249919, acc 1
2018-05-12T21:45:15.400137: step 3148, loss 0.053398, acc 0.984375
2018-05-12T21:45:15.425770: step 3149, loss 0.0199235, acc 1
2018-05-12T21:45:15.449763: step 3150, loss 0.00808468, acc 1
2018-05-12T21:45:15.472843: step 3151, loss 0.00240621, acc 1
2018-05-12T21:45:15.504478: step 3152, loss 0.00862093, acc 1
2018-05-12T21:45:15.527236: step 3153, loss 0.00939198, acc 1
2018-05-12T21:45:15.550769: step 3154, loss 0.0164872, acc 0.984375
2018-05-12T21:45:15.580232: step 3155, loss 0.0505921, acc 0.984375
2018-05-12T21:45:15.603919: step 3156, loss 0.00621999, acc 1
2018-05-12T21:45:15.628022: step 3157, loss 0.0421605, a

2018-05-12T21:45:18.486928: step 3274, loss 0.00802733, acc 1
2018-05-12T21:45:18.512011: step 3275, loss 0.00234879, acc 1
2018-05-12T21:45:18.533507: step 3276, loss 0.0568945, acc 0.984375
2018-05-12T21:45:18.557847: step 3277, loss 0.00647637, acc 1
2018-05-12T21:45:18.582188: step 3278, loss 0.00336985, acc 1
2018-05-12T21:45:18.605902: step 3279, loss 0.00290066, acc 1
2018-05-12T21:45:18.615644: step 3280, loss 0.000205336, acc 1
2018-05-12T21:45:18.637249: step 3281, loss 0.018211, acc 0.984375
2018-05-12T21:45:18.661964: step 3282, loss 0.00400515, acc 1
2018-05-12T21:45:18.688196: step 3283, loss 0.000720871, acc 1
2018-05-12T21:45:18.718661: step 3284, loss 0.00170981, acc 1
2018-05-12T21:45:18.742587: step 3285, loss 0.00631855, acc 1
2018-05-12T21:45:18.767471: step 3286, loss 0.00340223, acc 1
2018-05-12T21:45:18.792702: step 3287, loss 0.139403, acc 0.96875
2018-05-12T21:45:18.818357: step 3288, loss 0.00167061, acc 1
2018-05-12T21:45:18.841079: step 3289, loss 0.0015634

2018-05-12T21:45:21.750617: step 3406, loss 0.00563528, acc 1
2018-05-12T21:45:21.774403: step 3407, loss 0.00440686, acc 1
2018-05-12T21:45:21.800391: step 3408, loss 0.00485411, acc 1
2018-05-12T21:45:21.826311: step 3409, loss 0.00732973, acc 1
2018-05-12T21:45:21.850994: step 3410, loss 0.0015703, acc 1
2018-05-12T21:45:21.876519: step 3411, loss 0.0119872, acc 1
2018-05-12T21:45:21.900581: step 3412, loss 0.00721127, acc 1
2018-05-12T21:45:21.923702: step 3413, loss 0.00765001, acc 1
2018-05-12T21:45:21.948934: step 3414, loss 0.00595433, acc 1
2018-05-12T21:45:21.980605: step 3415, loss 0.0128854, acc 1
2018-05-12T21:45:22.005484: step 3416, loss 0.0680181, acc 0.984375
2018-05-12T21:45:22.033029: step 3417, loss 0.00358194, acc 1
2018-05-12T21:45:22.056129: step 3418, loss 0.0107966, acc 1
2018-05-12T21:45:22.080724: step 3419, loss 0.00606833, acc 1
2018-05-12T21:45:22.093711: step 3420, loss 0.00161931, acc 1
2018-05-12T21:45:22.119307: step 3421, loss 0.000720007, acc 1
2018-

2018-05-12T21:45:25.034016: step 3539, loss 0.00520557, acc 1
2018-05-12T21:45:25.059375: step 3540, loss 0.000119094, acc 1
2018-05-12T21:45:25.090161: step 3541, loss 0.0439716, acc 0.984375
2018-05-12T21:45:25.115164: step 3542, loss 0.00124501, acc 1
2018-05-12T21:45:25.139824: step 3543, loss 0.00467939, acc 1
2018-05-12T21:45:25.168656: step 3544, loss 0.00229348, acc 1
2018-05-12T21:45:25.191630: step 3545, loss 0.00114781, acc 1
2018-05-12T21:45:25.216672: step 3546, loss 0.00173535, acc 1
2018-05-12T21:45:25.246088: step 3547, loss 0.00386227, acc 1
2018-05-12T21:45:25.270899: step 3548, loss 0.0134327, acc 1
2018-05-12T21:45:25.295628: step 3549, loss 0.00334625, acc 1
2018-05-12T21:45:25.318993: step 3550, loss 0.00893004, acc 1
2018-05-12T21:45:25.342760: step 3551, loss 0.078228, acc 0.984375
2018-05-12T21:45:25.366387: step 3552, loss 0.00235051, acc 1
2018-05-12T21:45:25.388809: step 3553, loss 0.00093861, acc 1
2018-05-12T21:45:25.413745: step 3554, loss 0.00469771, acc

2018-05-12T21:45:28.221517: step 3670, loss 0.00537663, acc 1
2018-05-12T21:45:28.245987: step 3671, loss 0.00195609, acc 1
2018-05-12T21:45:28.269739: step 3672, loss 0.0021846, acc 1
2018-05-12T21:45:28.295769: step 3673, loss 0.00222635, acc 1
2018-05-12T21:45:28.320973: step 3674, loss 0.00442933, acc 1
2018-05-12T21:45:28.345780: step 3675, loss 0.00407161, acc 1
2018-05-12T21:45:28.369704: step 3676, loss 0.00545187, acc 1
2018-05-12T21:45:28.390778: step 3677, loss 0.00111945, acc 1
2018-05-12T21:45:28.414058: step 3678, loss 0.0361207, acc 0.984375
2018-05-12T21:45:28.439556: step 3679, loss 0.00128117, acc 1
2018-05-12T21:45:28.448945: step 3680, loss 4.0829e-06, acc 1
2018-05-12T21:45:28.471864: step 3681, loss 0.00107276, acc 1
2018-05-12T21:45:28.498477: step 3682, loss 0.00926879, acc 1
2018-05-12T21:45:28.520803: step 3683, loss 0.00047308, acc 1
2018-05-12T21:45:28.543001: step 3684, loss 0.00614211, acc 1
2018-05-12T21:45:28.565721: step 3685, loss 0.0605793, acc 0.9843

2018-05-12T21:45:31.493893: step 3803, loss 0.00204037, acc 1
2018-05-12T21:45:31.517374: step 3804, loss 0.011296, acc 0.984375
2018-05-12T21:45:31.541090: step 3805, loss 0.00519302, acc 1
2018-05-12T21:45:31.566831: step 3806, loss 0.00196818, acc 1
2018-05-12T21:45:31.590525: step 3807, loss 0.0032553, acc 1
2018-05-12T21:45:31.616084: step 3808, loss 0.00245009, acc 1
2018-05-12T21:45:31.641037: step 3809, loss 0.0746701, acc 0.984375
2018-05-12T21:45:31.664492: step 3810, loss 0.000518053, acc 1
2018-05-12T21:45:31.688232: step 3811, loss 0.0751283, acc 0.984375
2018-05-12T21:45:31.713673: step 3812, loss 0.00217808, acc 1
2018-05-12T21:45:31.736035: step 3813, loss 0.00309983, acc 1
2018-05-12T21:45:31.758702: step 3814, loss 0.0102373, acc 1
2018-05-12T21:45:31.784276: step 3815, loss 0.0051278, acc 1
2018-05-12T21:45:31.808991: step 3816, loss 0.0690353, acc 0.984375
2018-05-12T21:45:31.835441: step 3817, loss 0.00776436, acc 1
2018-05-12T21:45:31.860495: step 3818, loss 0.004

2018-05-12T21:45:34.761997: step 3937, loss 0.0524384, acc 0.984375
2018-05-12T21:45:34.785050: step 3938, loss 0.0495795, acc 0.984375
2018-05-12T21:45:34.807380: step 3939, loss 0.00314603, acc 1
2018-05-12T21:45:34.819509: step 3940, loss 0.000115309, acc 1
2018-05-12T21:45:34.842218: step 3941, loss 0.00183546, acc 1
2018-05-12T21:45:34.867233: step 3942, loss 0.00234094, acc 1
2018-05-12T21:45:34.891379: step 3943, loss 0.00351348, acc 1
2018-05-12T21:45:34.919728: step 3944, loss 0.00339666, acc 1
2018-05-12T21:45:34.942312: step 3945, loss 0.0715071, acc 0.984375
2018-05-12T21:45:34.965380: step 3946, loss 0.00330914, acc 1
2018-05-12T21:45:34.989824: step 3947, loss 0.00388412, acc 1
2018-05-12T21:45:35.020587: step 3948, loss 0.00979102, acc 1
2018-05-12T21:45:35.045915: step 3949, loss 0.0100441, acc 1
2018-05-12T21:45:35.069894: step 3950, loss 0.00168879, acc 1
2018-05-12T21:45:35.094908: step 3951, loss 0.0536209, acc 0.984375
2018-05-12T21:45:35.119404: step 3952, loss 0.

### Run RNN

### Sandbox

In [None]:
a = [len(s) for s in cc]

In [None]:
plt.hist(a)

---------

## Classification Model

## Use findings and impression to predict present and pass

In [None]:
df['history'][1]

In [None]:
#for PAST without inconsistency
fields = [
    'history', 
    'findings', 
    'comparison', 
    'impression',
]
foo = df[~df['Past'].isnull() & df['Count'] != 1].sample(frac=1)
df_train = foo.iloc[:1240]
y_train = np.array(df_train['Past'].astype(int))
df_test = foo.iloc[1240:]
y_test = np.array(df_test['Past'].astype(int))
obj = utils.Df2TFIDF()
obj.fit(df_train, fields, ngram=5, min_count=5)
output_train = obj.transform(df_train, fields)
output_test = obj.transform(df_test, fields)
# concatenate sparse matrices of all fields
from scipy.sparse import hstack
x_train = hstack([foo['bow_tfidf'] for foo in output_train.itervalues()])
# TODO (xiao)
x_test = hstack([foo['bow_tfidf'] for foo in output_test.itervalues()])
print x_train.shape
print x_test.shape

In [None]:
#for PRESENT
fields = [
    'history', 
    'findings', 
    'comparison',
    'impression',
]
#'history', 'impression', 'findings', 'comparison'
foo = df[~df['Present'].isnull()].sample(frac=1)
foo = foo.loc[foo['Present']!=0, :]
df_train = foo.iloc[:1100]
y_train = np.array(df_train['Present'].astype(int))
df_test = foo.iloc[1100:]
y_test = np.array(df_test['Present'].astype(int))
obj = utils.Df2TFIDF()
obj.fit(df_train, fields, ngram=5, min_count=2)
output_train = obj.transform(df_train, fields)
output_test = obj.transform(df_test, fields)
# concatenate sparse matrices of all fields
x_train = hstack([foo['bow_tfidf'] for foo in output_train.itervalues()])
x_test = hstack([foo['bow_tfidf'] for foo in output_test.itervalues()])
print x_train.shape
print x_test.shape

In [None]:
#for Left Grade
fields = ['history', 'findings', 'comparison','impression']
foo = df[~df['Left'].isnull() & df['Left'] != 0].sample(frac=1)
df_train = foo.iloc[:210]
y_train = np.array(df_train['Past'].astype(int))
df_test = foo.iloc[210:]
y_test = np.array(df_test['Past'].astype(int))
obj = utils.Df2TFIDF()
obj.fit(df_train, fields, ngram=5, min_count=5)
output_train = obj.transform(df_train, fields)
output_test = obj.transform(df_test, fields)

# concatenate sparse matrices of all fields
from scipy.sparse import hstack
x_train = hstack([foo['bow_tfidf'] for foo in output_train.itervalues()])
# TODO (xiao)
x_test = hstack([foo['bow_tfidf'] for foo in output_test.itervalues()])
print x_train.shape
print x_test.shape

In [None]:
#for Left Grade
fields = ['history', 'findings', 'comparison','impression']
foo = df[~df['Right'].isnull() & df['Right'] != 0].sample(frac=1)
df_train = foo.iloc[:200]
y_train = np.array(df_train['Past'].astype(int))
df_test = foo.iloc[200:]
y_test = np.array(df_test['Past'].astype(int))
obj = utils.Df2TFIDF()
obj.fit(df_train, fields, ngram=5, min_count=5)
output_train = obj.transform(df_train, fields)
output_test = obj.transform(df_test, fields)

# concatenate sparse matrices of all fields
from scipy.sparse import hstack
x_train = hstack([foo['bow_tfidf'] for foo in output_train.itervalues()])
# TODO (xiao)
x_test = hstack([foo['bow_tfidf'] for foo in output_test.itervalues()])
print x_train.shape
print x_test.shape

### run classifier

In [None]:
from sklearn.svm import LinearSVC, SVC
from sklearn.linear_model import LogisticRegression
# svm
clf = LinearSVC(C=1, loss='squared_hinge')
#clf = SVC()
#clf = LogisticRegression(C=3)

clf.fit(x_train, y_train)
coef = clf.coef_.squeeze()

In [None]:
x_train.shape

In [None]:
y_train_pred = clf.predict(x_train)
y_test_pred = clf.predict(x_test)
results = pd.concat([utils.my_classification_report(y_train, y_train_pred),
                     utils.my_classification_report(y_test, y_test_pred),
                     ], axis=1).transpose()
results.index = ['training', 'testing']
display(results)

In [None]:
# features with the big weights
idx = np.absolute(coef).argsort()[::-1]
plt.plot(coef[idx])
plt.show()
idx2word_agg = pd.Series(obj.idx2word_concat)
display(idx2word_agg[idx[:20]])

In [None]:
# output failed cases.
pd.set_option('display.max_colwidth', -1)
fields = [
    'Past',
    'history', 
    'findings', 
    'comparison', 
    'impression'
]
false_positive = (y_test_pred - y_test) > 0
display(df_test.loc[false_positive,fields].applymap(utils.list2str))

---
# Semi - Supervised Learning

## Prepare data

### Labeled Data

In [None]:
filename = 'Data/upto1528.xlsx'
df_raw = pd.read_excel(open(filename, 'r'))
# get data frame
ps = utils.Parser()
ps.parser(df_raw)
df = ps.df
for idx, row in df['findings'].iteritems():
    try:
        text, velos = utils.parse_findings(row)
        df.set_value(idx, 'findings', text)
        for n, v in velos:
            df.set_value(idx, n, v)
    except:
        pass
discardField = ['Report Text']
foo = [item for item in df.columns.tolist() if item not in ordered_names+discardField]
foo.sort()
CORE_COL = ordered_names + foo
df = df[CORE_COL]
df_labeled = pd.concat([df_raw[['Past', 'Present', 'Left', 'Right', 'Count']], df[CORE_COL]], axis=1)

### Unlabeled Data

In [None]:
filename = 'Data/2011-2016.xlsx'
df_raw = pd.read_excel(open(filename, 'r'))
# get data frame
ps = utils.Parser()
ps.parser(df_raw)
df = ps.df
for idx, row in df['findings'].iteritems():
    try:
        text, velos = utils.parse_findings(row)
        df.set_value(idx, 'findings', text)
        for n, v in velos:
            df.set_value(idx, n, v)
    except:
        pass
discardField = ['Report Text']
foo = [item for item in df.columns.tolist() if item not in ordered_names+discardField]
foo.sort()
CORE_COL = ordered_names + foo
df_unlabeled = df[CORE_COL]
# df = pd.concat([df_raw[['Past', 'Present', 'Left', 'Right', 'Count']], df[CORE_COL]], axis=1)

----

In [None]:
#for PAST
fields = [
    'history', 
    'findings', 
    'comparison', 
    'impression'
]
# labeled data
df_labeled = utils.null2empty(df_labeled, fields)
foo = df_labeled[~df_labeled['Past'].isnull() & df_labeled['Past'] != 0].sample(frac=1, random_state=1)
df_train_labeled = foo.iloc[:1220]
y_train_labeled = np.array(df_train_labeled['Past'].astype(int))
y_train_labeled[y_train_labeled==-1] = 0 # turn -1 to 0
df_test = foo.iloc[1220:]
y_test = np.array(df_test['Past'].astype(int))
y_test[y_test==-1] = 0 # turn -1 to 0
obj = utils.Df2TFIDF()
obj.fit(df_train_labeled, fields, ngram=5, min_count=5)
output_train_labeled = obj.transform(df_train_labeled, fields)
output_test = obj.transform(df_test, fields)
# concatenate sparse matrices of all fields
x_train_labeled = hstack([foo['bow_tfidf'] for foo in output_train_labeled.itervalues()])
x_test = hstack([foo['bow_tfidf'] for foo in output_test.itervalues()])
print x_train_labeled.shape
print x_test.shape
# unlabeled data
df_unlabeled = utils.null2empty(df_unlabeled, fields)
# obj = utils.Df2TFIDF()
# obj.fit(df_train_unlabeled, fields, ngram=5, min_count=5)
output_train_unlabeled = obj.transform(df_unlabeled, fields)
x_train_unlabeled = hstack([foo['bow_tfidf'] for foo in output_train_unlabeled.itervalues()])
print x_train_unlabeled.shape

In [None]:
x_train = vstack([x_train_labeled, x_train_unlabeled])
y_train = np.append(y_train_labeled, np.ones(x_train_unlabeled.shape[0], dtype=int) * -1)

In [None]:
from sklearn.semi_supervised import LabelPropagation
semi_clf = LabelPropagation(kernel='rbf', gamma=20, n_jobs=-1)
semi_clf.fit(x_train.toarray(), y_train)

In [None]:
y_train_pred = semi_clf.predict(x_train_labeled.toarray())
y_test_pred = semi_clf.predict(x_test.toarray())
results = pd.concat([utils.my_classification_report(y_train_labeled, y_train_pred),
                     utils.my_classification_report(y_test, y_test_pred),
                     ], axis=1).transpose()
results.index = ['training', 'testing']
display(results)