In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from tqdm import tqdm
plt.style.use('fivethirtyeight')

%matplotlib inline

In [2]:
input_X = np.load("../outputs/10_user_X_Nov_11.npy")
input_y = np.load("../outputs/10_user_y_Nov_11.npy")
num_classes_for_prediction = 10

# put X in the right format
X = np.array([_ for _ in input_X])

# put y in the right format
def ohe(n):
    vec = np.zeros(num_classes_for_prediction)
    vec[n] = 1
    return vec

y = np.array([ohe(n) for n in input_y])

# constants
vocab_size = 254
embedding_dims = 300
tweet_size = 140
batch_size = 100
epochs = 10


In [3]:
# Model

# Placeholders for inputs
train_inputs = tf.placeholder(tf.int32, shape=[None, tweet_size], name='train_inputs')
train_labels = tf.placeholder(tf.int32, shape=[None, num_classes_for_prediction], name='train_labels')

# Create the embedding matrix
embeddings = tf.Variable(tf.random_uniform([vocab_size, embedding_dims], -1.0, 1.0), name='embeddings')

# Extract the character embeddings for the tweet
embed =  tf.reshape(tf.nn.embedding_lookup(embeddings, train_inputs), [-1, tweet_size, embedding_dims, 1],
                    name='embed')

# The convolutional part
n_gram = 2
num_filters = 32
W_conv = tf.Variable(tf.truncated_normal([n_gram, embedding_dims, 1, num_filters], stddev=0.05), name="W_conv")
b_conv = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b_conv")

conv = tf.nn.conv2d(
        embed,
        W_conv, 
        strides=[1, 1, 1, 1], 
        padding="VALID", 
        name="convolution")

h = tf.nn.tanh(tf.nn.bias_add(conv, b_conv), name="relu")

pooled = tf.nn.max_pool(
        h,
        ksize=[1, h.get_shape()[1].value, 1, 1],
        strides=[1, 1, 1, 1],
        padding='VALID',
        name="max_pool")

# reshape and add FC
h_pool_flat = tf.reshape(pooled, [-1, num_filters], name='flatten')

W_fc = tf.Variable(tf.truncated_normal([num_filters, num_classes_for_prediction], stddev=0.05), name="W_fc")
b_fc = tf.Variable(tf.constant(0.1, shape=[num_classes_for_prediction]), name="b_fc")
fc_output = tf.nn.tanh(tf.matmul(h_pool_flat, W_fc) + b_fc, name="fc")

# cross entropy loss function
loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(
    _sentinel=None,
    labels=train_labels,
    logits=fc_output,
    dim=-1,
    name='loss'
))

preds = tf.argmax(fc_output, 1, name='predictions')
true_vals = tf.argmax(train_labels, 1, name='true_vals')
acc = tf.reduce_mean(tf.cast(tf.equal(preds, true_vals), tf.float32), name = 'acc')

# SGD optimizer
learning_rate = 0.5
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate, name='optimizer')
train_step = optimizer.minimize(loss, name='train_step')

In [4]:
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

In [7]:
# Train data
from sklearn.model_selection import KFold

kf = KFold(n_splits=5, shuffle=True)

cv_accuracy_val = []
cv_loss_val = []

split_num = 0
for train_index, test_index in kf.split(X):
    
    # reset the network parameters
    sess.run(init)
    
    print("Split: {}".format(split_num))
    split_num += 1
    
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # around 2 minutes per epoch
    for e in range(epochs):
        print("Epoch: {}".format(e))
        pairs = zip(np.split(X, indices_or_sections=np.arange(0, len(X), batch_size)), 
                                    np.split(y, indices_or_sections=np.arange(0, len(X), batch_size)))
        train_acc = []
        train_loss = []
        for batch_x, batch_y in tqdm(pairs):
            if len(batch_x)==batch_size:
                feed_dict = {train_inputs: batch_x, train_labels: batch_y}
                _, train_l, train_a = sess.run([train_step, loss, acc], feed_dict=feed_dict)
                train_acc.append(train_a)
                train_loss.append(train_l)
        
        val_acc, val_loss = sess.run([acc, loss], feed_dict={train_inputs: X_test, train_labels: y_test})
        
        print("Training accuracy: {}".format(np.mean(train_acc)))
        print("Training loss: {}".format(np.mean(train_loss)))
        print("Validation accuracy: {}".format(val_acc))
        print("Validation loss: {}".format(val_loss))
    
    cv_loss_val.append(val_loss)
    cv_accuracy_val.append(val_acc)

0it [00:00, ?it/s]

Split: 0
Epoch: 0


901it [01:54,  7.86it/s]
0it [00:00, ?it/s]

Training accuracy: 0.5954171419143677
Training loss: 1.388081669807434
Validation accuracy: 0.7697353959083557
Validation loss: 1.1556872129440308
Epoch: 1


901it [01:54,  7.88it/s]
2it [00:00, 11.46it/s]

Training accuracy: 0.8565406203269958
Training loss: 1.0096009969711304
Validation accuracy: 0.8420614004135132
Validation loss: 1.063541054725647
Epoch: 2


901it [01:54,  7.88it/s]
2it [00:00, 13.24it/s]

Training accuracy: 0.894126832485199
Training loss: 0.9689398407936096
Validation accuracy: 0.870413601398468
Validation loss: 1.0205819606781006
Epoch: 3


901it [01:54,  7.88it/s]
2it [00:00, 11.73it/s]

Training accuracy: 0.9014126658439636
Training loss: 0.9543343782424927
Validation accuracy: 0.8693573474884033
Validation loss: 1.0040658712387085
Epoch: 4


901it [01:56,  7.76it/s]
2it [00:00, 12.27it/s]

Training accuracy: 0.9066963195800781
Training loss: 0.9472568035125732
Validation accuracy: 0.8705248236656189
Validation loss: 0.9964889883995056
Epoch: 5


901it [01:53,  7.90it/s]
2it [00:00, 11.77it/s]

Training accuracy: 0.9061846137046814
Training loss: 0.9440957903862
Validation accuracy: 0.8858683705329895
Validation loss: 0.9705634117126465
Epoch: 6


901it [01:58,  7.63it/s]
2it [00:00, 12.02it/s]

Training accuracy: 0.9036596417427063
Training loss: 0.941411554813385
Validation accuracy: 0.8749166131019592
Validation loss: 0.9851270318031311
Epoch: 7


901it [01:54,  7.88it/s]
2it [00:00, 12.12it/s]

Training accuracy: 0.9078198075294495
Training loss: 0.9381839632987976
Validation accuracy: 0.8849788904190063
Validation loss: 0.9650824069976807
Epoch: 8


901it [01:54,  7.87it/s]
2it [00:00, 12.80it/s]

Training accuracy: 0.9092102646827698
Training loss: 0.93940269947052
Validation accuracy: 0.8958750367164612
Validation loss: 0.9555873870849609
Epoch: 9


901it [02:03,  7.28it/s]


Training accuracy: 0.9128475785255432
Training loss: 0.9354503154754639
Validation accuracy: 0.8940404653549194
Validation loss: 0.9596749544143677
Split: 1


2it [00:00, 13.76it/s]

Epoch: 0


901it [01:56,  7.75it/s]
2it [00:00, 11.50it/s]

Training accuracy: 0.5710678696632385
Training loss: 1.3993964195251465
Validation accuracy: 0.6810095906257629
Validation loss: 1.224310040473938
Epoch: 1


901it [01:59,  7.57it/s]
2it [00:00, 13.13it/s]

Training accuracy: 0.8247052431106567
Training loss: 1.016623854637146
Validation accuracy: 0.8098732233047485
Validation loss: 1.0419102907180786
Epoch: 2


901it [01:54,  7.84it/s]
2it [00:00, 11.34it/s]

Training accuracy: 0.857630729675293
Training loss: 0.9760024547576904
Validation accuracy: 0.8507338166236877
Validation loss: 0.9990748763084412
Epoch: 3


901it [01:58,  7.61it/s]
2it [00:00, 11.62it/s]

Training accuracy: 0.8771412372589111
Training loss: 0.9617418646812439
Validation accuracy: 0.8656882643699646
Validation loss: 0.981870174407959
Epoch: 4


901it [01:58,  7.58it/s]
2it [00:00, 12.14it/s]

Training accuracy: 0.8834593892097473
Training loss: 0.9497621059417725
Validation accuracy: 0.877307116985321
Validation loss: 0.9645670056343079
Epoch: 5


901it [01:55,  7.77it/s]
2it [00:00, 12.28it/s]

Training accuracy: 0.8886095285415649
Training loss: 0.9414439797401428
Validation accuracy: 0.8681899309158325
Validation loss: 0.9775855541229248
Epoch: 6


901it [01:55,  7.78it/s]
2it [00:00, 12.51it/s]

Training accuracy: 0.8909121155738831
Training loss: 0.9371137022972107
Validation accuracy: 0.8745274543762207
Validation loss: 0.9649292826652527
Epoch: 7


901it [01:56,  7.74it/s]
2it [00:00, 13.09it/s]

Training accuracy: 0.892903208732605
Training loss: 0.9339619278907776
Validation accuracy: 0.882199227809906
Validation loss: 0.9455469250679016
Epoch: 8


901it [01:56,  7.71it/s]
2it [00:00, 12.57it/s]

Training accuracy: 0.8941267728805542
Training loss: 0.9296714067459106
Validation accuracy: 0.8778074383735657
Validation loss: 0.9587898850440979
Epoch: 9


901it [01:56,  7.71it/s]


Training accuracy: 0.8952279686927795
Training loss: 0.9277146458625793
Validation accuracy: 0.8891482949256897
Validation loss: 0.9421476125717163
Split: 2


2it [00:00, 12.79it/s]

Epoch: 0


901it [01:54,  7.84it/s]
2it [00:00, 12.87it/s]

Training accuracy: 0.6058843731880188
Training loss: 1.354637622833252
Validation accuracy: 0.7838438749313354
Validation loss: 1.102437973022461
Epoch: 1


901it [01:54,  7.88it/s]
2it [00:00, 12.65it/s]

Training accuracy: 0.84369295835495
Training loss: 1.0053112506866455
Validation accuracy: 0.8319897651672363
Validation loss: 1.058595061302185
Epoch: 2


901it [01:54,  7.84it/s]
2it [00:00, 12.97it/s]

Training accuracy: 0.8689765930175781
Training loss: 0.9812381267547607
Validation accuracy: 0.8592316508293152
Validation loss: 1.0152692794799805
Epoch: 3


901it [01:54,  7.89it/s]
2it [00:00, 13.81it/s]

Training accuracy: 0.8795661926269531
Training loss: 0.9608388543128967
Validation accuracy: 0.8739645481109619
Validation loss: 0.9817913174629211
Epoch: 4


901it [01:54,  7.89it/s]
2it [00:00, 10.85it/s]

Training accuracy: 0.8853059411048889
Training loss: 0.9502231478691101
Validation accuracy: 0.8789681196212769
Validation loss: 0.9740529656410217
Epoch: 5


901it [01:57,  7.64it/s]
2it [00:00, 12.66it/s]

Training accuracy: 0.889276921749115
Training loss: 0.9433402419090271
Validation accuracy: 0.8825262784957886
Validation loss: 0.9640811681747437
Epoch: 6


901it [02:05,  7.15it/s]
0it [00:00, ?it/s]

Training accuracy: 0.890745222568512
Training loss: 0.9376601576805115
Validation accuracy: 0.884472131729126
Validation loss: 0.9614943265914917
Epoch: 7


901it [02:06,  7.10it/s]
2it [00:00, 11.13it/s]

Training accuracy: 0.8932925462722778
Training loss: 0.9344272017478943
Validation accuracy: 0.8831378221511841
Validation loss: 0.9646820425987244
Epoch: 8


901it [02:05,  7.19it/s]
2it [00:00, 13.46it/s]

Training accuracy: 0.8944493532180786
Training loss: 0.9327582716941833
Validation accuracy: 0.886918306350708
Validation loss: 0.9495725035667419
Epoch: 9


901it [02:20,  6.40it/s]


Training accuracy: 0.8965961933135986
Training loss: 0.930208146572113
Validation accuracy: 0.8895313143730164
Validation loss: 0.9462740421295166
Split: 3


2it [00:00, 12.93it/s]

Epoch: 0


901it [02:04,  7.22it/s]
2it [00:00, 12.20it/s]

Training accuracy: 0.5895216464996338
Training loss: 1.3914705514907837
Validation accuracy: 0.7928503751754761
Validation loss: 1.111039400100708
Epoch: 1


901it [02:04,  7.23it/s]
2it [00:00, 12.37it/s]

Training accuracy: 0.8448053002357483
Training loss: 1.010575532913208
Validation accuracy: 0.8307110667228699
Validation loss: 1.0239912271499634
Epoch: 2


901it [02:08,  7.01it/s]
2it [00:00, 10.62it/s]

Training accuracy: 0.86725252866745
Training loss: 0.9825337529182434
Validation accuracy: 0.8462778925895691
Validation loss: 1.0162235498428345
Epoch: 3


901it [02:04,  7.25it/s]
2it [00:00, 11.32it/s]

Training accuracy: 0.8846830129623413
Training loss: 0.9665583968162537
Validation accuracy: 0.8675154447555542
Validation loss: 0.9984362721443176
Epoch: 4


901it [02:12,  6.80it/s]
2it [00:00, 12.05it/s]

Training accuracy: 0.8960400223731995
Training loss: 0.953342616558075
Validation accuracy: 0.8818591237068176
Validation loss: 0.9701226353645325
Epoch: 5


901it [02:02,  7.38it/s]
0it [00:00, ?it/s]

Training accuracy: 0.9090990424156189
Training loss: 0.9411808252334595
Validation accuracy: 0.897092342376709
Validation loss: 0.9545575380325317
Epoch: 6


901it [01:58,  7.60it/s]
2it [00:00, 12.75it/s]

Training accuracy: 0.9158954620361328
Training loss: 0.9328134059906006
Validation accuracy: 0.9012064337730408
Validation loss: 0.9527600407600403
Epoch: 7


901it [02:07,  7.08it/s]
0it [00:00, ?it/s]

Training accuracy: 0.9175305962562561
Training loss: 0.9281209111213684
Validation accuracy: 0.906599223613739
Validation loss: 0.9444603323936462
Epoch: 8


901it [01:59,  7.54it/s]
2it [00:00, 11.16it/s]

Training accuracy: 0.9195550084114075
Training loss: 0.9244049191474915
Validation accuracy: 0.9070995450019836
Validation loss: 0.9406130313873291
Epoch: 9


901it [02:02,  7.33it/s]


Training accuracy: 0.9200222492218018
Training loss: 0.9223052859306335
Validation accuracy: 0.9117140173912048
Validation loss: 0.9363650679588318
Split: 4


2it [00:00, 12.02it/s]

Epoch: 0


901it [02:00,  7.47it/s]
0it [00:00, ?it/s]

Training accuracy: 0.5524805188179016
Training loss: 1.4498342275619507
Validation accuracy: 0.7145716547966003
Validation loss: 1.2293505668640137
Epoch: 1


901it [02:00,  7.47it/s]
2it [00:00, 10.57it/s]

Training accuracy: 0.8174304962158203
Training loss: 1.0263327360153198
Validation accuracy: 0.803913950920105
Validation loss: 1.0631372928619385
Epoch: 2


901it [01:58,  7.62it/s]
2it [00:00, 10.83it/s]

Training accuracy: 0.8377196788787842
Training loss: 0.9858655333518982
Validation accuracy: 0.8223717212677002
Validation loss: 1.015085220336914
Epoch: 3


901it [02:00,  7.50it/s]
2it [00:00, 10.82it/s]

Training accuracy: 0.8444048762321472
Training loss: 0.983432412147522
Validation accuracy: 0.8303775191307068
Validation loss: 1.0166418552398682
Epoch: 4


901it [01:59,  7.56it/s]
0it [00:00, ?it/s]

Training accuracy: 0.8453392386436462
Training loss: 0.9715567231178284
Validation accuracy: 0.8332128524780273
Validation loss: 0.9959913492202759
Epoch: 5


901it [01:59,  7.53it/s]
0it [00:00, ?it/s]

Training accuracy: 0.854282557964325
Training loss: 0.9617449641227722
Validation accuracy: 0.8493912220001221
Validation loss: 0.9794803261756897
Epoch: 6


901it [02:06,  7.12it/s]
2it [00:00, 12.61it/s]

Training accuracy: 0.8757174611091614
Training loss: 0.951949417591095
Validation accuracy: 0.8744648694992065
Validation loss: 0.9639996290206909
Epoch: 7


901it [02:00,  7.47it/s]
0it [00:00, ?it/s]

Training accuracy: 0.887552797794342
Training loss: 0.9466041326522827
Validation accuracy: 0.8919775485992432
Validation loss: 0.9579125046730042
Epoch: 8


901it [02:09,  6.96it/s]
0it [00:00, ?it/s]

Training accuracy: 0.8967742323875427
Training loss: 0.9411112070083618
Validation accuracy: 0.9009284377098083
Validation loss: 0.9562973380088806
Epoch: 9


901it [02:03,  7.29it/s]


Training accuracy: 0.901067852973938
Training loss: 0.937191367149353
Validation accuracy: 0.8982042670249939
Validation loss: 0.9546306729316711


In [8]:
print("CV Accuracy: {}".format(np.mean(cv_loss_val)))
print("CV Loss: {}".format(np.mean(cv_accuracy_val)))

CV Accuracy: 0.9478184580802917
CV Loss: 0.8965276479721069


In [9]:
# Saved Model, DO NOT RERUN
# save_dir = "../models/"
# model_name = "10_class_1_layerconv_89%"
# saver = tf.train.Saver()
# saver.save(sess, save_dir+model_name)

'../models/10_class_1_layerconv_89%'

In [5]:
# restore model
saver = tf.train.Saver()
saver.restore(sess, "../models/10_class_1_layerconv_89%")

INFO:tensorflow:Restoring parameters from ../models/10_class_1_layerconv_89%


In [6]:
sess.run([acc, loss], feed_dict={train_inputs: X[:1000], train_labels: y[:1000]})

[0.89899999, 0.95786172]