In [1]:
%load_ext autoreload
%autoreload 2
import tensorflow as tf
# from elmo import elmo_embedding
import numpy as np
import os
from tqdm import tqdm
import json
import pickle
import logging
from utils import get_logger

In [37]:
def dan(inputs, labels, seq_lens, n_layers, n_units, n_classes, dropout, name='classifier', is_training=True, reuse=False):
    with tf.variable_scope(name, reuse=reuse):
        inputs = tf.reduce_sum(inputs, axis=1, keepdims=False)
        for i in range(n_layers):
            W = tf.get_variable(name='W_{}'.format(i), shape=(inputs.shape[-1], n_units), trainable=True, initializer=tf.glorot_uniform_initializer())
            b = tf.get_variable(name='b_{}'.format(i), shape=(n_units,), trainable=True, initializer=tf.zeros_initializer())
            outputs = tf.nn.xw_plus_b(inputs, W, b, name='dense_{}'.format(i))
            if is_training:
                outputs = tf.nn.dropout(outputs, keep_prob=1.0-dropout)
            inputs = outputs
        W = tf.get_variable(name='W_out', shape=(inputs.shape[-1], n_classes), trainable=True, initializer=tf.glorot_uniform_initializer())
        b = tf.get_variable(name='b_out', shape=(n_classes,), trainable=True, initializer=tf.zeros_initializer())
        outputs = tf.nn.xw_plus_b(inputs, W, b, name='dense_out')
        loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=outputs))
        pred = tf.argmax(outputs, axis=-1, name='predict', output_type=tf.int32)
        acc = tf.reduce_sum(tf.to_float(tf.equal(pred, labels))) / tf.to_float(tf.shape(inputs)[0])
        return outputs, pred, loss, acc

In [43]:
def cnn(inputs, labels, seq_lens, k_size, n_filters, n_classes, n_layers, n_units, dropout, l2_coef=1e-3, name='classifier', is_training=True, reuse=False):
    with tf.variable_scope(name, reuse=reuse):
        outputs = []
        mask = tf.sequence_mask(seq_lens, dtype=tf.float32)
        mask
        for ksz, nf in zip(k_size, n_filters):
            output = tf.layers.conv1d(inputs,
                                      nf, 
                                      ksz, 
                                      padding='same',
                                      kernel_initializer=tf.glorot_uniform_initializer(), 
                                      bias_initializer=tf.zeros_initializer(), 
                                      kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=l2_coef))
            if is_training:
                output = tf.nn.dropout(output, 1-dropout, noise_shape=(1, 1, nf))
            output = tf.reduce_max(output, axis=1)
            outputs.append(output)
        outputs = tf.concat(outputs, axis=-1)
        for i in range(n_layers):
            W = tf.get_variable(name='W_{}'.format(i), shape=(outputs.shape[-1], n_units), trainable=True, initializer=tf.glorot_uniform_initializer(), regularizer=tf.contrib.layers.l2_regularizer(scale=l2_coef))
            b = tf.get_variable(name='b_{}'.format(i), shape=(n_units,), trainable=True, initializer=tf.zeros_initializer())
            outputs = tf.nn.xw_plus_b(outputs, W, b, name='dense_{}'.format(i))
            if is_training:
                outputs = tf.nn.dropout(outputs, keep_prob=1.0-dropout)
        W = tf.get_variable(name='W_out', shape=(outputs.shape[-1], n_classes), trainable=True, initializer=tf.glorot_uniform_initializer(), regularizer=tf.contrib.layers.l2_regularizer(scale=l2_coef))
        b = tf.get_variable(name='b_out', shape=(n_classes,), trainable=True, initializer=tf.zeros_initializer())
        outputs = tf.nn.xw_plus_b(outputs, W, b, name='dense_out')
        loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=outputs))
        pred = tf.argmax(outputs, axis=-1, name='predict', output_type=tf.int32)
        acc = tf.reduce_sum(tf.to_float(tf.equal(pred, labels))) / tf.to_float(tf.shape(inputs)[0])
        return outputs, pred, loss, acc

In [4]:
class2idx = {
    'fashion': 0,
    'technology': 1,
    'music': 2,
    'lifestyle': 3,
    'travel': 4
}
idx2class = {
    v: k for k, v in class2idx.items()
}
VERSION = 12
logger = get_logger('classify data/{}/logs'.format(VERSION))

In [5]:
def pad_sequences(sequences):
    maxlen = max(len(x) for x in sequences)
#     print(np.argmax([len(x) for x in sequences]))
    if isinstance(sequences[0], np.ndarray):
        arr = np.zeros((maxlen, len(sequences)) + sequences[0].shape[1:])
    else:
        arr = np.zeros((maxlen, len(sequences)))
#     print(arr.shape)
    for i, x in enumerate(sequences):
        arr[:len(x), i] = x
    return np.transpose(arr, axes=[1, 0] + [x for x in range(2, arr.ndim)])
def get_batch(X, y, batch_size, shuffle=True):
    assert len(X) == len(y)
    idx = np.arange(0, len(X))
    X = np.array(X)
    y = np.array([class2idx[x] for x in y])
    if shuffle:
        idx = np.random.permutation(idx)
    for i in range(0, len(idx), batch_size):
        indices = idx[i:i+batch_size]
#         print(indices)
        batch = X[indices]
        embed = pad_sequences([x[0] for x in batch])
        labels = y[indices]
        seq_len = [len(x[0]) for x in batch]
        yield embed, labels, seq_len

In [44]:
tf.reset_default_graph()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)
x = tf.placeholder(dtype=tf.float32, shape=(None, None, 1024, 4), name='x')
y = tf.placeholder(dtype=tf.int32, shape=(None,), name='y')
seq_len = tf.placeholder(dtype=tf.int32, shape=(None,), name='seq_len')
drop = tf.placeholder(dtype=tf.float32, shape=(), name='drop')
drop_e = tf.placeholder(dtype=tf.float32, shape=(), name='drop_e')
elmo, elmo_l2_reg = elmo_embedding(x, seq_lens=seq_len, l2_coef=1e-3, layer_norm=True)
shape = tf.shape(elmo)
elmo = tf.nn.dropout(elmo, 1-drop_e, noise_shape=(shape[0], shape[1], 1)) * (1-drop_e)
outputs, pred, loss, acc = cnn(elmo, y, seq_len, (3, 4, 5), (128, 128, 128), 5, 2, 256, drop, 1e-3, name='classifier')
optimizer = tf.train.AdamOptimizer(1e-3)
grads, _vars = zip(*optimizer.compute_gradients(loss + elmo_l2_reg + tf.losses.get_regularization_loss()))
grads, _ = tf.clip_by_global_norm(grads, clip_norm=1.0)
global_step = tf.Variable(0, name="global_step", trainable=False)
train_op = optimizer.apply_gradients(
    zip(grads, _vars),
    global_step=global_step
)
session.run(tf.global_variables_initializer())

In [2]:
with open('classify data/X_train.pkl', 'rb') as inp:
    X_train = pickle.load(inp)
with open('classify data/y_train.pkl', 'rb') as inp:
    y_train = pickle.load(inp)
with open('classify data/X_test.pkl', 'rb') as inp:
    X_test = pickle.load(inp)
with open('classify data/y_test.pkl', 'rb') as inp:
    y_test = pickle.load(inp)
with open('classify data/X_val.pkl', 'rb') as inp:
    X_val = pickle.load(inp)
with open('classify data/y_val.pkl', 'rb') as inp:
    y_val = pickle.load(inp)

In [45]:
saver = tf.train.Saver(tf.global_variables(), max_to_keep=1000)

In [46]:
prob = tf.nn.softmax(outputs, axis=-1)

In [47]:
for _ in range(20):
    gen = get_batch(X_train, y_train, 32)
    for e, l, sl in gen:
        _, s, l, a = session.run([train_op, global_step, loss, acc], feed_dict={
            x: e, y: l, seq_len: sl, drop: 0.5, drop_e: 0.3
        })
        logger.info('Step {}: loss {}, acc {}'.format(s, l, a))
    gen = get_batch(X_val, y_val, 32, shuffle=False)
    saver.save(session, 'classify data/{}/classifier.cpkt'.format(VERSION), global_step=s)
    total_loss = 0.0
    total_acc = 0.0
    count = 0
    for e, l, sl in gen:
        l, a = session.run([loss, acc], feed_dict={
            x: e, y: l, seq_len: sl, drop: 0.0, drop_e: 0.0
        })
        logger.info('Evaluation: loss {}, acc {}'.format(l, a))
        total_loss += l * len(e)
        total_acc += a * len(e)
        count += len(e)
    logger.info('Evaluation: total loss {}, total acc {}'.format(total_loss / count, total_acc / count))

Step 1: loss 8.368650436401367, acc 0.21875
Step 2: loss 10.25060749053955, acc 0.34375
Step 3: loss 11.906867980957031, acc 0.21875
Step 4: loss 8.783586502075195, acc 0.3125
Step 5: loss 10.477054595947266, acc 0.15625
Step 6: loss 8.430365562438965, acc 0.1875
Step 7: loss 10.568829536437988, acc 0.15625
Step 8: loss 12.06379508972168, acc 0.25
Step 9: loss 7.990574836730957, acc 0.28125
Step 10: loss 8.672176361083984, acc 0.3125
Step 11: loss 6.602428436279297, acc 0.28125
Step 12: loss 5.82124662399292, acc 0.3125
Step 13: loss 5.628190994262695, acc 0.28125
Step 14: loss 5.920144081115723, acc 0.34375
Step 15: loss 8.235280990600586, acc 0.40625
Step 16: loss 8.71013355255127, acc 0.125
Step 17: loss 8.240959167480469, acc 0.375
Step 18: loss 10.240030288696289, acc 0.375
Step 19: loss 9.54597282409668, acc 0.375
Step 20: loss 6.971106052398682, acc 0.28125
Step 21: loss 4.875779628753662, acc 0.5625
Step 22: loss 10.827559471130371, acc 0.34375
Step 23: loss 6.370181560516357, 

Step 156: loss 0.947742760181427, acc 0.75
Step 157: loss 2.443552255630493, acc 0.6875
Step 158: loss 1.812637209892273, acc 0.71875
Step 159: loss 1.4152841567993164, acc 0.78125
Step 160: loss 2.24153995513916, acc 0.78125
Step 161: loss 2.136155843734741, acc 0.6875
Step 162: loss 0.8268330097198486, acc 0.8125
Step 163: loss 1.083641529083252, acc 0.8125
Step 164: loss 0.36385291814804077, acc 0.875
Step 165: loss 1.9893064498901367, acc 0.8125
Step 166: loss 2.5044660568237305, acc 0.71875
Step 167: loss 1.1988790035247803, acc 0.84375
Step 168: loss 0.45629820227622986, acc 0.84375
Step 169: loss 0.7578085660934448, acc 0.84375
Step 170: loss 0.80647212266922, acc 0.875
Step 171: loss 3.0229344367980957, acc 0.59375
Step 172: loss 1.963712453842163, acc 0.75
Step 173: loss 3.1151351928710938, acc 0.59375
Step 174: loss 1.7754801511764526, acc 0.6875
Step 175: loss 1.5830552577972412, acc 0.78125
Step 176: loss 3.2678065299987793, acc 0.75
Step 177: loss 1.3625426292419434, acc 0

Evaluation: loss 0.6176065802574158, acc 0.9375
Evaluation: loss 0.5060036182403564, acc 0.875
Evaluation: loss 0.93328857421875, acc 0.90625
Evaluation: loss 0.601937472820282, acc 0.8125
Evaluation: loss 0.6562756299972534, acc 0.875
Evaluation: loss 0.33802729845046997, acc 0.90625
Evaluation: loss 0.38499918580055237, acc 0.9375
Evaluation: loss 0.8944710493087769, acc 0.90625
Evaluation: loss 0.17032106220722198, acc 0.96875
Evaluation: loss 0.35014134645462036, acc 0.9375
Evaluation: loss 0.8319045901298523, acc 0.9375
Evaluation: loss 0.20064260065555573, acc 0.96875
Evaluation: loss 0.34839096665382385, acc 0.9375
Evaluation: loss 0.001117022125981748, acc 1.0
Evaluation: total loss 0.47239104326826903, total acc 0.9208173690932312
Step 295: loss 0.24042591452598572, acc 0.90625
Step 296: loss 0.7924025058746338, acc 0.84375
Step 297: loss 1.1788486242294312, acc 0.8125
Step 298: loss 1.6621551513671875, acc 0.78125
Step 299: loss 1.252346396446228, acc 0.78125
Step 300: loss 1

Step 428: loss 0.7481011748313904, acc 0.90625
Step 429: loss 0.008864284493029118, acc 1.0
Step 430: loss 0.10742142796516418, acc 0.96875
Step 431: loss 0.7441555261611938, acc 0.8125
Step 432: loss 0.28971076011657715, acc 0.90625
Step 433: loss 0.035596247762441635, acc 1.0
Step 434: loss 2.1263158321380615, acc 0.71875
Step 435: loss 0.29460608959198, acc 0.9375
Step 436: loss 1.6538668870925903, acc 0.78125
Step 437: loss 0.5897615551948547, acc 0.84375
Step 438: loss 0.7346423864364624, acc 0.90625
Step 439: loss 0.5333775877952576, acc 0.875
Step 440: loss 0.6215585470199585, acc 0.875
Step 441: loss 0.2309616208076477, acc 0.9375
Step 442: loss 0.721362829208374, acc 0.9375
Step 443: loss 0.2950933277606964, acc 0.90625
Step 444: loss 1.390824317932129, acc 0.90625
Step 445: loss 1.0132497549057007, acc 0.8125
Step 446: loss 0.4211035370826721, acc 0.90625
Step 447: loss 0.3328767716884613, acc 0.96875
Step 448: loss 0.09652187675237656, acc 0.96875
Step 449: loss 0.7647643089

Step 577: loss 0.042701683938503265, acc 0.96875
Step 578: loss 0.10164424777030945, acc 0.96875
Step 579: loss 0.4069756865501404, acc 0.90625
Step 580: loss 0.3274441361427307, acc 0.9375
Step 581: loss 0.013998333364725113, acc 1.0
Step 582: loss 0.40453600883483887, acc 0.90625
Step 583: loss 0.234242781996727, acc 0.9375
Step 584: loss 0.3861144781112671, acc 0.90625
Step 585: loss 0.2716157138347626, acc 0.9375
Step 586: loss 0.2867230772972107, acc 0.90625
Step 587: loss 0.08088687062263489, acc 0.96875
Step 588: loss 1.2486234903335571, acc 0.8333333134651184
Evaluation: loss 0.46587708592414856, acc 0.9375
Evaluation: loss 0.6872004270553589, acc 0.875
Evaluation: loss 0.1721072494983673, acc 0.9375
Evaluation: loss 0.16851598024368286, acc 0.96875
Evaluation: loss 0.09414703398942947, acc 0.96875
Evaluation: loss 0.05249331146478653, acc 1.0
Evaluation: loss 0.048605673015117645, acc 0.96875
Evaluation: loss 0.3583870530128479, acc 0.9375
Evaluation: loss 0.20325998961925507,

Step 698: loss 0.07349042594432831, acc 1.0
Step 699: loss 0.08610281348228455, acc 0.96875
Step 700: loss 0.13419032096862793, acc 0.9375
Step 701: loss 0.30904728174209595, acc 0.90625
Step 702: loss 0.17114686965942383, acc 0.9375
Step 703: loss 0.27275165915489197, acc 0.9375
Step 704: loss 0.5788636207580566, acc 0.90625
Step 705: loss 0.03841838613152504, acc 0.96875
Step 706: loss 0.3359706401824951, acc 0.9375
Step 707: loss 0.08788441121578217, acc 0.9375
Step 708: loss 0.07630050927400589, acc 0.96875
Step 709: loss 0.34493720531463623, acc 0.90625
Step 710: loss 0.24872639775276184, acc 0.96875
Step 711: loss 0.027485229074954987, acc 1.0
Step 712: loss 0.07507552206516266, acc 0.96875
Step 713: loss 0.005898648872971535, acc 1.0
Step 714: loss 0.6278833746910095, acc 0.84375
Step 715: loss 0.6480093598365784, acc 0.78125
Step 716: loss 0.04221909120678902, acc 1.0
Step 717: loss 0.4215630888938904, acc 0.8125
Step 718: loss 0.042033787816762924, acc 1.0
Step 719: loss 0.648

Step 846: loss 0.08801991492509842, acc 0.96875
Step 847: loss 0.1994192898273468, acc 0.96875
Step 848: loss 0.39223089814186096, acc 0.875
Step 849: loss 0.028875410556793213, acc 0.96875
Step 850: loss 0.021604441106319427, acc 1.0
Step 851: loss 0.08713112771511078, acc 0.96875
Step 852: loss 0.6818763017654419, acc 0.90625
Step 853: loss 0.28926318883895874, acc 0.90625
Step 854: loss 0.31649795174598694, acc 0.9375
Step 855: loss 0.2521525025367737, acc 0.90625
Step 856: loss 0.3864356279373169, acc 0.96875
Step 857: loss 0.5376142859458923, acc 0.90625
Step 858: loss 0.31323325634002686, acc 0.875
Step 859: loss 0.12234657257795334, acc 0.9375
Step 860: loss 0.49384939670562744, acc 0.875
Step 861: loss 0.4014724791049957, acc 0.90625
Step 862: loss 0.050656162202358246, acc 0.96875
Step 863: loss 0.2596834897994995, acc 0.90625
Step 864: loss 0.5507178902626038, acc 0.90625
Step 865: loss 0.12724635004997253, acc 0.90625
Step 866: loss 0.9630866050720215, acc 0.78125
Step 867: 

Evaluation: loss 0.2862081527709961, acc 0.84375
Evaluation: loss 0.24751362204551697, acc 0.875
Evaluation: loss 0.28466105461120605, acc 0.90625
Evaluation: loss 0.3529320955276489, acc 0.90625
Evaluation: loss 0.32473504543304443, acc 0.96875
Evaluation: loss 0.01791136898100376, acc 1.0
Evaluation: loss 0.25163501501083374, acc 0.9375
Evaluation: loss 0.16280777752399445, acc 0.96875
Evaluation: loss 0.17744798958301544, acc 0.9375
Evaluation: loss 0.13563929498195648, acc 0.96875
Evaluation: loss 0.10132645070552826, acc 0.96875
Evaluation: loss 0.05195702239871025, acc 0.96875
Evaluation: loss 0.10648390650749207, acc 0.9333333373069763
Evaluation: total loss 0.19136559675860373, total acc 0.9386973180837863
Step 981: loss 0.3300226032733917, acc 0.9375
Step 982: loss 0.04664270579814911, acc 0.96875
Step 983: loss 0.2734096348285675, acc 0.9375
Step 984: loss 0.2155536413192749, acc 0.90625
Step 985: loss 0.34830325841903687, acc 0.90625
Step 986: loss 0.03257312625646591, acc 1

Step 1111: loss 0.007951566949486732, acc 1.0
Step 1112: loss 0.0003447572235018015, acc 1.0
Step 1113: loss 0.02495299093425274, acc 1.0
Step 1114: loss 0.0816260352730751, acc 0.96875
Step 1115: loss 0.12281276285648346, acc 0.9375
Step 1116: loss 0.10949210822582245, acc 0.9375
Step 1117: loss 0.012304211035370827, acc 1.0
Step 1118: loss 0.09596918523311615, acc 0.9375
Step 1119: loss 0.12156867980957031, acc 0.9375
Step 1120: loss 0.13738463819026947, acc 0.90625
Step 1121: loss 0.1555471271276474, acc 0.90625
Step 1122: loss 0.03748627007007599, acc 0.96875
Step 1123: loss 0.22939762473106384, acc 0.9375
Step 1124: loss 0.028462707996368408, acc 1.0
Step 1125: loss 0.1697939932346344, acc 0.96875
Step 1126: loss 0.007997121661901474, acc 1.0
Step 1127: loss 0.31781190633773804, acc 0.90625
Step 1128: loss 0.197098970413208, acc 0.9375
Step 1129: loss 0.09556201100349426, acc 0.96875
Step 1130: loss 0.04806264489889145, acc 1.0
Step 1131: loss 0.11425893008708954, acc 0.9375
Step 

Step 1256: loss 0.1130068302154541, acc 0.96875
Step 1257: loss 0.025771427899599075, acc 1.0
Step 1258: loss 0.02492440491914749, acc 1.0
Step 1259: loss 0.0635264664888382, acc 0.96875
Step 1260: loss 0.08749943971633911, acc 0.96875
Step 1261: loss 0.08788551390171051, acc 0.96875
Step 1262: loss 0.1680191308259964, acc 0.90625
Step 1263: loss 0.09337057918310165, acc 0.96875
Step 1264: loss 0.0031908280216157436, acc 1.0
Step 1265: loss 0.12883047759532928, acc 0.9375
Step 1266: loss 0.06227579712867737, acc 0.96875
Step 1267: loss 0.3010922074317932, acc 0.9375
Step 1268: loss 0.19264942407608032, acc 0.96875
Step 1269: loss 0.25473514199256897, acc 0.90625
Step 1270: loss 0.08387593179941177, acc 0.96875
Step 1271: loss 0.08099175989627838, acc 0.96875
Step 1272: loss 0.1069248616695404, acc 0.96875
Step 1273: loss 0.07118212431669235, acc 0.96875
Step 1274: loss 0.008357873186469078, acc 1.0
Evaluation: loss 0.20152612030506134, acc 0.96875
Evaluation: loss 0.3162189722061157, a

Step 1374: loss 0.04122915863990784, acc 1.0
Step 1375: loss 0.39161497354507446, acc 0.90625
Step 1376: loss 0.16592180728912354, acc 0.96875
Step 1377: loss 0.04179070144891739, acc 0.96875
Step 1378: loss 0.0711667612195015, acc 0.96875
Step 1379: loss 0.06755584478378296, acc 0.96875
Step 1380: loss 0.06929628551006317, acc 0.9375
Step 1381: loss 0.08958274126052856, acc 0.96875
Step 1382: loss 0.060952380299568176, acc 0.96875
Step 1383: loss 0.08390617370605469, acc 0.96875
Step 1384: loss 0.0956115573644638, acc 0.96875
Step 1385: loss 0.011310548521578312, acc 1.0
Step 1386: loss 0.04014670103788376, acc 0.96875
Step 1387: loss 0.06722161173820496, acc 0.96875
Step 1388: loss 0.03513919562101364, acc 1.0
Step 1389: loss 0.03842136263847351, acc 0.96875
Step 1390: loss 0.03950856998562813, acc 1.0
Step 1391: loss 0.06948693096637726, acc 0.96875
Step 1392: loss 0.1848481148481369, acc 0.9375
Step 1393: loss 0.030798176303505898, acc 1.0
Step 1394: loss 0.027017885819077492, acc 

Step 1520: loss 0.0634344294667244, acc 0.96875
Step 1521: loss 0.14654192328453064, acc 0.9375
Step 1522: loss 0.008596089668571949, acc 1.0
Step 1523: loss 0.0433114618062973, acc 0.96875
Step 1524: loss 0.0725896880030632, acc 0.96875
Step 1525: loss 0.01049319189041853, acc 1.0
Step 1526: loss 0.1761133223772049, acc 0.96875
Step 1527: loss 0.14543657004833221, acc 0.96875
Step 1528: loss 0.05001024156808853, acc 0.96875
Step 1529: loss 0.02024715580046177, acc 1.0
Step 1530: loss 0.020783405750989914, acc 1.0
Step 1531: loss 0.15256141126155853, acc 0.9375
Step 1532: loss 0.0676843672990799, acc 0.9375
Step 1533: loss 0.039253056049346924, acc 0.96875
Step 1534: loss 0.2967089116573334, acc 0.9375
Step 1535: loss 0.07828914374113083, acc 0.96875
Step 1536: loss 0.16351182758808136, acc 0.9375
Step 1537: loss 0.014354180544614792, acc 1.0
Step 1538: loss 0.21156051754951477, acc 0.96875
Step 1539: loss 0.03575577586889267, acc 1.0
Step 1540: loss 0.002535476814955473, acc 1.0
Step 

Step 1666: loss 0.01352540124207735, acc 1.0
Evaluation: loss 0.2672136127948761, acc 0.9375
Evaluation: loss 0.5222494602203369, acc 0.90625
Evaluation: loss 0.07764410227537155, acc 0.96875
Evaluation: loss 0.23144076764583588, acc 0.9375
Evaluation: loss 0.016361281275749207, acc 1.0
Evaluation: loss 0.039780717343091965, acc 1.0
Evaluation: loss 0.020320694893598557, acc 1.0
Evaluation: loss 0.1793491244316101, acc 0.96875
Evaluation: loss 0.28592097759246826, acc 0.90625
Evaluation: loss 0.270925372838974, acc 0.96875
Evaluation: loss 0.2490372210741043, acc 0.875
Evaluation: loss 0.12233748286962509, acc 0.96875
Evaluation: loss 0.5989097356796265, acc 0.875
Evaluation: loss 0.5477827787399292, acc 0.875
Evaluation: loss 0.34082552790641785, acc 0.9375
Evaluation: loss 0.46183550357818604, acc 0.875
Evaluation: loss 0.347992479801178, acc 0.9375
Evaluation: loss 0.030194150283932686, acc 0.96875
Evaluation: loss 0.27939367294311523, acc 0.90625
Evaluation: loss 0.0650056079030036

Step 1785: loss 0.022826850414276123, acc 1.0
Step 1786: loss 0.2778991460800171, acc 0.9375
Step 1787: loss 0.0175552386790514, acc 1.0
Step 1788: loss 0.007432244718074799, acc 1.0
Step 1789: loss 0.019197925925254822, acc 1.0
Step 1790: loss 0.018632613122463226, acc 1.0
Step 1791: loss 0.30679067969322205, acc 0.9375
Step 1792: loss 0.0034257262013852596, acc 1.0
Step 1793: loss 0.09098061919212341, acc 0.9375
Step 1794: loss 0.27026480436325073, acc 0.875
Step 1795: loss 0.002493253443390131, acc 1.0
Step 1796: loss 0.02405066229403019, acc 1.0
Step 1797: loss 0.0369098037481308, acc 1.0
Step 1798: loss 0.07696408033370972, acc 0.96875
Step 1799: loss 0.04424169659614563, acc 1.0
Step 1800: loss 0.0748668983578682, acc 0.96875
Step 1801: loss 0.0019786711782217026, acc 1.0
Step 1802: loss 0.06667664647102356, acc 0.96875
Step 1803: loss 0.034773655235767365, acc 1.0
Step 1804: loss 0.01706225983798504, acc 1.0
Step 1805: loss 0.031573452055454254, acc 1.0
Step 1806: loss 0.0374608

Step 1931: loss 0.03965652361512184, acc 0.96875
Step 1932: loss 0.1607753187417984, acc 0.96875
Step 1933: loss 0.012558305636048317, acc 1.0
Step 1934: loss 0.018382716923952103, acc 1.0
Step 1935: loss 0.04054037109017372, acc 1.0
Step 1936: loss 0.11003036051988602, acc 0.96875
Step 1937: loss 0.010942963883280754, acc 1.0
Step 1938: loss 0.029635220766067505, acc 1.0
Step 1939: loss 0.01753026619553566, acc 1.0
Step 1940: loss 0.024715464562177658, acc 1.0
Step 1941: loss 0.024224672466516495, acc 1.0
Step 1942: loss 0.01865004003047943, acc 1.0
Step 1943: loss 0.06687086820602417, acc 0.96875
Step 1944: loss 0.009048078209161758, acc 1.0
Step 1945: loss 0.0455133281648159, acc 1.0
Step 1946: loss 0.11170967668294907, acc 0.96875
Step 1947: loss 0.22112582623958588, acc 0.9375
Step 1948: loss 0.14543963968753815, acc 0.96875
Step 1949: loss 0.05772865191102028, acc 0.96875
Step 1950: loss 0.016759762540459633, acc 1.0
Step 1951: loss 0.03916454687714577, acc 0.96875
Step 1952: los

In [18]:
saver.restore(session, 'classify data/12/classifier.cpkt-3822')

INFO:tensorflow:Restoring parameters from classify data/12/classifier.cpkt-3822


Restoring parameters from classify data/12/classifier.cpkt-3822


In [48]:
gen = get_batch(X_test, y_test, 32, shuffle=False)
all_pred = []
all_prob = []
total_loss = 0.0
total_acc = 0.0
count = 0
for e, lb, sl in gen:
    pr, p, l, a = session.run([prob, pred, loss, acc], feed_dict={
        x: e, y: lb, seq_len: sl, drop: 0.0, drop_e: 0.0
    })
    print('Evaluation: loss {}, acc {}'.format(l, a))
    total_loss += l * len(e)
    total_acc += a * len(e)
    count += len(e)
    all_pred.extend(p)
    all_prob.extend(pr)
print('Evaluation: total loss {}, total acc {}'.format(total_loss / count, total_acc / count))

Evaluation: loss 0.2792838513851166, acc 0.9375
Evaluation: loss 0.4210464656352997, acc 0.90625
Evaluation: loss 0.12554848194122314, acc 0.96875
Evaluation: loss 0.003417950589209795, acc 1.0
Evaluation: loss 0.31914669275283813, acc 0.9375
Evaluation: loss 0.3984847664833069, acc 0.9375
Evaluation: loss 0.1848779171705246, acc 0.9375
Evaluation: loss 0.15837159752845764, acc 0.9375
Evaluation: loss 0.19729937613010406, acc 0.9375
Evaluation: loss 0.5918911695480347, acc 0.875
Evaluation: loss 0.19225803017616272, acc 0.96875
Evaluation: loss 0.5521550178527832, acc 0.9375
Evaluation: loss 0.32669249176979065, acc 0.9375
Evaluation: loss 0.021329544484615326, acc 1.0
Evaluation: loss 0.06677235662937164, acc 0.96875
Evaluation: loss 0.2697557210922241, acc 0.9375
Evaluation: loss 0.25986945629119873, acc 0.90625
Evaluation: loss 0.06645567715167999, acc 0.96875
Evaluation: loss 0.005481920205056667, acc 1.0
Evaluation: loss 0.6838467717170715, acc 0.875
Evaluation: loss 0.02223216556

In [19]:
from sklearn.metrics import classification_report, accuracy_score

In [50]:
print(classification_report(y_true=y_test, y_pred=[idx2class[c] for c in all_pred]))

             precision    recall  f1-score   support

    fashion       0.97      0.93      0.95       583
  lifestyle       0.96      0.95      0.95       981
      music       0.96      0.93      0.94       597
 technology       0.95      0.98      0.97      1029
     travel       0.94      0.97      0.95       722

avg / total       0.96      0.96      0.95      3912



In [51]:
count = 0
for yp, yt, (_, mess), pr in zip(all_pred, y_test, X_test, all_prob):
    yp = idx2class[yp]
    if yp != yt:
        count += 1
        print('True:', yt)
        print('Pred:', yp)
        print('Prob:', {c: p for c, p in zip(class2idx.keys(), pr)})
        print(mess)
        print('-' * 80)
print('Total:', count)

True: travel
Pred: technology
Prob: {'fashion': 6.3692255e-07, 'technology': 0.9964719, 'music': 4.807285e-07, 'lifestyle': 7.049488e-06, 'travel': 0.0035201053}
Nếu bạn thích chinh phục những cung đường mạo hiểm thì đây chính là gói bảo hiểm dành cho bạn. Bảo hiểm FWD đang tặng MIỄN PHÍ 100,000 gói bảo hiểm tai nạn cá nhân, bảo hiểm đến 300 TRIỆU ĐỒNG. Nhanh tay đăng ký và nhận ngay gói bảo hiểm chỉ trong vòng 3 phút tại: https://tiki.vn/top/fwd
#FWDbảohiểm #Sốngđầytừhômnay
--------------------------------------------------------------------------------
True: travel
Pred: music
Prob: {'fashion': 0.0035031168, 'technology': 0.18375571, 'music': 0.6855716, 'lifestyle': 0.086963035, 'travel': 0.040206585}
Album được sưu tập từ FB bạn Huyền Phạm (https://www.facebook.com/HuyenCherry93) và FB Món Ngon Hà Nội (https://www.facebook.com/HNmonngon)

ALBUM vẫn còn đang cập nhật tiếp. Ôi nhiều món quá update không xuể được ạ.

#Nhim
---------------------------------------------------------------

In [3]:
from sklearn.linear_model import LogisticRegression

In [15]:
classifier = LogisticRegression(verbose=1, solver='lbfgs', multi_class='auto', n_jobs=4)

In [10]:
train_data = np.stack([x[0] for x in X_train], axis=0)

In [16]:
classifier.fit(train_data, y_train)

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:   11.6s finished


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='auto', n_jobs=4,
          penalty='l2', random_state=None, solver='lbfgs', tol=0.0001,
          verbose=1, warm_start=False)

In [17]:
test_data = np.stack([x[0] for x in X_test], axis=0)

In [18]:
test_pred = classifier.predict(test_data)

In [23]:
print(classification_report(y_true=y_test, y_pred=test_pred))

              precision    recall  f1-score   support

     fashion       0.60      0.58      0.59       583
   lifestyle       0.61      0.64      0.63       981
       music       0.61      0.58      0.59       597
  technology       0.66      0.68      0.67      1029
      travel       0.69      0.67      0.68       722

   micro avg       0.64      0.64      0.64      3912
   macro avg       0.63      0.63      0.63      3912
weighted avg       0.64      0.64      0.64      3912



In [21]:
count = 0
for yp, yt, (_, mess) in zip(test_pred, y_test, X_test):
    if yp != yt:
        count += 1
        print('True:', yt)
        print('Pred:', yp)
#         print('Prob:', {c: p for c, p in zip(class2idx.keys(), pr)})
        print(mess)
        print('-' * 80)
print('Total:', count)

True: technology
Pred: music
Trên tay Samsung Galaxy A8/A8+. Anh em nhớ bình luận để trúng Loa Bluetooth JBL GO và chân máy Mefoto Mini Selfie.

88 khách hàng đầu tiên đặt hàng trước A8, A8 Plus tại Tiki.vn sẽ được nhận thêm Adapter Sạc Không Dây Tronsmart 10W QC2.0 - WQ10 trị giá 600k. Link đặt hàng Tiki: http://bit.ly/SamsungA8_Tiki.

Livestream sponsor by Tiki #Tinhte #SamsungA8
--------------------------------------------------------------------------------
True: technology
Pred: lifestyle
Dù chưa thật sự hoàn hảo, nhưng đây chính là niềm hi vọng cho những bệnh nhân có khuôn mặt bị biến dạng nặng, do tại nạn hoặc bệnh tật mà không thể khôi phục bằng phẫu thuật thẩm mỹ.

#GenK
--------------------------------------------------------------------------------
True: travel
Pred: technology
Nếu bạn thích chinh phục những cung đường mạo hiểm thì đây chính là gói bảo hiểm dành cho bạn. Bảo hiểm FWD đang tặng MIỄN PHÍ 100,000 gói bảo hiểm tai nạn cá nhân, bảo hiểm đến 300 TRIỆU ĐỒNG. Nhanh 

True: fashion
Pred: technology
"Áo dài, đôi cánh, nón lá và cả lá cờ trong VIFW đã thoát khỏi sức ỳ tâm lý rằng "cái gì dân tộc là sến". Công Trí đã mở màn ngoạn mục cho một Tuần lễ của văn minh và hội nhập. Về chuyên môn, đây là định nghĩa chính xác nhất về thời trang" - Tùng Leo
--------------------------------------------------------------------------------
True: technology
Pred: lifestyle
Google luôn thích làm những điều điên rồ, những dự án không tưởng nhưng đó mới là điều khiến người ta thêm yêu công ty này <3

http://genk.vn/internet/hay-tin-vao-chung-toi-google-se-khong-that-bai-20150604174308379.chn
--------------------------------------------------------------------------------
True: technology
Pred: lifestyle
TUYỂN GẤP vị trí SENIOR MOBILE DEVERLOPER tại LUXOFT Việt Nam: 

- Tham gia các dự án của Ngân Hàng 
- Mức lương hấp dẫn. 
- Lương tháng thứ 13
- Đi công tác nước ngoài và làm việc trong môi trường quốc tế

>> YÊU CẦU: Có kiến thức về nền tảng iOS và Android.

NỘP CV và

ELLE chúc mừng 3 bạn sau đây đã trở thành chủ nhân 3 giải thưởng cao nhất của ELLE OFFICE CHIC 2012:

Giải nhất: Bạn Doãn Thu Hằng
Giải nhì: Bạn Bùi Thu Thủy
Giải ba: Bạn Lê Phương Vy

ELLE sẽ liên hệ trực tiếp với các bạn về hình thức trao quà nhé!
--------------------------------------------------------------------------------
True: technology
Pred: lifestyle
Các chuyên gia từ Cyradar phân tích kỹ thuật của mã độc lây lan qua facebook mấy ngày gần đây.
Họ còn biết rõ thông tin của kẻ phát tán và hiện tại mã này chưa lây qua điện thoại
Tuy nhiên người dùng cần phải đề phòng biến thể có thể sắp tới.
Nguồn Cyradar team
--------------------------------------------------------------------------------
True: music
Pred: fashion
Sự kết hợp của EPIK HIGH và IU với ca khúc Love Story đã đạt Perfect All Kill lần đầu vào 1h30 KST ngày hôm nay. Hiện tại ca khúc vẫn đang #1 tại tất cả các BXH ngày + RealTime của Hàn Quốc.
#lề: Bài hát này cực hay luôn đó, mỗi ngày nghe mấy chục lần không chán =)))