In [11]:
# Softmax Review Classifier
import tensorflow as tf
import numpy as np
tf.set_random_seed(777)  # for reproducibility

# Predicting animal type based on various features
xy = np.loadtxt('../naver_movie_review/review.csv', delimiter=',', dtype=np.float32)
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]

print(x_data.shape, y_data.shape)

nb_classes = 2  # 0(negative) or 1(positive)
nb_features = 279  # the number of token types

X = tf.placeholder(tf.float32, [None, nb_features])
Y = tf.placeholder(tf.int32, [None, 1])  # 0 or 1
Y_one_hot = tf.one_hot(Y, nb_classes)  # one hot
print("one_hot", Y_one_hot)
Y_one_hot = tf.reshape(Y_one_hot, [-1, nb_classes])
print("reshape", Y_one_hot)

W = tf.Variable(tf.random_normal([nb_features, nb_classes]), name='weight')
b = tf.Variable(tf.random_normal([nb_classes]), name='bias')

# tf.nn.softmax computes softmax activations
# softmax = exp(logits) / reduce_sum(exp(logits), dim)
logits = tf.matmul(X, W) + b
hypothesis = tf.nn.softmax(logits)

# Cross entropy cost/loss
cost_i = tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                 labels=Y_one_hot)
cost = tf.reduce_mean(cost_i)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)

prediction = tf.argmax(hypothesis, 1)
correct_prediction = tf.equal(prediction, tf.argmax(Y_one_hot, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# Launch graph
nb_steps = 15000
xy_test = np.loadtxt('../naver_movie_review/test.csv', delimiter=',', dtype=np.float32)
x_test = xy_test[:, 0:-1]
y_test = xy_test[:, [-1]]
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    for step in range(nb_steps):
        sess.run(optimizer, feed_dict={X: x_data, Y: y_data})
        if step % 100 == 0:
            loss, acc = sess.run([cost, accuracy], feed_dict={
                                 X: x_data, Y: y_data})
            print("Step: {:5}\tLoss: {:.3f}\tAcc: {:.2%}".format(
                step, loss, acc))

    # Let's see if we can predict
    pred = sess.run(prediction, feed_dict={X: x_test})
    # y_data: (N,1) = flatten => (N, ) matches pred.shape
    true_cnt = 0
    for p, y in zip(pred, y_test.flatten()):
        print("[{}] Prediction: {} True Y: {}".format(p == int(y), p, int(y)))
        if p == int(y): true_cnt += 1
    print(true_cnt)

(150000, 279) (150000, 1)
one_hot Tensor("one_hot_5:0", shape=(?, 1, 2), dtype=float32)
reshape Tensor("Reshape_20:0", shape=(?, 2), dtype=float32)
Step:     0	Loss: 1.467	Acc: 48.84%
Step:   100	Loss: 1.287	Acc: 49.46%
Step:   200	Loss: 1.190	Acc: 50.94%
Step:   300	Loss: 1.111	Acc: 52.78%
Step:   400	Loss: 1.046	Acc: 54.14%
Step:   500	Loss: 0.992	Acc: 55.74%
Step:   600	Loss: 0.945	Acc: 56.94%
Step:   700	Loss: 0.906	Acc: 58.12%
Step:   800	Loss: 0.871	Acc: 58.98%
Step:   900	Loss: 0.841	Acc: 59.68%
Step:  1000	Loss: 0.815	Acc: 60.48%
Step:  1100	Loss: 0.792	Acc: 61.14%
Step:  1200	Loss: 0.771	Acc: 61.88%
Step:  1300	Loss: 0.752	Acc: 62.62%
Step:  1400	Loss: 0.735	Acc: 63.14%
Step:  1500	Loss: 0.720	Acc: 63.77%
Step:  1600	Loss: 0.706	Acc: 64.39%
Step:  1700	Loss: 0.693	Acc: 64.87%
Step:  1800	Loss: 0.681	Acc: 65.22%
Step:  1900	Loss: 0.670	Acc: 65.57%
Step:  2000	Loss: 0.660	Acc: 65.89%
Step:  2100	Loss: 0.651	Acc: 66.22%
Step:  2200	Loss: 0.643	Acc: 66.53%
Step:  2300	Loss: 0.635	