In [1]:
# this is a code cell that contains Python code
# we usually start with the imports
# these are the imports we usually use for machine learning
import numpy as np
import scipy
import scipy.sparse as sps
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
import pandas as pd
import tensorflow as tf
from nltk.corpus import movie_reviews

In [2]:
num_neurons = 20
num_features = 5000

In [3]:
# getting the data, like last time

corpus, targets = zip(*[(movie_reviews.raw(fileid), category)
                         for category in movie_reviews.categories() for fileid in movie_reviews.fileids(category)])

count_vectorizer = CountVectorizer(stop_words='english', max_df=0.95, min_df=2, max_features=num_features)
bows = count_vectorizer.fit_transform(corpus)

# convert targets to numbers
targets = np.array([0 if target == 'neg' else 1 for target in targets])

bows = bows.astype(np.float32)
targets = targets.astype(np.float32)
X_train, X_test, y_train, y_test = train_test_split(bows, targets, test_size=0.1, shuffle=True)

# the problem: we have sparse arrays, but neural network need dense arrays!
# the solution will be word embeddings, here we just convert to dense arrays
X_train = X_train.toarray()
X_test = X_test.toarray()

In [4]:
tf.enable_eager_execution()

In [5]:
# now we can see the tensor instantly
tf.ones((4, 4))

<tf.Tensor: id=2, shape=(4, 4), dtype=float32, numpy=
array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]], dtype=float32)>

In [6]:
# we also see the results of an operation instantly
tf.add(tf.constant([1, 2, 3]), tf.constant([4, 5, 6]))

<tf.Tensor: id=6, shape=(3,), dtype=int32, numpy=array([5, 7, 9], dtype=int32)>

In [7]:
tf.zeros((4, 4)) + 4

<tf.Tensor: id=12, shape=(4, 4), dtype=float32, numpy=
array([[4., 4., 4., 4.],
       [4., 4., 4., 4.],
       [4., 4., 4., 4.],
       [4., 4., 4., 4.]], dtype=float32)>

In [8]:
# variables are for the parameters (weights) for the NN
W1 = tf.Variable(tf.random_normal((num_neurons, num_features), stddev=0.01), name='W1')
b1 = tf.Variable(tf.random_normal((num_neurons, 1), stddev=0.01), name='b1')
W2 = tf.Variable(tf.random_normal((1, num_neurons), stddev=0.01), name='W2')
b2 = tf.Variable(tf.random_normal((1, 1), stddev=0.01), name='b2')
W1, b1, W2, b2

(<tf.Variable 'W1:0' shape=(20, 5000) dtype=float32, numpy=
 array([[ 0.00861787,  0.02128106,  0.00960503, ...,  0.01153171,
         -0.01031314, -0.00281827],
        [ 0.00824988,  0.00486534, -0.00140077, ..., -0.0033846 ,
          0.00329555,  0.0065271 ],
        [ 0.02080767, -0.00551045,  0.00208974, ...,  0.00487724,
         -0.00768933, -0.01369358],
        ...,
        [-0.03269031, -0.00355082,  0.00315598, ...,  0.01347431,
         -0.00355484,  0.01264856],
        [-0.01359317, -0.00880538,  0.00645966, ..., -0.00399274,
         -0.00305909,  0.00597724],
        [ 0.00789742,  0.008413  , -0.01262239, ...,  0.01687056,
          0.00802961,  0.00191226]], dtype=float32)>,
 <tf.Variable 'b1:0' shape=(20, 1) dtype=float32, numpy=
 array([[-0.02093641],
        [-0.0023646 ],
        [-0.00238637],
        [-0.00131463],
        [ 0.00071911],
        [-0.00318027],
        [ 0.0073009 ],
        [ 0.00193804],
        [ 0.0144193 ],
        [ 0.00543289],
        [ 

In [None]:
# we don't need the placeholders

In [9]:
# the network itself
def forward_pass(X):
    z = tf.add(tf.matmul(W1, X), b1)
    a = tf.nn.relu(z)
    z = tf.add(tf.matmul(W2, a), b2)
    #a = tf.nn.sigmoid(z) # already in loss function
    return z

In [10]:
# the loss function
def loss(a, y):
    return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=a, labels=y))

In [11]:
def minibatches(X, y, minibatch_size):

# shuffling    
    perm = np.random.permutation(X.shape[0])
    X_shuffled = X[perm]
    y_shuffled = y[perm]
    
    mini_batches = [(X[i*minibatch_size:(i+1)*minibatch_size], y[i*minibatch_size:(i+1)*minibatch_size])
                   for i in range(X.shape[0] // minibatch_size)]
# we lost some examples at the end, doesn't really matter for this example
    return mini_batches

In [12]:
num_epochs = 10

$W_1 := W_1 - lr*\frac{loss}{dW_1}$

In [13]:
learning_rate = 0.3
minibatch_size = 32
for epoch in range(num_epochs):
# minibatch training
    epoch_loss = 0
    num_minibatches = X_train.shape[0] // minibatch_size
    for X_mini, y_mini in minibatches(X_train, y_train, minibatch_size):
# we use a GradientTape to record the gradient for each minibatch
        with tf.GradientTape() as t:
            # important: we have to transpose here! each example is in a column
            mini_loss = loss(forward_pass(X_mini.T), y_mini[None, :])
        # update the weights
        dW1, db1, dW2, db2 = t.gradient(mini_loss, [W1, b1, W2, b2])
        W1.assign_sub(learning_rate * dW1)
        b1.assign_sub(learning_rate * db1)
        W2.assign_sub(learning_rate * dW2)
        b2.assign_sub(learning_rate * db2)
        epoch_loss += mini_loss
    epoch_loss /= num_minibatches
    predictions = forward_pass(X_train.T)
    predictions = np.array([0 if pred < 0 else 1 for pred in tf.squeeze(predictions)])
    accuracy = (y_train == predictions).sum() / len(y_train)
    print("Loss in epoch {}: {}, accuracy: {}".format(epoch, epoch_loss, accuracy))
# accuracy on test set
predictions = forward_pass(X_test.T)
predictions = np.array([0 if pred < 0 else 1 for pred in tf.squeeze(predictions)])
accuracy = (y_test == predictions).sum() / len(y_test)
print("Accuracy on test set: {}".format(accuracy))

Loss in epoch 0: 0.6551772952079773, accuracy: 0.8216666666666667
Loss in epoch 1: 0.4975118339061737, accuracy: 0.6933333333333334
Loss in epoch 2: 0.40052077174186707, accuracy: 0.9477777777777778
Loss in epoch 3: 0.2790760397911072, accuracy: 0.8588888888888889
Loss in epoch 4: 0.26597705483436584, accuracy: 0.9283333333333333
Loss in epoch 5: 0.20803622901439667, accuracy: 0.9672222222222222
Loss in epoch 6: 0.2595866322517395, accuracy: 0.7794444444444445
Loss in epoch 7: 0.15296688675880432, accuracy: 0.9755555555555555
Loss in epoch 8: 0.035927943885326385, accuracy: 0.9972222222222222
Loss in epoch 9: 0.009744838811457157, accuracy: 0.9988888888888889
Accuracy on test set: 0.875
