In [1]:
import tensorflow as tf
import numpy as np
import time

from logistic_regression import LogisticRegression
from sklearn.feature_extraction.text import TfidfTransformer


VOCAB_SIZE = 20000


def transform(X, tfidf):
    t0 = time.time()
    count = np.zeros((len(X), VOCAB_SIZE))
    for i, indices in enumerate(X):
        for idx in indices:
            count[i, idx] += 1
    print("%.2f secs ==> Document-Term Matrix"%(time.time()-t0))

    t0 = time.time()
    X = tfidf.fit_transform(count)
    print("%.2f secs ==> TF-IDF transform"%(time.time()-t0))
    return X


if __name__ == '__main__':
    (X_train, y_train), (X_test, y_test) = tf.keras.datasets.imdb.load_data(
        num_words=VOCAB_SIZE)
    
    tfidf = TfidfTransformer()
    X_train = transform(X_train, tfidf)
    X_test = transform(X_test, tfidf)

    model = LogisticRegression(VOCAB_SIZE, 2)
    model.fit(X_train, y_train, n_epoch=2, batch_size=32, val_data=(X_test, y_test))
    y_pred = model.predict(X_test)
    
    final_acc = (y_pred == y_test).mean()
    print("final testing accuracy: %.4f" % final_acc)


3.87 secs ==> Document-Term Matrix
10.46 secs ==> TF-IDF transform
3.95 secs ==> Document-Term Matrix
9.61 secs ==> TF-IDF transform
Train (25000, 20000) | Test (25000, 20000)
Epoch 1/2 | Step 50/781 | train_loss: 0.6363 | train_acc: 0.8125 | lr: 0.0050
Epoch 1/2 | Step 100/781 | train_loss: 0.6032 | train_acc: 0.7812 | lr: 0.0050
Epoch 1/2 | Step 150/781 | train_loss: 0.5807 | train_acc: 0.7188 | lr: 0.0050
Epoch 1/2 | Step 200/781 | train_loss: 0.4916 | train_acc: 0.8438 | lr: 0.0050
Epoch 1/2 | Step 250/781 | train_loss: 0.4711 | train_acc: 0.8438 | lr: 0.0050
Epoch 1/2 | Step 300/781 | train_loss: 0.4072 | train_acc: 0.8750 | lr: 0.0050
Epoch 1/2 | Step 350/781 | train_loss: 0.4182 | train_acc: 0.8750 | lr: 0.0050
Epoch 1/2 | Step 400/781 | train_loss: 0.4244 | train_acc: 0.8750 | lr: 0.0050
Epoch 1/2 | Step 450/781 | train_loss: 0.3532 | train_acc: 0.9375 | lr: 0.0050
Epoch 1/2 | Step 500/781 | train_loss: 0.3727 | train_acc: 0.8750 | lr: 0.0050
Epoch 1/2 | Step 550/781 | train_lo