In [1]:
import tensorflow as tf
import numpy as np
import time

from logistic_regression import LogisticRegression
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.metrics import classification_report


VOCAB_SIZE = 50000


def transform(X, tfidf):
    t0 = time.time()
    count = np.zeros((len(X), VOCAB_SIZE))
    for i, indices in enumerate(X):
        for idx in indices:
            count[i, idx] += 1
    print("%.2f secs ==> Document-Term Matrix"%(time.time()-t0))

    t0 = time.time()
    X = tfidf.fit_transform(count)
    print("%.2f secs ==> TF-IDF transform"%(time.time()-t0))
    return X


if __name__ == '__main__':
    (X_train, y_train), (X_test, y_test) = tf.keras.datasets.imdb.load_data(
        num_words=VOCAB_SIZE)
    
    tfidf = TfidfTransformer()
    X_train = transform(X_train, tfidf)
    X_test = transform(X_test, tfidf)

    model = LogisticRegression(VOCAB_SIZE, 2)
    model.fit(X_train, y_train, n_epoch=4, batch_size=64, val_data=(X_test, y_test))
    
    print(classification_report(y_test, model.predict(X_test)))

4.29 secs ==> Document-Term Matrix
37.73 secs ==> TF-IDF transform
4.37 secs ==> Document-Term Matrix
29.20 secs ==> TF-IDF transform
Train (25000, 50000) | Test (25000, 50000)
Epoch 1/4 | Step 50/390 | train_loss: 0.6247 | train_acc: 0.7969 | lr: 0.0050
Epoch 1/4 | Step 100/390 | train_loss: 0.5613 | train_acc: 0.8594 | lr: 0.0050
Epoch 1/4 | Step 150/390 | train_loss: 0.5235 | train_acc: 0.8281 | lr: 0.0050
Epoch 1/4 | Step 200/390 | train_loss: 0.4868 | train_acc: 0.8125 | lr: 0.0050
Epoch 1/4 | Step 250/390 | train_loss: 0.4666 | train_acc: 0.8750 | lr: 0.0050
Epoch 1/4 | Step 300/390 | train_loss: 0.3884 | train_acc: 0.9219 | lr: 0.0050
Epoch 1/4 | Step 350/390 | train_loss: 0.4240 | train_acc: 0.8281 | lr: 0.0050
Epoch 1/4 | train_loss: 0.3725 | train_acc: 0.9000 | test_loss: 0.3878 | test_acc: 0.8748 | lr: 0.0050
Epoch 2/4 | Step 50/390 | train_loss: 0.3390 | train_acc: 0.8906 | lr: 0.0050
Epoch 2/4 | Step 100/390 | train_loss: 0.3726 | train_acc: 0.8281 | lr: 0.0050
Epoch 2/4 |