In [2]:
import tensorflow_datasets as tfds
import tensorflow as tf
print(tf.__version__)
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

2.2.0


In [3]:
imdb, info = tfds.load("imdb_reviews", with_info=True, as_supervised=True)

In [7]:
info

tfds.core.DatasetInfo(
    name='imdb_reviews',
    version=1.0.0,
    description='Large Movie Review Dataset.
This is a dataset for binary sentiment classification containing substantially more data than previous benchmark datasets. We provide a set of 25,000 highly polar movie reviews for training, and 25,000 for testing. There is additional unlabeled data for use as well.',
    homepage='http://ai.stanford.edu/~amaas/data/sentiment/',
    features=FeaturesDict({
        'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=2),
        'text': Text(shape=(), dtype=tf.string),
    }),
    total_num_examples=100000,
    splits={
        'test': 25000,
        'train': 25000,
        'unsupervised': 50000,
    },
    supervised_keys=('text', 'label'),
    citation="""@InProceedings{maas-EtAl:2011:ACL-HLT2011,
      author    = {Maas, Andrew L.  and  Daly, Raymond E.  and  Pham, Peter T.  and  Huang, Dan  and  Ng, Andrew Y.  and  Potts, Christopher},
      title     = {Learning Word

In [6]:
train_data, test_data = imdb['train'], imdb['test']
training_sentences = []
training_labels = []

testing_sentences = []
testing_labels = []

for sentence, label in train_data:
    training_sentences.append(sentence.numpy().decode('utf8'))
    training_labels.append(label.numpy())

for sentence, label in test_data:
    testing_sentences.append(sentence.numpy().decode('utf8'))
    testing_labels.append(label.numpy())

training_labels_final = np.array(training_labels)
testing_labels_final = np.array(testing_labels)

training_labels_final

array([0, 0, 0, ..., 0, 0, 1])

In [13]:
vocab_size = 1000
embedding_dim = 16
trunc_type = 'post'
oov_token = '<OOV>'
max_length = 120

tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_token)
tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index

sequences = tokenizer.texts_to_sequences(training_sentences)
padded = pad_sequences(sequences, maxlen=max_length, truncating=trunc_type)

testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
testing_padded = pad_sequences(testing_sequences, maxlen=max_length)

In [21]:
model = tf.keras.Sequential(
    [
        tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(6, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ]
)
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 120, 16)           16000     
_________________________________________________________________
flatten_3 (Flatten)          (None, 1920)              0         
_________________________________________________________________
dense_6 (Dense)              (None, 6)                 11526     
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 7         
Total params: 27,533
Trainable params: 27,533
Non-trainable params: 0
_________________________________________________________________


In [22]:
num_epochs = 10
model.fit(padded, training_labels_final, epochs=num_epochs, validation_data=(testing_padded, testing_labels_final))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fab0da38ac0>

In [23]:
e = model.layers[0]
weights = e.get_weights()[0]
print(weights.shape)

(1000, 16)


In [24]:
!pip install git+https://github.com/vik228/deeplearning_ai.git

Collecting git+https://github.com/vik228/deeplearning_ai.git
  Cloning https://github.com/vik228/deeplearning_ai.git to /private/var/folders/65/lws8zxy92071v3hg9g9mt8wc0000gn/T/pip-req-build-s0_2ihl3
  Running command git clone -q https://github.com/vik228/deeplearning_ai.git /private/var/folders/65/lws8zxy92071v3hg9g9mt8wc0000gn/T/pip-req-build-s0_2ihl3
Building wheels for collected packages: deeplearning-ai-vik228
  Building wheel for deeplearning-ai-vik228 (setup.py) ... [?25ldone
[?25h  Created wheel for deeplearning-ai-vik228: filename=deeplearning_ai_vik228-0.0.1-py3-none-any.whl size=15495 sha256=da445fcbe6b4627a397888b8e002dcb1f3059c4a5be5c16e2bc261412dc93691
  Stored in directory: /private/var/folders/65/lws8zxy92071v3hg9g9mt8wc0000gn/T/pip-ephem-wheel-cache-mp3lgvmg/wheels/bb/52/04/8d929c60d351aa4db7a5c1bae56f17267680b45da408576ae1
Successfully built deeplearning-ai-vik228
