In [3]:
# ======================================================================
# There are 5 questions in this exam with increasing difficulty from 1-5.
# Please note that the weight of the grade for the question is relative
# to its difficulty. So your Category 1 question will score significantly
# less than your Category 5 question.
#
# Don't use lambda layers in your model.
# You do not need them to solve the question.
# Lambda layers are not supported by the grading infrastructure.
#
# You must use the Submit and Test button to submit your model
# at least once in this category before you finally submit your exam,
# otherwise you will score zero for this category.
# ======================================================================
#
# NLP QUESTION
#
# Build and train a classifier for the sarcasm dataset.
# The classifier should have a final layer with 1 neuron activated by sigmoid as shown.
# It will be tested against a number of sentences that the network hasn't previously seen
# and you will be scored on whether sarcasm was correctly detected in those sentences.

import json
import tensorflow as tf
import numpy as np
import urllib
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


def solution_model():
    url = 'https://storage.googleapis.com/download.tensorflow.org/data/sarcasm.json'
    urllib.request.urlretrieve(url, 'sarcasm.json')

    with open("./sarcasm.json", 'r') as f:
        datastore = json.load(f)
    # DO NOT CHANGE THIS CODE OR THE TESTS MAY NOT WORK
    vocab_size = 1000
    embedding_dim = 16
    max_length = 120
    trunc_type='post'
    padding_type='post'
    oov_tok = "<OOV>"
    training_size = 20000

    sentences = []
    labels = []

    # YOUR CODE HERE
    for item in datastore:
      sentences.append(item['headline'])
      labels.append(item['is_sarcastic'])

    training_sentences = sentences[0:training_size]
    testing_sentences = sentences[training_size:]
    training_labels = labels[0:training_size]
    testing_labels = labels[training_size:]

    tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)
    tokenizer.fit_on_texts(training_sentences)

    word_index = tokenizer.word_index

    training_sequences = tokenizer.texts_to_sequences(training_sentences)
    training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
    testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
    testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

    training_padded = np.array(training_padded)
    training_labels = np.array(training_labels)
    testing_padded = np.array(testing_padded)
    testing_labels = np.array(testing_labels)


    # model = tf.keras.Sequential([
    # # YOUR CODE HERE. KEEP THIS OUTPUT LAYER INTACT OR TESTS MAY FAIL
    #     tf.keras.layers.Dense(1, activation='sigmoid')
    # ])
    model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size , embedding_dim, input_length=max_length),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')])

    opt = tf.keras.optimizers.Adam(learning_rate=0.0001)
    model.compile(loss='binary_crossentropy',optimizer=opt,metrics=['accuracy'])
    num_epochs = 100
    model.fit(training_padded, training_labels, epochs=num_epochs, validation_data=(testing_padded, testing_labels), verbose=2)
    return model


# Note that you'll need to save your model as a .h5 like this.
# When you press the Submit and Test button, your saved .h5 model will
# be sent to the testing infrastructure for scoring
# and the score will be returned to you.
if __name__ == '__main__':
    model = solution_model()
    model.save("mymodel.h5")


Epoch 1/100
625/625 - 2s - loss: 0.6870 - accuracy: 0.5601 - val_loss: 0.6841 - val_accuracy: 0.5633
Epoch 2/100
625/625 - 2s - loss: 0.6837 - accuracy: 0.5603 - val_loss: 0.6785 - val_accuracy: 0.5633
Epoch 3/100
625/625 - 2s - loss: 0.6654 - accuracy: 0.5659 - val_loss: 0.6347 - val_accuracy: 0.6244
Epoch 4/100
625/625 - 2s - loss: 0.6047 - accuracy: 0.6650 - val_loss: 0.5397 - val_accuracy: 0.7626
Epoch 5/100
625/625 - 2s - loss: 0.5341 - accuracy: 0.7401 - val_loss: 0.4776 - val_accuracy: 0.7991
Epoch 6/100
625/625 - 2s - loss: 0.4894 - accuracy: 0.7721 - val_loss: 0.4446 - val_accuracy: 0.8068
Epoch 7/100
625/625 - 2s - loss: 0.4558 - accuracy: 0.7965 - val_loss: 0.4240 - val_accuracy: 0.8038
Epoch 8/100
625/625 - 2s - loss: 0.4370 - accuracy: 0.7991 - val_loss: 0.4105 - val_accuracy: 0.8126
Epoch 9/100
625/625 - 2s - loss: 0.4222 - accuracy: 0.8134 - val_loss: 0.4016 - val_accuracy: 0.8183
Epoch 10/100
625/625 - 2s - loss: 0.4101 - accuracy: 0.8188 - val_loss: 0.3958 - val_accura