In [1]:
!nvidia-smi

Tue May 12 14:27:24 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 435.21       Driver Version: 435.21       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce RTX 208...  Off  | 00000000:07:00.0  On |                  N/A |
| 35%   34C    P5    25W / 260W |   1295MiB / 11016MiB |      2%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage    

In [2]:
import tensorflow as tf
from transformers import T5Tokenizer, TFT5Model, TFT5ForConditionalGeneration
import tensorflow_datasets as tfds

In [3]:
BATCH_SIZE = 16

SHUFFEL_SIZE = 1024

In [4]:
cnn_dailymail = tfds.load(name="cnn_dailymail")

INFO:absl:No config specified, defaulting to first: cnn_dailymail/plain_text
INFO:absl:Load dataset info from /home/yannik/tensorflow_datasets/cnn_dailymail/plain_text/3.0.0
INFO:absl:Field info.description from disk and from code do not match. Keeping the one from code.
INFO:absl:Field info.citation from disk and from code do not match. Keeping the one from code.
INFO:absl:Reusing dataset cnn_dailymail (/home/yannik/tensorflow_datasets/cnn_dailymail/plain_text/3.0.0)
INFO:absl:Constructing tf.data.Dataset for split None, from /home/yannik/tensorflow_datasets/cnn_dailymail/plain_text/3.0.0


In [5]:
train_tfds = cnn_dailymail['train']
test_tfds = cnn_dailymail['test']
val_tfds = cnn_dailymail['validation']

In [6]:
tokenizer = T5Tokenizer.from_pretrained('t5-small')

In [7]:
def normalize_text(text):
    """Lowercase and remove quotes from a TensorFlow string."""
    text = tf.strings.lower(text)
    text = tf.strings.regex_replace(text,"'(.*)'", r"\1")
    return text.numpy().decode('UTF-8')

def tokenize_text(text, needs_sum):
    text = normalize_text(text)
    if needs_sum:
        text = "summarize: " + text
        ids = tokenizer.encode(text, return_tensors="tf", max_length=512) 
    else:
        ids = tokenizer.encode(text, return_tensors="tf", max_length=150)
    return tf.squeeze(ids)



def map_func(features):

    article_ids = tf.py_function(tokenize_text, inp=[features["article"], True], Tout=tf.int32)
    highlights_ids = tf.py_function(tokenize_text, inp=[features["highlights"], False], Tout=tf.int32)

    return article_ids, highlights_ids



def map_to_dict(x, y):
    return {"inputs": x, "decoder_input_ids": y}, y

def map_tfds_to_dict(x):
    return {"inputs": x['article'], "decoder_input_ids": x['highlights']}, 

In [8]:
train_ds = train_tfds.map(map_func)\
    .shuffle(SHUFFEL_SIZE)\
    .padded_batch(BATCH_SIZE, padded_shapes=([512],[150]))\
    .prefetch(tf.data.experimental.AUTOTUNE)

val_ds = val_tfds.map(map_func)\
    .shuffle(SHUFFEL_SIZE)\
    .padded_batch(BATCH_SIZE, padded_shapes=([512],[150]))\
    .prefetch(tf.data.experimental.AUTOTUNE)

test_ds = test_tfds.map(map_func)\
    .shuffle(SHUFFEL_SIZE)\
    .padded_batch(BATCH_SIZE, padded_shapes=([512],[150]))\
    .prefetch(tf.data.experimental.AUTOTUNE)

In [9]:
model = TFT5ForConditionalGeneration.from_pretrained('t5-small')

In [10]:
optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0)
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

val_loss = tf.keras.metrics.Mean(name='val_loss')
val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')

model.summary()

Model: "tf_t5for_conditional_generation"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
shared (TFSharedEmbeddings)  multiple                  16449536  
_________________________________________________________________
encoder (TFT5MainLayer)      multiple                  18881280  
_________________________________________________________________
decoder (TFT5MainLayer)      multiple                  25176064  
Total params: 60,506,880
Trainable params: 60,506,880
Non-trainable params: 0
_________________________________________________________________


In [11]:
#x = next(iter(train_ds))
#model.fit({'inputs': x[0], 'decoder_input_ids':x[1]}, epochs=3)

In [12]:
@tf.function
def train_step(x, y):
    with tf.GradientTape() as tape:
        # prediction_scores: (bs, 150, 32128)
        # decoder_past_key_value_states: (bs, 512, 512), (bs, 8, 150, 64)
        # z: (bs, 512, 512)
        predictions, _, _ = model(x, decoder_input_ids=y, training=True)
        loss = loss_object(y, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    
    train_loss(loss)
    train_accuracy(y, predictions)

In [13]:
@tf.function
def val_step(x, y):
    predictions, _, _ = model(x,  decoder_input_ids=y, training=False)
    v_loss = loss_object(y, predictions)

    val_loss(v_loss)
    val_accuracy(y, predictions)

In [14]:
from rouge_score import rouge_scorer
from rouge_score import scoring

class RougeScore:
    '''
    mostly from https://github.com/google-research/text-to-text-transfer-transformer/blob/master/t5/evaluation/metrics.py 
    '''
    
    def __init__(self, score_keys=None)-> None:
        super().__init__()
        if score_keys is None:  
            self.score_keys = ["rouge1", "rouge2", "rougeLsum"]
        
        self.scorer = rouge_scorer.RougeScorer(self.score_keys)
        self.aggregator = scoring.BootstrapAggregator()
        
        
    @staticmethod
    def prepare_summary(summary):
            # Make sure the summary is not bytes-type
            # Add newlines between sentences so that rougeLsum is computed correctly.
            summary = summary.replace(" . ", " .\n")
            return summary
    
    def __call__(self, target, prediction):
        """Computes rouge score.''
        Args:
        targets: string
        predictions: string
        """

        target = self.prepare_summary(target)
        prediction = self.prepare_summary(prediction)
        
        self.aggregator.add_scores(self.scorer.score(target=target, prediction=prediction))

        return 
    
    def reset_states(self):
        self.rouge_list = []

    def result(self):
        result = self.aggregator.aggregate()
        
        for key in self.score_keys:
            score_text = "%s = %.2f, 95%% confidence [%.2f, %.2f]"%(
                key,
                result[key].mid.fmeasure*100,
                result[key].low.fmeasure*100,
                result[key].high.fmeasure*100
            )
            print(score_text)
        
        return {key: result[key].mid.fmeasure*100 for key in self.score_keys}

In [15]:
rouge_score = RougeScore()

rouge_score("I want some ice cream, what are you doing","What I want is ice cream, what the hell")
rouge_score.result()

rouge1 = 55.56, 95% confidence [55.56, 55.56]
rouge2 = 37.50, 95% confidence [37.50, 37.50]
rougeLsum = 55.56, 95% confidence [55.56, 55.56]


{'rouge1': 55.55555555555556, 'rouge2': 37.5, 'rougeLsum': 55.55555555555556}

In [16]:
@tf.function
def test_step(x, y):
    predictions, _, _ = model(x, decoder_input_ids=y, training=False)
    t_loss = loss_object(y, predictions)
    test_loss(t_loss)
    test_accuracy(y, predictions)
    
    predicted_ids = tf.math.argmax(predictions, 2)
    for i in range(predicted_ids.shape[0]):
        decoded_prediction = tokenizer.decode(predicted_ids[i])
        decoded_label = tokenizer.decode(y[i])
        rouge_score(decoded_label, decoded_prediction)
        


@tf.function
def predict(x, y):
    predicted_ids = model.generate(x)
    
    decoded_prediction = tokenizer.decode(predicted_ids)
    decoded_label = tokenizer.decode(y)
    
    print("------")
    print("Pred Sentence:", decoded_prediction)
    print("True Sentence:", decoded_label)
    print("------")

In [None]:
EPOCHS = 1
for epoch in range(EPOCHS):
    train_loss.reset_states()
    train_accuracy.reset_states()
    
    test_loss.reset_states()
    test_accuracy.reset_states()
    
    val_loss.reset_states()
    val_accuracy.reset_states()
    
    val_batches = iter(val_ds)
    
    for i, (x, y) in enumerate(train_ds):
        train_step(x, y)
        if i % 200 == 0:
            x_val, y_val = next(val_batches)
            val_step(x_val, y_val)
            template = 'Epoch {}, i: {}, train loss: {}, train acc: {}, val loss: {}, val acc: {}'
            print(template.format(epoch + 1,
                        i,
                        train_loss.result(),
                        train_accuracy.result() * 100,
                        val_loss.result(),
                        val_accuracy.result() * 100))

    
    for x, y in test_ds:
        test_step(x, y)
        
    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print(template.format(epoch + 1,
                        train_loss.result(),
                        train_accuracy.result() * 100,
                        test_loss.result(),
                        test_accuracy.result() * 100))
    
    print(rouge_score.result())





Epoch 1, i: 0, train loss: 13.879189491271973, train acc: 3.0833334922790527, val loss: 13.301065444946289, val acc: 0.4583333134651184
Epoch 1, i: 200, train loss: 4.01372766494751, train acc: 40.46165084838867, val loss: 7.6712117195129395, val acc: 26.625001907348633
