# Cross Lingual summarization CNN Daily Mail Results
We will try out the trained t5 network from the tpu

In [1]:
import tensorflow as tf
import pandas as pd
from transformers import T5Tokenizer, TFT5ForConditionalGeneration
import time
from rouge_score import rouge_scorer
from rouge_score import scoring

In [2]:
if not tf.config.list_physical_devices('GPU'):
    print("Change runtime to \"GPU runtime\" for faster computations")

In [3]:
tf.test.is_gpu_available()

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


True

## Params

In [4]:
BATCH_SIZE = 8

SHUFFEL_SIZE = 1024

learning_rate = 3e-5

model_size = "t5-base"

MAX_ARTICLE_LEN = 512

MAX_HIGHLIGHT_LEN = 150

## Model

In [5]:
tokenizer = T5Tokenizer.from_pretrained(model_size)
model = TFT5ForConditionalGeneration.from_pretrained(model_size)

task_specific_params = model.config.task_specific_params
if task_specific_params is not None:
    model.config.update(task_specific_params.get("summarization", {}))
    
pad_token_id = tokenizer.pad_token_id

Downloading:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.20k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/892M [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFT5ForConditionalGeneration.

All the layers of TFT5ForConditionalGeneration were initialized from the model checkpoint at t5-base.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.


In [6]:
val_loss = tf.keras.metrics.Mean(name='val_loss')
val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')

model.summary()

Model: "tf_t5for_conditional_generation"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
shared (TFSharedEmbeddings)  multiple                  24674304  
_________________________________________________________________
encoder (TFT5MainLayer)      multiple                  84954240  
_________________________________________________________________
decoder (TFT5MainLayer)      multiple                  113275008 
Total params: 222,903,552
Trainable params: 222,903,552
Non-trainable params: 0
_________________________________________________________________


In [8]:
ckpt_file = "../models/checkpoint_cross_lingual_plus_translation.ckpt"
model.load_weights(ckpt_file)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f81f85ceeb8>

## Dataset
We will load the translated CNN Daily Mail dataset from the tfrecords files

In [1]:
en_de_prefix = tf.reshape(tokenizer("summarize: en_to_de ", return_tensors="tf").input_ids, (-1,))
de_en_prefix = tf.reshape(tokenizer("summarize: de_to_en ", return_tensors="tf").input_ids, (-1,))
en_en_prefix = tf.reshape(tokenizer("summarize: en_to_en ", return_tensors="tf").input_ids, (-1,))
de_de_prefix = tf.reshape(tokenizer("summarize: de_to_de ", return_tensors="tf").input_ids, (-1,))
trans_en_de_prefix = tf.reshape(tokenizer("translate: en_to_de ", return_tensors="tf").input_ids, (-1,))
trans_de_en_prefix = tf.reshape(tokenizer("translate: de_to_en ", return_tensors="tf").input_ids, (-1,))

NameError: name 'tf' is not defined

In [10]:
prefix_length = de_de_prefix.shape[0]
prefix_length

9

In [11]:
import numpy as np
MAX_ARTICLE_LEN = 512
MAX_HIGHLIGHT_LEN = 150
GLOBAL_BATCH_SIZE = 8

def get_tfrecord_dataset(file_name):
    features = {
        'ger_x': tf.io.FixedLenFeature([MAX_ARTICLE_LEN-8], tf.int64),
        'ger_x_mask': tf.io.FixedLenFeature([MAX_ARTICLE_LEN-8], tf.int64),
        'ger_y': tf.io.FixedLenFeature([MAX_HIGHLIGHT_LEN], tf.int64),
        'ger_y_ids': tf.io.FixedLenFeature([MAX_HIGHLIGHT_LEN], tf.int64),

        'en_x': tf.io.FixedLenFeature([MAX_ARTICLE_LEN-8], tf.int64),
        'en_x_mask': tf.io.FixedLenFeature([MAX_ARTICLE_LEN-8], tf.int64),
        'en_y': tf.io.FixedLenFeature([MAX_HIGHLIGHT_LEN], tf.int64),
        'en_y_ids': tf.io.FixedLenFeature([MAX_HIGHLIGHT_LEN], tf.int64),
    }

    dataset = tf.data.TFRecordDataset(f"../data/{file_name}.tfrecord")

    # Taken from the TensorFlow models repository: https://github.com/tensorflow/models/blob/befbe0f9fe02d6bc1efb1c462689d069dae23af1/official/nlp/bert/input_pipeline.py#L24
    def decode_record(record, features):
        """Decodes a record to a TensorFlow example."""
        example = tf.io.parse_single_example(record, features)

        # tf.Example only supports tf.int64, but the TPU only supports tf.int32.
        # So cast all int64 to int32.
        for name in list(example.keys()):
            t = example[name]
            if t.dtype == tf.int64:
                t = tf.cast(t, tf.int32)
            example[name] = t
        return example


    def select_data_from_record(record):
        return [
            tf.strided_slice(tf.concat([de_de_prefix, record['ger_x']], axis=0), [0], [-1]), tf.concat([tf.ones(8, dtype=tf.int32), record['ger_x_mask']], axis=0), record['ger_y'], record['ger_y_ids'],
            tf.strided_slice(tf.concat([en_de_prefix, record['en_x']], axis=0), [0], [-1]), tf.concat([tf.ones(8, dtype=tf.int32), record['en_x_mask']], axis=0), record['ger_y'], record['ger_y_ids'],
            tf.strided_slice(tf.concat([de_en_prefix, record['ger_x']], axis=0), [0], [-1]), tf.concat([tf.ones(8, dtype=tf.int32), record['ger_x_mask']], axis=0), record['en_y'], record['en_y_ids'],
            tf.strided_slice(tf.concat([en_en_prefix, record['en_x']], axis=0), [0], [-1]), tf.concat([tf.ones(8, dtype=tf.int32), record['en_x_mask']], axis=0), record['en_y'], record['en_y_ids'],
        ]
    
    dataset = dataset.map(lambda record: decode_record(record, features))
    dataset = dataset.map(select_data_from_record)
    dataset = dataset.shuffle(100)
    return dataset.batch(GLOBAL_BATCH_SIZE)

test_ds = get_tfrecord_dataset("corss_lingual_test_cnn_daily_mail")

In [12]:
def get_summaries(ds):
    for i in range(1,5):
        yield ds[(i-1)*4], ds[i*4-3], ds[i*4-2], ds[i*4-1]


for ds in test_ds.take(1):
    for i in get_summaries(ds):
        print(i[0].shape, i[1].shape, i[2].shape, i[3].shape)

(8, 512) (8, 512) (8, 150) (8, 150)
(8, 512) (8, 512) (8, 150) (8, 150)
(8, 512) (8, 512) (8, 150) (8, 150)
(8, 512) (8, 512) (8, 150) (8, 150)


## Evaluation
### Define Rouge Score

In [14]:
class RougeScore:
    '''
    mostly from https://github.com/google-research/text-to-text-transfer-transformer/blob/master/t5/evaluation/metrics.py 
    '''
    
    def __init__(self, score_keys=None)-> None:
        super().__init__()
        if score_keys is None:  
            self.score_keys = ["rouge1", "rouge2", "rougeLsum"]
        
        self.scorer = rouge_scorer.RougeScorer(self.score_keys)
        self.aggregator = scoring.BootstrapAggregator()
        
        
    @staticmethod
    def prepare_summary(summary):
            # Make sure the summary is not bytes-type
            # Add newlines between sentences so that rougeLsum is computed correctly.
            summary = summary.replace(" . ", " .\n")
            return summary
    
    def __call__(self, target, prediction):
        """Computes rouge score.''
        Args:
        targets: string
        predictions: string
        """

        target = self.prepare_summary(target)
        prediction = self.prepare_summary(prediction)
        
        self.aggregator.add_scores(self.scorer.score(target=target, prediction=prediction))

        return 
    
    def reset_states(self):
        self.rouge_list = []

    def result(self):
        result = self.aggregator.aggregate()
        
        for key in self.score_keys:
            score_text = "%s = %.2f, 95%% confidence [%.2f, %.2f]"%(
                key,
                result[key].mid.fmeasure*100,
                result[key].low.fmeasure*100,
                result[key].high.fmeasure*100
            )
            print(score_text)
        
        return {key: result[key].mid.fmeasure*100 for key in self.score_keys}

### Compute Summaries

In [37]:
predictions = []
start_time = time.time()

for i, ds_item in enumerate(test_ds): 
    for (input_ids, input_mask, y, y_ids) in get_summaries(ds_item):
        summaries = model.generate(
            input_ids=input_ids, 
            attention_mask=input_mask, 
            num_beams=4, 
            length_penalty=0.6,
            early_stopping=True, 
            max_length=150
        )

        articles = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in input_ids]

        pred = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summaries]
        real = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in y]
    
        for pred_sent, real_sent, article_sent in zip(pred, real, articles):
            predictions.append(str("article: " + article_sent + "\n\npred sentence: " + pred_sent + "\n\nreal sentence: " + real_sent))
    
    if (i % 10) == 0:
        elapsed = (time.time() - start_time) / 10
        print(i,": time genreate batch:", elapsed)
        start_time = time.time()
    if i > 20:
        # otherwise it will take ages
        break


# rouge_score.result()

0 : time genreate batch: 15.252158188819886
10 : time genreate batch: 165.11613590717315
20 : time genreate batch: 165.9109206199646


### Lets have a look at some of these predicted summaries

In [38]:
import numpy as np
len_predictions = len(predictions)

def get_random_prediction():
    return predictions[np.random.randint(len_predictions)]

In [39]:
print(get_random_prediction())

article: summarize: ger_to_en Marion "Suge" Knight wurde wegen eines nicht näher beschriebenen medizinischen Notfalls ins Krankenhaus gebracht, nachdem ein Richter am Donnerstag entschieden hatte, dass er sich wegen Mordes und versuchten Mordes vor Gericht verantworten muss. Der ehemalige Rap-Musikmogul hatte im Januar mit seinem Pickup zwei Männer angefahren, einen getötet und den anderen schwer verletzt. Die Sprecherin des Sheriffs von Los Angeles, Nicole Nishida, sagte, Knight sei vom Gerichtsgebäude in der Innenstadt in ein Krankenhaus gebracht worden. Weitere Details könnten nicht veröffentlicht werden, sagte sie. Die Eltern von Suge Knight (im Bild links) und seiner Freundin (rechts) trösten sich gegenseitig vor dem Gerichtsgebäude in Los Angeles, nachdem sie am Donnerstag erfahren hatten, dass er sich wegen Mordes vor Gericht verantworten muss. Richter Ronald Coen am Obersten Gerichtshof fällte das Urteil am Donnerstag, nachdem er eine Anhörung abgeschlossen hatte, die sich star

In [40]:
print(get_random_prediction())

article: summarize: en_to_ger Ed Miliband has overtaken David Cameron as the most popular political leader for the first time. In a shock new poll that will send shockwaves through Downing Street, the Labour leader has jumped ahead of the Prime Minister in personal approval ratings. The Labour Party has also taken a commanding four point lead over the Tories with just 28 days until polling day, according to the pollsters Survation. Labour leader Ed Miliband has jumped ahead of the Prime Minister in personal approval ratings, a Survation poll has revealed . The revelation comes in the wake of a furious political row over the Labour leader’s personal character – after the Defence Secretary Michael Fallon said Mr Miliband could stab Britain in the back like he did to his brother. Mr Miliband responded by accusing the Tories of dragging the election campaign ‘into the gutter’. The Tories have pinned much of their election hopes on turning the campaign into a straight choice between Mr Came

In [41]:
print(get_random_prediction())

article: summarize: en_to_ger Voters are still in the dark' about the scale and depth of spending cuts being planned by all the main parties with just two weeks until polling day, economic experts warned today. Analysts from the Institute for Fiscal Studies said none of the major parties had given 'anything like full details' on how they will tackle the nations' debts after the election. The Tories were accused of giving 'no detail' about their deficit reduction plan, which relies on £30billion of cuts, while Labour has left the door open to borrowing an extra £26billion-a-year. Scroll down for video . The Institute for Fiscal Studies analysed the policies of all the main parties to see how their policies would increase borrowing . The IFS warned that the promise of tackling the deficit in the next Parliament is based on 'almost entirely unspecified spending cuts and tax increases'. The think-tank reached its conclusions after a detailed study of the party manifestos ahead of May's Gen

In [42]:
print(get_random_prediction())

article: summarize: ger_to_ger Der Oberste Gerichtshof von Tennessee verschob Hinrichtungstermine für vier Häftlinge und stoppte damit praktisch alle Hinrichtungen, während die Gerichte entscheiden, ob die derzeitigen Protokolle zur Tötung von Menschen verfassungsgemäß sind. Tennessee hatte zuletzt 2009 einen Gefangenen hingerichtet. Seitdem haben rechtliche Herausforderungen und Probleme bei der Beschaffung tödlicher Injektionsmedikamente neue Hinrichtungen zum Stillstand gebracht. In den Jahren 2013 und 2014 versuchte der Staat, den Prozess mit einer neuen Methode der Giftspritze und der Wiedereinführung des elektrischen Stuhls als Backup anzukurbeln. Der Oberste Gerichtshof von Tennessee hat die Hinrichtungstermine für die letzten vier Männer in der Todeszelle des Bundesstaates freigegeben, da neue Methoden der Giftspritze in Frage gestellt werden. Vor allem wurde der Stromschlag als Sicherungsmethode bezeichnet. Ab Dezember 2013 setzte das Gericht neue Hinrichtungstermine für elf H

In [43]:
print(get_random_prediction())

article: summarize: en_to_en Obese workers are lazy' and 'unable to fulfil their roles' and as a result are less likely to be hired, a new survey has revealed. Almost half of 1,000 British companies questioned said they are less inclined to recruit an applicant after interview if they are obese or overweight. Among the reasons given were that overweight workers 'are unable to play a full role in the business', 'they're lazy', and 'they wouldn't be able to do the job required', the research shows. Beverley Sunderland, managing director, at Crosslands Employment Solicitors, who commissioned the research, said: 'Our research shows that prejudiced attitudes towards hiring obese workers are rife among British employers.' A survey of 1,000 British companies revealed almost half are less inclined to recruit an obese applicant, fearing 'they're lazy', and 'unable to fulfil their roles as required', because of their weight . The findings come in the wake of a European court case brought by unio

In [44]:
for i in range(10):
    print(get_random_prediction())

article: summarize: en_to_en Former NFL cornerback Will Allen and his business partner are facing civil fraud charges from federal regulators over allegations the pair ran a multi-million dollar Ponzi scheme. Theyre accused of reaping more than $31 million in a Ponzi scheme that promised high returns to investors from funding loans to cash-strapped pro athletes. The Securities and Exchange Commission announced the charges Monday against William D. Allen, Susan Daub and their Capital Financial Partners investment firms. Allen, 36, was a cornerback in the NFL from 2001 to 2012, playing for the New York Giants and the Miami Dolphins. Accused: Former NFL cornerback Will Allen and his business partner are facing civil fraud charges from federal regulators over allegations the pair ran a multi-million dollar Ponzi scheme (file photo) He was signed by the New England Patriots in March 2012 but was placed on injured reserve the following August, and he left football in March 2013. Daub, 54, is

## Save results to text file

In [45]:
result_path = "../results/t5_cross_lingual_plus_translate_result.txt"
open(result_path, "w")
for pred in predictions:
    with open(result_path, "a") as file:
        file.write(pred + "\n")

## Load save File

In [46]:
data_points = []
result_path = "../results/t5_cross_lingual_plus_translate_result.txt"
file = open(result_path, "r")
for line in file:
    data_points.append(line)

In [47]:
data_points[4]

'real sentence: Tinder zeigt nur die letzten 34 Fotos an - aber Nutzer können problemlos mehr sehen.\n'

In [48]:
class SummaryData():
    
    def __init__(self):
        self.language_tag = ''
        self.real_data = ''
        self.pred_data = ''        

In [27]:
count = 0
summary_data = SummaryData()
summary_data_list = []
for point in data_points:
    count += 1
    
    if count == 1:
        summary_data.language_tag = point.split(" ")[2]
    elif count == 3:
        summary_data.pred_data = ": ".join(point.split(":")[1:])
    elif count == 5:
        summary_data.real_data = ": ".join(point.split(":")[1:])
        summary_data_list.append(summary_data)
        summary_data = SummaryData()
        count = 0
summary_data_list[0].__dict__

{'language_tag': 'ger_to_ger',
 'real_data': ' David Messerschmitt, 30, der seine Frau auf dem College kennengelernt hatte, hatte Craigslist laut einer eidesstattlichen Versicherung am Dienstag benutzt, um Männer anzuwerben. Jamyra Gallmon, 21, "kontaktierte das Opfer über eine männlich klingende E-Mail-Adresse und ging in das Hotel mit der Absicht, ihn auszurauben". Das Paar geriet am 9. Februar in Streit und sie stach ihm wiederholt in Bauch, Rücken und Leisten. Der Anwalt wurde tot im noblen Hotel in Washington DC aufgefunden, nachdem ihn seine Frau als vermisst gemeldet hatte. Gallmon\n',
 'pred_data': ' Jamyra Gallmon, 21, wurde des Mordes ersten Grades angeklagt, während sie bewaffnet war, weil sie David Messerschmitt, 30, am 9. Februar in Washington, D.C. getötet haben soll. Die Frau gab gegenüber der Polizei zu, dass sie den Anwalt mit einer männlich klingenden E-Mail-Adresse gegründet hatte und in das noble Donovan Hotel ging, um ihn auszurauben. Eine eidesstattliche Versicher

In [28]:
from ast import literal_eval

results_en_trans = []
results_en_path = "../results/en_en_results"
file = open(results_en_path, "r")
for i, line in enumerate(file):
    results_en_trans.append(literal_eval(line))
    
results_ger_trans = []
results_ger_path = "../results/en_en_results"
file = open(results_ger_path, "r")
for i, line in enumerate(file):
    results_ger_trans.append(literal_eval(line))

In [29]:
rouge_scores_dict = dict()
rouge_scores_dict['en_to_en'] = RougeScore()
rouge_scores_dict['en_to_en_trans'] = RougeScore()

rouge_scores_dict['en_to_ger'] = RougeScore()
rouge_scores_dict['ger_to_en'] = RougeScore()
rouge_scores_dict['ger_to_ger'] = RougeScore()
rouge_scores_dict['ger_to_ger_trans'] = RougeScore()

rouge_scores_dict

{'en_to_en': <__main__.RougeScore at 0x7f80b4509e48>,
 'en_to_en_trans': <__main__.RougeScore at 0x7f80b45099b0>,
 'en_to_ger': <__main__.RougeScore at 0x7f80b4509be0>,
 'ger_to_en': <__main__.RougeScore at 0x7f8084615908>,
 'ger_to_ger': <__main__.RougeScore at 0x7f80846156d8>,
 'ger_to_ger_trans': <__main__.RougeScore at 0x7f80846159e8>}

In [30]:
for summary_data in summary_data_list:
    rouge_scores_dict[summary_data.language_tag](summary_data.real_data, summary_data.pred_data)
    

In [31]:
results_ger_trans[0]

{'id': 24,
 'item_id': 1,
 'language_tag': 'en_to_en',
 'real_data': 'Beatrice seen watching race on terrace with the Gulf states Crown Prince . Marks 13th holiday since November last year, and fourth in a month . Princess quit her job at Sony Pictures in New York before Christmas . Despite that she is described as working full-time on her fathers website .\n',
 'pred_data': 'Princess Beatrice spotted at Bahrain Grand Prix with long-term boyfriend Dave Clark . Onlooker said 26-year-old was walking behind the Crown Prince of Bahrain . Princes regime accused of violently repressing pro-democracy protests . Sir Jackie Stewart and comedian Rory Bremner watched race from the tower .\n',
 'translated': 'Prinzessin Beatrice beim Großen Preis von Bahrain mit ihrem langjährigen Freund Dave Clark.Beobachter sagten, der 26-Jährige sei hinter dem Kronprinzen von Bahrain hergelaufen.Prinzen-Regime beschuldigt, prodemokratische Proteste gewaltsam zu unterdrücken.Sir Jackie Stewart und Komiker Rory B

In [32]:
for item in results_en_trans:
    rouge_scores_dict['en_to_en_trans'](item['translated_real'], item['translated'])

In [33]:
for item in results_ger_trans:
    rouge_scores_dict['ger_to_ger_trans'](item['translated_real'], item['translated'])

In [34]:
for key, rouge_score_item in rouge_scores_dict.items():
    print(key, rouge_score_item.result())
    print()

rouge1 = 42.41, 95% confidence [40.04, 44.86]
rouge2 = 20.07, 95% confidence [17.70, 22.69]
rougeLsum = 39.26, 95% confidence [37.13, 41.63]
en_to_en {'rouge1': 42.41134305679392, 'rouge2': 20.065374961983395, 'rougeLsum': 39.26425894255747}

rouge1 = 33.23, 95% confidence [32.50, 33.99]
rouge2 = 14.30, 95% confidence [13.60, 14.97]
rougeLsum = 23.23, 95% confidence [22.54, 23.97]
en_to_en_trans {'rouge1': 33.231375771303064, 'rouge2': 14.298427277035335, 'rougeLsum': 23.226494546959657}

rouge1 = 33.35, 95% confidence [31.51, 35.20]
rouge2 = 12.66, 95% confidence [11.28, 14.42]
rougeLsum = 22.88, 95% confidence [21.25, 24.54]
en_to_ger {'rouge1': 33.35185694157777, 'rouge2': 12.655929617599487, 'rougeLsum': 22.875681499416785}

rouge1 = 37.02, 95% confidence [35.33, 38.71]
rouge2 = 13.72, 95% confidence [12.34, 15.22]
rougeLsum = 34.52, 95% confidence [32.89, 36.18]
ger_to_en {'rouge1': 37.02213087712113, 'rouge2': 13.716584706085392, 'rougeLsum': 34.52148308376297}

rouge1 = 34.37, 9

# Special Example

In [29]:
input_text = "In a world where we have to read and understand a lot of documents automatic text summarization has an obvious demand. To have the option to get a brief summary of a text in your language can be very useful. But would it not be even more useful, if you could have the option to get a summary of a text in a language, you do not understand, in your wished language? Sometimes we don’t want an exact translation, sometimes we just want to know a brief overview of a text in a language we don’t understand. That is the case where Cross-Lingual summarization would be prefered over normal translation. Cross-Lingual summarization can give you a short overview of a text in a language you do not understand."

In [30]:
input_text_test = "The dramatic growth of data on the internet leads to the need to automatically process and understand the data. A big part of the data is text data in many languages. This overwhelming amount of information causes a demand for automatic text summarization and other Natural Language Processing (NLP) taks.\n The Field of NLP had some high points in the last couple of years, when the field got revolutionized by Neural Language Models. With publications like Attention is all you need \cite{vaswani2017attention} or GPT3\cite{brown2020language} the limits of the field are pushed even further. Because of large data corpuses, scraped from the internet, and advanced models which contain up to 175 Billion parameters like the GPT3 it is possible to generate text, answer questions, summarize text, translate or many other things.\n In this master thesis we will take a deeper look into summarization. There are extractive and abstractive techniques to summarization. The extractive summary technique tries to find subsets of sentences, which representante the original text well and uses them to summarize the original text \cite{allahyari2017text}. The abstractive technique uses advanced language models to generate a new text, which should be much shorter than the original one, contain all the key information and preserve the overall meaning.\n In this master thesis we will use the abstractive technique, because it is closer to a human-like interpretation. It combines the ability to understand what the context of a given text is and the ability to generate fluent and grammatically correct text to that given context.\n The summarization in one language is an interesting topic but cross lingual approach is even more interesting. The goal of Cross Lingual Summarization is to summarize a text from one language into another language. It combines the ability to summarize and the ability to translate. Where state of the art models perform well on normal summarization, it will be interesting to see how well they perform doing Cross-Lingual Summarization."

In [31]:
x = tokenizer.encode_plus("summarize: en_to_ger " + input_text, max_length=512, return_tensors="tf", padding='max_length', truncation=True)
input_ids = tf.reshape(x['input_ids'], (1,-1))
attention_mask = tf.reshape(x['attention_mask'], (1,-1))

In [32]:
print(input_ids.shape, attention_mask.shape)

(1, 512) (1, 512)


In [33]:
summaries = model.generate( 
    input_ids=input_ids, 
    attention_mask=attention_mask
)

In [34]:
summaries

<tf.Tensor: shape=(1, 74), dtype=int32, numpy=
array([[    0,    86,   645,  3779,     6,    16,    74,   558,  2584,
            3, 20127,    15,   110,    35,    64, 19163,  3766,     6,
          229, 15820,    15,  5027,     7,    63,    29, 19712,   266,
        30410,    15,  5222,  6367,     5,  4098,  6199,     3,    15,
            7,   311, 30001,    49,     6,  1301,   292,    67,  7251,
         8219,    29,     6,   266, 11068, 14449,   266,     7,  5027,
           15,     7,    16,   645, 16933,   170,  8837,     6,    67,
          292,   311, 19163,     6,    16,  1197,    52, 24054,    29,
        16933,    58]], dtype=int32)>

In [35]:
tokenizer.decode(summaries[0])

'<pad> In einer Welt, in der wir viele Dokumente lesen und verstehen müssen, ist automatische Textsynthese eine offensichtliche Forderung. Aber wäre es nicht nützlicher, wenn Sie die Möglichkeit hätten, eine Zusammenfassung eines Textes in einer Sprache zu bekommen, die Sie nicht verstehen, in Ihrer gewünschten Sprache?'