In [8]:
import torch

from transformers import *
from transformers import pipeline
import spacy
spacy_en = spacy.load('en_core_web_sm')

## Tokenization

In [3]:
tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')
model = BartForConditionalGeneration.from_pretrained('facebook/bart-base')

In [11]:
article = "I like apples because "

In [12]:
inputs = tokenizer(article, return_tensors='pt')

In [13]:
inputs['input_ids'].shape

torch.Size([1, 7])

In [14]:
part1 = torch.cat([inputs['input_ids'][0][:1023], inputs['input_ids'][0][-1:]]).unsqueeze(0)

In [15]:
summary_ids = model.generate(part1, num_beams=4, max_length=130, min_length=20, early_stopping=True)
print([tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids])

['I like apples because they taste so good. _______________________.advertisementadvertisementadvertisementadvertisementadvertisement']


In [16]:
begin = 512
end = 512 + 511

In [17]:
part2 = torch.cat([inputs['input_ids'][0][:1], inputs['input_ids'][0][-511:]]).unsqueeze(0)

In [18]:
summary_ids = model.generate(part2, num_beams=4, max_length=130, min_length=30, early_stopping=True)
print([tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids])

['I like apples because ____________________ ____________________ ____________________I like apples because ____________________ ____________________ ____________________ ____________________']


## BART - pretrained using `pipeline`

In [19]:
summarizer = pipeline("summarization")

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1621.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=898822.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=456318.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=26.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1222317369.0, style=ProgressStyle(descr…




In [20]:
targets = [
    'The Finance Coordinator did not back brief his staff following formal EOC coordination meetings.',
    'Finance Section personnel had not received any formal Finance Section training prior the exercise.',
    'While not specific to the Finance Section, exercise evaluators stated that no guidance was provided pertaining to breaks and lunch.'
]

In [21]:
comments = [
    'Exercise evaluators stated they did not observe the Finance Section Coordinator back brief his personnel following EOC coordination meetings. They attributed this deficiency to the understaffing of the Finance Section. Evaluators stated that when the Finance Section Coordinator would return to the Section following the coordination meetings, he would be inundated with questions and tasks that required his immediate attention.',
    'Exercise evaluators stated that neither of three Finance Section personnel had been formally trained on how to function within the Finance Section. Evaluators noted that in the absence of training, Finance Section personnel were proactive in their problem solving, but were unsure if their approach was the right one.',
    'Exercise evaluators indicated that they did not observe EOC staff provide any guidance on when to take breaks or how to maintain EOC operations while obtaining lunch. Evaluators indicated that when lunch finally arrived, everyone dropped what they were doing and went to lunch.'    
]


In [22]:
summaries = summarizer(comments, max_length=30, min_length=5)

In [23]:
for i, summary in enumerate(summaries):
    txt = summary['summary_text']
    sents = [sent.text for sent in spacy_en(txt).sents]
    print(f"=== predicted ===\n{' '.join(sents[:-1])}\n=== target ===\n{targets[i]}\n")

=== predicted ===
 Exercise evaluators stated they did not observe the Finance Section Coordinator back brief his personnel following EOC coordination meetings .
=== target ===
The Finance Coordinator did not back brief his staff following formal EOC coordination meetings.

=== predicted ===
 Neither of three Finance Section personnel had been formally trained on how to function within the Finance Section .
=== target ===
Finance Section personnel had not received any formal Finance Section training prior the exercise.

=== predicted ===

=== target ===
While not specific to the Finance Section, exercise evaluators stated that no guidance was provided pertaining to breaks and lunch.



## T5 - pretrained using model

In [36]:
from transformers import AutoModelWithLMHead, AutoTokenizer

model = AutoModelWithLMHead.from_pretrained("t5-base")
tokenizer = AutoTokenizer.from_pretrained("t5-base")

# T5 uses a max_length of 512 so we cut the article to 512 tokens.
inputs = tokenizer("summarize: " + comments[0], return_tensors="pt", truncation=True)
outputs = model.generate(inputs['input_ids'], max_length=25, min_length=5, length_penalty=2.0, num_beams=4, early_stopping=True)

print(tokenizer.decode(outputs[0]))

exercise evaluators did not observe the Finance Section Coordinator back brief his personnel. they attributed this defic


In [37]:
inputs = tokenizer(["summarize: " + c for c in comments], return_tensors="pt", padding=True, truncation=True)

outputs = model.generate(inputs['input_ids'], max_length=25, min_length=5, 
                         length_penalty=4.0, num_beams=4, early_stopping=True)

In [38]:
for i, pred in enumerate(outputs):
    txt = tokenizer.decode(pred)
    print(f"=== predicted ===\n{txt}\n=== target ===\n{targets[i]}\n")
    
#     sents = [sent.text for sent in spacy_en(txt).sents]
#     print(f"=== predicted ===\n{' '.join(sents[:-1])}\n=== target ===\n{targets[i]}\n")

=== predicted ===
exercise evaluators did not observe the Finance Section Coordinator back brief his personnel. they attributed this defic
=== target ===
The Finance Coordinator did not back brief his staff following formal EOC coordination meetings.

=== predicted ===
exercise evaluators stated that neither of three Finance Section personnel had been formally trained on how to function.
=== target ===
Finance Section personnel had not received any formal Finance Section training prior the exercise.

=== predicted ===
exercise evaluators did not observe EOC staff provide any guidance on when to take breaks or how to maintain E
=== target ===
While not specific to the Finance Section, exercise evaluators stated that no guidance was provided pertaining to breaks and lunch.

