In [3]:
from transformers import *
from transformers import pipeline
import spacy
spacy_en = spacy.load('en_core_web_sm')

## Tokenization

In [98]:
tokenizer = BartTokenizer.from_pretrained('bart-large-cnn')
model = BartForConditionalGeneration.from_pretrained('bart-large-cnn')

In [148]:
article = """
i like apples
"""

In [149]:
inputs = tokenizer.encode_plus(article, return_tensors='pt')

In [150]:
inputs['input_ids'].shape

torch.Size([1, 6])

In [151]:
part1 = torch.cat([inputs['input_ids'][0][:1023], inputs['input_ids'][0][-1:]]).unsqueeze(0)

In [152]:
summary_ids = model.generate(part1, num_beams=4, max_length=130, min_length=20, early_stopping=True)
print([tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids])

['i like apples like apples. i.e. I like apples, I like to eat apples.']


In [104]:
begin = 512
end = 512 + 511

In [105]:
part2 = torch.cat([inputs['input_ids'][0][:1], inputs['input_ids'][0][-511:]]).unsqueeze(0)

In [106]:
summary_ids = model.generate(part2, num_beams=4, max_length=130, min_length=30, early_stopping=True)
print([tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids])

['26.  I think that 24 is ironic for UCSD because I dont know a single person that does not try and welcome people of all different backgrounds.28.  There is HORRIBLE cooperation in my department and in External Affairs in general.29.  The head of our human resources is perhaps one of the worst I have ever experienced in my work history.']


## BART - pretrained using `pipeline`

In [2]:
summarizer = pipeline("summarization")

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1300.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1625270765.0, style=ProgressStyle(descr…




In [62]:
targets = [
    'The Finance Coordinator did not back brief his staff following formal EOC coordination meetings.',
    'Finance Section personnel had not received any formal Finance Section training prior the exercise.',
    'While not specific to the Finance Section, exercise evaluators stated that no guidance was provided pertaining to breaks and lunch.'
]

In [63]:
comments = [
    'Exercise evaluators stated they did not observe the Finance Section Coordinator back brief his personnel following EOC coordination meetings. They attributed this deficiency to the understaffing of the Finance Section. Evaluators stated that when the Finance Section Coordinator would return to the Section following the coordination meetings, he would be inundated with questions and tasks that required his immediate attention.',
    'Exercise evaluators stated that neither of three Finance Section personnel had been formally trained on how to function within the Finance Section. Evaluators noted that in the absence of training, Finance Section personnel were proactive in their problem solving, but were unsure if their approach was the right one.',
    'Exercise evaluators indicated that they did not observe EOC staff provide any guidance on when to take breaks or how to maintain EOC operations while obtaining lunch. Evaluators indicated that when lunch finally arrived, everyone dropped what they were doing and went to lunch.'    
]


In [64]:
summaries = summarizer(comments, max_length=30, min_length=5)

In [67]:
for i, summary in enumerate(summaries):
    txt = summary['summary_text']
    sents = [sent.text for sent in spacy_en(txt).sents]
    print(f"=== predicted ===\n{' '.join(sents[:-1])}\n=== target ===\n{targets[i]}\n")

=== predicted ===
EOC evaluators did not observe the Finance Section Coordinator back brief his personnel following EOC coordination meetings.
=== target ===
The Finance Coordinator did not back brief his staff following formal EOC coordination meetings.

=== predicted ===
Evaluators found that neither of three Finance Section personnel had been formally trained on how to function within the Finance Section.
=== target ===
Finance Section personnel had not received any formal Finance Section training prior the exercise.

=== predicted ===
EOC staff did not provide guidance on when to take breaks or how to maintain EOC operations while obtaining lunch.
=== target ===
While not specific to the Finance Section, exercise evaluators stated that no guidance was provided pertaining to breaks and lunch.



## T5 - pretrained using model

In [89]:
from transformers import AutoModelWithLMHead, AutoTokenizer

model = AutoModelWithLMHead.from_pretrained("t5-base")
tokenizer = AutoTokenizer.from_pretrained("t5-base")

# T5 uses a max_length of 512 so we cut the article to 512 tokens.
inputs = tokenizer.encode("summarize: " + comments[0], return_tensors="pt", max_length=512)
outputs = model.generate(inputs, max_length=25, min_length=5, length_penalty=2.0, num_beams=4, early_stopping=True)

print(tokenizer.decode(outputs[0]))

Exercise evaluators stated they did not observe the Finance Section Coordinator back brief his personnel following EOC coordination meetings 


In [92]:
inputs = tokenizer.batch_encode_plus(["summarize: " + c for c in comments], 
                                     return_tensors="pt", 
                                     max_length=512, 
                                     pad_to_max_length=True)

outputs = model.generate(inputs['input_ids'], max_length=25, min_length=5, 
                         length_penalty=4.0, num_beams=4, early_stopping=True)

In [93]:
for i, pred in enumerate(outputs):
    txt = tokenizer.decode(pred)
    print(f"=== predicted ===\n{txt}\n=== target ===\n{targets[i]}\n")
    
#     sents = [sent.text for sent in spacy_en(txt).sents]
#     print(f"=== predicted ===\n{' '.join(sents[:-1])}\n=== target ===\n{targets[i]}\n")

=== predicted ===
Exercise evaluators stated they did not observe the Finance Section Coordinator back brief his personnel following EOC coordination meetings 
=== target ===
The Finance Coordinator did not back brief his staff following formal EOC coordination meetings.

=== predicted ===
exercise evaluators stated that neither of three Finance Section personnel had been formally trained on how to function within the
=== target ===
Finance Section personnel had not received any formal Finance Section training prior the exercise.

=== predicted ===
Exercise evaluators indicated that they did not observe EOC staff provide any guidance on when to take breaks or how
=== target ===
While not specific to the Finance Section, exercise evaluators stated that no guidance was provided pertaining to breaks and lunch.

