In [1]:
#pipeline
from transformers import pipeline
# Create a suitable pipeline for a given task. The currently accepted tasks are:
#    - `"audio-classification"`
#    - `"automatic-speech-recognition"`
#    - `"conversational"`
#    - `"feature-extraction"`
#    - `"fill-mask"`
#    - `"image-classification"`
#    - `"question-answering"`
#    - `"table-question-answering"`
#    - `"text2text-generation"`
#    - `"text-classification"`
#    - `"text-generation"`
#    - `"token-classification"`
#    - `"translation"`
#    - `"translation_xx_to_yy"`
#    - `"summarization"`
#    - `"zero-shot-classification"`

In [2]:
#sentiment analysis
classifier = pipeline("sentiment-analysis")

print('\t', classifier("I've been waiting for a HuggingFace course my whole life."))
print('\t', classifier("I hate this so much!"))

print('\t', classifier([
    "I've been waiting for a HuggingFace course my whole life.",
    "I hate this so much!",
]))

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


	 [{'label': 'POSITIVE', 'score': 0.9598049521446228}]
	 [{'label': 'NEGATIVE', 'score': 0.9994558691978455}]
	 [{'label': 'POSITIVE', 'score': 0.9598049521446228}, {'label': 'NEGATIVE', 'score': 0.9994558691978455}]


In [3]:
def pretty_print_zero_shot(_classifier, _sequences, _candidate_labels):
    """ Simple function to pretty print the output of zero shot classification """
    for k,v in classifier(
        sequences=_sequences,
        candidate_labels=_candidate_labels
    ).items(): print(f"\t{k}:\n\t\t--> {v}")
        
example_info = {
    "EXAMPLE 1":{
            "sequences":"This is a course about the Transformers library",
            "candidate_labels":["education", "politics", "business"],
    }, 
}

classifier = pipeline("zero-shot-classification")

for ex_name,ex_map in example_info.items():
    print(f"\n\n\n{ex_name}:")
    pretty_print_zero_shot(classifier, ex_map["sequences"], ex_map["candidate_labels"])

No model was supplied, defaulted to facebook/bart-large-mnli and revision c626438 (https://huggingface.co/facebook/bart-large-mnli).
Using a pipeline without specifying a model name and revision in production is not recommended.





EXAMPLE 1:
	sequence:
		--> This is a course about the Transformers library
	labels:
		--> ['education', 'business', 'politics']
	scores:
		--> [0.8445988297462463, 0.11197438091039658, 0.04342680424451828]


In [4]:
#text generation
generator = pipeline("text-generation")

original_text = "In this course, we will teach you how to"
pred_text = generator(original_text, return_full_text=False, num_return_sequences=10)
print(f"\n===== Original Phrase =====\n\t--> {original_text} ... ")
print(f"\n----- Generated Text -----\n\t--> ...{pred_text[0]['generated_text']} ...")

original_text = "While Dumbledore began to climb the spiral staircase he motioned for Harry to follow"
pred_text_seqs = generator(original_text,
                             return_full_text=False, 
                             max_length=50, 
                             num_return_sequences=3)
print(f"\n===== Original Phrase =====\n\t--> {original_text} ... ")
for pred_text in pred_text_seqs:
    print(f"\n----- Generated Text -----\n\t--> ...{pred_text['generated_text']} ...")
    
original_text = "Oh boy oh boy oh boy!"
pred_text_seqs = generator(original_text,
                             return_full_text=False, 
                             max_length=20, 
                             num_return_sequences=5)
print(f"\n===== Original Phrase =====\n\t--> {original_text} ... ")
for pred_text in pred_text_seqs:
    print(f"\n----- Generated Text -----\n\t--> ...{pred_text['generated_text']} ...")

No model was supplied, defaulted to gpt2 and revision 6c0e608 (https://huggingface.co/gpt2).
Using a pipeline without specifying a model name and revision in production is not recommended.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



===== Original Phrase =====
	--> In this course, we will teach you how to ... 

----- Generated Text -----
	--> ... create an object's class method from Haskell.

Step 1: Identify the object you want to create, add it to the class method, and then use this method to perform actions.
 ...


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



===== Original Phrase =====
	--> While Dumbledore began to climb the spiral staircase he motioned for Harry to follow ... 

----- Generated Text -----
	--> ... him onto the staircase.

"You are going as slowly as you can," Dumbledore reassured Harry, his voice cracking under the strain; "I have nothing left to lose ...

----- Generated Text -----
	--> ... him, Harry's wand flying toward the other side of it as if to block it. "Harry..." Harry whispered as he took the wand back to the top of the spiral ...

----- Generated Text -----
	--> ....

"Why do you want me to come to your castle? I've known Snape before and you've always been my best friend." Harry said finally saying.
 ...

===== Original Phrase =====
	--> Oh boy oh boy oh boy! ... 

----- Generated Text -----
	--> ... He was in a hurry." He went down the stairwell and ...

----- Generated Text -----
	--> ... Yeah…you're gonna cry!"

"You cry too ...

----- Generated Text -----
	--> ... Oh boy oh boy! Oh boy oh boy! Oh boy oh 

In [5]:
#load specific model
original_text = "In this course, we will teach you how to"

# Default
print("\n\n\n... DEFAULT MODEL (GPT2) EXAMPLE ...\n")
generator = pipeline("text-generation")
pred_text_seqs = generator(original_text,
                             return_full_text=False, 
                             max_length=50, 
                             num_return_sequences=3)
print(f"\n===== Original Phrase =====\n\t--> {original_text} ... ")
for pred_text in pred_text_seqs:
    print(f"\n----- Generated Text -----\n\t--> ...{pred_text['generated_text']} ...")
    


# distilgpt2
print("\n\n\n... DISTILGPT2 MODEL EXAMPLE ...\n")
generator = pipeline("text-generation", model="distilgpt2")
pred_text_seqs = generator(original_text,
                             return_full_text=False, 
                             max_length=50, 
                             num_return_sequences=3)
print(f"\n===== Original Phrase =====\n\t--> {original_text} ... ")
for pred_text in pred_text_seqs:
    print(f"\n----- Generated Text -----\n\t--> ...{pred_text['generated_text']} ...")
    
# distilgpt2 finetuned on wikipedia text
print("\n\n\n... WIKI FINETUNED DISTILGPT2 MODEL EXAMPLE ...\n")
generator = pipeline("text-generation", model="rbhushan/distilgpt2-finetuned-wikitext2")
pred_text_seqs = generator(original_text,
                             return_full_text=False, 
                             max_length=50, 
                             num_return_sequences=3)
print(f"\n===== Original Phrase =====\n\t--> {original_text} ... ")
for pred_text in pred_text_seqs:
    print(f"\n----- Generated Text -----\n\t--> ...{pred_text['generated_text']} ...")

No model was supplied, defaulted to gpt2 and revision 6c0e608 (https://huggingface.co/gpt2).
Using a pipeline without specifying a model name and revision in production is not recommended.





... DEFAULT MODEL (GPT2) EXAMPLE ...



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



===== Original Phrase =====
	--> In this course, we will teach you how to ... 

----- Generated Text -----
	--> ... create and set up a unique application called the webkit-redirect-redirect library. When you run the tutorial, you will be shown several tutorials about the basics of implementing this library using the ...

----- Generated Text -----
	--> ... install Android, how to install and use the Java source with JDK and How to find Java binaries files on Google services. The course will take you through the process of building a Java project with Android ...

----- Generated Text -----
	--> ... create and implement functional interfaces for Scala.

Scala

Scala uses a special implementation of a class called a Type. It implements a new class, type. The type (and methods ...



... DISTILGPT2 MODEL EXAMPLE ...



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



===== Original Phrase =====
	--> In this course, we will teach you how to ... 

----- Generated Text -----
	--> ... use the word "giggar" to describe everything that is a problem, how to think about it, and how to make problems as easily as possible within an easy (or easy, easy) ...

----- Generated Text -----
	--> ... master this approach by adding some concepts to each class. Learn more. ...

----- Generated Text -----
	--> ... identify yourself as a smart person with a better understanding of the world and understanding the limitations of the world that surrounds you. ...



... WIKI FINETUNED DISTILGPT2 MODEL EXAMPLE ...



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



===== Original Phrase =====
	--> In this course, we will teach you how to ... 

----- Generated Text -----
	--> ... do it and then do your best to answer. To contact your loved ones please send a message on 4th of January to 2nd of July. If you've got any questions call 101 to see ...

----- Generated Text -----
	--> ... use the power to power that day and use to become more aware of you. A few days before I walk you through the course, in the kitchen we are asking how we can help you. I ...

----- Generated Text -----
	--> ... win the prize. Please be safe in the centre of the room - you won't get anywhere. Then send it. A reply is very welcome at 12.45am & to 8,000K ...


In [6]:
#mask--encoder-only
unmasker = pipeline("fill-mask", model="bert-base-cased")
try:
    for k in unmasker("This course will teach you all about <mask> models.", top_k=2): print(k)
except:
    print("This command raises an error because it can't find the [MASK] token...\n" \
          "NOTE: this is a different token than the <mask> token we mentioned previously.")

unmasker = pipeline("fill-mask")
for k in unmasker("This course will teach you all about <mask> models.", top_k=2): print(k)

for k in unmasker("2+2=<mask>.", top_k=2): print(k)

Downloading model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'cls.seq_relationship.weight', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
No model was supplied, defaulted to distilroberta-

This command raises an error because it can't find the [MASK] token...
NOTE: this is a different token than the <mask> token we mentioned previously.


Downloading (…)lve/main/config.json:   0%|          | 0.00/480 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/331M [00:00<?, ?B/s]

Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForMaskedLM: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

{'score': 0.1961977779865265, 'token': 30412, 'token_str': ' mathematical', 'sequence': 'This course will teach you all about mathematical models.'}
{'score': 0.0405273362994194, 'token': 38163, 'token_str': ' computational', 'sequence': 'This course will teach you all about computational models.'}
{'score': 0.08762217313051224, 'token': 29225, 'token_str': 'true', 'sequence': '2+2=true.'}
{'score': 0.06843051314353943, 'token': 134, 'token_str': '1', 'sequence': '2+2=1.'}


In [7]:
#NER
from spacy import displacy
import matplotlib
import matplotlib.pyplot as plt

ner_colors = {
    "CC": 1, "CD": 2, "DT": 3, "EX": 4, "FW": 5, "IN": 6, "JJ": 7, "JJR": 8, "JJS": 9, "MD": 10, "NN": 11, "NNP": 12, 
    "NNPS": 13, "NNS": 14, "O": 0, "PDT": 15, "POS": 16, "PRP": 17, "RB": 18, "RBR": 19, "RBS": 20, "RP": 21, "SYM": 22,
    "TO": 23, "UH": 24, "VB": 25, "VBD": 26, "VBG": 27, "VBN": 28, "VBP": 29, "VBZ": 30, "WDT": 31, "WP": 32, "WRB": 33
}
cmap = plt.cm.get_cmap('rainbow', len(ner_colors))
ner_colors = {k:matplotlib.colors.rgb2hex(cmap(v-1)) for k,v in ner_colors.items()}

def convert_hf_to_displacy_format(hf_pred, _original_text, _title=None):
    """ Function to convert prediction to the displacy specific format """
    return [dict(
        text=_original_text, 
        ents=[{
            "start":ent["start"], 
            "end":ent["end"], 
            "label":ent["entity_group"], 
            "score":ent["score"]} for ent in hf_pred], 
        title=_title
    ),]
    
ner_1 = pipeline("ner", grouped_entities=True)
ner_2 = pipeline("ner", model="mrm8488/mobilebert-finetuned-pos", grouped_entities=True)

print("\n\n... EXAMPLE #1 WITH DEFAULT MODEL ...")
original_text = "My name is Sylvain and I work at Hugging Face in Brooklyn."
ner_pred = ner_1(original_text)
displacy.render(convert_hf_to_displacy_format(ner_pred, original_text), style="ent", manual=True)

print("\n\n... EXAMPLE #1 WITH FLAIR POS MODEL ...")
ner_pred = ner_2(original_text)
displacy.render(convert_hf_to_displacy_format(ner_pred, original_text), style="ent", manual=True, options = {"colors": ner_colors})


print("\n\n\n\n... EXAMPLE #2 WITH DEFAULT MODEL...")
original_text =  \
    """ 
        Italy, officially the Italian Republic is a country consisting of a peninsula 
        delimited by the Alps and several islands surrounding it, whose territory 
        largely coincides with the homonymous geographical region. Italy is located 
        in the centre of the Mediterranean Sea, in Southern Europe; it is 
        also considered part of Western Europe. A unitary parliamentary republic 
        with Rome as its capital and largest city. The country covers a total area of 
        301,340 km2 (116,350 sq mi) and shares land borders with France, Switzerland, 
        Austria, Slovenia, as well as the enclaved microstates of Vatican City and San 
        Marino. Italy has a territorial exclave in Switzerland (Campione) and a maritime 
        exclave in Tunisian waters (Lampedusa). With around 60 million inhabitants, 
        Italy is the third-most populous member state of the European Union.
    """
ner_pred = ner_1(original_text)
displacy.render(convert_hf_to_displacy_format(ner_pred, original_text), style="ent", manual=True)

print("\n\n... EXAMPLE #2 WITH FLAIR POS MODEL ...")
ner_pred = ner_2(original_text)
displacy.render(convert_hf_to_displacy_format(ner_pred, original_text), style="ent", manual=True, options = {"colors": ner_colors})

No model was supplied, defaulted to dbmdz/bert-large-cased-finetuned-conll03-english and revision f2482bf (https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


Downloading pytorch_model.bin:   0%|          | 0.00/1.33G [00:00<?, ?B/s]

Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Downloading (…)okenizer_config.json:   0%|          | 0.00/60.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]



Downloading (…)lve/main/config.json:   0%|          | 0.00/1.84k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/98.5M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/62.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



... EXAMPLE #1 WITH DEFAULT MODEL ...


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.




... EXAMPLE #1 WITH FLAIR POS MODEL ...






... EXAMPLE #2 WITH DEFAULT MODEL...




... EXAMPLE #2 WITH FLAIR POS MODEL ...


In [8]:
#QA
def pretty_print_qa(_model, _questions, _context, show_context=True):
    """ Simple function to pretty print the output of QA model """
    
    if type(_questions)!=list: _questions=[_questions]
        
    if show_context:
        print(f"\n{'-'*100}\nCONTEXT:\n{'-'*100}\n{_context}\n{'-'*100}")
    
    # Print QA
    for i, _q in enumerate(_questions):
        _a = _model(question=_q, context=_context )
        print(f"\n\tQUESTION #{i+1}: {_q}")
        print(f"\t\tANSWER:\t--> {_a['answer']}")
        print(f"\t\tSCORE:\t--> {_a['score']}")

question_answerer = pipeline("question-answering")

context_text  = "My name is Sylvain and I work at Hugging Face in Brooklyn"
question_text = "Where do I work?"
pretty_print_qa(question_answerer, question_text, context_text)


context_text = \
    """
        Italy, officially the Italian Republic is a country consisting of a peninsula 
        delimited by the Alps and several islands surrounding it,[15] whose territory 
        largely coincides with the homonymous geographical region.[16] Italy is located 
        in the centre of the Mediterranean Sea, in Southern Europe;[17][18][19] it is 
        also considered part of Western Europe.[20][21] A unitary parliamentary republic 
        with Rome as its capital and largest city, the country covers a total area of 
        301,340 km2 (116,350 sq mi) and shares land borders with France, Switzerland, 
        Austria, Slovenia, as well as the enclaved microstates of Vatican City and San 
        Marino. Italy has a territorial exclave in Switzerland (Campione) and a maritime 
        exclave in Tunisian waters (Lampedusa). With around 60 million inhabitants, 
        Italy is the third-most populous member state of the European Union.
    """
question_text = ["Where is Italy located?", 
                 "What is the largest city in Italy?", 
                 "What is the most populous EU member state?", 
                 "What countries border Italy?", 
                 "How large is Italy?", 
                 "How large is Italy in Miles?", 
                 "What continent is Italy located within?",
                 "Is Italy a country or state?",
                 "What is the relationship between the enclaved microstates of Vatican City and Italy?",
                 "What mountain range is close to Italy?"]
pretty_print_qa(question_answerer, question_text, context_text)

print("\n\n\n\n... EXAMPLE #3 ...")
context_text = \
    """
        'Strange Bedfellows!' lamented the title of a recent letter to Museum News, in which a certain
        Harriet Sherman excoriated the National Gallery of Art in Washington for its handling of 
        200,000 tickets to the much-ballyhooed “Van Gogh’s van Goghs” exhibit. A huge proportion 
        of the free tickets were snatched up by the opportunists in the dead of winter, who 
        then scalped those tickets at $85 apiece to less hardy connoiseurs. 
        Yet, Sherman’s bedfellows are far from strange. Art, despite its religious and magical 
        origins, very soon became a commercial venture. From bourgeois patrons funding art they 
        barely understood in order to share their protegee’s prestige, to museum curators 
        stage-managing the cult of artists in order to enhance the market value of museum 
        holdings, entrepreneurs have found validation and profit in big-name art. Speculators, 
        thieves, and promoters long ago created and fed a market where cultural icons could 
        be traded like commodities. This trend toward commodification of high-brow art took 
        an ominous, if predictable, turn in the 1980s during the Japanese 'bubble economy.' 
        At a time when Japanese share prices more than doubled, individual tycoons and industrial 
        giants alike invested record amounts in some of the West’s greatest masterpieces. 
        Ryoei Saito, for example, purchased van Gogh’s Portrait of Dr. Gachet for a record-breaking 
        $82.5 million. The work, then on loan to the Metropolitan Museum of Modern Art, suddenly 
        vanished from the public domain. Later learning that he owed the Japanese government $24 
        million in taxes, Saito remarked that he would have the paining cremated with him to spare 
        his heirs the inheritance tax. This statement, which he later dismissed as a joke, alarmed 
        and enraged many. A representative of the Van Gogh museum, conceding that he had no legal 
        redress, made an ethical appeal to Mr. Saito, asserting, 'a work of art remains the 
        possession of the world at large'. Ethical appeals notwithstanding, great art will increasingly 
        devolve into big business. Firstly, great art can only be certified by its market value. 
        Moreover, the 'world at large' hasn’t the means of acquisition. Only one museum currently 
        has the funding to contend for the best pieces–the J. Paul Getty Museum, founded by the 
        billionaire oilman. The art may disappear into private hands, but its transfer will 
        disseminate once static fortunes into the hands of various investors, collectors, and 
        occasionally the artist.
    """

question_text = ["What is the main idea being communicated by this passage?", 
                 "Which museum might be able to afford to keep or obtain top art pieces?",
                 "What famous artist does this article reference?",
                 "What painting is referenced in this article?",
                 "How much did Ryoei pay for Van Gogh's portrait of Dr. Gachet?",
                 "What did Saito joke about?",
                 "Why would Saito cremate a painting?",
                 "Which group of people does the author of this article like the least?"]
pretty_print_qa(question_answerer, question_text, context_text)

No model was supplied, defaulted to distilbert-base-cased-distilled-squad and revision 626af31 (https://huggingface.co/distilbert-base-cased-distilled-squad).
Using a pipeline without specifying a model name and revision in production is not recommended.


Downloading model.safetensors:   0%|          | 0.00/261M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]


----------------------------------------------------------------------------------------------------
CONTEXT:
----------------------------------------------------------------------------------------------------
My name is Sylvain and I work at Hugging Face in Brooklyn
----------------------------------------------------------------------------------------------------

	QUESTION #1: Where do I work?
		ANSWER:	--> Hugging Face
		SCORE:	--> 0.6949770450592041

----------------------------------------------------------------------------------------------------
CONTEXT:
----------------------------------------------------------------------------------------------------

        Italy, officially the Italian Republic is a country consisting of a peninsula 
        delimited by the Alps and several islands surrounding it,[15] whose territory 
        largely coincides with the homonymous geographical region.[16] Italy is located 
        in the centre of the Mediterranean Sea, in Southern Eu

In [None]:
#summarization
def pretty_print_summary(_model, _text, show_text=True, **kwargs):
    """ Simple function to pretty print the output of QA model """
    summary = _model(_text, **kwargs)[0]["summary_text"]
    if show_text:
        print(f"""\n{'-'*100}\nORIGINAL TEXT:\n{'-'*100}\n{_text}\n{'-'*100}""")
    print(f"""\n{'-'*100}\nSUMMARY TEXT:\n{'-'*100}\n{summary}\n{'-'*100}""")

# As we want to see the differences between extractive and abstractive
# summarization we will instantiate two models
#    - An abstractive summarizer --> Google's Pegasus Model
#    - An extractive summarizer --> The Default HF Model (DistilBart)
abstractive_summarizer = pipeline("summarization", model="google/pegasus-xsum")
extractive_summarizer = pipeline("summarization")

text = \
    """
    America has changed dramatically during recent years. Not only has the number of 
    graduates in traditional engineering disciplines such as mechanical, civil, 
    electrical, chemical, and aeronautical engineering declined, but in most of 
    the premier American universities engineering curricula now concentrate on 
    and encourage largely the study of engineering science. As a result, there 
    are declining offerings in engineering subjects dealing with infrastructure, 
    the environment, and related issues, and greater concentration on high 
    technology subjects, largely supporting increasingly complex scientific 
    developments. While the latter is important, it should not be at the expense 
    of more traditional engineering.

    Rapidly developing economies such as China and India, as well as other 
    industrial countries in Europe and Asia, continue to encourage and advance 
    the teaching of engineering. Both China and India, respectively, graduate 
    six and eight times as many traditional engineers as does the United States. 
    Other industrial countries at minimum maintain their output, while America 
    suffers an increasingly serious decline in the number of engineering graduates 
    and a lack of well-educated engineers.
    """

# We don't include a 'short' answer as it will just be a truncated version of the default.
print("\n\n\n... EXTRACTIVE MODEL - EXAMPLE #1 - DEFAULT ANSWER ...")
pretty_print_summary(extractive_summarizer, text, show_text=True)
print("\n\n\n... EXTRACTIVE MODEL - EXAMPLE #1 - LONG ANSWER ...")
pretty_print_summary(extractive_summarizer, text, show_text=True, min_length=40, max_length=160)
print("\n\n... ABSTRACTIVE MODEL - EXAMPLE #1 - DEFAULT ANSWER ...")
pretty_print_summary(abstractive_summarizer, text, show_text=True)
print("\n\n... ABSTRACTIVE MODEL - EXAMPLE #1 - LONG ANSWER ...")
pretty_print_summary(abstractive_summarizer, text, show_text=True, min_length=40, max_length=160)


text = \
    """
        'Strange Bedfellows!' lamented the title of a recent letter to Museum News, in which a certain
        Harriet Sherman excoriated the National Gallery of Art in Washington for its handling of 
        200,000 tickets to the much-ballyhooed “Van Gogh’s van Goghs” exhibit. A huge proportion 
        of the free tickets were snatched up by the opportunists in the dead of winter, who 
        then scalped those tickets at $85 apiece to less hardy connoiseurs. 
        
        Yet, Sherman’s bedfellows are far from strange. Art, despite its religious and magical 
        origins, very soon became a commercial venture. From bourgeois patrons funding art they 
        barely understood in order to share their protegee’s prestige, to museum curators 
        stage-managing the cult of artists in order to enhance the market value of museum 
        holdings, entrepreneurs have found validation and profit in big-name art. Speculators, 
        thieves, and promoters long ago created and fed a market where cultural icons could 
        be traded like commodities. This trend toward commodification of high-brow art took 
        an ominous, if predictable, turn in the 1980s during the Japanese 'bubble economy.' 
        At a time when Japanese share prices more than doubled, individual tycoons and industrial 
        giants alike invested record amounts in some of the West’s greatest masterpieces. 
        
        Ryoei Saito, for example, purchased van Gogh’s Portrait of Dr. Gachet for a record-breaking 
        $82.5 million. The work, then on loan to the Metropolitan Museum of Modern Art, suddenly 
        vanished from the public domain. Later learning that he owed the Japanese government $24 
        million in taxes, Saito remarked that he would have the paining cremated with him to spare 
        his heirs the inheritance tax. This statement, which he later dismissed as a joke, alarmed 
        and enraged many. A representative of the Van Gogh museum, conceding that he had no legal 
        redress, made an ethical appeal to Mr. Saito, asserting, 'a work of art remains the 
        possession of the world at large'. 
        
        Ethical appeals notwithstanding, great art will increasingly devolve into big business. 
        Firstly, great art can only be certified by its market value. Moreover, the 'world at 
        large' hasn’t the means of acquisition. Only one museum currently has the funding to 
        contend for the best pieces–the J. Paul Getty Museum, founded by the billionaire oilman. 
        The art may disappear into private hands, but its transfer will disseminate once static 
        fortunes into the hands of various investors, collectors, and occasionally the artist.
    """

print("\n\n\n... EXTRACTIVE MODEL - EXAMPLE #2 - DEFAULT ANSWER ...")
pretty_print_summary(extractive_summarizer, text, show_text=True)
print("\n\n\n... EXTRACTIVE MODEL - EXAMPLE #2 - LONG ANSWER ...")
pretty_print_summary(extractive_summarizer, text, show_text=True, min_length=40, max_length=160)
print("\n\n... ABSTRACTIVE MODEL - EXAMPLE #2 - DEFAULT ANSWER ...")
pretty_print_summary(abstractive_summarizer, text, show_text=True)
print("\n\n... ABSTRACTIVE MODEL - EXAMPLE #2 - LONG ANSWER ...")
pretty_print_summary(abstractive_summarizer, text, show_text=True, min_length=40, max_length=160)


text = \
    """
        The painter Roy Lichtenstein helped to define pop art—the movement that incorporated commonplace 
        objects and commercial-art techniques into paintings—by paraphrasing the style of comic books in 
        his work. His merger of a popular genre with the forms and intentions of fine art generated a 
        complex result: while poking fun at the pretensions of the art world, Lichtenstein’s work also 
        managed to convey a seriousness of theme that enabled it to transcend mere parody.
        
        That Lichtenstein’s images were fine art was at first difficult to see, because, with their word 
        balloons and highly stylized figures, they looked like nothing more than the comic book panels 
        from which they were copied. Standard art history holds that pop art emerged as an impersonal 
        alternative to the histrionics of abstract expressionism, a movement in which painters conveyed 
        their private attitudes and emotions using nonrepresentational techniques. The truth is that by the 
        time pop art first appeared in the early 1960s, abstract expressionism had already lost much of its 
        force. Pop art painters weren’t quarreling with the powerful early abstract expressionist work of the 
        late 1940s but with a second generation of abstract expressionists whose work seemed airy, high-minded, 
        and overly lyrical. Pop art paintings were full of simple black lines and large areas of primary color. 
        Lichtenstein’s work was part of a general rebellion against the fading emotional power of abstract 
        expressionism, rather than an aloof attempt to ignore it.
        
        But if rebellion against previous art by means of the careful imitation of a popular genre were all 
        that characterized Lichtenstein’s work, it would possess only the reflective power that parodies have 
        in relation to their subjects. Beneath its cartoonish methods, his work displayed an impulse toward 
        realism, an urge to say that what was missing from contemporary painting was the depiction of contemporary 
        life. The stilted romances and war stories portrayed in the comic books on which he based his canvases, 
        the stylized automobiles, hot dogs, and table lamps that appeared in his pictures, were reflections of the 
        culture Lichtenstein inhabited. But, in contrast to some pop art, Lichtenstein’s work exuded not a jaded 
        cynicism about consumer culture, but a kind of deliberate naiveté, intended as a response to the excess 
        of sophistication he observed not only in the later abstract expressionists but in some other pop artists. 
        With the comics—typically the domain of youth and innocence—as his reference point, a nostalgia fills his 
        paintings that gives them, for all their surface bravado, an inner sweetness. 
    """

print("\n\n\n... EXTRACTIVE MODEL - EXAMPLE #3 - DEFAULT ANSWER ...")
pretty_print_summary(extractive_summarizer, text, show_text=True)
print("\n\n\n... EXTRACTIVE MODEL - EXAMPLE #3 - LONG ANSWER ...")
pretty_print_summary(extractive_summarizer, text, show_text=True, min_length=40, max_length=160)
print("\n\n... ABSTRACTIVE MODEL - EXAMPLE #3 - DEFAULT ANSWER ...")
pretty_print_summary(abstractive_summarizer, text, show_text=True)
print("\n\n... ABSTRACTIVE MODEL - EXAMPLE #3 - LONG ANSWER ...")
pretty_print_summary(abstractive_summarizer, text, show_text=True, min_length=40, max_length=160)

Downloading pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]