In [8]:
# zero-shot learning pipeline, summarization models available on huggingface - https://huggingface.co/models?pipeline_tag=summarization&sort=downloads

from transformers import pipeline

summarizer_facebook_bart = pipeline("summarization", model="facebook/bart-large-cnn") # https://huggingface.co/facebook/bart-large-cnn
summarizer_sshleifer_distilbart_12_6 = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") # https://huggingface.co/sshleifer/distilbart-cnn-12-6
summarizer_philschmid_bart = pipeline("summarization", model="philschmid/bart-large-cnn-samsum") # https://huggingface.co/philschmid/bart-large-cnn-samsum
summarizer_google_pegasus = pipeline("summarization", model="google/pegasus-xsum") # https://huggingface.co/google/pegasus-xsum
summarizer_falconsai = pipeline("summarization", model="Falconsai/text_summarization") # https://huggingface.co/Falconsai/text_summarization


# summarizer_google_pegasusL = pipeline("summarization", model="google/pegasus-large") # https://huggingface.co/google/pegasus-large
summarizer_sshleifer_distilbart_6_6 = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6") # https://huggingface.co/sshleifer/distilbart-cnn-6-6
summarizer_azma_bart = pipeline("summarization", model="Azma-AI/bart-large-text-summarizer") # https://huggingface.co/Azma-AI/bart-large-text-summarizer

list_of_summarizer = [summarizer_facebook_bart, summarizer_sshleifer_distilbart_12_6, summarizer_philschmid_bart, summarizer_google_pegasus, summarizer_falconsai, summarizer_sshleifer_distilbart_6_6, summarizer_azma_bart]

Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
ARTICLE = """ 
The principal activities of the Company are the provision of airframe maintenance, component overhaul services and inventory
technical management, the provision of line maintenance and technical ground handling services and investment holdings.
The principal activities of the subsidiary companies are disclosed in Note 18 to the financial statements. There have been no
significant changes in the nature of these activities during the financial year.
"""

for summarizer in list_of_summarizer:
    model_name = summarizer.model.name_or_path if hasattr(summarizer, 'model') else summarizer.__name__
    summary = summarizer(ARTICLE, max_length=60, min_length=30, do_sample=False)
    print(f"Summary from {model_name}: {summary}")

Summary from facebook/bart-large-cnn: [{'summary_text': 'The principal activities of the subsidiary companies are disclosed in Note 18 to the financial statements. There have been no significant changes in the nature of these activities during the financial year.'}]
Summary from sshleifer/distilbart-cnn-12-6: [{'summary_text': ' The principal activities of the subsidiary companies are disclosed in Note 18 to the financial statements . There have been no significant changes in the nature of these activities during the financial year .'}]
Summary from philschmid/bart-large-cnn-samsum: [{'summary_text': 'The principal activities of the Company are the provision of airframe maintenance, component overhaul services, inventory management, line maintenance and technical ground handling services and investment holdings. There were no significant changes in the nature of these activities during the financial year.'}]
Summary from google/pegasus-xsum: [{'summary_text': "The results of the Compan

In [12]:
from transformers import pipeline
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

Downloading config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

Downloading vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [13]:
sequence_to_classify = "one day I will see the world"
candidate_labels = ['travel', 'cooking', 'dancing']
classifier(sequence_to_classify, candidate_labels)
#{'labels': ['travel', 'dancing', 'cooking'],
# 'scores': [0.9938651323318481, 0.0032737774308770895, 0.002861034357920289],
# 'sequence': 'one day I will see the world'}


{'sequence': 'one day I will see the world',
 'labels': ['travel', 'dancing', 'cooking'],
 'scores': [0.9938650727272034, 0.0032737995497882366, 0.0028610355220735073]}