In [1]:
from transformers import pipeline

qa_pipeline = pipeline("question-answering")

context = """The Eiffel Tower is one of the most famous landmarks in the world.
It was constructed in 1889 in Paris, France, and stands at a height of 330 meters."""

question = "Where is the Eiffel Tower located?"

result = qa_pipeline(question=question, context=context)
print(result)


No model was supplied, defaulted to distilbert/distilbert-base-cased-distilled-squad and revision 564e9b5 (https://huggingface.co/distilbert/distilbert-base-cased-distilled-squad).
Using a pipeline without specifying a model name and revision in production is not recommended.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/473 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/261M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

Device set to use cpu


{'score': 0.8780861496925354, 'start': 97, 'end': 110, 'answer': 'Paris, France'}


In [2]:
summarizer = pipeline("summarization")

text = """Artificial Intelligence (AI) is transforming industries by automating
tasks, improving efficiency, and enabling data-driven decision-making.
Companies across healthcare, finance, and transportation are integrating AI
to enhance productivity and customer experiences."""

summary = summarizer(text, max_length=30, min_length=10, do_sample=False)
print(summary)


No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/1.80k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Device set to use cpu


[{'summary_text': ' Artificial Intelligence (AI) is transforming industries by automating tasks, improving efficiency, and enabling data-driven decision-making . Companies across'}]


In [3]:
from transformers import pipeline
import pandas as pd

# Load table QA pipeline
table_qa = pipeline("table-question-answering")

# Create a simple table
data = {
    "Name": ["Alice", "Bob"],
    "Age": ["25", "30"],
    "City": ["New York", "San Francisco"]
}
table = pd.DataFrame.from_dict(data)

# Ask a question about the table
question = "Where does Bob live?"
result = table_qa(table=table, query=question)
print(result)


No model was supplied, defaulted to google/tapas-base-finetuned-wtq and revision e3dde19 (https://huggingface.co/google/tapas-base-finetuned-wtq).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/1.66k [00:00<?, ?B/s]

TAPAS models are not usable since `tensorflow_probability` can't be loaded. It seems you have `tensorflow_probability` installed with the wrong tensorflow version. Please try to reinstall it following the instructions here: https://github.com/tensorflow/probability.


pytorch_model.bin:   0%|          | 0.00/443M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/490 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/262k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/443M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/154 [00:00<?, ?B/s]

Device set to use cpu
  text = normalize_for_match(row[col_index].text)
  cell = row[col_index]


{'answer': 'San Francisco', 'coordinates': [(1, 2)], 'cells': ['San Francisco'], 'aggregator': 'NONE'}


In [4]:
text2text = pipeline("text2text-generation")

text = "Convert this sentence into a question: The sky is blue."
result = text2text(text)
print(result)


No model was supplied, defaulted to google-t5/t5-base and revision a9723ea (https://huggingface.co/google-t5/t5-base).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

Device set to use cpu


[{'generated_text': 'False'}]


In [5]:
classifier = pipeline("text-classification")

text = "I absolutely love this new AI model!"
result = classifier(text)
print(result)


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Device set to use cpu


[{'label': 'POSITIVE', 'score': 0.9998767375946045}]


In [6]:
generator = pipeline("text-generation")

prompt = "The future of AI is"
result = generator(prompt, max_length=30, num_return_sequences=1)
print(result)


No model was supplied, defaulted to openai-community/gpt2 and revision 607a30d (https://huggingface.co/openai-community/gpt2).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cpu
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': "The future of AI is uncertain, but if we don't act as a kind of benevolent benefactor and help to improve their efficiency rather than as the"}]


In [7]:
ner_pipeline = pipeline("token-classification", grouped_entities=True)

text = "Elon Musk founded SpaceX in 2002 and acquired Twitter in 2022."
result = ner_pipeline(text)
print(result)


No model was supplied, defaulted to dbmdz/bert-large-cased-finetuned-conll03-english and revision 4c53496 (https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/998 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.33G [00:00<?, ?B/s]

Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tokenizer_config.json:   0%|          | 0.00/60.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Device set to use cpu


[{'entity_group': 'PER', 'score': 0.99783283, 'word': 'Elon Musk', 'start': 0, 'end': 9}, {'entity_group': 'ORG', 'score': 0.999151, 'word': 'SpaceX', 'start': 18, 'end': 24}, {'entity_group': 'ORG', 'score': 0.99849725, 'word': 'Twitter', 'start': 46, 'end': 53}]


In [8]:
translator = pipeline("translation_en_to_fr")

text = "Hello, how are you?"
result = translator(text)
print(result)


No model was supplied, defaulted to google-t5/t5-base and revision a9723ea (https://huggingface.co/google-t5/t5-base).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cpu


[{'translation_text': 'Bonjour, comment êtes-vous?'}]


In [9]:
zero_shot = pipeline("zero-shot-classification")

text = "I love playing football during the weekends."
labels = ["sports", "technology", "food"]

result = zero_shot(text, candidate_labels=labels)
print(result['labels'])
print(result['scores'])


No model was supplied, defaulted to facebook/bart-large-mnli and revision d7645e1 (https://huggingface.co/facebook/bart-large-mnli).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cpu


['sports', 'technology', 'food']
[0.9965085983276367, 0.0019449957180768251, 0.0015463430900126696]
