In [None]:
# Verify whether HF transformers are installed well

from transformers import pipeline
print(pipeline('sentiment-analysis')('we love you really so much'))


In [None]:
# Try to transcribe a audio file through pipeline

# And the default is to use the facebook's Wav2Vec2

transcriber = pipeline(task="automatic-speech-recognition")

transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")


In [None]:
# Use OpenAI's Whisper large-v2 model

transcriber = pipeline(model="openai/whisper-large-v2")

transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")

In [None]:
# Use OpenAI's Whisper large-v2 model, but with more inputs

transcriber = pipeline(model="openai/whisper-large-v2")

transcriber(
    [
        "https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac",
        "https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/1.flac",
    ]
)

In [None]:
# Install Accelerate package for device map setting

! pip install --upgrade accelerate


In [None]:
# Transcribe the audio files with device_map parameter setup, not sure if it works

from transformers import pipeline

transcriber = pipeline(model="openai/whisper-large-v2", device_map="auto")

transcriber(
    [
        "https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac",
        "https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/1.flac",
    ]
)

In [None]:
# Setup with batch_size

transcriber = pipeline(model="openai/whisper-large-v2", batch_size=2)

audio_filenames = [f"https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/{i}.flac" for i in range(1, 5)]

texts = transcriber(audio_filenames)

In [None]:
# Task specific parameter like timestamps

transcriber = pipeline(model="openai/whisper-large-v2", return_timestamps=True)

transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")

In [None]:
# Using datasets

# KeyDataset is a util that will just output the item we're interested in.
from transformers.pipelines.pt_utils import KeyDataset
from datasets import load_dataset

pipe = pipeline(model="hf-internal-testing/tiny-random-wav2vec2", device=0)
dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:10]")

for out in pipe(KeyDataset(dataset, "audio")):
    print(out)

In [None]:
# Install soundfile module

! pip install --upgrade soundfile

In [None]:
# Try an image pipeline

from transformers import pipeline

vision_classifier = pipeline(model="google/vit-base-patch16-224")

preds = vision_classifier(
    images="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
)

preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]

preds

In [3]:
# Try some NLP tasks

from transformers import pipeline

# This model is a `zero-shot-classification` model.
# It will classify text, except you are free to choose any label you might imagine

classifier = pipeline(model="facebook/bart-large-mnli")

classifier(
    "I have a problem with my iphone that needs to be resolved asap!!",
    candidate_labels=["urgent", "not urgent", "phone", "tablet", "computer"],
)

{'sequence': 'I have a problem with my iphone that needs to be resolved asap!!',
 'labels': ['urgent', 'phone', 'computer', 'not urgent', 'tablet'],
 'scores': [0.5036354660987854,
  0.47879987955093384,
  0.012600123882293701,
  0.0026557897217571735,
  0.0023087516892701387]}

In [None]:
# Install pytesseract first to run the multimodal examples

! sudo apt install -y tesseract-ocr
! pip install pytesseract


In [None]:
# Try a multi-modal cass, ask a question for an image

from transformers import pipeline

vqa = pipeline(model="impira/layoutlm-document-qa")

vqa(
    image="https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png",
    question="What is the invoice number?",
)


In [None]:
# pip install accelerate
import torch
from transformers import pipeline

pipe = pipeline(model="facebook/opt-1.3b", torch_dtype=torch.bfloat16)

output = pipe("This is a cool example!", do_sample=True, top_p=0.95)