In [None]:
# For AMD RDNA 2 support

import os

os.environ["HSA_OVERRIDE_GFX_VERSION"] = "10.3.0"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [None]:
# Run this script to check if ROCm is installed and working properly

import os
import torch, grp, pwd, os, subprocess

devices = []
try:
    print("\n\nChecking ROCM support...")
    result = subprocess.run(["rocminfo"], stdout=subprocess.PIPE)
    cmd_str = result.stdout.decode("utf-8")
    cmd_split = cmd_str.split("Agent ")
    for part in cmd_split:
        item_single = part[0:1]
        item_double = part[0:2]
        if item_single.isnumeric() or item_double.isnumeric():
            new_split = cmd_str.split("Agent " + item_double)
            device = (
                new_split[1]
                .split("Marketing Name:")[0]
                .replace("  Name:                    ", "")
                .replace("\n", "")
                .replace("                  ", "")
                .split("Uuid:")[0]
                .split("*******")[1]
            )
            devices.append(device)
    if len(devices) > 0:
        print("GOOD: ROCM devices found: ", len(devices))
    else:
        print("BAD: No ROCM devices found.")

    print("Checking PyTorch...")
    x = torch.rand(5, 3)
    has_torch = False
    len_x = len(x)
    if len_x == 5:
        has_torch = True
        for i in x:
            if len(i) == 3:
                has_torch = True
            else:
                has_torch = False
    if has_torch:
        print("GOOD: PyTorch is working fine.")
    else:
        print("BAD: PyTorch is NOT working.")

    print("Checking user groups...")
    user = os.getlogin()
    groups = [g.gr_name for g in grp.getgrall() if user in g.gr_mem]
    gid = pwd.getpwnam(user).pw_gid
    groups.append(grp.getgrgid(gid).gr_name)
    if "render" in groups and "video" in groups:
        print("GOOD: The user", user, "is in RENDER and VIDEO groups.")
    else:
        print(
            "BAD: The user",
            user,
            "is NOT in RENDER and VIDEO groups. This is necessary in order to PyTorch use HIP resources",
        )

    if torch.cuda.is_available():
        print("GOOD: PyTorch ROCM support found.")
        t = torch.tensor([5, 5, 5], dtype=torch.int64, device="cuda")
        print("Testing PyTorch ROCM support...")
        if str(t) == "tensor([5, 5, 5], device='cuda:0')":
            print("Everything fine! You can run PyTorch code inside of: ")
            for device in devices:
                print("---> ", device)
    else:
        print("BAD: PyTorch ROCM support NOT found.")
except:
    print(
        "Cannot find rocminfo command information. Unable to determine if AMDGPU drivers with ROCM support were installed."
    )

In [None]:
# Text Generation

from transformers import GPT2Tokenizer, GPT2Model

tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2Model.from_pretrained("gpt2")
text = "Replace me by any text you'd like."
encoded_input = tokenizer(text, return_tensors="pt")
output = model(**encoded_input)
output

In [None]:
# Embedding

from transformers import AutoModel, AutoTokenizer

# list of sentences
sentences = [
    "sentence_0",
    "sentence_1",
]

# init model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("maidalun1020/bce-embedding-base_v1")
model = AutoModel.from_pretrained("maidalun1020/bce-embedding-base_v1")

device = "cuda"  # if no GPU, set "cpu"
model.to(device)

# get inputs
inputs = tokenizer(
    sentences, padding=True, truncation=True, max_length=512, return_tensors="pt"
)
inputs_on_device = {k: v.to(device) for k, v in inputs.items()}

# get embeddings
outputs = model(**inputs_on_device, return_dict=True)
embeddings = outputs.last_hidden_state[:, 0]  # cls pooler
embeddings = embeddings / embeddings.norm(dim=1, keepdim=True)  # normalize
embeddings

In [None]:
# Reranking

import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# your query and corresponding passages
query = "input_query"
passages = ["passage_0", "passage_1"]

# construct sentence pairs
sentence_pairs = [[query, passage] for passage in passages]

# init model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("maidalun1020/bce-reranker-base_v1")
model = AutoModelForSequenceClassification.from_pretrained(
    "maidalun1020/bce-reranker-base_v1"
)

device = "cuda"  # if no GPU, set "cpu"
model.to(device)

# get inputs
inputs = tokenizer(
    sentence_pairs, padding=True, truncation=True, max_length=512, return_tensors="pt"
)
inputs_on_device = {k: v.to(device) for k, v in inputs.items()}

# calculate scores
scores = (
    model(**inputs_on_device, return_dict=True)
    .logits.view(
        -1,
    )
    .float()
)
scores = torch.sigmoid(scores)
scores

### Pipeline

In [None]:
# Text Generation
from transformers import pipeline

pipe = pipeline(
    "text-generation",
    model="microsoft/Phi-3-mini-4k-instruct",
    trust_remote_code=True,
    device=0,
)

In [None]:
# Text Classification
from transformers import pipeline


pipe = pipeline("text-classification", device=0)
pipe(["This restaurant is awesome", "This restaurant is awful"])

In [None]:
# Automatic Speech Recognition with Datasets

import datasets
from transformers import pipeline
from transformers.pipelines.pt_utils import KeyDataset
from tqdm.auto import tqdm

pipe = pipeline(
    "automatic-speech-recognition", model="facebook/wav2vec2-base-960h", device=0
)
dataset = datasets.load_dataset("superb", name="asr", split="test")

# KeyDataset (only *pt*) will simply return the item in the dict returned by the dataset item
# as we're not interested in the *target* part of the dataset. For sentence pair use KeyPairDataset
for out in tqdm(pipe(KeyDataset(dataset, "file"))):
    print(out)
    # {"text": "NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD NIGHT HUSBAND"}
    # {"text": ....}
    # ....

In [None]:
# Summarization

from transformers import pipeline

summarizer = pipeline("summarization", device=0)
summarizer("An apple a day, keeps the doctor away", min_length=5, max_length=30)

In [None]:
# Object Detection

from transformers import pipeline

detector = pipeline(task="object-detection", device=0)
preds = detector(
    "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
)
preds = [
    {"score": round(pred["score"], 4), "label": pred["label"], "box": pred["box"]}
    for pred in preds
]
preds

In [None]:
# Audio Classification

from transformers import pipeline

classifier = pipeline("audio-classification", device=0)
classifier("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/1.flac")