In [None]:
# transformers 모듈 설치
# pip install transformers
# 허깅페이스 -> google/vit-base-patch16-224 

In [5]:
from transformers import ViTImageProcessor, ViTForImageClassification
from PIL import Image
import requests

url = 'https://cdn.pixabay.com/photo/2016/07/15/15/55/dachshund-1519374_640.jpg'
image = Image.open(requests.get(url, stream=True).raw)

processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224')
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')

inputs = processor(images=image, return_tensors="pt") # pt = 파이토치
outputs = model(**inputs)
logits = outputs.logits
# model predicts one of the 1000 ImageNet classes
predicted_class_idx = logits.argmax(-1).item()
print("Predicted class:", model.config.id2label[predicted_class_idx])

Predicted class: black-and-tan coonhound


In [12]:
# 허깅페이스 -> facebook/detr-resnet-50
from transformers import DetrImageProcessor, DetrForObjectDetection
import torch
from PIL import Image
import requests

url = "https://cdn.pixabay.com/photo/2016/07/15/15/55/dachshund-1519374_640.jpg"
image = Image.open(requests.get(url, stream=True).raw)

# you can specify the revision tag if you don't want the timm dependency
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50", revision="no_timm")

inputs = processor(images=image, return_tensors="pt")
outputs = model(**inputs)

# convert outputs (bounding boxes and class logits) to COCO API
# let's only keep detections with score > 0.9
target_sizes = torch.tensor([image.size[::-1]])
results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]

for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
    box = [round(i, 2) for i in box.tolist()]
    print(
            f"Detected {model.config.id2label[label.item()]} with confidence "
            f"{round(score.item(), 3)} at location {box}"
    )


Detected dog with confidence 1.0 at location [1.56, 15.5, 593.54, 475.03]


In [13]:
# 허깅페이스 -> https://huggingface.co/nvidia/segformer-b0-finetuned-ade-512-512

from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
from PIL import Image
import requests

processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")

url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)

inputs = processor(images=image, return_tensors="pt")
outputs = model(**inputs)
logits = outputs.logits  # shape (batch_size, num_labels, height/4, width/4)


In [14]:
logits

tensor([[[[ -4.6310,  -5.5232,  -6.2356,  ...,  -4.9868,  -4.7341,  -4.6612],
          [ -5.1921,  -6.1444,  -6.5996,  ...,  -5.1771,  -5.0288,  -5.1761],
          [ -5.4424,  -6.2790,  -6.7574,  ...,  -5.2748,  -5.1669,  -5.0999],
          ...,
          [ -8.5836,  -9.0887,  -9.5409,  ...,  -8.7190,  -8.5183,  -8.3098],
          [ -8.4320,  -8.8555,  -9.1848,  ...,  -7.7831,  -7.4822,  -7.3598],
          [ -8.3224,  -8.8764,  -9.1849,  ...,  -7.1564,  -6.8759,  -6.6428]],

         [[-12.1391, -13.3122, -13.9554,  ..., -11.8693, -11.5761, -11.3418],
          [-12.8732, -13.9352, -14.3563,  ..., -12.3348, -12.1524, -12.3176],
          [-12.9438, -13.8226, -14.2513,  ..., -12.3360, -12.3081, -12.2396],
          ...,
          [-13.9108, -14.2715, -14.6169,  ..., -13.2829, -13.3424, -13.3222],
          [-13.8718, -14.2715, -14.3808,  ..., -12.5270, -12.4334, -12.3057],
          [-13.6848, -14.2857, -14.5154,  ..., -11.8523, -11.8534, -11.6054]],

         [[-12.5134, -13.4687,

In [15]:
logits.shape

torch.Size([1, 150, 128, 128])

In [18]:
# 허깅페이스 -> openai-community/gpt2

from transformers import GPT2Tokenizer, TFGPT2Model
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = TFGPT2Model.from_pretrained('gpt2')
text = "My name is Sean. I am "
encoded_input = tokenizer(text, return_tensors='tf')
output = model(encoded_input)

output

All PyTorch model weights were used when initializing TFGPT2Model.

All the weights of TFGPT2Model were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2Model for predictions without further training.


TFBaseModelOutputWithPastAndCrossAttentions(last_hidden_state=<tf.Tensor: shape=(1, 8, 768), dtype=float32, numpy=
array([[[-0.03373868, -0.04289959, -0.28264883, ..., -0.1524211 ,
          0.01575812, -0.11777692],
        [-0.5587513 ,  0.08880288, -0.7682316 , ...,  0.6419477 ,
         -0.15694837,  0.23887528],
        [ 0.04018354, -0.02829874,  0.12519777, ...,  0.25838193,
         -0.1078774 ,  0.62750804],
        ...,
        [-0.09572621, -0.34613052,  0.6800951 , ...,  0.25507423,
         -0.17687407,  0.1280706 ],
        [-0.73651135,  0.1363889 , -0.06959349, ...,  0.5516236 ,
          0.14938027,  0.7436497 ],
        [ 0.21519922, -0.32922813,  1.692186  , ...,  1.0434346 ,
         -0.32980028,  0.3046119 ]]], dtype=float32)>, past_key_values=(<tf.Tensor: shape=(2, 1, 12, 8, 64), dtype=float32, numpy=
array([[[[[-1.09587240e+00,  1.84698129e+00,  8.98728311e-01, ...,
           -1.29997396e+00, -7.13939130e-01,  1.15244353e+00],
          [-2.82440877e+00,  2.6346