In [81]:
import torch

# alexnet, vgg16, vgg19, resnet50, resnet152, dll
PRE_TRAINED = 'resnet50'

model = torch.hub.load('pytorch/vision:v0.10.0', PRE_TRAINED, pretrained=True)
model.eval()

Using cache found in /home/ciheul/.cache/torch/hub/pytorch_vision_v0.10.0


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [82]:
# Download an example image from the pytorch website
# import urllib

# url, filename = ("https://github.com/pytorch/hub/raw/master/images/dog.jpg", "dog.jpg")
# try: urllib.URLopener().retrieve(url, filename)
# except: urllib.request.urlretrieve(url, filename)

In [83]:
from PIL import Image
from torchvision import transforms

# file image menjadi PIL image format
input_image = Image.open("dog.jpg")

# fungsi untuk transform dari PIL image format menjadi tensor
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# image dalam tensor format (3 x 224 x 224)
input_tensor = preprocess(input_image)

# tensor array menjadi tensor (1 x 3 x 224 x 224)
input_batch = input_tensor.unsqueeze(0)

# gunakan cuda jika ada
if torch.cuda.is_available():
    input_batch = input_batch.to('cuda')
    model.to('cuda')

# prediksi gambar
with torch.no_grad():
    output = model(input_batch)

# size probabilities adalah 1000.
# sesuai dengan output neuron terakhir alexnet sejumlah 1000
# yang ditujukan untuk imagenet yang memiliki 1000 object classes
# urutan di dalam tensor array penting karena menunjukkan nama object
probabilities = torch.nn.functional.softmax(output[0], dim=0)

In [84]:
# Download ImageNet labels
# !wget https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt
# !wget https://upload.wikimedia.org/wikipedia/commons/7/76/Bazoule_sacred_crocodiles_MS_6709cropped.JPG
# !wget https://upload.wikimedia.org/wikipedia/commons/9/9e/Coturnix_ypsilophora_-_Sydney_Olympic_Park.jpg
# !wget https://hips.hearstapps.com/hmg-prod.s3.amazonaws.com/images/rabbit-breeds-american-white-1553635287.jpg
# !wget https://awsimages.detik.net.id/community/media/visual/2021/11/04/zebra-di-etosha-national-park-namibia-afika.jpeg

In [85]:
# Read the categories
with open("imagenet_classes.txt", "r") as f:
    categories = [s.strip() for s in f.readlines()]

# Show top categories per image
top5_prob, top5_catid = torch.topk(probabilities, 5)
print(top5_prob)
print(top5_catid)
for i in range(top5_prob.size(0)):
    print(categories[top5_catid[i]], top5_prob[i].item())

tensor([0.8733, 0.0303, 0.0197, 0.0111, 0.0092])
tensor([258, 259, 270, 261, 248])
Samoyed 0.8732959032058716
Pomeranian 0.03027082420885563
white wolf 0.019671205431222916
keeshond 0.01107352040708065
Eskimo dog 0.009204281494021416
