In [None]:
import os
import time
import torch
from PIL import Image
from torchvision import models
from torchvision import transforms


transform = transforms.Compose(
    [
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.48235, 0.45882, 0.40784],
            std=[0.229, 0.224, 0.225],
        ),
    ]
)

image = Image.open("../datasets/images/cat.jpg")
inputs = transform(image).unsqueeze(0)

model = models.vgg16(num_classes=2)
model.load_state_dict(torch.load("../models/VGG16.pt"))

device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
model.eval()

model_static_quantized = torch.jit.load("../models/PTSQ_VGG16.pt")

with torch.no_grad():
    start_time = time.time()
    outputs = model(inputs.to(device))
    file_size = os.path.getsize("../models/VGG16.pt") / 1e6
    print("양자화 적용 전:")
    print(f"출력 결과: {outputs}")
    print(f"추론 시간: {time.time() - start_time:.4f}s")
    print(f"파일 크기: {file_size:.2f} MB")
    print("\n")
    
start_time = time.time()
outputs = model_static_quantized(inputs)
file_size = os.path.getsize("../models/PTSQ_VGG16.pt") / 1e6
end_time = time.time() - start_time
print("양자화 적용 후:")
print(f"출력 결과: {outputs}")
print(f"추론 시간: {time.time() - start_time:.4f}s")
print(f"파일 크기: {file_size:.2f} MB")