## Imports

In [1]:
import gc
import time
from importlib import reload
from subprocess import Popen, PIPE
from threading import Thread
import logging

import numpy as np
from torch.nn import Module
from torchvision import models, transforms
import torch
from PIL import Image

from tqdm import tqdm

reload(logging)
logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', level=logging.INFO, datefmt='%I:%M:%S')

## GPU Monitor

In [2]:
class GPUMonitor(Thread):
    def __init__(self, delay):
        super(GPUMonitor, self).__init__()
        self.delay = delay
        self.power_readings = []
        self.running = True
        self.start()

    def run(self):
        while self.running:
            try:
                p = Popen('nvidia-smi --query-gpu=power.draw --format=csv,noheader,nounits'.split(' '), stdout=PIPE)
                stdout, stderror = p.communicate()
                self.power_readings.append(float(stdout.strip()))
                p.terminate()
            except:
                logging.error('Something went wrong while retrieving GPU readings...')
            time.sleep(self.delay)

    def stop(self):
        self.running = False

    def reset_energy(self):
        self.power_readings = []

    def get_power_average(self):
        return np.mean(self.power_readings)

## Initialisation

In [3]:

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )])
with open('image_net_classes.txt') as file:
    classes = [line.strip().split(', ')[1] for line in file.readlines()]

## Inference

In [4]:
def infer(model: Module, images, use_gpu=True, verbose=False):
    model.eval()
    with torch.no_grad():
        if use_gpu:
            model.cuda()
        images_t = [transform(im) for im in images]
        batch = torch.cat([tensor for tensor in [torch.unsqueeze(im_t, 0) for im_t in images_t]])
        if use_gpu:
            out = model(batch.cuda())
        else:
            out = model(batch)

    for prediction in out:
        prediction = prediction.cpu()
        _, indices = torch.sort(prediction, descending=True)
        percentages = [(torch.nn.functional.softmax(prediction, dim=0)[class_index] * 100).item() for class_index in
                       indices[:5]]
        if verbose:
            logging.info(f'Rank\tInferred class\tProbability(%)')
            for idx, class_index in enumerate(indices[:5]):
                logging.info(f'#{idx}\t\t{classes[class_index]}\t{percentages[idx]}')
            logging.info('-----------------------------------------')

def run_experiment(model_, input_images_, batch_size_, gpu_monitor_):
    gpu_monitor_.reset_energy()
    t_0 = time.perf_counter()
    for i in tqdm(range(0, len(input_images_), batch_size_)):
        infer(model_, input_images_[i:i+batch_size_], use_gpu=True)
    return time.perf_counter() - t_0, gpu_monitor.get_power_average()

## Demo

In [5]:
img = Image.open('img/dog.jpg')
img2 = Image.open('img/strawberries.jpg')
img3 = Image.open('img/bald_eagle.jpg')
gpu_monitor = GPUMonitor(0.1)
model = models.resnet101(pretrained=True)
input_images = [img for _ in range(1024)]

# Single inference
t, P = run_experiment(model, input_images, 1, gpu_monitor)
logging.info(f'Single inference stats: {P} watts for {t} s')

# Batch inference
t, P = run_experiment(model, input_images, 8, gpu_monitor)
logging.info(f'Batch inference stats: {P} watts for {t} s')

gc.collect()
torch.cuda.empty_cache()
gpu_monitor.stop()

100%|██████████| 1024/1024 [00:25<00:00, 40.45it/s]
01:20:44 INFO: Single inference stats: 82.98312101910827 watts for 25.314886432000094 s
100%|██████████| 128/128 [00:10<00:00, 11.66it/s]
01:20:55 INFO: Batch inference stats: 85.79307692307692 watts for 10.97552584999994 s
