## Imports

In [1]:
import gc
import logging
import queue
import random
import time
from importlib import reload
from multiprocessing import Process, Manager
from subprocess import Popen, PIPE

import numpy as np
import torch
from PIL import Image
from torch.nn import Module
from torchvision import models, transforms
from tqdm import tqdm

reload(logging)
logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', level=logging.INFO, datefmt='%I:%M:%S')

## Async Processes

In [2]:
class GPUMonitor(Process):
    def __init__(self, delay):
        super(GPUMonitor, self).__init__()
        self.delay = delay
        self.power_readings = Manager().list()
        self.running = True
        self.start()

    def run(self):
        while self.running:
            try:
                p = Popen('nvidia-smi --query-gpu=power.draw --format=csv,noheader,nounits'.split(' '), stdout=PIPE)
                stdout, stderror = p.communicate()
                self.power_readings.append(float(stdout.strip()))
                p.terminate()
            except:
                logging.error('Something went wrong while retrieving GPU readings...')
            time.sleep(self.delay)

    def reset_energy(self):
        self.power_readings[:] = []

    def get_power_average(self):
        return np.mean(self.power_readings)


class RequestQueue(Process):
    def __init__(self, id, frequency, nr_of_requests):
        super(Process, self).__init__()
        self.id = id
        self.frequency = frequency
        self.nr_of_requests = nr_of_requests
        self.queue = Manager().Queue(nr_of_requests)
        self.start()

    def run(self):
        logging.info("Started simulation with id: {}".format(self.id))
        while self.nr_of_requests > 0:
            self.queue.put(Image.open('img/dog.jpg'))
            self.nr_of_requests -= 1
            time.sleep(1 / self.frequency * random.uniform(0.8, 1.2))


## Initialisation

In [3]:

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )])
with open('image_net_classes.txt') as file:
    classes = [line.strip().split(', ')[1] for line in file.readlines()]

## Inference

In [4]:
def infer(model: Module, images, use_gpu=True, verbose=False):
    model.eval()
    with torch.no_grad():
        if use_gpu:
            model.cuda()
        images_t = [transform(im) for im in images]
        batch = torch.cat([tensor for tensor in [torch.unsqueeze(im_t, 0) for im_t in images_t]])
        if use_gpu:
            out = model(batch.cuda())
        else:
            out = model(batch)

    for prediction in out:
        prediction = prediction.cpu()
        _, indices = torch.sort(prediction, descending=True)
        percentages = [(torch.nn.functional.softmax(prediction, dim=0)[class_index] * 100).item() for class_index in
                       indices[:5]]
        if verbose:
            logging.info(f'Rank\tInferred class\tProbability(%)')
            for idx, class_index in enumerate(indices[:5]):
                logging.info(f'#{idx}\t\t{classes[class_index]}\t{percentages[idx]}')
            logging.info('-----------------------------------------')


def run_experiment(model_, input_images_):
    t_0 = time.perf_counter()
    infer(model_, input_images_, use_gpu=True)
    return time.perf_counter() - t_0

## Simulation

In [41]:
batch_size = 16
frequency = 1
nr_of_requests = 64
epsilon = 2 / frequency + 0.5
model = models.alexnet(pretrained=True)
gpu_monitor = GPUMonitor(0.1)
rq = RequestQueue('inference_simulation', frequency, nr_of_requests)
t_0 = time.perf_counter()
while True:
    try:
        batch = [rq.queue.get(block=True, timeout=epsilon) for _ in range(batch_size)]
        t = run_experiment(model, batch)
        logging.info(f"Batch processed | {t=}")
    except queue.Empty:
        break

logging.info(f'Average Power: {gpu_monitor.get_power_average()} over a duration of {time.perf_counter() - t_0 - epsilon} seconds')
rq.terminate()
gpu_monitor.terminate()
gc.collect()
torch.cuda.empty_cache()

02:21:33 INFO: Started simulation with id: inference_simulation
02:21:48 INFO: Batch processed | t=0.13576744399961171
02:22:03 INFO: Batch processed | t=0.08052059100009501
02:22:20 INFO: Batch processed | t=0.06266189399957511
02:22:36 INFO: Batch processed | t=0.09271269900000334
02:22:39 INFO: Average Power: 53.314872521246464 over a duration of 63.09854310500032 seconds
