## Imports

In [23]:
import gc
import logging
import queue
import random
import subprocess
import time
from datetime import datetime
from importlib import reload
from multiprocessing import Process, Manager

import matplotlib.pyplot as plt
import numpy as np
import torch
from PIL import Image
from torch.nn import Module
from torchvision import models, transforms

reload(logging)
logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', level=logging.INFO, datefmt='%I:%M:%S')

## Async Processes

In [24]:
class GPUMonitor(Process):
    def __init__(self, delay):
        super(GPUMonitor, self).__init__()
        self.delay = delay
        self.power_readings = Manager().list()
        self.running = True
        self.command = 'nvidia-smi --query-gpu=power.draw --format=csv,noheader,nounits'.split(' ')
        self.start()

    def run(self):
        while self.running:
            try:
                self.power_readings.append(float(subprocess.check_output(self.command).strip()))
            except:
                logging.error('Something went wrong while retrieving GPU readings...')
            time.sleep(self.delay)

    def reset_energy(self):
        self.power_readings[:] = []

    def get_power_average(self):
        return np.mean(self.power_readings)

    def plot(self):
        plt.plot(self.power_readings)
        plt.show()


class RequestQueue(Process):
    def __init__(self, id, frequency, nr_of_requests):
        super(Process, self).__init__()
        self.id = id
        self.frequency = frequency
        self.nr_of_requests = nr_of_requests
        self.queue = Manager().Queue(nr_of_requests)
        self.total_time_in_queue = Manager().Value(float, 0.0)
        self.batch_start_times = Manager().list()
        self.start()

    def run(self):
        logging.info("Started simulation with id: {}".format(self.id))
        while self.nr_of_requests > 0:
            self.queue.put((random.choice(['img/dog.jpg', 'img/bald_eagle.jpg', 'img/strawberries.jpg']), time.perf_counter()))
            self.nr_of_requests -= 1
            time.sleep(1 / self.frequency * random.uniform(0.8, 1.2))

    def get_request(self, block=True, timeout=None):
        img, t_0 = self.queue.get(block, timeout)
        self.batch_start_times.append(t_0)
        return img

    def update_wait_times(self):
        curr_time = time.perf_counter()
        for img_t0 in self.batch_start_times:
            self.total_time_in_queue.value += curr_time - img_t0
        self.batch_start_times[:] = []



## Initialisation

In [25]:

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )])
with open('image_net_classes.txt') as file:
    classes = [line.strip().split(', ')[1] for line in file.readlines()]

## Inference

In [26]:
def infer(model: Module, images, use_gpu=True, verbose=False):
    model.eval()
    with torch.no_grad():
        if use_gpu:
            model.cuda()
        images_t = [transform(im) for im in images]
        batch = torch.cat([tensor for tensor in [torch.unsqueeze(im_t, 0) for im_t in images_t]])
        if use_gpu:
            out = model(batch.cuda())
        else:
            out = model(batch)
    if verbose:
        for prediction in out:
            prediction = prediction.cpu()
            _, indices = torch.sort(prediction, descending=True)
            percentages = [(torch.nn.functional.softmax(prediction, dim=0)[class_index] * 100).item() for class_index in
                           indices[:5]]

            logging.info(f'Rank\tInferred class\tProbability(%)')
            for idx, class_index in enumerate(indices[:5]):
                logging.info(f'#{idx}\t\t{classes[class_index]}\t{percentages[idx]}')
            logging.info('-----------------------------------------')


def run_experiment(model_, input_images_):
    t_0 = time.perf_counter()
    infer(model_, input_images_, use_gpu=True)
    return time.perf_counter() - t_0

## Configuration

In [27]:
batch_size = 64
frequency = 16
nr_of_requests = 1024
model = models.convnext_base(pretrained=True)

## Simulation

In [28]:
torch.cuda.empty_cache()
gc.collect()
epsilon = 2 / frequency + 1
gpu_monitor = GPUMonitor(0.01)
batch_count = 0

rq = RequestQueue('inference_simulation', frequency, nr_of_requests)
t_0 = time.perf_counter()
while True:
    try:
        batch = [Image.open(rq.get_request(block=True, timeout=epsilon)) for _ in
                 range(max(min(rq.queue.qsize(), 256), 1) if batch_size <= 0 else batch_size)]
        t = run_experiment(model, batch)
        rq.update_wait_times()
        batch_count += 1
        logging.info(f"{100 * batch_count * batch_size / nr_of_requests}% last batch ({batch_size}) took {t}s")
    except queue.Empty:
        break

power = gpu_monitor.power_readings[:]
average = np.mean(power)
duration = time.perf_counter() - t_0 - epsilon
wait_time = rq.total_time_in_queue.value / nr_of_requests
peak_average = np.mean(list(filter(lambda p: p > min(power) + 25, power)))
gpu_monitor.plot()
rq.terminate()
gpu_monitor.terminate()

02:39:52 INFO: Started simulation with id: inference_simulation
02:40:01 INFO: 6.25% last batch (64) took 4.555557256999236s
02:40:05 ERROR: Something went wrong while retrieving GPU readings...
Process RequestQueue-21:
Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/tmp/ipykernel_171218/4284442209.py", line 45, in run
    time.sleep(1 / self.frequency * random.uniform(0.8, 1.2))
KeyboardInterrupt


KeyboardInterrupt: 

## Write Results

In [None]:
file_name = f'{model.__class__.__name__}_f{frequency}_{batch_size}_{datetime.today().strftime("%d%m%Y_%H%M%S")}.csv'
file = open(f'results/{file_name}', 'a')
logging.info(f'Average Power(W)\t\tTime(s)\t\t\t\tEnergy(J)\t\t\tAverage Wait Time(s)\tAverage Peak Power (W)')
file.write(f'Average Power(W),Time(s),Energy(J),Average Wait Time(s),Average Peak Power (W)\n')
logging.info(f'{average}\t{duration}\t{average * duration}\t{wait_time}\t\t{peak_average}')
file.write(f'{average},{duration},{average * duration},{wait_time},{peak_average}')
file.close()
logging.info(f'Results logged to: results/{file_name}')