In [1]:
import random
import numpy as np
from torch.utils.data import Dataset, DataLoader
import torch as T
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F


device = T.device("cuda:0" if T.cuda.is_available() else "cpu")

In [2]:
class DuelingDeepQNetwork(nn.Module):
    def __init__(self, alpha, state_dim, action_dim, fc1_dim, fc2_dim):
        super(DuelingDeepQNetwork, self).__init__()

        self.fc1 = nn.Linear(state_dim, fc1_dim)
        self.fc2 = nn.Linear(fc1_dim, fc2_dim)
        self.V = nn.Linear(fc2_dim, 1)
        self.A = nn.Linear(fc2_dim, action_dim)

        self.optimizer = optim.Adam(self.parameters(), lr=alpha)
        self.to(device)

    def forward(self, state):
        x = T.relu(self.fc1(state))
        x = T.relu(self.fc2(x))

        V = self.V(x)
        A = self.A(x)
        Q = V + A - T.mean(A, dim=-1, keepdim=True)

        return Q

    def save(self, file_path):
        T.save(self, file_path)

ACTIONS= ["PRESERVE", "DELETE"]

In [3]:
maintanance_agent = DuelingDeepQNetwork(alpha=1e-3,
                                  state_dim=10,
                                  action_dim=len(ACTIONS),
                                  fc1_dim=64,
                                  fc2_dim=32)


class MaintainanceDataset(Dataset):
    def __init__(self):
        self.data = []
        self.labels = []
        self.generate()

    def gen(self,func, num=1000):
        for _ in range(num):
            obs, label = func()
            self.data.append(obs)
            self.labels.append(label)

    def generate(self):
        self.gen(self.cold_start)
    
    def get_loc(self):
        loc = {"L1": 0.8, "L2": 0.15, "L3": 0.05}
        T = random.random()
        if T < loc["L1"]:
            return "L1"
        elif T < loc["L1"] + loc["L2"]:
            return "L2"
        else:
            return "L3"

    def cold_start(self):
        loc = self.get_loc()
        obs = {
            "free_space_ratio":random.uniform(0.9, 1),
            "service_size_ratio":random.uniform(0, 0.3),
            "cached_in_L1": loc == "L1",
            "cached_in_L2": loc == "L2",
            "service_charm":random.randint(0, 10),
            "service_request_frequency":0,
            "ES_request_frequency":random.uniform(0, 10),
            "es_cache_miss_rate":random.uniform(0.8, 1),
            "least_freq_index":random.randint(0, 3),
            "is_ugent":0
        }
        return obs, "PRESERVE"

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        obs = list(self.data[index].values())
        label = self.labels[index]
        label_index = ACTIONS.index(label)
        one_hot = np.zeros(len(ACTIONS))
        one_hot[label_index] = 1
        return T.tensor(obs).float(), T.tensor(one_hot).float()

def train(network, epochs):
    loss_func = nn.CrossEntropyLoss()
    dataset = MaintainanceDataset()
    dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
    optimizer = network.optimizer
    for i in range(epochs):
        for data, labels in dataloader:
            optimizer.zero_grad()
            output = network(data)
            loss = loss_func(output, labels)
            loss.backward()
            optimizer.step()
            print(loss)

if __name__ == "__main__":
    train(maintanance_agent, 10)
    maintanance_agent.save("maintanance_agent.pt")


tensor(0.6740, grad_fn=<DivBackward1>)
tensor(0.6507, grad_fn=<DivBackward1>)
tensor(0.6063, grad_fn=<DivBackward1>)
tensor(0.5616, grad_fn=<DivBackward1>)
tensor(0.5104, grad_fn=<DivBackward1>)
tensor(0.4650, grad_fn=<DivBackward1>)
tensor(0.4336, grad_fn=<DivBackward1>)
tensor(0.4535, grad_fn=<DivBackward1>)
tensor(0.3705, grad_fn=<DivBackward1>)
tensor(0.3379, grad_fn=<DivBackward1>)
tensor(0.2838, grad_fn=<DivBackward1>)
tensor(0.2973, grad_fn=<DivBackward1>)
tensor(0.2458, grad_fn=<DivBackward1>)
tensor(0.2342, grad_fn=<DivBackward1>)
tensor(0.1972, grad_fn=<DivBackward1>)
tensor(0.1656, grad_fn=<DivBackward1>)
tensor(0.1746, grad_fn=<DivBackward1>)
tensor(0.1211, grad_fn=<DivBackward1>)
tensor(0.1514, grad_fn=<DivBackward1>)
tensor(0.0978, grad_fn=<DivBackward1>)
tensor(0.1018, grad_fn=<DivBackward1>)
tensor(0.0881, grad_fn=<DivBackward1>)
tensor(0.1065, grad_fn=<DivBackward1>)
tensor(0.0249, grad_fn=<DivBackward1>)
tensor(0.0606, grad_fn=<DivBackward1>)
tensor(0.0564, grad_fn=<D