In [1]:
import random
import numpy as np
from torch.utils.data import Dataset, DataLoader
import torch as T
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F


device = T.device("cuda:0" if T.cuda.is_available() else "cpu")
ACTIONS= ["PRESERVE", "DELETE"]

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class DuelingDeepQNetwork(nn.Module):
    def __init__(self, alpha, state_dim, action_dim, fc1_dim, fc2_dim):
        super(DuelingDeepQNetwork, self).__init__()

        self.fc1 = nn.Linear(state_dim, fc1_dim)
        self.fc2 = nn.Linear(fc1_dim, fc2_dim)
        self.V = nn.Linear(fc2_dim, 1)
        self.A = nn.Linear(fc2_dim, action_dim)

        self.optimizer = optim.Adam(self.parameters(), lr=alpha)
        self.to(device)

    def forward(self, state):
        x = T.relu(self.fc1(state))
        x = T.relu(self.fc2(x))

        V = self.V(x)
        A = self.A(x)
        Q = V + A - T.mean(A, dim=-1, keepdim=True)

        return Q

    def save(self, file_path):
        T.save(self, file_path)

maintanance_agent = DuelingDeepQNetwork(alpha=1e-3,
                                  state_dim=11,
                                  action_dim=len(ACTIONS),
                                  fc1_dim=64,
                                  fc2_dim=32)


In [4]:
import pandas as pd

class SimulatedDataset(Dataset):
    def __init__(self):
        self.data = []
        self.labels = []
        self.load_data()
    
    def load_data(self):
        filepath = "../../maintainance_df.csv"
        df = pd.read_csv(filepath)
        for i in range(len(df)):
            obs = df.iloc[i, 0:-1].to_dict()
            label = df.iloc[i, -1]
            self.data.append(obs)
            self.labels.append(label)
        print(self.data[0])
        print(self.labels[0])

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        obs = list(self.data[index].values())
        label = self.labels[index]
        label_index = ACTIONS.index(label)
        one_hot = np.zeros(len(ACTIONS))
        one_hot[label_index] = 1
        return T.tensor(obs).float(), T.tensor(one_hot).float()

def train(network, epochs):
    loss_func = nn.CrossEntropyLoss()
    dataset = SimulatedDataset()
    dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
    optimizer = network.optimizer
    for i in range(epochs):
        for data, labels in dataloader:
            optimizer.zero_grad()
            output = network(data)
            loss = loss_func(output, labels)
            loss.backward()
            optimizer.step()
            print(loss)


if __name__ == "__main__":
    train(maintanance_agent, 10)
    maintanance_agent.save("maintanance_agent.pth")

{'free_space_ratio': 0.9351953125, 'service_size_ratio': 0.0648046875, 'cached_in_L1': False, 'cached_in_L2': True, 'cached_in_L3': False, 'service_charm': 3.27, 'service_request_frequency': 0.0, 'es_request_frequency': 0.001, 'es_cache_miss_rate': 1.0, 'least_freq_index': 0, 'is_ugent': False}
PRESERVE
tensor(16.2185, grad_fn=<DivBackward1>)
tensor(11.7394, grad_fn=<DivBackward1>)
tensor(7.9240, grad_fn=<DivBackward1>)
tensor(3.0032, grad_fn=<DivBackward1>)
tensor(0.3075, grad_fn=<DivBackward1>)
tensor(0.0080, grad_fn=<DivBackward1>)
tensor(0.0013, grad_fn=<DivBackward1>)
tensor(0.0190, grad_fn=<DivBackward1>)
tensor(0.5208, grad_fn=<DivBackward1>)
tensor(0.7008, grad_fn=<DivBackward1>)
tensor(0.0185, grad_fn=<DivBackward1>)
tensor(2.4238, grad_fn=<DivBackward1>)
tensor(0.0259, grad_fn=<DivBackward1>)
tensor(2.9802e-08, grad_fn=<DivBackward1>)
tensor(0.0124, grad_fn=<DivBackward1>)
tensor(1.4640, grad_fn=<DivBackward1>)
tensor(9.4832e-06, grad_fn=<DivBackward1>)
tensor(2.4004e-05, gra