In [11]:
# pytorch mlp for regression
from numpy import vstack
from numpy import sqrt
from pandas import read_csv
from sklearn.metrics import mean_squared_error
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from torch import Tensor
from torch.nn import Linear
from torch.nn import Sigmoid
from torch.nn import Module
from torch.optim import SGD
from torch.nn import MSELoss
from torch.nn.init import xavier_uniform_
from sklearn import preprocessing
from torch import nn
import torch


In [15]:
# dataset definition
class CSVDataset(Dataset):
    # load the dataset
    def __init__(self, path):
        # load the csv file as a dataframe
        df = read_csv(path, header=None)
        
        df.rename(columns=df.iloc[0]).drop(df.index[0])
        # store the inputs and outputs
        self.X = df.values[1:, 0:16].astype('float32')
        self.y = df.values[1:, 16:17].astype('float32')
        
        self.X = torch.from_numpy(self.X)
        self.y = torch.from_numpy(self.y)

        # ensure target has the right shape
        #self.y = self.y.reshape((len(self.y), 1))

    # number of rows in the dataset
    def __len__(self):
        return len(self.X)

    # get a row at an index
    def __getitem__(self, idx):
        return [self.X[idx], self.y[idx]]

    # get indexes for train and test rows
    def get_splits(self, n_test=0.33):
        # determine sizes
        test_size = round(n_test * len(self.X))
        train_size = len(self.X) - test_size
        # calculate the split
        return random_split(self, [train_size, test_size])

class MLP(nn.Module):
  '''
    Multilayer Perceptron for regression.
  '''
  def __init__(self):
    super().__init__()
    self.layers = nn.Sequential(
      nn.Linear(9, 64),
      nn.ReLU(),
      nn.Linear(64, 32),
      nn.ReLU(),
      nn.Linear(32, 1)
    )


  def forward(self, x):
    '''
      Forward pass
    '''
    return self.layers(x)



# prepare the dataset
def prepare_data(path):
    # load the dataset
    dataset = CSVDataset(path)
    # calculate split
    train, test = dataset.get_splits()
    # prepare data loaders
    train_dl = DataLoader(train, batch_size=32, shuffle=False)
    test_dl = DataLoader(test, batch_size=32, shuffle=False)
    return train_dl, test_dl

# train the model
def train_model(train_dl, model):
    # define the optimization
    criterion = MSELoss()
    optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9)
    # enumerate epochs
    for epoch in range(300):
        # enumerate mini batches
        for i, (inputs, targets) in enumerate(train_dl):
            # clear the gradients
            optimizer.zero_grad()
            # compute the model output
            yhat = model(inputs)
            # calculate loss
            loss = criterion(yhat, targets)
            # credit assignment
            loss.backward()
            # update model weights
            optimizer.step()

# evaluate the model
def evaluate_model(test_dl, model):
    predictions, actuals = list(), list()
    for i, (inputs, targets) in enumerate(test_dl):
        # evaluate the model on the test set
        output = model(inputs)
        # retrieve numpy array
        output = output.detach().numpy()
        actual = targets.numpy()
        actual = actual.reshape((len(actual), 1))
        # store
        predictions.append(output)
        actuals.append(actual)
    predictions, actuals = vstack(predictions), vstack(actuals)
    # calculate mse
    mse = mean_squared_error(actuals, predictions)
    return mse

# make a class prediction for one row of data
def predict(row, model):
    # convert row to data
    row = Tensor([row])
    # make prediction
    yhat = model(row)
    # retrieve numpy array
    yhat = yhat.detach().numpy()
    return yhat

In [16]:
import numpy as np
path = './data/qoe/federated_devices/pokemon_complet_ABR_BITRATE.csv'
df = read_csv(path, header=None)

        
df.rename(columns=df.iloc[0]).drop(df.index[0])
# store the inputs and outputs
X = df.values[1:, 0:16].astype('float32')
y = df.values[1:, 16:17].astype('float32')

y.shape

(1543, 1)

In [17]:
import numpy as np
path = './data/qoe/federated_devices/pokemon_complet_ABR_BITRATE.csv'
train_dl, test_dl = prepare_data(path)
print(len(train_dl.dataset), len(test_dl.dataset))
# define the network
model = MLP(16)
# train the model
train_model(train_dl, model)
# evaluate the model
mse = evaluate_model(test_dl, model)
print('MSE: %.3f, RMSE: %.3f' % (mse, sqrt(mse)))
#make a single prediction (expect class=1)
#row =  [22,1024,0,0.472,0.034,360,736,24,2,869,4,1,0,3,0,26]
row =  [38,1536,0.486,0.078,360,912,1,0,2,0,19,3,3,3,3,3]

yhat = predict(row, model)
print('Predicted:',yhat)

NameError: name 'scale_data' is not defined

In [18]:
import torch 
torch.save(model.state_dict(), './model/model_central_training.pt')

In [19]:
path = './data/qoe/federated_devices/pokemon_complet_ABR_BITRATE.csv'
df = read_csv(path, header=None)

X = df.values[1:, 0:16].astype('float32')
y = df.values[1:, 16:21].astype('float32')
print(y.shape)

(1543, 1)
