In [45]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import logging
from pandas import read_csv
import syft as sy
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from torch import Tensor
from torch.nn import Linear
from torch.nn import Sigmoid
from torch.nn import Module
from torch.optim import SGD
from torch.nn import MSELoss, L1Loss
from torch.nn.init import xavier_uniform_
from sklearn import preprocessing
from sklearn.linear_model import LinearRegression

In [46]:
from torch.nn import Linear
from torch.nn import Sigmoid ,ReLU
from torch.nn import Module
from torch.optim import SGD
from torch.nn import MSELoss
from torch.nn.init import xavier_uniform_
from torch import Tensor
# model definition
class MLP(Module):
    # define model elements
    def __init__(self, n_inputs):
        super(MLP, self).__init__()
        # input to first hidden layer
        self.hidden1 = Linear(n_inputs, 32)
        xavier_uniform_(self.hidden1.weight)
        self.act1 = ReLU()
        # second hidden layer
        self.hidden2 = Linear(32, 16)
        xavier_uniform_(self.hidden2.weight)
        self.act2 = ReLU()
        # third hidden layer and output
        self.hidden3 = Linear(16, 5)
        xavier_uniform_(self.hidden3.weight)

    # forward propagate input
    def forward(self, X):
        # input to first hidden layer
        X = self.hidden1(X)
        X = self.act1(X)
         # second hidden layer
        X = self.hidden2(X)
        X = self.act2(X)
        # third hidden layer and output
        X = self.hidden3(X)
        return X


In [47]:
path = './data/qoe/federated_devices/pokemon_complet.csv'
df = read_csv(path)

train_set=df.sample(frac=0.8,random_state=200) #random state is a seed value
test_set=df.drop(train_set.index)
# test data
test_dl = DataLoader(test_set, batch_size=32, shuffle=True)

# slipt the data into 3 devices dataset
train_set1 =  train_set.loc[train_set['QoD_model'].isin([5, 8, 9])]
train_set2 =  train_set.loc[train_set['QoD_model'].isin([2, 1, 9])]
train_set3 =  train_set.loc[train_set['QoD_model'].isin([3, 4, 6])]

In [48]:
# device1 dataset
train_set1.rename(columns=train_set1.iloc[0]).drop(train_set1.index[0])
train_set_data1  = train_set1.iloc[:, 0:16].astype('float32')
target_set_data1 = train_set1.iloc[:, 16:21].astype('float32')


train_tensor1 = torch.from_numpy(train_set_data1.to_numpy()).float()
target_tensor1  = torch.from_numpy(target_set_data1.to_numpy()).float()

In [49]:
# device2 dataset
train_set2.rename(columns=train_set2.iloc[0]).drop(train_set2.index[0])
train_set_data2  = train_set2.iloc[:, 0:16].astype('float32')
target_set_data2 = train_set2.iloc[:, 16:21].astype('float32')


train_tensor2 = torch.from_numpy(train_set_data2.to_numpy()).float()
target_tensor2  = torch.from_numpy(target_set_data2.to_numpy()).float()

In [50]:
# device3 dataset
train_set3.rename(columns=train_set3.iloc[0]).drop(train_set3.index[0])
train_set_data3  = train_set3.iloc[:, 0:16].astype('float32')
target_set_data3 = train_set3.iloc[:, 16:21].astype('float32')


train_tensor3 = torch.from_numpy(train_set_data3.to_numpy()).float()
target_tensor3  = torch.from_numpy(target_set_data3.to_numpy()).float()


In [51]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
import numpy as np

In [52]:
# Create the federated enviroment 
hook = sy.TorchHook(torch)

bob = sy.VirtualWorker(hook, id="bob")
alice = sy.VirtualWorker(hook, id="alice")
joe = sy.VirtualWorker(hook, id="joe")



In [53]:
# creating the federated dataset 
target_tensor_1 = target_tensor1.type(torch.LongTensor)
target_tensor_2 = target_tensor2.type(torch.LongTensor)
target_tensor_3 = target_tensor3.type(torch.LongTensor)

# sending the data set to the devices
bob_train_dataset = sy.BaseDataset(train_tensor1,target_tensor1).send(bob) 
anne_train_dataset = sy.BaseDataset(train_tensor2, target_tensor2).send(alice)
joe_train_dataset = sy.BaseDataset(train_tensor3, target_tensor3).send(joe)

federated_train_dataset = sy.FederatedDataset([bob_train_dataset, anne_train_dataset,joe_train_dataset]) 
federated_train_loader = sy.FederatedDataLoader(federated_train_dataset, shuffle=False, batch_size=32)

In [54]:

print(federated_train_dataset)

FederatedDataset
    Distributed accross: bob, alice, joe
    Number of datapoints: 1245



In [55]:
model3 = MLP(16).to(device)

In [56]:
print(federated_train_loader)


<syft.frameworks.torch.fl.dataloader.FederatedDataLoader object at 0x7ffc29ee1ee0>


In [57]:
def train(model, federate_train_loader, optimizer, epoch):
    model.train()
    #criterion = MSELoss()
    criterion = L1Loss()
    for batch_idx, (data, target) in enumerate(federate_train_loader):
        model.send(data.location)
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        #loss = F.nll_loss(output, target.float())
        loss = criterion(output, target.float())
        loss.backward()
        optimizer.step()
        model.get()
        if batch_idx % 10 == 0:
            loss = loss.get()
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, 
                    batch_idx * 32 , # no of images done
                    len(federate_train_loader) * 32, # total images left
                    100. * batch_idx / len(federate_train_loader), 
                    loss.item()
                 )
            )

In [59]:
optimizer = optim.SGD(model3.parameters(), lr=0.03)  
epoch = 100
for epoch in range(1, epoch + 1):
    train(model3, federated_train_loader, optimizer, epoch)








In [16]:
# save the model
import torch 
torch.save(model3.state_dict(), './model/master_model.pt')

In [17]:
# make a class prediction for one row of data
def predict(row, model):
    # convert row to data
    row = Tensor([row])
    # make prediction
    yhat = model(row)
    # retrieve numpy array
    yhat = yhat.detach().numpy()
    return yhat

In [18]:
row =  [22,1024,0,0.472,0.034,360,736,24,2,869,4,1,0,3,0,26]
client = predict(row, model3)

In [19]:
print('Predicted:', client)

Predicted: [[4.1694155 5.2498083 3.6201644 4.387382  4.3095016]]


In [29]:
# dataset definition
class CSVDataset(Dataset):
    # load the dataset
    def __init__(self, path):
        # load the csv file as a dataframe
        df = read_csv(path, header=None)

        
        df.rename(columns=df.iloc[0]).drop(df.index[0])
        # store the inputs and outputs
        self.X = df.values[1:, 0:16].astype('float32')
        self.y = df.values[1:, 16:21].astype('float32')
    
        # ensure target has the right shape
        self.y = self.y.reshape((len(self.y), 5))

    # number of rows in the dataset
    def __len__(self):
        return len(self.X)

    # get a row at an index
    def __getitem__(self, idx):
        return [self.X[idx], self.y[idx]]

    # get indexes for train and test rows
    def get_splits(self, n_test=0.33):
        # determine sizes
        test_size = round(n_test * len(self.X))
        train_size = len(self.X) - test_size
        # calculate the split
        return random_split(self, [train_size, test_size])

# prepare the dataset
def prepare_data(path):
    # load the dataset
    dataset = CSVDataset(path)
    # calculate split
    train, test = dataset.get_splits()
    # prepare data loaders
    train_dl = DataLoader(train, batch_size=32, shuffle=False)
    test_dl = DataLoader(test, batch_size=32, shuffle=False)
    return train_dl, test_dl

In [30]:
print(test_dl)

<torch.utils.data.dataloader.DataLoader object at 0x7ffc154b6e50>


In [43]:
# pytorch mlp for regression
from numpy import vstack
from numpy import sqrt
from sklearn.metrics import mean_squared_error

# evaluate the model
def evaluate_model(test_dl, model):
    predictions, actuals = list(), list()
    for i, (inputs, targets) in enumerate(test_dl):
        # evaluate the model on the test set
        output = model(inputs)
        # retrieve numpy array
        output = output.detach().numpy()
        actual = targets.numpy()
        actual = actual.reshape((len(actual), 5))
        # store
        predictions.append(output)
        actuals.append(actual)
    predictions, actuals = vstack(predictions), vstack(actuals)
    # calculate mse
    mse = mean_squared_error(actuals, predictions)
    return mse

In [44]:
path = './data/qoe/federated_devices/pokemon_complet.csv'
train_dl, test_dl = prepare_data(path)

mse = evaluate_model(test_dl, model3)
print('MSE: %.3f, RMSE: %.3f' % (mse, sqrt(mse)))

MSE: 1.315, RMSE: 1.147
