In [7]:
# pytorch mlp for regression
from numpy import vstack
from numpy import sqrt
from pandas import read_csv
from sklearn.metrics import mean_squared_error
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from torch import Tensor
from torch.nn import Linear
from torch.nn import Sigmoid , ReLU
from torch.nn import Module
from torch.optim import SGD
from torch.nn import MSELoss
from torch.nn.init import xavier_uniform_
from sklearn import preprocessing


In [8]:
# dataset definition
class CSVDataset(Dataset):
    # load the dataset
    def __init__(self, path):
        # load the csv file as a dataframe
        df = read_csv(path, header=None)

        print(df.shape)
        df = df.drop_duplicates(keep='first')
        print(df.shape)
        #df.rename(columns=df.iloc[0]).drop(df.index[0])
        # store the inputs and outputs
        self.X = df.values[:, 0:8].astype('float32')
        self.y = df.values[:, 8:9].astype('float32')
    
        # ensure target has the right shape
        self.y = self.y.reshape((len(self.y),1))

    # number of rows in the dataset
    def __len__(self):
        return len(self.X)

    # get a row at an index
    def __getitem__(self, idx):
        return [self.X[idx], self.y[idx]]

    # get indexes for train and test rows
    def get_splits(self, n_test=0.2):
        # determine sizes
        test_size = round(n_test * len(self.X))
        train_size = len(self.X) - test_size
        # calculate the split
        return random_split(self, [train_size, test_size])

# model definition
class MLP(Module):
    # define model elements
    def __init__(self, n_inputs):
        super(MLP, self).__init__()
        # input to first hidden layer
        self.hidden1 = Linear(n_inputs, 8)
        xavier_uniform_(self.hidden1.weight)
        self.act1 = Sigmoid()
        # second hidden layer
        self.hidden2 = Linear(8, 4)
        xavier_uniform_(self.hidden2.weight)
        self.act2 = Sigmoid()
        # third hidden layer and output
        self.hidden3 = Linear(4, 1)
        xavier_uniform_(self.hidden3.weight)

    # forward propagate input
    def forward(self, X):
        # input to first hidden layer
        X = self.hidden1(X)
        X = self.act1(X)
         # second hidden layer
        X = self.hidden2(X)
        X = self.act2(X)
        # third hidden layer and output
        X = self.hidden3(X)
        return X

# prepare the dataset
def prepare_data(path):
    # load the dataset
    dataset = CSVDataset(path)
    # calculate split
    train, test = dataset.get_splits()
    # prepare data loaders
    train_dl = DataLoader(train, batch_size=1, shuffle=False)
    test_dl = DataLoader(test, batch_size=1, shuffle=False)
    return train_dl, test_dl

# train the model
def train_model(train_dl, model):
    # define the optimization
    criterion = MSELoss()
    optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9)
    # enumerate epochs
    for epoch in range(100):
        # enumerate mini batches
        for i, (inputs, targets) in enumerate(train_dl):
            # clear the gradients
            optimizer.zero_grad()
            # compute the model output
            yhat = model(inputs)
            # calculate loss
            loss = criterion(yhat, targets)
            # credit assignment
            loss.backward()
            # update model weights
            optimizer.step()

# evaluate the model
def evaluate_model(test_dl, model):
    predictions, actuals = list(), list()
    for i, (inputs, targets) in enumerate(test_dl):
        # evaluate the model on the test set
        output = model(inputs)
        # retrieve numpy array
        output = output.detach().numpy()
        actual = targets.numpy()
        actual = actual.reshape((len(actual), 1))
        # store
        predictions.append(output)
        actuals.append(actual)
    predictions, actuals = vstack(predictions), vstack(actuals)
    # calculate mse
    mse = mean_squared_error(actuals, predictions)
    return mse

# make a class prediction for one row of data
def predict(row, model):
    # convert row to data
    row = Tensor([row])
    # make prediction
    yhat = model(row)
    # retrieve numpy array
    yhat = yhat.detach().numpy()
    return yhat

In [9]:
import numpy as np
path = './data/qoe/federated_devices/simtetic/small/client_1.csv'
train_dl, test_dl = prepare_data(path)
print(len(train_dl.dataset), len(test_dl.dataset))
# define the network
model = MLP(8)
# train the model
train_model(train_dl, model)
# evaluate the model
mse = evaluate_model(test_dl, model)
print('MSE: %.3f, RMSE: %.3f' % (mse, sqrt(mse)))
#make a single prediction (expect class=1)
#row = [0.00632,18.00,2.310,0,0.5380,6.5750,65.20,4.0900,1,296.0,15.30,396.90,4.98]
#row =  np.array([60,754.5763,25.26666667,2,916,2,5,19,0,37])

#3584	1	0	1	4	480

row = [2560,0.395,0.07,1,0,4,2,1]
#2560	0.395	0.07	1	0	4	2	1

#vector = np.vectorize(np.float)
#row = np.array(list(map(np.float, row)))

yhat = predict(row, model)
print('Predicted:',yhat)

(75, 9)
(74, 9)
59 15
MSE: 13949.146, RMSE: 118.107
Predicted: [[267.31522]]


In [None]:
import numpy as np
path = './data/qoe/federated_devices/simtetic/small/client_2.csv'
train_dl, test_dl = prepare_data(path)
print(len(train_dl.dataset), len(test_dl.dataset))
# define the network
model2 = MLP(5)
# train the model
train_model(train_dl, model)
# evaluate the model
mse = evaluate_model(test_dl, model)
print('MSE: %.3f, RMSE: %.3f' % (mse, sqrt(mse)))
#make a single prediction (expect class=1)
#row = [0.00632,18.00,2.310,0,0.5380,6.5750,65.20,4.0900,1,296.0,15.30,396.90,4.98]
#row =  np.array([60,754.5763,25.26666667,2,916,2,5,19,0,37])

row = [2048,1,0,3,3]
#vector = np.vectorize(np.float)
#row = np.array(list(map(np.float, row)))

yhat = predict(row, model)
print('Predicted:',yhat)

In [None]:
import torch 
torch.save(model.state_dict(), './model/model_client1.pt')

In [None]:
import torch 
torch.save(model.state_dict(), './model/model_client2.pt')

In [None]:

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import logging
from pandas import read_csv

# import Pysyft to help us to simulate federated leraning
import syft as sy

# hook PyTorch to PySyft i.e. add extra functionalities to support Federated Learning
# and other private AI tools
hook = sy.TorchHook(torch) 

In [None]:
bob = sy.VirtualWorker(hook, id="bob")
anne = sy.VirtualWorker(hook, id="anne")
joe = sy.VirtualWorker(hook, id="joe")


In [None]:
from torch.nn import Linear
from torch.nn import Sigmoid ReLU
from torch.nn import Module
from torch.optim import SGD
from torch.nn import MSELoss
from torch.nn.init import xavier_uniform_
from torch import Tensor
# model definition
class MLP(Module):
    # define model elements
    def __init__(self, n_inputs):
        super(MLP, self).__init__()
        # input to first hidden layer
        self.hidden1 = Linear(n_inputs, 16)
        xavier_uniform_(self.hidden1.weight)
        self.act1 = Sigmoid()
        # second hidden layer
        self.hidden2 = Linear(16, 8)
        xavier_uniform_(self.hidden2.weight)
        self.act2 = Sigmoid()
        # third hidden layer and output
        self.hidden3 = Linear(8, 1)
        xavier_uniform_(self.hidden3.weight)

    # forward propagate input
    def forward(self, X):
        # input to first hidden layer
        X = self.hidden1(X)
        X = self.act1(X)
         # second hidden layer
        X = self.hidden2(X)
        X = self.act2(X)
        # third hidden layer and output
        X = self.hidden3(X)
        return X


In [None]:
import pandas as pd
import torch

# determine the supported device
def get_device():
    if torch.cuda.is_available():
        device = torch.device('cuda:0')
    else:
        device = torch.device('cpu') # don't have GPU 
    return device

# convert a df to tensor to be used in pytorch
def df_to_tensor(df):
    device = get_device()
    return torch.from_numpy(df.values).float().to(device)

In [None]:
path = './data/qoe/federated_devices/simtetic/small/client_1.csv'
df = read_csv(path, header=None)
        # store the inputs and outputs
    
train_set=df.sample(frac=0.8,random_state=200) #random state is a seed value
test_set=df.drop(train_set.index)

test_dl = DataLoader(test_set, batch_size=2, shuffle=False)

df.rename(columns=df.iloc[0]).drop(df.index[0])
train_set_data  = train_set.iloc[:, 0:8].astype('float32')
target_set_data = train_set.iloc[:, 8:9].astype('float32')

train_tensor = torch.from_numpy(train_set_data.to_numpy()).float()
target_tensor  = torch.from_numpy(target_set_data.to_numpy()).float()

test_tensor = torch.from_numpy(test_set.to_numpy()).float()


In [None]:
path2 = './data/qoe/federated_devices/simtetic/small/client_2.csv'
df2 = read_csv(path2, header=None)

train_set_2=df2.sample(frac=0.8,random_state=200) #random state is a seed value
test_set_2=df2.drop(train_set_2.index)

df.rename(columns=df2.iloc[0]).drop(df2.index[0])
train_set_data2  = train_set_2.iloc[:, 0:8]
target_set_data2 = train_set_2.iloc[:, 8:9]

train_tensor_2 = df_to_tensor(train_set_data2)
target_tensor_2 =  df_to_tensor(target_set_data2)
test_tensor_2 = df_to_tensor(test_set_2)

In [None]:
path3 = './data/qoe/federated_devices/simtetic/small/client_3.csv'
df3 = read_csv(path2, header=None)


train_set_3=df3.sample(frac=0.8,random_state=200) #random state is a seed value
test_set_3=df3.drop(train_set_3.index)

df.rename(columns=df3.iloc[0]).drop(df3.index[0])
train_set_data3  = train_set_3.iloc[:, 0:8]
target_set_data3 = train_set_3.iloc[:, 8:9]

train_tensor_3 = df_to_tensor(train_set_data3)
target_tensor_3 =  df_to_tensor(target_set_data3)
test_tensor_3 = df_to_tensor(test_set_3)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
import numpy as np

In [None]:
hook = sy.TorchHook(torch)

bob = sy.VirtualWorker(hook, id="bob")
alice = sy.VirtualWorker(hook, id="alice")
joe = sy.VirtualWorker(hook, id="joe")

In [None]:
target_tensor = target_tensor.type(torch.LongTensor)
target_tensor_2 = target_tensor_2.type(torch.LongTensor)
target_tensor_3 = target_tensor_3.type(torch.LongTensor)

bob_train_dataset = sy.BaseDataset(train_tensor,target_tensor).send(bob) 
anne_train_dataset = sy.BaseDataset(train_tensor_2, target_tensor_2).send(alice)
joe_train_dataset = sy.BaseDataset(train_tensor_3, target_tensor_3).send(joe)

#bob_train_dataset = sy.BaseDataset(train_inputs[:train_idx], train_labels[:train_idx]).send(bob)

#anne_train_dataset = sy.BaseDataset(train_inputs[train_idx:], train_labels[train_idx:]).send(anne)

federated_train_dataset = sy.FederatedDataset([bob_train_dataset, anne_train_dataset,joe_train_dataset]) 
federated_train_loader = sy.FederatedDataLoader(federated_train_dataset, shuffle=False, batch_size=4)

In [None]:
print(federated_train_dataset)

In [None]:
model3 = MLP(5).to(device)

In [None]:
print(federated_train_loader)

In [None]:
def train(model, federate_train_loader, optimizer, epoch):
    model.train()
    criterion = MSELoss()
    for batch_idx, (data, target) in enumerate(federate_train_loader):
        model.send(data.location)
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        #loss = F.nll_loss(output, target.float())
        loss = criterion(output, target.float())
        loss.backward()
        optimizer.step()
        model.get()
        if batch_idx % 10 == 0:
            loss = loss.get()
            #print(f'Train Epoch: {epoch} [{batch_idx*32}/{len(federate_train_loader)*32} ({100. * batch_idx / len(federated_train_loader)}%)] \t Loss: {loss.item()}')
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, 
                    batch_idx * 10 , # no of images done
                    len(federate_train_loader) * 10, # total images left
                    100. * batch_idx / len(federate_train_loader), 
                    loss.item()
                 )
            )
                  


In [None]:
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            # add losses together
            test_loss += F.mse_loss(output, target.float(), reduction='sum').item() 

            # get the index of the max probability class
            pred = output.argmax(dim=1, keepdim=True)  
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
      

In [None]:
optimizer = optim.SGD(model3.parameters(), lr=0.01) 
#optimizer = SGD(model3.parameters(), lr=0.01, momentum=0.9)
    
epoch = 30
for epoch in range(1, epoch + 1):
    train(model3, federated_train_loader, optimizer, epoch)
    #test(model3, test_loader)

#th.save(model.state_dict(), "mnist_cnn.pt")

In [None]:
import torch 
torch.save(model3.state_dict(), './model/master_model.pt')

In [None]:
# make a class prediction for one row of data
def predict(row, model):
    # convert row to data
    row = Tensor([row])
    # make prediction
    yhat = model(row)
    # retrieve numpy array
    yhat = yhat.detach().numpy()
    return yhat

In [None]:
#row2 = [34011,50,5699,1035,0,6015,0,30003,30003,0,2903,1544292,4.33]
#row =  [57400,57,6023,1051,0,6128,0,30003,30003,0,2903,1565292,4.33]
#row =  np.array([60,754.5763,25.26666667,2,916,2,5,19,0,37])
row = [2048,1,0,3,3]
client = predict(row, model3)

In [None]:
print('Predicted:', client)

In [None]:
#row2 =  np.array([60,754.5763,25.26666667,2,916,2,5,19,0,37])
#row = [1584,1,2,4,0,1]

row2 = [2048,1,0,3,3]


master = predict(row2, model3)

In [None]:
print('Predicted:', master)

In [None]:
model3.eval()
test_loss = 0
correct = 0

import numpy as np
#path = './data/qoe/pokemon_total.csv'
path = './data/qoe/federated_devices/simtetic/small/client_3.csv'
train_dl, test_dl = prepare_data(path)
criterion = MSELoss()
predictions, actuals = list(), list()

with torch.no_grad():
    for i, (inputs, targets) in enumerate(test_dl):
        output = model3(inputs)
        # retrieve numpy array
        output = output.detach().numpy()
        actual = targets.numpy()
        actual = actual.reshape((len(actual), 1))
        # store
        predictions.append(output)
        actuals.append(actual)
    predictions, actuals = vstack(predictions), vstack(actuals)
mse = mean_squared_error(actuals, predictions)
print('MSE: %.3f, RMSE: %.3f' % (mse, sqrt(mse)))