In [2]:
from sklearn.linear_model import SGDClassifier
import numpy as np
from simulation_util import client_update
import warnings

# hide the warning message temporarily
warnings.simplefilter("ignore")

# auto-reload the modules everytime a cell is run
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Client Update Example

In [3]:
# this data will be provided by the server
features = [[1, 4, 3], [0, 2, 2], [1, 4, 0], [0, 5, 3], [1, 2, 1], [0, 2, 9]]
labels = [1, 0, 1, 0, 1, 0]

coefs = np.array([29., 0., 0.]) # should be of size num_classes * num_features
intercepts = np.array([-9])
weights = [coefs, intercepts]

epochs = 3
batch_size = 3

new_weights = client_update(weights, epochs, batch_size, features, labels)
print(new_weights)

[array([[28.48292602, -0.02897195, -0.04332721]]), array([-9.00973857])]


## Server Update Example

In [4]:
import numpy as np
from simulation_util import server_update

init_weights = [np.array([0, 0, 0]), np.array([0])]
client_fraction = 0.5
num_rounds = 10
epoch = 10
batch_size = 25
display_weight_per_round = True

num_client = 100
samples_per_client = 100
num_features = 3
features = np.random.randint(10, size=(num_client, samples_per_client, num_features))
labels = np.random.randint(2, size=(num_client, samples_per_client))

new_clf = server_update(init_weights, client_fraction, num_rounds, features, labels, epoch, batch_size, display_weight_per_round)

Updated Weights:  [[ 2.41126175 -1.86852129  2.54946438]] [-3.85193902]
Updated Weights:  [[-2.21508766  5.79140132 -6.35425011]] [-16.32732165]
Updated Weights:  [[-9.44761818  0.93745551 12.29904039]] [-3.7761171]
Updated Weights:  [[-7.71386106  2.48841301 13.20955494]] [-0.96720671]
Updated Weights:  [[-8.05148847 -8.54217737  1.10910935]] [-5.64997482]
Updated Weights:  [[-10.57993055   9.72163346  -1.77701689]] [-16.94230952]
Updated Weights:  [[-9.67600676 -3.74694828 -6.9874319 ]] [5.92436471]
Updated Weights:  [[ 7.48101602 -5.50368669  2.89677314]] [-13.22231835]
Updated Weights:  [[-1.30934672  3.0314917   9.82082531]] [-1.2742573]
Updated Weights:  [[-11.44089677  -7.01592399   8.84258137]] [-17.68213567]


## Simulation Runner

In [5]:
from sklearn.model_selection import ParameterGrid, train_test_split
from simulation_util import server_update
import numpy as np
import random_data_gen as rdata_gen
import pandas as pd

# Load the data
NUM_SAMPLES = 20000
NUM_LABELS = 3
NUM_FEATURES = 4
NUM_CLIENTS = 100
g_prms = rdata_gen.InputGenParams(NUM_SAMPLES, NUM_LABELS, NUM_FEATURES, NUM_CLIENTS)
df = pd.read_csv("datasets/blob_S20000_L3_F4_U100.csv")

sim_labels, sim_features = rdata_gen.transform_data_for_simulator_format(df, g_prms)
features = np.array(sim_features)
labels = np.array(sim_labels)

# (100, 200, 4)
# (100, 200)
print(features.shape)
print(labels.shape)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.4, random_state=0)

print(X_train[0][0])
print(y_train[0][0])

# (60, 200, 4)
# (60, 200)
print(X_train.shape)
print(y_train.shape)

init_weights = np.zeros((NUM_LABELS, NUM_FEATURES), dtype=np.float64, order="C")
# init_weights = np.array([[ 4.99547008,  5.62897696,  9.68194524, -6.54602355],
#        [ 6.49729334,  8.97500002,  9.1586204 , -2.78742303],
#        [-4.23699246, 10.53697248,  7.5595605 , -6.49384498]])
init_intercept = np.zeros(NUM_LABELS, dtype=np.float64, order="C")
# init_intercept = np.array([-136.32219189, -138.92717176, -136.08247396])

# Find all the permutations of the parameters
param_grid = {"client_fraction": [1],
              "epoch": [30, 40],
              "batch_size": [40, 80], # TODO: need to implement an infinite batch size
              "init_weight": [[init_weights, init_intercept]],
              "num_rounds": [10]}

# run training/testing over all parameter combinations to get the best combination
for params in ParameterGrid(param_grid):
    print("Training...")
    print("Params: ", params)
    classifier = server_update(params["init_weight"], params["client_fraction"], params["num_rounds"], X_train, y_train, params["epoch"], params["batch_size"], False)
    weights = [classifier.coef_, classifier.intercept_]

    # need to remove the client dimension from our data for testing 
    # ex: [[[1, 1], [2, 2]], [[3, 3], [4, 4]]] needs to become [[1, 1], [2, 2], [3, 3], [4, 4]] for features 
    # and [[1, 2], [3, 4]] needs to become [1, 2, 3, 4] for labels 
    reshaped_X_test = np.reshape(X_test, (X_test.shape[0] * X_test.shape[1], X_test.shape[2]))
    reshaped_y_test = np.reshape(y_test, y_test.size)
    
    score = classifier.score(reshaped_X_test, reshaped_y_test)

    print('Weights: {}\nScore: {:f}\n\n'.format(weights, score))

(100, 200, 4)
(100, 200)
[-10.88348229  -2.93027725  -0.64782806  -7.41013639]
1
(60, 200, 4)
(60, 200)
Training...
Params:  {'batch_size': 40, 'client_fraction': 1, 'epoch': 30, 'init_weight': [array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]]), array([0., 0., 0.])], 'num_rounds': 10}
Weights: [array([[  3.44995639,   7.14690942,  30.59698956, -15.31640262],
       [  9.00978017,  16.03720563,  29.44427004,  -8.3968508 ],
       [ -6.54168793,  17.96046064,  22.10775999, -17.09432116]]), array([-282.3264304 , -283.41673775, -281.316077  ])]
Score: 0.345500


Training...
Params:  {'batch_size': 40, 'client_fraction': 1, 'epoch': 40, 'init_weight': [array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]]), array([0., 0., 0.])], 'num_rounds': 10}
Weights: [array([[ 16.71806359,  10.78111023,  28.63589043, -12.05894321],
       [ 10.23156596,  18.73107409,  11.81058198,  -0.42007112],
       [-17.77045187,   7.69868314,  13.56291205, -18.0167512