In [1]:
from sklearn.linear_model import SGDClassifier
import numpy as np
from simulation_util import client_update
import warnings

# hide the warning message temporarily
warnings.simplefilter("ignore")

# auto-reload the modules everytime a cell is run
%load_ext autoreload
%autoreload 2

## Client Update Example

In [2]:
# this data will be provided by the server
features = [[1, 4, 3], [0, 2, 2], [1, 4, 0], [0, 5, 3], [1, 2, 1], [0, 2, 9]]
labels = [1, 0, 1, 0, 1, 0]

coefs = np.array([29., 0., 0.]) # should be of size num_classes * num_features
intercepts = np.array([-9])
weights = [coefs, intercepts]

epochs = 3
batch_size = 3

new_weights = client_update(weights, epochs, batch_size, features, labels)
print(new_weights)

TypeError: Argument 'weights' has incorrect type (expected numpy.ndarray, got numpy.float64)

## Server Update Example

In [8]:
import numpy as np
from simulation_util import server_update

init_weights = [np.array([0, 0, 0]), np.array([0])]
client_fraction = 0.5
num_rounds = 10
epoch = 10
batch_size = 25
display_weight_per_round = True

num_client = 100
samples_per_client = 100
num_features = 3
features = np.random.randint(10, size=(num_client, samples_per_client, num_features))
labels = np.random.randint(2, size=(num_client, samples_per_client))

new_clf = server_update(init_weights, client_fraction, num_rounds, features, labels, epoch, batch_size, display_weight_per_round)

Updated Weights:  [[ 4.75450975 -2.44971341  4.40047858]] [-9.20703972]
Updated Weights:  [[  6.71901712 -10.38477388   3.39131083]] [5.5717736]
Updated Weights:  [[ 6.95792936 -7.29211908  6.28516806]] [-8.25096537]
Updated Weights:  [[3.51996821 7.01965866 5.21825586]] [7.89624394]
Updated Weights:  [[-8.30318559  6.09994066 -4.22280758]] [-7.68441356]
Updated Weights:  [[ 3.90333898  0.70948686 -1.87217782]] [13.40375092]
Updated Weights:  [[-11.7494913    3.47976377   4.0840828 ]] [-16.32461824]
Updated Weights:  [[-3.9286137  -2.47186698  6.09179214]] [5.84250549]
Updated Weights:  [[-4.21205324 -2.11821868  6.23597035]] [-3.37561152]
Updated Weights:  [[-5.16678509 10.0400736  -2.46470228]] [-3.46428207]


## Simulation Runner

In [None]:
from sklearn.model_selection import ParameterGrid, train_test_split
from simulation_util import server_update
import numpy as np
import random_data_gen as rdata_gen
import pandas as pd
from numpy import zeros, newaxis
from sklearn.utils import shuffle


# Load the data
print("start running!!!!!")
NUM_SAMPLES = 70000
NUM_LABELS = 10
NUM_FEATURES = 784
NUM_CLIENTS = 100
g_prms = rdata_gen.InputGenParams(NUM_SAMPLES, NUM_LABELS, NUM_FEATURES, NUM_CLIENTS)
df = pd.read_csv("datasets/test.csv",header=None)
df = shuffle(df)
# df.drop(dp.columns[-1],axis=1)
data = df.to_numpy()
features = data[:,range(data.shape[1]-1)]
labels = data[:,[data.shape[1]-1]]

print(features.shape)
print(labels.shape)

reshaped_feature = []
reshaped_label = []
# features = features[:, :, newaxis]
# labels = labels[:,:,newaxis]

for i in range(0,len(features),NUM_SAMPLES//NUM_CLIENTS):
    reshaped_feature.append(features[i:i+NUM_SAMPLES//NUM_CLIENTS])
#     print(labels[i:i+NUM_SAMPLES//NUM_CLIENTS].flatten())
    reshaped_label.append(labels[i:i+NUM_SAMPLES//NUM_CLIENTS].flatten())

    


reshaped_feature = np.array(reshaped_feature)    
reshaped_label = np.array(reshaped_label) 
print("feature shape",reshaped_feature.shape)
print("label shape",reshaped_label.shape)


# features.reshape(NUM_CLIENTS,NUM_FEATURES,NUM_SAMPLES)
# labels.reshape(NUM_CLIENTS,NUM_FEATURES,NUM_SAMPLES)


# sim_labels, sim_features = rdata_gen.transform_data_for_simulator_format(df, g_prms)
# features = np.array(sim_features)
# labels = np.array(sim_labels)


# (100, 200, 4)
# (100, 200)


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(reshaped_feature, reshaped_label, test_size=0.4, random_state=0)

# (60, 200, 4)
# (60, 200)
print("X_train", X_train.shape)
print("Y train", y_train.shape)

init_weights = np.zeros((NUM_LABELS, NUM_FEATURES), dtype=np.float64, order="C")
# init_weights = np.array([[ 4.99547008,  5.62897696,  9.68194524, -6.54602355],
#        [ 6.49729334,  8.97500002,  9.1586204 , -2.78742303],
#        [-4.23699246, 10.53697248,  7.5595605 , -6.49384498]])
init_intercept = np.zeros(NUM_LABELS, dtype=np.float64, order="C")
# init_intercept = np.array([-136.32219189, -138.92717176, -136.08247396])

# Find all the permutations of the parameters
param_grid = {"client_fraction": [0.5],
              "epoch": [10],
              "batch_size": [40, 80], # TODO: need to implement an infinite batch size
              "init_weight": [[init_weights, init_intercept]],
              "num_rounds": [2]}

# run training/testing over all parameter combinations to get the best combination
for params in ParameterGrid(param_grid):
    print("Training...")
    print("Params: ", params)
    classifier = server_update(params["init_weight"], params["client_fraction"], params["num_rounds"], X_train, y_train, params["epoch"], params["batch_size"], False)
    weights = [classifier.coef_, classifier.intercept_]

    # need to remove the client dimension from our data for testing 
    # ex: [[[1, 1], [2, 2]], [[3, 3], [4, 4]]] needs to become [[1, 1], [2, 2], [3, 3], [4, 4]] for features 
    # and [[1, 2], [3, 4]] needs to become [1, 2, 3, 4] for labels 
    reshaped_X_test = np.reshape(X_test, (X_test.shape[0] * X_test.shape[1], X_test.shape[2]))
    reshaped_y_test = np.reshape(y_test, y_test.size)
    
    score = classifier.score(reshaped_X_test, reshaped_y_test)

    print('Weights: {}\nScore: {:f}\n\n'.format(weights, score))