<a href="https://colab.research.google.com/github/zhaolotelli/FedLearn/blob/main/AFL_for_Adult_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Import Adult dataset and data preprocessing

In [None]:
import numpy as np
import pandas as pd
import collections
import torch
import torch.nn.functional as F
from torch import nn, optim
from torch.utils.data import TensorDataset, DataLoader, SequentialSampler, BatchSampler, RandomSampler

In [None]:
train_data = pd.read_csv("adult.data", sep = ', ', header=None, names = ('age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 
        'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 
        'hours-per-week', 'native-country', '>50K'), na_values = '?')
test_data = pd.read_csv("adult.test", sep = ', ', header=None, names = ('age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 
        'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 
        'hours-per-week', 'native-country', '>50K'), na_values = '?', skiprows = 1)

  This is separate from the ipykernel package so we can avoid doing imports until
  


delete the continuous features

In [None]:
cont_cols = ['age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week']
cato_cols = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country']
label_col = ['>50K']

cato_train_data = train_data.drop(cont_cols, axis = 1)
X_train, y_train = cato_train_data.drop(label_col, axis = 1), cato_train_data[label_col]
cato_test_data = test_data.drop(cont_cols, axis = 1)
X_test, y_test = cato_test_data.drop(label_col, axis = 1), cato_test_data[label_col]

One hot encoding by *pd.get_dummies*

In [None]:
X_total = pd.concat([X_train, X_test], axis = 0)
N = X_train.shape[0]

OH_total = pd.get_dummies(X_total)
OH_train = OH_total[:N]
OH_test = OH_total[N:]

In [None]:
new_X_train = OH_train.to_numpy()
new_X_test = OH_test.to_numpy()

In [None]:
new_y_train = y_train['>50K'].map({'<=50K': 0, '>50K': 1}).astype(int).to_numpy()
new_y_test = y_test['>50K'].map({'<=50K.': 0, '>50K.': 1}).astype(int).to_numpy()

In [None]:
class Client_Data(object):
  def __init__(self, dataset, id):
    raw_X, raw_y = dataset
    if id == 'doctor':
      doc_ind = (raw_X[:,18] == 1)
      X = raw_X[doc_ind]
      y = raw_y[doc_ind]
    else:
      ndoc_ind = (raw_X[:,18] == 0)
      X = raw_X[ndoc_ind]
      y = raw_y[ndoc_ind]
    self.X = X.astype(np.float32)
    self.y = y.astype(np.int64)
    
  def __len__(self):
    return len(self.y)

## Define Clients and Servers

In [None]:
class Client(object):
  def __init__(self, id, client_data):
    self.id = id
    self.client_data = client_data

  def create_model(self, Learner, initial_params, learning_rate):
    self.model = Learner(initial_params, learning_rate)

  def update_model(self, params):
    self.model.assign_params(params)

  def train(self, epoch, batch_size):
    self.model.train(self.client_data, epoch, batch_size)
    loss = self.model.solve_loss(self.client_data)
    num_example = len(self.client_data)
    return num_example, loss

  def sgd(self, batch_size):
    loss, grads = self.model.sgd(self.client_data, batch_size)
    num_example = len(self.client_data)
    return num_example, loss, grads

In [None]:
class Server(object):
  def __init__(self, train_data, ids, Learner, initial_params, learning_rate):
    self.ids = ids
    self.learner = Learner
    self.clients = self.set_clients(train_data)
    self.model = self.learner(initial_params, learning_rate)

  def set_clients(self, train_data):
    clients = []
    for id in self.ids:
      client_data = Client_Data(train_data, id)
      c = Client(id, client_data)
      c.create_model(self.learner, initial_params, learning_rate)
      clients.append(c)
    return clients

  def send_model(self):
    params = self.model.print_params()
    for c in self.clients:
      c.update_model(params)

  def select_client(self, select_rate):
    self.num_clients = np.maximum(1, np.int(np.floor(len(self.ids) * select_rate)))
    select_ids = np.random.choice(self.ids, self.num_clients, replace=False)
    select_clients = []
    for id in select_ids:
      loc_id = np.array([id == idx for idx in self.ids])
      ind = np.int(np.array(range(len(self.ids)))[loc_id])
      select_client = self.clients[ind]
      select_clients.append(select_client)
    return select_clients

## Define Training model

Logistic Regression Model

In [None]:
INPUT_SIZE = new_X_train.shape[1]

In [None]:
class my_LogR(nn.Module):
  def __init__(self, initial_params, learning_rate):
    super().__init__()

    self.lr = learning_rate
    self.loss_fn = nn.CrossEntropyLoss()

    self.linear = nn.Linear(INPUT_SIZE, 2)

    if initial_params is not None:
      self.assign_params(initial_params)

  def forward(self, xb):
    logits = self.linear(xb)
    return logits

  def train(self, client_data, epoch, batch_size):
    X, y = map(torch.tensor, (client_data.X, client_data.y))
    train_ds = TensorDataset(X, y)
    train_dl = DataLoader(train_ds, batch_size = batch_size)
    opt = optim.Adagrad(self.parameters(), lr=self.lr)

    for _ in range(epoch):
      for xb, yb in train_dl:
        logits = self.forward(xb)
        loss = self.loss_fn(logits, yb)
        loss_value = loss.item()

        loss.backward()
        opt.step()
        opt.zero_grad()

  def sgd(self, client_data, batch_size):
    X, y = map(torch.tensor, (client_data.X, client_data.y))
    train_ds = TensorDataset(X, y)
    train_dl = DataLoader(train_ds, 
        sampler = BatchSampler(RandomSampler(train_ds), 
        batch_size = batch_size, drop_last = False
    ))
    xb, yb = next(iter(train_dl))
    xb = xb.view(-1, INPUT_SIZE)
    yb = yb.view(-1)
    
    opt = optim.Adagrad((self.linear.weight, 
              self.linear.bias), lr=self.lr)
    
    logits = self.forward(xb)
    loss = self.loss_fn(logits, yb)
      
    loss.backward()
    grads = []
    grads.append(self.linear.weight.grad.view(-1).detach().numpy())
    grads.append(self.linear.bias.grad.detach().numpy())
    
    loss_value = self.solve_loss(client_data)

    return loss_value, grads

  def assign_params(self, params):
    self.linear.weight = nn.Parameter(torch.tensor(params[0].reshape(2, INPUT_SIZE), dtype=torch.float32))
    self.linear.bias = nn.Parameter(torch.tensor(params[1], dtype=torch.float32))

  def print_params(self):
    params = [self.linear.weight.detach().numpy().reshape(-1),
            self.linear.bias.detach().numpy()]
    return params
  
  def solve_loss(self, client_data):
    X = torch.tensor(client_data.X)
    y_true = torch.tensor(client_data.y)

    y_pred = self.forward(X)
    return self.loss_fn(y_pred, y_true).item()

  def predict_accu(self, client_data):
    X = torch.tensor(client_data.X)
    y_true = torch.tensor(client_data.y)

    y_pred = F.softmax(self.forward(X), dim = 1).detach().numpy().argmax(axis = 1)

    accuracy = sum(y_pred == client_data.y) / len(client_data)
    return accuracy

old version model

In [None]:
class my_LogR(object):
  def __init__(self, initial_params, learning_rate, 
               input_size = INPUT_SIZE):
    self.input_size = input_size

    self.lr = learning_rate
    self.loss_fn = nn.CrossEntropyLoss()

    self.linear = nn.Linear(input_size, 2)

    if initial_params is not None:
      self.assign_params(initial_params)

  def model(self, xb):
    logits = self.linear(xb)
    return logits

  def train(self, client_data, epoch, batch_size):
    X, y = map(torch.tensor, (client_data.X, client_data.y))
    train_ds = TensorDataset(X, y)
    train_dl = DataLoader(train_ds, batch_size = batch_size)
    opt = optim.Adagrad((self.linear.weight, 
              self.linear.bias), lr=self.lr)

    for _ in range(epoch):
      for xb, yb in train_dl:
        logits = self.model(xb)
        loss = self.loss_fn(logits, yb)
        loss_value = loss.item()

        loss.backward()
        opt.step()
        opt.zero_grad()

        # print('loss: {}'.format(loss_value))

  def sgd(self, client_data, batch_size):
    X, y = map(torch.tensor, (client_data.X, client_data.y))
    train_ds = TensorDataset(X, y)
    train_dl = DataLoader(train_ds, 
        sampler = BatchSampler(RandomSampler(train_ds), 
        batch_size = batch_size, drop_last = False
    ))
    xb, yb = next(iter(train_dl))
    xb = xb.view(-1, self.input_size)
    yb = yb.view(-1)
    
    opt = optim.Adagrad((self.linear.weight, 
              self.linear.bias), lr=self.lr)
    
    logits = self.model(xb)
    loss = self.loss_fn(logits, yb)
      
    loss.backward()
    grads = []
    grads.append(self.linear.weight.grad.view(-1).detach().numpy())
    grads.append(self.linear.bias.grad.detach().numpy())

    #opt.step()
    #opt.zero_grad()
    
    loss_value = self.solve_loss(client_data)

    return loss_value, grads

  def assign_params(self, params):
    self.linear.weight = nn.Parameter(torch.tensor(params[0].reshape(2, self.input_size), dtype=torch.float32))
    self.linear.bias = nn.Parameter(torch.tensor(params[1], dtype=torch.float32))

  def print_params(self):
    params = [self.linear.weight.detach().numpy().reshape(-1),
            self.linear.bias.detach().numpy()]
    return params
  
  def solve_loss(self, client_data):
    X = torch.tensor(client_data.X)
    y_true = torch.tensor(client_data.y)

    y_pred = self.model(X)
    return self.loss_fn(y_pred, y_true).item()

  def predict_error(self, client_data):
    X = torch.tensor(client_data.X)
    y_true = torch.tensor(client_data.y)

    y_pred = F.softmax(self.model(X), dim = 1).detach().numpy().argmax(axis = 1)

    accuracy = sum(y_pred == client_data.y) / len(client_data)
    return accuracy

## Aggregation

FedAvg

In [None]:
class WAVGM(Server):
  def __init__(self, train_data, ids, Learner, initial_params, learning_rate):
    super(WAVGM, self).__init__(train_data, ids, Learner, initial_params, learning_rate)

  def train(self, epoch, batch_size, select_rate=1):
    self.send_model()
    self.select_clients = self.select_client(select_rate)
    losses = []
    self.client_nums = []
    for client in self.select_clients:
      client_num, client_loss = client.train(epoch, batch_size)
      losses.append(client_loss)
      self.client_nums.append(client_num)
      print('Client: {}, Local_loss: {:f}'.format(client.id, client_loss))
    self.aggregate()
    return np.sum(losses)
  
  def aggregate(self):
    total_params = [np.zeros(len(param)) for param in self.model.print_params()]
    total_num = sum(self.client_nums)
    t = 0
    for c in self.select_clients:
      for i in range(len(total_params)):
        total_params[i] = total_params[i] + self.client_nums[t] / total_num * c.model.print_params()[i]
      t += 1
    self.model.assign_params(total_params)
    return total_params

Agnostic federated learning

In [None]:
def project(y):
  ''' algorithm comes from:
  https://arxiv.org/pdf/1309.1541.pdf
  '''
  u = sorted(y, reverse=True)
  x = []
  rho = 0
  for i in range(len(y)):
      if (u[i] + (1.0/(i+1)) * (1-np.sum(np.asarray(u)[:i]))) > 0:
          rho = i + 1
  lambda_ = (1.0/rho) * (1-np.sum(np.asarray(u)[:rho]))
  for i in range(len(y)):
      x.append(max(y[i]+lambda_, 0))
  return x

class AFL(Server):
  def __init__(self, train_data, ids, Learner, initial_params, learning_rate, lambda_learning_rate):
    super(AFL, self).__init__(train_data, ids, Learner, initial_params, learning_rate)
    self.lambdas = np.ones(len(self.clients)) / len(self.clients)
    self.lambda_lr = lambda_learning_rate

  def train(self, batch_size):
    self.send_model()
    losses = []
    grads = []
    for client in self.clients:
      client_num, client_loss, client_grads = client.sgd(batch_size)
      losses.append(client_loss)
      grads.append(client_grads)
      # print('Client: {}, Local_loss: {:f}'.format(client.id, client_loss))
    self.aggregate(losses, grads)
    return np.sum(losses)

  def aggregate(self, losses, grads):
    lambdas_new = self.lambdas + self.lambda_lr * np.array(losses)
    self.lambdas = project(lambdas_new)

    total_grad = [np.zeros(len(g)) for g in grads[0]]
    for lambda_, grad in zip(self.lambdas, grads):
      for i in range(len(grad)):
        total_grad[i] = total_grad[i] + grad[i] * lambda_
    
    total_params = [param for param in self.model.print_params()]
    for i in range(len(total_params)):
      total_params[i] = total_params[i] - self.model.lr * total_grad[i]
    self.model.assign_params(total_params)
    return total_params

q fair federated learning

In [None]:
class qFFL(Server):
  def __init__(self, q, L, train_data, ids, Learner, initial_params, learning_rate):
    self.L = L
    self.q = q
    super(qFFL, self).__init__(train_data, ids, Learner, initial_params, learning_rate)

  def train(self, epoch, batch_size, select_rate=1):
    self.send_model()
    self.select_clients = self.select_client(select_rate)
    self.start_losses = []
    losses = []
    for client in self.select_clients:
      start_loss = client.model.solve_loss(client.client_data)
      self.start_losses.append(start_loss)
      _, client_loss = client.train(epoch, batch_size)
      losses.append(client_loss)
      # print('Client: {}, Local_loss: {:f}'.format(client.id, client_loss))
    self.aggregate()
    return np.sum(losses)
  
  def aggregate(self):
    total_params = [np.zeros(len(param)) for param in self.model.print_params()]
    delta_ = [np.zeros(len(param)) for param in self.model.print_params()]
    start_params = [param for param in self.model.print_params()]
    h_ = 0
    for k, c in enumerate(self.select_clients):
      loss = self.start_losses[k]
      client_params = c.model.print_params()
      for i in range(len(total_params)):
        delta_[i] += np.power(loss, self.q) * (start_params[i] - client_params[i])
      flatten_deltas = np.concatenate(delta_).ravel().tolist()
      h_ += self.q * np.power(loss, self.q - 1) * np.sum(np.square(flatten_deltas)) + self.L * np.power(loss, self.q)
    for i in range(len(total_params)):
      total_params[i] = start_params[i] - delta_[i] / h_
    self.model.assign_params(total_params)
    return total_params

## Training

FedAvg

In [None]:
IDs = ('doctor', 'nondoctor')
initial_params = None
learning_rate = 0.001
EPOCH = 5
BATCH_SIZE = 10

In [None]:
LR_WAVGM_fit = WAVGM((new_X_train, new_y_train), IDs, my_LogR, initial_params, learning_rate)

In [None]:
ITER = 10
for i in range(ITER):
  loss = LR_WAVGM_fit.train(EPOCH, BATCH_SIZE)
  print('----------iter: {:d}/{:d}, loss: {:f}----------'.format(i+1, ITER, loss))

AFL

In [None]:
IDs = ('doctor', 'nondoctor')
initial_params = None
learning_rate = 0.05
EPOCH = 5
BATCH_SIZE = 20

In [None]:
LR_AFL_fit = AFL((new_X_train, new_y_train), IDs, my_LogR, initial_params, learning_rate, lambda_learning_rate = 0.005)

In [None]:
ITER = 2000
for i in range(ITER):
  loss = LR_AFL_fit.train(BATCH_SIZE)
  if ((i+1) % 10) == 0:
    print('----------iter: {:d}/{:d}, loss: {:f}----------'.format(i+1, ITER, loss))

In [None]:
LR_AFL_fit.lambdas

[0.9090541379898784, 0.09094586201012152]

q-FFL

In [None]:
IDs = ('doctor', 'nondoctor')
initial_params = None
learning_rate = 0.1
q1 = 0.01
q2 = 2
EPOCH = 2
BATCH_SIZE = 10
LR_qFFL_fit = qFFL(0, 0, (new_X_train, new_y_train), IDs, my_LogR, initial_params, learning_rate)

In [None]:
_,_,grads = LR_qFFL_fit.clients[0].sgd(BATCH_SIZE)

In [None]:
g1 = np.concatenate(grads).ravel()
w1 = LR_qFFL_fit.clients[0].model.print_params()
w1 = np.concatenate(w1).ravel()

In [None]:
LR_qFFL_fit = qFFL(0, 0, (new_X_train, new_y_train), IDs, my_LogR, initial_params, learning_rate)

In [None]:
_,_,grads = LR_qFFL_fit.clients[0].sgd(BATCH_SIZE)

In [None]:
g2 = np.concatenate(grads).ravel()
w2 = LR_qFFL_fit.clients[0].model.print_params()
w2 = np.concatenate(w2).ravel()

In [None]:
L = np.sqrt(np.sum(np.square(g2 - g1))) / np.sqrt(np.sum(np.square(w2 - w1)))

In [None]:
L

0.413

In [None]:
IDs = ('doctor', 'nondoctor')
initial_params = None
learning_rate = 0.1
q1 = 0.01
q2 = 2
EPOCH = 2
BATCH_SIZE = 10

In [None]:
LR_qFFL_fit = qFFL(q2, L, (new_X_train, new_y_train), IDs, my_LogR, initial_params, learning_rate)

In [None]:
ITER = 50
losses = np.arange(10, 0, -1, dtype = np.float64)
for i in range(ITER):
  loss = LR_qFFL_fit.train(EPOCH, BATCH_SIZE)
  losses[:-1] = losses[1:]
  losses[-1] = loss
  if sum(losses[:-1] >= losses[1:]) == 0:
    break
  print('----------iter: {:d}/{:d}, loss: {:f}----------'.format(i+1, ITER, loss))

----------iter: 1/50, loss: 0.826870----------
----------iter: 2/50, loss: 0.825796----------
----------iter: 3/50, loss: 0.825058----------
----------iter: 4/50, loss: 0.824283----------
----------iter: 5/50, loss: 0.823461----------
----------iter: 6/50, loss: 0.822604----------
----------iter: 7/50, loss: 0.820979----------
----------iter: 8/50, loss: 0.820078----------
----------iter: 9/50, loss: 0.818900----------
----------iter: 10/50, loss: 0.818300----------
----------iter: 11/50, loss: 0.817891----------
----------iter: 12/50, loss: 0.817589----------
----------iter: 13/50, loss: 0.817538----------
----------iter: 14/50, loss: 0.817581----------
----------iter: 15/50, loss: 0.817723----------
----------iter: 16/50, loss: 0.817900----------
----------iter: 17/50, loss: 0.818033----------
----------iter: 18/50, loss: 0.818186----------
----------iter: 19/50, loss: 0.818325----------
----------iter: 20/50, loss: 0.818424----------
----------iter: 21/50, loss: 0.818498----------


## Testing

In [None]:
final_model = LR_WAVGM_fit.model
doc_test = Client_Data((new_X_test, new_y_test), IDs[0])
nondoc_test = Client_Data((new_X_test, new_y_test), IDs[1])

acc1 = final_model.predict_accu(doc_test)
acc2 = final_model.predict_accu(nondoc_test)
print('doctor data prediction accuracy: {:2f} \nnondoctor data prediction accuracy: {:2f}'.format(acc1*100, acc2*100))

doctor data prediction accuracy: 69.060773 
nondoctor data prediction accuracy: 83.496894


In [None]:
final_model = LR_AFL_fit.model
doc_test = Client_Data((new_X_test, new_y_test), IDs[0])
nondoc_test = Client_Data((new_X_test, new_y_test), IDs[1])

acc1 = final_model.predict_error(doc_test)
acc2 = final_model.predict_error(nondoc_test)
print('doctor data prediction accuracy: {:2f} \nnondoctor data prediction accuracy: {:2f}'.format(acc1*100, acc2*100))

doctor data prediction accuracy: 74.585635 
nondoctor data prediction accuracy: 82.347826


In [None]:
final_model = LR_qFFL_fit.model
doc_test = Client_Data((new_X_test, new_y_test), IDs[0])
nondoc_test = Client_Data((new_X_test, new_y_test), IDs[1])

acc1 = final_model.predict_accu(doc_test)
acc2 = final_model.predict_accu(nondoc_test)
print('doctor data prediction accuracy: {:2f} \nnondoctor data prediction accuracy: {:2f}'.format(acc1*100, acc2*100))

doctor data prediction accuracy: 72.375691 
nondoctor data prediction accuracy: 81.403727


## Replications

FedAvg

In [None]:
IDs = ('doctor', 'nondoctor')
initial_params = None
learning_rate = 0.001
EPOCH = 5
BATCH_SIZE = 10

In [None]:
doc_test = Client_Data((new_X_test, new_y_test), IDs[0])
nondoc_test = Client_Data((new_X_test, new_y_test), IDs[1])

In [None]:
REP = 100
Results1 = np.zeros((REP, 3))
for i in range(REP):
  LR_WAVGM_fit = WAVGM((new_X_train, new_y_train), IDs, my_LogR, initial_params, learning_rate)
  ITER = 10
  for _ in range(ITER):
    loss = LR_WAVGM_fit.train(EPOCH, BATCH_SIZE)
  final_model = LR_WAVGM_fit.model

  acc1 = final_model.predict_error(doc_test)
  acc2 = final_model.predict_error(nondoc_test)
  acc0 = (len(doc_test) * acc1 + len(nondoc_test) * acc2) / (len(doc_test) + len(nondoc_test))
  print('rep {:d} \ntotal prediction accuracy: {:2f} \ndoctor data prediction accuracy: {:2f} \nnondoctor data prediction accuracy: {:2f}'.format(i, acc0, acc1, acc2))

  Results1[i, 0] = acc0
  Results1[i, 1] = acc1
  Results1[i, 2] = acc2


In [None]:
print(Results1)

In [None]:
Results1.mean(axis = 0)

array([0.83354954, 0.69546961, 0.83510186])

In [None]:
Results1.std(axis = 0)

array([0.00041558, 0.01097433, 0.00042961])

AFL

In [None]:
IDs = ('doctor', 'nondoctor')
initial_params = None
learning_rate = 0.05
EPOCH = 5
BATCH_SIZE = 20

In [None]:
doc_test = Client_Data((new_X_test, new_y_test), IDs[0])
nondoc_test = Client_Data((new_X_test, new_y_test), IDs[1])

In [None]:
REP = 100
Results2 = np.zeros((REP, 3))
for i in range(REP):
  LR_AFL_fit = AFL((new_X_train, new_y_train), IDs, my_LogR, initial_params, learning_rate, lambda_learning_rate = 0.005)
  ITER = 2000
  for _ in range(ITER):
    loss = LR_AFL_fit.train(BATCH_SIZE)
  final_model = LR_AFL_fit.model

  acc1 = final_model.predict_error(doc_test)
  acc2 = final_model.predict_error(nondoc_test)
  acc0 = (len(doc_test) * acc1 + len(nondoc_test) * acc2) / (len(doc_test) + len(nondoc_test))
  print('rep {:d} \ntotal prediction accuracy: {:2f} \ndoctor data prediction accuracy: {:2f} \nnondoctor data prediction accuracy: {:2f}'.format(i, acc0, acc1, acc2))

  Results2[i, 0] = acc0
  Results2[i, 1] = acc1
  Results2[i, 2] = acc2

rep 0 
total prediction accuracy: 0.819114 
doctor data prediction accuracy: 0.729282 
nondoctor data prediction accuracy: 0.820124
rep 1 
total prediction accuracy: 0.820773 
doctor data prediction accuracy: 0.712707 
nondoctor data prediction accuracy: 0.821988
rep 2 
total prediction accuracy: 0.818562 
doctor data prediction accuracy: 0.701657 
nondoctor data prediction accuracy: 0.819876
rep 3 
total prediction accuracy: 0.819237 
doctor data prediction accuracy: 0.729282 
nondoctor data prediction accuracy: 0.820248
rep 4 
total prediction accuracy: 0.819913 
doctor data prediction accuracy: 0.723757 
nondoctor data prediction accuracy: 0.820994
rep 5 
total prediction accuracy: 0.821018 
doctor data prediction accuracy: 0.729282 
nondoctor data prediction accuracy: 0.822050
rep 6 
total prediction accuracy: 0.821264 
doctor data prediction accuracy: 0.723757 
nondoctor data prediction accuracy: 0.822360
rep 7 
total prediction accuracy: 0.819667 
doctor data prediction accuracy:

In [None]:
print(Results2)

[[0.81911431 0.72928177 0.82012422]
 [0.82077268 0.71270718 0.82198758]
 [0.81856151 0.70165746 0.81987578]
 [0.81923715 0.72928177 0.82024845]
 [0.81991278 0.72375691 0.82099379]
 [0.82101836 0.72928177 0.82204969]
 [0.82126405 0.72375691 0.82236025]
 [0.8196671  0.72375691 0.82074534]
 [0.81463055 0.72928177 0.81559006]
 [0.81856151 0.70718232 0.81981366]
 [0.8196671  0.71823204 0.82080745]
 [0.82163258 0.72375691 0.82273292]
 [0.81843867 0.72375691 0.81950311]
 [0.82193968 0.71823204 0.82310559]
 [0.82046557 0.71823204 0.82161491]
 [0.82273816 0.72928177 0.82378882]
 [0.81978994 0.72375691 0.82086957]
 [0.82126405 0.74585635 0.8221118 ]
 [0.82138689 0.71823204 0.82254658]
 [0.81874578 0.72928177 0.81975155]
 [0.81929857 0.73480663 0.82024845]
 [0.82249248 0.74033149 0.82341615]
 [0.82181684 0.73480663 0.82279503]
 [0.82071126 0.71270718 0.82192547]
 [0.82064984 0.72375691 0.82173913]
 [0.8225539  0.74585635 0.82341615]
 [0.82181684 0.71823204 0.82298137]
 [0.82009705 0.72375691 0.82

In [None]:
Results2.mean(axis = 0)

array([0.81956882, 0.72662983, 0.82061366])

In [None]:
Results2.std(axis = 0)

array([0.00238644, 0.0103207 , 0.00242359])

q-FFL

In [None]:
IDs = ('doctor', 'nondoctor')
initial_params = None
learning_rate = 0.1
q1 = 0.01
q2 = 2
EPOCH = 2
BATCH_SIZE = 10

In [None]:
doc_test = Client_Data((new_X_test, new_y_test), IDs[0])
nondoc_test = Client_Data((new_X_test, new_y_test), IDs[1])

In [None]:
REP = 100
Results3 = np.zeros((REP, 3))
for i in range(REP):
  LR_qFFL_fit = qFFL(q2, L, (new_X_train, new_y_train), IDs, my_LogR, initial_params, learning_rate)
  ITER = 50
  losses = np.arange(10, 0, -1, dtype = np.float64)
  for _ in range(ITER):
    loss = LR_qFFL_fit.train(EPOCH, BATCH_SIZE)
    losses[:-1] = losses[1:]
    losses[-1] = loss
    if sum(losses[:-1] >= losses[1:]) == 0:
      break
  
  final_model = LR_qFFL_fit.model
  acc1 = final_model.predict_accu(doc_test)
  acc2 = final_model.predict_accu(nondoc_test)
  acc0 = (len(doc_test) * acc1 + len(nondoc_test) * acc2) / (len(doc_test) + len(nondoc_test))
  print('rep {:d} \ntotal prediction accuracy: {:2f} \ndoctor data prediction accuracy: {:2f} \nnondoctor data prediction accuracy: {:2f}'.format(i, acc0, acc1, acc2))

  Results3[i, 0] = acc0
  Results3[i, 1] = acc1
  Results3[i, 2] = acc2

rep 0 
total prediction accuracy: 0.810147 
doctor data prediction accuracy: 0.723757 
nondoctor data prediction accuracy: 0.811118
rep 1 
total prediction accuracy: 0.813586 
doctor data prediction accuracy: 0.723757 
nondoctor data prediction accuracy: 0.814596
rep 2 
total prediction accuracy: 0.828512 
doctor data prediction accuracy: 0.729282 
nondoctor data prediction accuracy: 0.829627
rep 3 
total prediction accuracy: 0.813402 
doctor data prediction accuracy: 0.723757 
nondoctor data prediction accuracy: 0.814410
rep 4 
total prediction accuracy: 0.812726 
doctor data prediction accuracy: 0.723757 
nondoctor data prediction accuracy: 0.813727
rep 5 
total prediction accuracy: 0.829556 
doctor data prediction accuracy: 0.729282 
nondoctor data prediction accuracy: 0.830683
rep 6 
total prediction accuracy: 0.812788 
doctor data prediction accuracy: 0.723757 
nondoctor data prediction accuracy: 0.813789
rep 7 
total prediction accuracy: 0.813709 
doctor data prediction accuracy:

In [None]:
print(Results3)

[[0.8101468  0.72375691 0.81111801]
 [0.81358639 0.72375691 0.81459627]
 [0.82851176 0.72928177 0.82962733]
 [0.81340213 0.72375691 0.81440994]
 [0.81272649 0.72375691 0.81372671]
 [0.82955592 0.72928177 0.83068323]
 [0.81278791 0.72375691 0.81378882]
 [0.81370923 0.72375691 0.8147205 ]
 [0.8130336  0.72375691 0.81403727]
 [0.81094527 0.72375691 0.81192547]
 [0.81082243 0.72375691 0.81180124]
 [0.8291874  0.72928177 0.83031056]
 [0.81266507 0.72375691 0.8136646 ]
 [0.81346355 0.72375691 0.81447205]
 [0.81340213 0.72375691 0.81440994]
 [0.81284933 0.72375691 0.81385093]
 [0.81309502 0.72375691 0.81409938]
 [0.81260365 0.72375691 0.81360248]
 [0.81284933 0.72375691 0.81385093]
 [0.81364781 0.72375691 0.81465839]
 [0.81266507 0.72375691 0.8136646 ]
 [0.81450771 0.72375691 0.81552795]
 [0.80977827 0.72375691 0.81074534]
 [0.81278791 0.72375691 0.81378882]
 [0.80996253 0.72375691 0.81093168]
 [0.8092869  0.71823204 0.81031056]
 [0.81450771 0.72375691 0.81552795]
 [0.81364781 0.72375691 0.81

In [None]:
Results3.mean(axis = 0)

array([0.81449481, 0.72425414, 0.81550932])

In [None]:
Results3.std(axis = 0)

array([0.00535218, 0.00192896, 0.00539236])