In [1]:
import sys
import os

# For Jupyter notebooks
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

# Import commonly used modules
from data_utils import *
from model_utils import *
from tensorflow.keras import backend as K
from tensorflow.keras.models import load_model
from numba import cuda
import gc
from tensorflow.keras import backend as K
import numpy as np

from config_utils import load_config

# Load default config
CONFIG = load_config('../config/fl_template_config.yaml')

# Export commonly used items
__all__ = ['CONFIG']
import tensorflow as tf

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

2025-03-27 14:49:03.092863: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1743108543.164071   19610 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1743108543.188907   19610 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1743108543.325030   19610 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1743108543.325167   19610 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1743108543.325171   19610 computation_placer.cc:177] computation placer alr

## FedAvg

### Federated Learning Averaging Pseudocode

### Server Initialization:
Initialize global model weights W₀

### Main Federated Learning Loop:
For each round t = 1 to T:
    
    1. Select a subset of clients Sₜ (or use all available clients)
    
    2. Broadcast the current global model weights Wₜ to all clients in Sₜ

    3. For each client k in Sₜ (executed in parallel):
         - Perform a local update:
           Wₜᵏ = ClientUpdate(Wₜ, local_dataₖ)
         - Return updated local model weights Wₜᵏ along with the number of samples nₖ

    4. Aggregate the updated weights using weighted averaging (FedAvg):
         - Compute total samples: N = Σₖ₍∈Sₜ₎ nₖ
         - Update global model weights:
           Wₜ₊₁ = Σₖ₍∈Sₜ₎ (nₖ / N) * Wₜᵏ

Return final global model weights W_T

### ClientUpdate Function:
Function ClientUpdate(W, local_data):
    
    - Set W_local = W
    
    - For each local epoch e = 1 to E:
         - For each batch b in local_data:
              - Compute gradient: grad = ∇(loss(W_local, b))
              - Update local weights: W_local = W_local - learning_rate * grad
              
    Return W_local

#### Central Model Initialization

In [2]:
import os
import numpy as np
from tensorflow.keras.models import Model
from typing import Dict
import os
import numpy as np
from typing import List, Dict
from tensorflow.keras.models import Model

def save_model_weights(model: Model, client_id: str, folder_dir: str):
    """Save a model's weights to a .npy file in folder_dir."""
    os.makedirs(folder_dir, exist_ok=True)
    weights = model.get_weights()
    filepath = os.path.join(folder_dir, f"{client_id}_weights.npz")
    np.savez(filepath, *weights)

def load_all_weights(folder_dir: str) -> Dict[str, list]:
    client_weights = {}
    for file in os.listdir(folder_dir):
        if file.endswith("_weights.npz"):
            client_id = file.replace("_weights.npz", "")
            data = np.load(os.path.join(folder_dir, file))
            weights = [data[f'arr_{i}'] for i in range(len(data.files))]
            client_weights[client_id] = weights
    return client_weights

def load_model_weights(weight_path: str):
    data = np.load(weight_path)
    weights = [data[f'arr_{i}'] for i in range(len(data.files))]
    return weights

def fed_avg_from_disk(folder_dir: str, client_scores: Dict[str, float]):
    """
    Load all weights from folder_dir, perform FedAvg, and update the model.
    - folder_dir: Directory containing client weights
    - client_scores: Dict mapping client_id to weight (e.g., sample count)
    """
    client_weights = load_all_weights(folder_dir)
    total_score = sum(client_scores.values())
    weighted_avg = None

    for client_id, weights in client_weights.items():
        score = client_scores.get(client_id, 0) / total_score
        if weighted_avg is None:
            weighted_avg = [score * layer for layer in weights]
        else:
            for i in range(len(weights)):
                weighted_avg[i] += score * weights[i]

    return weighted_avg

In [4]:
import sys
import os

# For Jupyter notebooks
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

# Import commonly used modules
from data_utils import *
from model_utils import *
from tensorflow.keras import backend as K
from tensorflow.keras.models import load_model
from numba import cuda
import gc
from tensorflow.keras import backend as K
import numpy as np

from config_utils import load_config

# Load default config
CONFIG = load_config('../config/fl_template_config.yaml')

# Export commonly used items
__all__ = ['CONFIG']
import tensorflow as tf

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

history_dic = {}
model = create_model()
trainx,trainy = load_training_data(f'../experiments/{CONFIG['experiment_name']}/processed_data/init.npy')
history =  train_model(model,trainx,trainy)
history_dic['init'] = history
save_model(model,f'../experiments/{CONFIG['experiment_name']}/models/central_model.keras')
save_model_weights(model,'central_model',f'../experiments/fed_ml_experiment_1/model_weights/')
del model
K.clear_session()
gc.collect()

for i in range(CONFIG.get('num_clients',5)):
    model = create_model(seed = i+1)
    model._name = f'client_{i+1}'
    save_model(model,f'../experiments/{CONFIG['experiment_name']}/models/{model._name}.keras')
    history_dic[model._name] = {}
    del model
    K.clear_session()
    gc.collect()

for i in range(1,CONFIG['num_rounds']+1):
    
    local_training_history = {}
    
    central_weights = load_model_weights(f'../experiments/fed_ml_experiment_1/model_weights/central_model_weights.npz')
    
    for client in range(1,CONFIG['num_clients']+1):
        client_name = f'client_{client}'
        model_path = f'../experiments/{CONFIG['experiment_name']}/models/client_model_{client}.keras'
        model = load_model_from_disk(model_path)
        trainx,trainy = load_training_data(f'../experiments/{CONFIG['experiment_name']}/processed_data/client_{client}/round{i}.npy')

        #update model with global weight
        model.set_weights(central_weights)

        #training model on local data
        history = train_model(model,trainx,trainy)
        
        # local training history for central server
        local_training_history[client_name] = len(trainx)
        
        #model training history based on round
        history_dic[client_name][f'round_{i}'] = history

        #save model
        save_model(model,model_path)
        save_model_weights(model,client_name,f'../experiments/{CONFIG['experiment_name']}/model_weights/client_weights/')

        #clear model
        del model
        K.clear_session()
        gc.collect()
        
    ## Aggregate Weighted average
        fed_avg_from_disk(f'../experiments/{CONFIG['experiment_name']}/model_weights/client_weights/',local_training_history)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1743108662.660449   19610 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5582 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3070 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


Epoch 1/10


I0000 00:00:1743108665.796871   19709 service.cc:152] XLA service 0x7fd50c00f980 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1743108665.797011   19709 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 3070 Laptop GPU, Compute Capability 8.6
2025-03-27 14:51:05.869099: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1743108666.208393   19709 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m5/7[0m [32m━━━━━━━━━━━━━━[0m[37m━━━━━━[0m [1m0s[0m 15ms/step - accuracy: 0.6344 - auc: 0.5248 - loss: 1.1898 - precision: 0.7197 - recall: 0.4259

I0000 00:00:1743108669.775522   19709 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 727ms/step - accuracy: 0.6235 - auc: 0.5452 - loss: 1.1168 - precision: 0.6877 - recall: 0.5327
Epoch 2/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.6147 - auc: 0.8527 - loss: 0.5932 - precision: 0.5867 - recall: 0.9803
Epoch 3/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.9183 - auc: 0.9530 - loss: 0.3457 - precision: 0.9136 - recall: 0.9321
Epoch 4/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.8928 - auc: 0.9644 - loss: 0.2260 - precision: 0.8990 - recall: 0.8816
Epoch 5/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.9097 - auc: 0.9635 - loss: 0.2466 - precision: 0.9131 - recall: 0.9173
Epoch 6/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.9465 - auc: 0.9799 - loss: 0.1888 - precision: 0.9452 - recall: 0.9607
E

0

#### Client Models Initialization

In [6]:
for i in range(CONFIG.get('num_clients',5)):
    model = create_model(seed = i+1)
    model._name = f'client_{i+1}'
    save_model(model,f'../experiments/{CONFIG['experiment_name']}/models/{model._name}.keras')
    history_dic[model._name] = {}
    del model
    K.clear_session()
    gc.collect()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


💾 Model saved to: ../experiments/fed_ml_experiment_1/models/client_1.keras
💾 Model saved to: ../experiments/fed_ml_experiment_1/models/client_2.keras
💾 Model saved to: ../experiments/fed_ml_experiment_1/models/client_3.keras
💾 Model saved to: ../experiments/fed_ml_experiment_1/models/client_4.keras
💾 Model saved to: ../experiments/fed_ml_experiment_1/models/client_5.keras


#### Local Train Client models and Global Aggregation

In [9]:
for i in range(1,CONFIG['num_rounds']+1):
    
    local_training_history = {}
    
    central_weights = load_model_weights(f'../experiments/fed_ml_experiment_1/model_weights/central_model_weights.npz')
    
    for client in range(1,CONFIG['num_clients']+1):
        client_name = f'client_{client}'
        model_path = f'../experiments/{CONFIG['experiment_name']}/models/client_model_{client}.keras'
        model = load_model_from_disk(model_path)
        trainx,trainy = load_training_data(f'../experiments/{CONFIG['experiment_name']}/processed_data/client_{client}/round{i}.npy')

        #update model with global weight
        model.set_weights(central_weights)

        #training model on local data
        history = train_model(model,trainx,trainy)
        
        # local training history for central server
        local_training_history[client_name] = len(trainx)
        
        #model training history based on round
        history_dic[client_name][f'round_{i}'] = history

        #save model
        save_model(model,model_path)
        save_model_weights(model,client_name,f'../experiments/{CONFIG['experiment_name']}/model_weights/client_weights/')

        #clear model
        del model
        K.clear_session()
        gc.collect()
        
    ## Aggregate Weighted average
        fed_avg_from_disk(f'../experiments/{CONFIG['experiment_name']}/model_weights/client_weights/',local_training_history)

📦 Model loaded from: ../experiments/fed_ml_experiment_1/models/client_model_1.keras
Epoch 1/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 422ms/step - accuracy: 0.8505 - auc: 0.9044 - loss: 0.5914 - precision: 0.8555 - recall: 0.8811
Epoch 2/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.8401 - auc: 0.9668 - loss: 0.2947 - precision: 0.8459 - recall: 0.8895 
Epoch 3/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.9494 - auc: 0.9900 - loss: 0.1770 - precision: 0.9800 - recall: 0.9291
Epoch 4/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.8982 - auc: 0.9696 - loss: 0.2310 - precision: 0.9398 - recall: 0.8669
Epoch 5/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.8999 - auc: 0.9866 - loss: 0.1818 - precision: 0.9455 - recall: 0.8737
Epoch 6/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1

In [10]:
history_dic

{'init': <keras.src.callbacks.history.History at 0x7fd623ba7f80>,
 'client_model_1': {},
 'client_model_2': {},
 'client_model_3': {},
 'client_model_4': {},
 'client_model_5': {},
 'client_1': {'round_1': <keras.src.callbacks.history.History at 0x7fd56451a480>,
  'round_2': <keras.src.callbacks.history.History at 0x7fd518242090>,
  'round_3': <keras.src.callbacks.history.History at 0x7fd51908c920>,
  'round_4': <keras.src.callbacks.history.History at 0x7fd51aa4fbf0>,
  'round_5': <keras.src.callbacks.history.History at 0x7fd5196b1460>,
  'round_6': <keras.src.callbacks.history.History at 0x7fd51029c920>,
  'round_7': <keras.src.callbacks.history.History at 0x7fd51226a090>,
  'round_8': <keras.src.callbacks.history.History at 0x7fd5134b6d50>,
  'round_9': <keras.src.callbacks.history.History at 0x7fd51203a480>,
  'round_10': <keras.src.callbacks.history.History at 0x7fd4d68b8920>},
 'client_2': {'round_1': <keras.src.callbacks.history.History at 0x7fd58443c440>,
  'round_2': <keras.src

In [51]:
save_model_weights(model,'client_1','../experiments/fed_ml_experiment_1/model_weights/client_weights')
save_model_weights(model,'client_2','../experiments/fed_ml_experiment_1/model_weights/client_weights')
save_model_weights(model,'client_3','../experiments/fed_ml_experiment_1/model_weights/client_weights')

In [52]:
weights = load_model_weights('../experiments/fed_ml_experiment_1/model_weights/client_weights/client_1_weights.npz')

In [58]:
client_scores = {
    "client_1":1,
    "client_2":1,
    "client_3":3,
}
avg = fed_avg_from_disk('../experiments/fed_ml_experiment_1/model_weights/client_weights/',client_scores)

[array([[[[ 0.10040593, -0.06353728, -0.10933036,  0.12259068,
           -0.00766506, -0.08605964,  0.01866475, -0.09534668,
            0.05138377, -0.0103279 , -0.10331773, -0.07375038,
            0.09672261, -0.0781009 ,  0.00721616,  0.08586128,
            0.1030162 ,  0.01892288, -0.06343832, -0.01815641,
           -0.08469357,  0.01792   , -0.07774559, -0.03122025,
            0.00111724, -0.09655579, -0.0255607 , -0.07044204,
           -0.01266762, -0.01372485, -0.02474237, -0.1419318 ],
          [ 0.12325376, -0.04511715, -0.10395281, -0.07862566,
           -0.05807437, -0.04046233, -0.10179633, -0.13416488,
           -0.09728852,  0.05542186,  0.02888137,  0.08262793,
           -0.07425914,  0.08575132,  0.10671686, -0.03873877,
           -0.00642935,  0.11563914,  0.02881887,  0.08512037,
           -0.11567664,  0.07414966, -0.04480654, -0.06381175,
           -0.08882004,  0.0845435 ,  0.00664516,  0.10599338,
            0.08692296,  0.01289197, -0.02154739, -0.0

## FedAvg Conditioned

### Modified FedAvg with Performance-based Weighting

### Server Initialization:
Initialize global model weights W₀

### Main Federated Learning Loop:

For each round t = 1 to T:

    1. Select a subset of clients Sₜ (or use all available clients)
    2. Broadcast the current global model weights Wₜ to all clients in Sₜ

    3. For each client k in Sₜ (executed in parallel):
         - Perform a local update:
           Wₜᵏ = ClientUpdate(Wₜ, local_dataₖ)
         - Evaluate the updated model on a common validation set:
           aₖ = Evaluate(Wₜᵏ, validation_set)  # e.g., accuracy
         - Return updated model Wₜᵏ, number of samples nₖ, and accuracy aₖ

    4. Aggregate the updated weights:
         - Compute the performance-weighted sum of samples:
           Total_weight = Σₖ₍∈Sₜ₎ (nₖ × aₖ)
         - Update global model weights:
           Wₜ₊₁ = Σₖ₍∈Sₜ₎ [(nₖ × aₖ) / Total_weight] × Wₜᵏ

Return final global model weights W_T

### ClientUpdate Function:

Function ClientUpdate(W, local_data):
    
    Set W_local = W
    For each local epoch e = 1 to E:
         For each batch b in local_data:
              - Compute gradient: grad = ∇(loss(W_local, b))
              - Update local weights: W_local = W_local - learning_rate * grad
    Return W_local


## Asynchronous Weight Updating Federated Learning

### Federated Learning with Partial Weight Sharing (Deep Layers Updated Frequently)

### Server Initialization:
Initialize global shallow weights W_shallow₀
Initialize global deep weights W_deep₀
Set shallow_update_interval K  # e.g., update shallow layers every K rounds, update deep layers every round

### Main Federated Learning Loop:

For each round t = 1 to T:

    1. Determine if this is a shallow update round:
         If (t mod K == 0):
             shallow_update = True
         Else:
             shallow_update = False

    2. Client Selection & Broadcast:
         Select a subset of clients Sₜ
         For each client in Sₜ, send:
             - Current deep weights: W_deepₜ  (always sent)
             - If shallow_update is True, also send current shallow weights: W_shallowₜ
             - Otherwise, clients use their locally stored shallow weights

    3. Clients' Local Update (executed in parallel):
         For each client k in Sₜ:
             - If shallow_update is True:
                  (W_shallowₜ^k, W_deepₜ^k) = ClientUpdate(W_shallowₜ, W_deepₜ, local_dataₖ, update_shallow=True)
             - Else:
                  (W_shallow_local, W_deepₜ^k) = ClientUpdate(W_shallow_local, W_deepₜ, local_dataₖ, update_shallow=False)
             - Evaluate the full updated model on a common validation set:
                  aₖ = Evaluate(FullModel(W_shallow, W_deep), validation_set)  # e.g., accuracy
             - Return to server:
                  - For shallow layers: if update_shallow is True, return updated W_shallowₜ^k; otherwise, no update (or the previous version)
                  - Updated deep weights: W_deepₜ^k
                  - Local sample count nₖ and performance metric aₖ

    4. Server Aggregation:
         # Always aggregate deep layers:
         Compute Total_weight_deep = Σₖ₍∈Sₜ₎ (nₖ × aₖ)
         Update global deep weights:
             W_deepₜ₊₁ = Σₖ₍∈Sₜ₎ [ (nₖ × aₖ) / Total_weight_deep ] × W_deepₜ^k

         # Aggregate shallow layers only on shallow update rounds:
         If shallow_update is True:
             Compute Total_weight_shallow = Σₖ₍∈Sₜ₎ (nₖ × aₖ)
             Update global shallow weights:
                 W_shallowₜ₊₁ = Σₖ₍∈Sₜ₎ [ (nₖ × aₖ) / Total_weight_shallow ] × W_shallowₜ^k
         Else:
             W_shallowₜ₊₁ = W_shallowₜ  # Keep shallow layers unchanged

    Return final global model: {W_shallow_T, W_deep_T}


### ClientUpdate Function:

Function ClientUpdate(shallow_weights, deep_weights, local_data, update_shallow):

    If update_shallow is True:
         Set local_shallow = shallow_weights    # Received from server
    Else:
         Set local_shallow = local_shallow      # Use previously stored shallow weights locally

    Set local_deep = deep_weights              # Always use the latest deep weights from server

    For each local epoch e = 1 to E:
         For each batch b in local_data:
              If update_shallow is True:
                  - Compute gradients for both layers:
                        grad_shallow, grad_deep = ∇(loss(FullModel(local_shallow, local_deep), b))
                  - Update shallow layers:
                        local_shallow = local_shallow - learning_rate * grad_shallow
              Else:
                  - Compute gradient only for deep layers (shallow remains fixed):
                        grad_deep = ∇(loss(FullModel(local_shallow, local_deep), b))
              - Update deep layers:
                    local_deep = local_deep - learning_rate * grad_deep

    If update_shallow is True:
         Return (local_shallow, local_deep)
    Else:
         Return (local_shallow, local_deep)  # Note: shallow remains unchanged from before the round
