In [None]:
import os
import numpy as np
import dp_accounting
import pandas as pd
#import logging

# Suppress TensorFlow logging (must be before importing tensorflow)
#os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  
# Suppress Python logging from TensorFlow
#logging.getLogger('tensorflow').setLevel(logging.FATAL)
import tensorflow as tf
import tensorflow_federated as tff
import tensorflow_privacy
from tensorflow_privacy import DPKerasSGDOptimizer, compute_dp_sgd_privacy_statement
 

# List available physical devices
gpus = tf.config.list_physical_devices('GPU')
print("Available GPUs:")
print(gpus)
# Mount Google Drive

dataset_path = "COVID-19_Radiography_Dataset"

# Define categories (Folders inside dataset)
categories = ["COVID", "Lung_Opacity", "Normal", "Viral Pneumonia"]
# Function to load and preprocess an image
def load_image(image_path, label):
    img = tf.io.read_file(image_path)  # Read image file
    img = tf.image.decode_jpeg(img, channels=3)  # Decode as RGB image
    img = tf.image.resize(img, (64, 64))  # Resize to 224x224for correct sizing in tensor flow
    img = img / 255.0  # Normalize pixel values between 0 and 1. 224/255 = inbetween 0 and 1
    return img, label
    # Create empty lists to store image paths and labels
image_paths = []
labels = []
k = 0
# Loop through each category and load image paths
i = 0
for i, category in enumerate(categories):
    category_path = os.path.join(dataset_path, category)
    category_path = os.path.join(category_path, "images")
    #print(category_path)
    #print(i)
    for img_name in os.listdir(category_path):
        #load_image(category_path, img_name)
        image_paths.append(os.path.join(category_path, img_name))
        labels.append(i)


# Convert lists to TensorFlow Dataset
image_dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels)) #creates a dataset
image_dataset = image_dataset.map(load_image).batch(200)#110 is ok
print(image_dataset)

#

<_BatchDataset element_spec=(TensorSpec(shape=(None, 64, 64, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>


In [10]:
#image_dataset is a 2dem array
#simulate clients
os.environ['TFF_CPP_MIN_LOG_LEVEL'] = '3' 
NUM_CLIENTS = 5
client_datasets = []
for i in range(NUM_CLIENTS):
  client_dataset = image_dataset.shard(NUM_CLIENTS, i)  # Split dataset
  client_datasets.append(client_dataset)#2dem array
  #print(len(client_dataset))
def create_model():
    model = tf.keras.Sequential([
    # 1st Convolutional Layer: Extracts basic features (edges, textures) #224 height and width and 3 channels for rgb color
    tf.keras.layers.Conv2D(8, (3,3), activation='relu', input_shape=(64, 64, 3), name='convo',padding='same'),
    tf.keras.layers.MaxPooling2D(pool_size=(2,2), name='maxpool'),  # Reduces image size
    # 2nd Convolutional Layer: Extracts more complex features
    tf.keras.layers.Conv2D(16, (3,3), activation='relu', name='convo2'),
    tf.keras.layers.MaxPooling2D(pool_size=(2,2),name='maxpool2'),  # Reduces image size
    tf.keras.layers.Flatten(),  # Converts 2D feature maps into 1D array
    # Fully Connected Layer: Decides important features
    tf.keras.layers.Dense(16, activation='relu',name= 'dense'),
    # Output Layer: 4 classes (COVID, Lung Opacity, Normal, Viral Pneumonia)
    tf.keras.layers.Dense(4, activation='softmax', name='dense2')
    ])
    #print(model.summary())
    #model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model
def model_fn():
    keras_model = create_model()  # Create the CNN model for images
    return tff.learning.models.from_keras_model(
        keras_model,
        input_spec=image_dataset.element_spec,  # Use dataset format
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),  # Match loss with model
        metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]  # Track accuracy
    )
# Initialize Federated Averaging (FedAvg) process
trainer = tff.learning.algorithms.build_weighted_fed_avg(model_fn, client_optimizer_fn=tff.learning.optimizers.build_sgdm(learning_rate=0.0005, momentum=0.7))
# Initialize server state
state = trainer.initialize()
#Use DPKerasSGDOptimizer to add clipping and noise for differential privacy
dpopt = DPKerasSGDOptimizer(l2_norm_clip=0.8, noise_multiplier=0.05, num_microbatches=1)
dpstatement = compute_dp_sgd_privacy_statement(number_of_examples=1000, batch_size=2, noise_multiplier=0.3, num_epochs=1, delta=1/1000) #delta is typically 1/ num_of_examples
print(dpstatement)
# Simulate federated training round #This method updates weights of the model
def client_update(client_dataset, model_weights):
    model = create_model()
    model.set_weights(model_weights)  # Set weights from server
    model.compile(optimizer=dpopt, loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False))  # Compile model with optimizer and loss
    model.fit(client_dataset, epochs=3)  # Train locally for 1 epoch
    model.summary()
    return model.get_weights()  # Return updated weights
#

DP-SGD performed over 1000 examples with 2 examples per iteration, noise
multiplier 0.3 for 1 epochs with microbatching, and no bound on number of
examples per user.

This privacy guarantee protects the release of all model checkpoints in addition
to the final model.

Example-level DP with add-or-remove-one adjacency at delta = 0.001 computed with
RDP accounting:
    Epsilon with each example occurring once per epoch:        45.239
    Epsilon assuming Poisson sampling (*):                    116.412

No user-level privacy guarantee is possible without a bound on the number of
examples per user.

(*) Poisson sampling is not usually done in training pipelines, but assuming
that the data was randomly shuffled, it is believed that the actual epsilon
should be closer to this value than the conservative assumption of an arbitrary
data order.



In [3]:
os.environ['TFF_CPP_MIN_LOG_LEVEL'] = '3' 
client_weights = []
for client in client_datasets:
  new_weights = client_update(client, state.global_model_weights.trainable)
  client_weights.append(new_weights)


Epoch 1/3
Epoch 2/3
Epoch 3/3
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 convo (Conv2D)              (None, 64, 64, 8)         224       
                                                                 
 maxpool (MaxPooling2D)      (None, 32, 32, 8)         0         
                                                                 
 convo2 (Conv2D)             (None, 30, 30, 16)        1168      
                                                                 
 maxpool2 (MaxPooling2D)     (None, 15, 15, 16)        0         
                                                                 
 flatten (Flatten)           (None, 3600)              0         
                                                                 
 dense (Dense)               (None, 16)                57616     
                                                                 
 dense2 (Dense)           

In [4]:
#[arr1,arr2,arr3]axis 1 -> # client weights array [arr1,arr2,arr3]
#[#   ,#   ,#   ]          #print(len(client_weights[0])) output = 8 y
#[#   ,#   ,#   ]          #print(len(client_weights[0][0])) output = 3 x
#[#   ,#   ,#   ]
#[#   ,#   ,#   ]
#axis0^
#1.get average weight of first column, 2nd and third. These are the averages ofthe layers.
os.environ['TFF_CPP_MIN_LOG_LEVEL'] = '3' 
averages = []
layer_weights = []
# Each client_weights is a list of arrays, where each array represents a layer's weights
for layer_index in range(len(client_weights[0])):  #[arr1,arr2,arr3] # take length of client_weights[0] to beacause all arrays should be equal in size #8
    #layer_index is what row you are on
    layer_weights = []  # Collect this layer’s weights from all clients

    # Collect the same layer weights from each client(column in the array)
    for client in client_weights:
        layer_weights.append(client[layer_index])

    # Average the collected weights for this layer
    layer_average = np.mean(layer_weights, axis=0)
    averages.append(layer_average)  # Store averaged layer weights

    # Debugging: Print info about this layer’s weights
    #print(f"Collected weights shape: {[w.shape for w in layer_weights]}")
    #print(f"Averaged weights shape: {layer_average.shape}\n")

federated_data = [client_datasets[i] for i in range(NUM_CLIENTS)]
print(federated_data) 
# Update global model weights with the averaged weights
for round_num in range(1, 4):
    print(f"Round {round_num}")
    state, metrics = trainer.next(state,federated_data)
    print(f"Round {round_num} metrics:", metrics)



print("Federated learning round complete!")

[<_ShardDataset element_spec=(TensorSpec(shape=(None, 64, 64, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>, <_ShardDataset element_spec=(TensorSpec(shape=(None, 64, 64, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>, <_ShardDataset element_spec=(TensorSpec(shape=(None, 64, 64, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>, <_ShardDataset element_spec=(TensorSpec(shape=(None, 64, 64, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>, <_ShardDataset element_spec=(TensorSpec(shape=(None, 64, 64, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>]
Round 1
Round 1 metrics: OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('sparse_categorical_accuracy', 0.13630995), ('loss', 1.5503649), ('num_examples', 21165), ('num_batches', 106)]))])), ('aggregator',