In [1]:
pip install tensorflow_federated

Collecting tensorflow_federated
  Downloading tensorflow_federated-0.75.0-py3-none-manylinux_2_31_x86_64.whl.metadata (4.0 kB)
Collecting cachetools~=5.3 (from tensorflow_federated)
  Downloading cachetools-5.3.3-py3-none-any.whl.metadata (5.3 kB)
Collecting dp-accounting==0.4.3 (from tensorflow_federated)
  Downloading dp_accounting-0.4.3-py3-none-any.whl.metadata (1.8 kB)
Collecting farmhashpy==0.4.0 (from tensorflow_federated)
  Downloading farmhashpy-0.4.0.tar.gz (98 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting google-vizier==0.1.11 (from tensorflow_federated)
  Downloading google_vizier-0.1.11-py3-none-any.whl.metadata (10 kB)
Collecting jaxlib==0.4.14 (from tensorflow_federated)
  Downloading jaxlib-0.4.14-cp310-cp310-manylinux2014_x86_64.whl.metadata (2.0 kB)
Collecting jax==0.4.14 (from tensorflow_federated)
  Downloading jax-0.4.1

In [2]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder

# Check if GPU is available
device = '/GPU:0' if tf.config.experimental.list_physical_devices('GPU') else '/CPU:0'

# Define paths
HAM_PATH_PART1 = "/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_1"
HAM_PATH_PART2 = "/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_2"
METADATA_PATH = "/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_metadata.csv"

def load_data_ham(path, metadata_path):
    print("Loading data from:", path)
    data = []
    metadata = pd.read_csv(metadata_path)
    for img_file in os.listdir(path):
        img_id = img_file.split('.')[0]
        metadata_row = metadata[metadata['image_id'] == img_id]
        if not metadata_row.empty:
            lesion_type = metadata_row.iloc[0]['dx']
            img_path = os.path.join(path, img_file)
            img_resize = process_image(img_path)
            data.append([img_resize, lesion_type])
    return data

def process_image(img_path, target_size=(64, 192)):
    img_array = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    img_resize = cv2.resize(img_array, target_size)
    img_normalize = img_resize.astype(np.float32) / 255.0
    return img_normalize

ham_data_part1 = load_data_ham(HAM_PATH_PART1, METADATA_PATH)
ham_data_part2 = load_data_ham(HAM_PATH_PART2, METADATA_PATH)

all_ham_data = ham_data_part1 + ham_data_part2
column_names = ['image_data', 'skin_disease']
ham_df = pd.DataFrame(all_ham_data, columns=column_names)
print("Structure of the merged HAM dataset:")
print(ham_df.head())

# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Fit and transform the 'skin_disease' column
ham_df['skin_disease'] = label_encoder.fit_transform(ham_df['skin_disease'])

# Print the encoded DataFrame
print(ham_df.head())
df=pd.DataFrame(ham_df)

2024-04-17 14:38:20.747894: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-17 14:38:20.747953: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-17 14:38:20.747995: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Loading data from: /kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_1
Loading data from: /kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_2
Structure of the merged HAM dataset:
                                          image_data skin_disease
0  [[0.6666667, 0.6666667, 0.6745098, 0.654902, 0...           nv
1  [[0.5882353, 0.5921569, 0.5529412, 0.6039216, ...           nv
2  [[0.6666667, 0.6862745, 0.6745098, 0.7137255, ...           nv
3  [[0.54901963, 0.53333336, 0.5176471, 0.4941176...          bkl
4  [[0.5803922, 0.6039216, 0.57254905, 0.6, 0.623...          bkl
                                          image_data  skin_disease
0  [[0.6666667, 0.6666667, 0.6745098, 0.654902, 0...             5
1  [[0.5882353, 0.5921569, 0.5529412, 0.6039216, ...             5
2  [[0.6666667, 0.6862745, 0.6745098, 0.7137255, ...             5
3  [[0.54901963, 0.53333336, 0.5176471, 0.4941176...             2
4  [[0.5803922, 0.6039216, 0.57254905, 0.6, 0.623...             2

In [3]:
import random

num_client = 3

df["client"] = ["client_{}".format(random.randint(0, num_client)) for _ in range(df.shape[0])]

In [4]:
client_id_colname = 'client'

client_ids = df[client_id_colname].unique()

train_client_ids = pd.DataFrame(client_ids).sample(frac=0.8).values.ravel().tolist()
test_client_ids = [x for x in client_ids if x not in train_client_ids]

In [5]:
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(df, test_size=0.1, random_state=42)

In [6]:
from collections import OrderedDict
import tensorflow as tf
import numpy as np

NUM_EPOCHS = 1
SHUFFLE_BUFFER = 100

def create_tf_dataset_for_client_fn(client_id):
    client_data = dataframe[dataframe[client_id_colname] == client_id]
    client_data_dict = OrderedDict()
    client_data_dict["image_data"] = np.array(client_data['image_data'].values.tolist(), dtype="float32")
    client_data_dict["skin_disease"] = np.array(client_data['skin_disease'].values.tolist(), dtype="int32")

    dataset = tf.data.Dataset.from_tensor_slices(client_data_dict)
    dataset = dataset.shuffle(SHUFFLE_BUFFER).batch(1).repeat(NUM_EPOCHS)
    return dataset

In [7]:
import tensorflow_federated as tff
dataframe = train_df
train_data = tff.simulation.datasets.ClientData.from_clients_and_tf_fn(
    client_ids=train_client_ids,
    serializable_dataset_fn=create_tf_dataset_for_client_fn)

dataframe = test_df
test_data = tff.simulation.datasets.ClientData.from_clients_and_tf_fn(
    client_ids=test_client_ids,
    serializable_dataset_fn=create_tf_dataset_for_client_fn)

In [8]:
train_data.element_type_structure

OrderedDict([('image_data',
              TensorSpec(shape=(None, 192, 64), dtype=tf.float32, name=None)),
             ('skin_disease',
              TensorSpec(shape=(None,), dtype=tf.int32, name=None))])

In [9]:
import collections

NUM_EPOCHS = 1
BATCH_SIZE = 10
SHUFFLE_BUFFER = 100
PREFETCH_BUFFER = 10

def preprocess(dataset):
    def batch_format_fn(element):
        return collections.OrderedDict(x=tf.reshape(element['image_data'], [-1,64,64,3]),
                                       y=tf.reshape(element['skin_disease'], [-1, 1]))

    return dataset.repeat(NUM_EPOCHS).shuffle(SHUFFLE_BUFFER, seed=1).batch(
      BATCH_SIZE).map(batch_format_fn).prefetch(PREFETCH_BUFFER)

In [10]:
from tqdm import tqdm

def make_federated_data(client_data, client_ids):
    return [preprocess(client_data.create_tf_dataset_for_client(x)) for x in tqdm(client_ids)]

In [11]:
NUM_CLIENTS = len(np.unique(train_df[client_id_colname]))

sample_clients = train_data.client_ids[0:NUM_CLIENTS]

federated_train_data = make_federated_data(train_data, sample_clients)

print('Number of client datasets: {l}'.format(l=len(federated_train_data)))
print('First dataset: {d}'.format(d=federated_train_data[0]))

100%|██████████| 3/3 [00:00<00:00, 20.46it/s]

Number of client datasets: 3
First dataset: <_PrefetchDataset element_spec=OrderedDict([('x', TensorSpec(shape=(None, 64, 64, 3), dtype=tf.float32, name=None)), ('y', TensorSpec(shape=(None, 1), dtype=tf.int32, name=None))])>





In [49]:
from keras.applications import EfficientNetB0
from keras.layers import Dense, GlobalAveragePooling2D
from keras.models import Model

model = Model(inputs=base_model.input, outputs=predictions)

def create_keras_model(input_shape=(64, 64, 3), num_classes=7):
    base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(64, 64, 3))

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    predictions = Dense(num_classes, activation='softmax')(x)
    return model

NameError: name 'base_model' is not defined

In [None]:
example_dataset = train_data.create_tf_dataset_for_client(train_data.client_ids[0])

example_element = next(iter(example_dataset))

example_element['skin_disease'].numpy()

In [50]:
preprocessed_example_dataset = preprocess(example_dataset)

sample_batch = tf.nest.map_structure(lambda x: x.numpy(),
                                     next(iter(preprocessed_example_dataset)))

In [51]:
preprocessed_example_dataset.element_spec

OrderedDict([('x',
              TensorSpec(shape=(None, 64, 64, 3), dtype=tf.float32, name=None)),
             ('y', TensorSpec(shape=(None, 1), dtype=tf.int32, name=None))])

In [52]:
from keras.metrics import Recall, Precision

def model_fn():
    keras_model = create_keras_model()
    return tff.learning.models.from_keras_model(
        keras_model,
        input_spec=preprocessed_example_dataset.element_spec,
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

In [62]:
iterative_process = tff.learning.algorithms.build_weighted_fed_avg(
    model_fn,
    client_optimizer_fn=lambda: tf.keras.optimizers.Adam(learning_rate=0.001))

In [63]:
str(iterative_process.initialize.type_signature)

'( -> <global_model_weights=<trainable=<float32[7,7,3,64],float32[64],float32[64],float32[64],float32[64],float32[1,1,64,128],float32[128],float32[128],float32[3,3,128,32],float32[96],float32[96],float32[1,1,96,128],float32[128],float32[128],float32[3,3,128,32],float32[128],float32[128],float32[1,1,128,128],float32[128],float32[128],float32[3,3,128,32],float32[160],float32[160],float32[1,1,160,128],float32[128],float32[128],float32[3,3,128,32],float32[192],float32[192],float32[1,1,192,128],float32[128],float32[128],float32[3,3,128,32],float32[224],float32[224],float32[1,1,224,128],float32[128],float32[128],float32[3,3,128,32],float32[256],float32[256],float32[1,1,256,128],float32[128],float32[128],float32[1,1,128,128],float32[128],float32[128],float32[3,3,128,32],float32[160],float32[160],float32[1,1,160,128],float32[128],float32[128],float32[3,3,128,32],float32[192],float32[192],float32[1,1,192,128],float32[128],float32[128],float32[3,3,128,32],float32[224],float32[224],float32[1,1,22

In [64]:
state = iterative_process.initialize()

In [65]:
NUM_ROUNDS = 2
for round_num in range(0, NUM_ROUNDS):
  result = iterative_process.next(state, federated_train_data)
  train_state = result.state
  train_metrics = result.metrics
  print('round {:2d}, metrics={}'.format(round_num, train_metrics))

round  0, metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('sparse_categorical_accuracy', 0.5667125), ('loss', 1.9504842), ('num_examples', 727), ('num_batches', 74)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', OrderedDict([('update_non_finite', 0)]))])
round  1, metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('sparse_categorical_accuracy', 0.5818432), ('loss', 1.9135108), ('num_examples', 727), ('num_batches', 74)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', OrderedDict([('update_non_finite', 0)]))])


In [66]:
evaluation_process = tff.learning.algorithms.build_fed_eval(model_fn)

In [67]:
evaluation_state = evaluation_process.initialize()
model_weights = iterative_process.get_model_weights(train_state)
evaluation_state = evaluation_process.set_model_weights(evaluation_state, model_weights)

In [68]:
federated_test_data = make_federated_data(test_data, test_client_ids)

len(federated_test_data), federated_test_data[0]

100%|██████████| 1/1 [00:00<00:00, 27.78it/s]


(1,
 <_PrefetchDataset element_spec=OrderedDict([('x', TensorSpec(shape=(None, 64, 64, 3), dtype=tf.float32, name=None)), ('y', TensorSpec(shape=(None, 1), dtype=tf.int32, name=None))])>)

In [69]:
evaluation_output = evaluation_process.next(evaluation_state, federated_test_data)

In [70]:
str(evaluation_output.metrics)

"OrderedDict([('distributor', ()), ('client_work', OrderedDict([('eval', OrderedDict([('current_round_metrics', OrderedDict([('sparse_categorical_accuracy', 0.6727273), ('loss', 4.5433383), ('num_examples', 275), ('num_batches', 28)])), ('total_rounds_metrics', OrderedDict([('sparse_categorical_accuracy', 0.6727273), ('loss', 4.5433383), ('num_examples', 275), ('num_batches', 28)]))]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', ())])"