In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
import numpy as np
from sklearn.metrics import f1_score, accuracy_score, recall_score, precision_score
import pandas as pd
import xgboost as xgb
from imblearn.over_sampling import RandomOverSampler

df = pd.read_csv('diabetes_prediction_dataset.csv')
df.drop('smoking_history',axis=1,inplace=True)
categorical_columns = df.select_dtypes(include=['object']).columns
df = pd.get_dummies(df, columns=categorical_columns)

X = df.drop('diabetes', axis=1)
y = df['diabetes']

oversampler = RandomOverSampler()
X_resampled, y_resampled = oversampler.fit_resample(X, y)

df = pd.concat([X_resampled, y_resampled], axis=1)

X = df.drop('diabetes', axis=1)
y = df['diabetes']


In [2]:
from sklearn import preprocessing
from sklearn.model_selection import train_test_split, StratifiedKFold


stand = preprocessing.StandardScaler()


X = stand.fit_transform(df.drop('diabetes', axis=1).values)
y = df['diabetes'].values
num_clients = 5

def split_data_among_clients(X, y, num_clients=num_clients):
    skf = StratifiedKFold(n_splits=num_clients, shuffle=True, random_state=42)
    splits = list(skf.split(X, y))

    clients_data = [(X[train_idx], y[train_idx]) for train_idx, _ in splits]
    return clients_data

In [6]:
import tensorflow as tf
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

scores = {}
client_weights = {}

# Assume split_data_among_clients function is defined elsewhere
clients_data = split_data_among_clients(X, y)

def test_metrics(model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_pred = (y_pred > 0.5).astype(int)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, zero_division=1)
    recall = recall_score(y_test, y_pred, zero_division=1)
    f1 = f1_score(y_test, y_pred, zero_division=1)
    return accuracy, precision, recall, f1

def create_model(input_shape):
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(64, activation='relu', input_shape=input_shape),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    return model

# Hyperparameters
learning_rate = 0.0005
epochs = 10
batch_size = 32

for i, client_data in enumerate(clients_data):
    print(f"Client {i+1} Data:")
    X_train, y_train = client_data
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

    # Feature scaling
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)

    ann = create_model((X_train_scaled.shape[1],))

    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    ann.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    scores[f'client_{i+1}'] = {}
    client_weights[f'client_{i+1}'] = {}

    # Learning rate scheduler
    def lr_schedule(epoch):
        return learning_rate * (0.1 ** (epoch // 10))

    lr_scheduler = tf.keras.callbacks.LearningRateScheduler(lr_schedule)

    for epoch in range(epochs):
        # Gradually increase the training data size for each epoch
        train_size = int((epoch + 1) / epochs * len(X_train_scaled))
        X_train_subset = X_train_scaled[:train_size]
        y_train_subset = y_train[:train_size]

        history = ann.fit(
            X_train_subset, y_train_subset,
            epochs=1,
            batch_size=batch_size,
            validation_data=(X_val_scaled, y_val),
            callbacks=[lr_scheduler],
            verbose=0
        )

        accuracy, precision, recall, f1 = test_metrics(ann, X_val_scaled, y_val)
        scores[f'client_{i+1}'][epoch] = (accuracy, precision, recall, f1)
        print(f"Epoch {epoch+1}: Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}")
        client_weights[f'client_{i+1}'][epoch] = ann.get_weights()

    # Final evaluation on validation set
    final_accuracy, final_precision, final_recall, final_f1 = test_metrics(ann, X_val_scaled, y_val)
    print(f"\nFinal Validation Metrics for Client {i+1}:")
    print(f"Accuracy: {final_accuracy:.4f}")
    print(f"Precision: {final_precision:.4f}")
    print(f"Recall: {final_recall:.4f}")
    print(f"F1 Score: {final_f1:.4f}\n")

Client 1 Data:
Epoch 1: Accuracy: 0.8798, Precision: 0.9019, Recall: 0.8521, F1: 0.8763
Epoch 2: Accuracy: 0.8856, Precision: 0.8833, Recall: 0.8884, F1: 0.8858
Epoch 3: Accuracy: 0.8877, Precision: 0.8965, Recall: 0.8764, F1: 0.8863
Epoch 4: Accuracy: 0.8950, Precision: 0.8913, Recall: 0.8996, F1: 0.8954
Epoch 5: Accuracy: 0.8967, Precision: 0.8805, Recall: 0.9177, F1: 0.8987
Epoch 6: Accuracy: 0.9000, Precision: 0.8855, Recall: 0.9187, F1: 0.9018
Epoch 7: Accuracy: 0.9011, Precision: 0.8814, Recall: 0.9268, F1: 0.9035
Epoch 8: Accuracy: 0.9026, Precision: 0.8815, Recall: 0.9299, F1: 0.9051
Epoch 9: Accuracy: 0.9028, Precision: 0.8790, Recall: 0.9341, F1: 0.9057
Epoch 10: Accuracy: 0.9034, Precision: 0.8800, Recall: 0.9340, F1: 0.9062

Final Validation Metrics for Client 1:
Accuracy: 0.9034
Precision: 0.8800
Recall: 0.9340
F1 Score: 0.9062

Client 2 Data:
Epoch 1: Accuracy: 0.8859, Precision: 0.8835, Recall: 0.8891, F1: 0.8863
Epoch 2: Accuracy: 0.8864, Precision: 0.8906, Recall: 0.88

KeyboardInterrupt: 

In [4]:
import xgboost as xgb
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

scores = {}
client_models = {}

# Assume split_data_among_clients function is defined elsewhere
clients_data = split_data_among_clients(X, y)

def test_metrics(model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_pred = (y_pred > 0.5).astype(int)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, zero_division=1)
    recall = recall_score(y_test, y_pred, zero_division=1)
    f1 = f1_score(y_test, y_pred, zero_division=1)
    return accuracy, precision, recall, f1

# Hyperparameters
params = {
    'objective': 'binary:logistic',
    'learning_rate': 0.0005,
    'max_depth': 6,
    'min_child_weight': 1,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'n_estimators': 10
}

for i, client_data in enumerate(clients_data):
    print(f"Client {i+1} Data:")
    X_train, y_train = client_data
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

    # Feature scaling
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)

    dtrain = xgb.DMatrix(X_train_scaled, label=y_train)
    dval = xgb.DMatrix(X_val_scaled, label=y_val)

    client_models[f'client_{i+1}'] = []

    for epoch in range(params['n_estimators']):
        # Train the model
        if epoch == 0:
            booster = xgb.train(params, dtrain, num_boost_round=1)
        else:
            booster = xgb.train(params, dtrain, num_boost_round=1, xgb_model=booster)

        client_models[f'client_{i+1}'].append(booster)

        accuracy, precision, recall, f1 = test_metrics(booster, dval, y_val)
        scores[f'client_{i+1}'] = (accuracy, precision, recall, f1)
        print(f"Epoch {epoch+1}: Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}")

    # Final evaluation on validation set
    final_accuracy, final_precision, final_recall, final_f1 = test_metrics(booster, dval, y_val)
    print(f"\nFinal Validation Metrics for Client {i+1}:")
    print(f"Accuracy: {final_accuracy:.4f}")
    print(f"Precision: {final_precision:.4f}")
    print(f"Recall: {final_recall:.4f}")
    print(f"F1 Score: {final_f1:.4f}\n")

# Aggregation of models (this part can be more sophisticated based on the actual federated learning approach)
# This is a simple averaging of model predictions as an example
def aggregate_models(client_models, X_test):
    predictions = np.zeros((len(X_test), len(client_models)))
    for i, client in enumerate(client_models):
        for model in client:
            dtest = xgb.DMatrix(X_test)
            predictions[:, i] += model.predict(dtest)
    predictions = predictions / len(client_models)
    return predictions.mean(axis=1)

# Assume X_test and y_test are defined elsewhere
X_test_scaled = scaler.transform(X_test)
final_predictions = aggregate_models(client_models, X_test_scaled)
final_predictions = (final_predictions > 0.5).astype(int)
final_accuracy = accuracy_score(y_test, final_predictions)
final_precision = precision_score(y_test, final_predictions, zero_division=1)
final_recall = recall_score(y_test, final_predictions, zero_division=1)
final_f1 = f1_score(y_test, final_predictions, zero_division=1)

print(f"\nFinal Aggregated Metrics:")
print(f"Accuracy: {final_accuracy:.4f}")
print(f"Precision: {final_precision:.4f}")
print(f"Recall: {final_recall:.4f}")
print(f"F1 Score: {final_f1:.4f}\n")


Client 1 Data:


Parameters: { "n_estimators" } are not used.



Epoch 1: Accuracy: 0.7984, Precision: 0.7125, Recall: 1.0000, F1: 0.8321


Parameters: { "n_estimators" } are not used.



Epoch 2: Accuracy: 0.8858, Precision: 0.8771, Recall: 0.8972, F1: 0.8870


Parameters: { "n_estimators" } are not used.



Epoch 3: Accuracy: 0.8858, Precision: 0.8771, Recall: 0.8972, F1: 0.8870


Parameters: { "n_estimators" } are not used.



Epoch 4: Accuracy: 0.8858, Precision: 0.8771, Recall: 0.8972, F1: 0.8870


Parameters: { "n_estimators" } are not used.



Epoch 5: Accuracy: 0.8878, Precision: 0.8870, Recall: 0.8885, F1: 0.8878


Parameters: { "n_estimators" } are not used.



Epoch 6: Accuracy: 0.8887, Precision: 0.8912, Recall: 0.8853, F1: 0.8882


Parameters: { "n_estimators" } are not used.



Epoch 7: Accuracy: 0.8887, Precision: 0.8912, Recall: 0.8853, F1: 0.8882


Parameters: { "n_estimators" } are not used.



Epoch 8: Accuracy: 0.8887, Precision: 0.8912, Recall: 0.8853, F1: 0.8882
Epoch 9: Accuracy: 0.8887, Precision: 0.8912, Recall: 0.8853, F1: 0.8882


Parameters: { "n_estimators" } are not used.

Parameters: { "n_estimators" } are not used.



Epoch 10: Accuracy: 0.8887, Precision: 0.8912, Recall: 0.8853, F1: 0.8882

Final Validation Metrics for Client 1:
Accuracy: 0.8887
Precision: 0.8912
Recall: 0.8853
F1 Score: 0.8882

Client 2 Data:


Parameters: { "n_estimators" } are not used.



Epoch 1: Accuracy: 0.8854, Precision: 0.8795, Recall: 0.8932, F1: 0.8863


Parameters: { "n_estimators" } are not used.



Epoch 2: Accuracy: 0.8854, Precision: 0.8795, Recall: 0.8932, F1: 0.8863
Epoch 3: Accuracy: 0.8854, Precision: 0.8795, Recall: 0.8932, F1: 0.8863

Parameters: { "n_estimators" } are not used.






Parameters: { "n_estimators" } are not used.



Epoch 4: Accuracy: 0.8854, Precision: 0.8795, Recall: 0.8932, F1: 0.8863


Parameters: { "n_estimators" } are not used.



Epoch 5: Accuracy: 0.8854, Precision: 0.8795, Recall: 0.8932, F1: 0.8863


Parameters: { "n_estimators" } are not used.



Epoch 6: Accuracy: 0.8854, Precision: 0.8795, Recall: 0.8932, F1: 0.8863


Parameters: { "n_estimators" } are not used.



Epoch 7: Accuracy: 0.8854, Precision: 0.8795, Recall: 0.8932, F1: 0.8863
Epoch 8: Accuracy: 0.8854, Precision: 0.8795, Recall: 0.8932, F1: 0.8863
Epoch 9: Accuracy: 0.8854, Precision: 0.8795, Recall: 0.8932, F1: 0.8863


Parameters: { "n_estimators" } are not used.

Parameters: { "n_estimators" } are not used.

Parameters: { "n_estimators" } are not used.



Epoch 10: Accuracy: 0.8854, Precision: 0.8795, Recall: 0.8932, F1: 0.8863

Final Validation Metrics for Client 2:
Accuracy: 0.8854
Precision: 0.8795
Recall: 0.8932
F1 Score: 0.8863

Client 3 Data:


Parameters: { "n_estimators" } are not used.

Parameters: { "n_estimators" } are not used.

Parameters: { "n_estimators" } are not used.



Epoch 1: Accuracy: 0.4983, Precision: 0.4983, Recall: 1.0000, F1: 0.6651
Epoch 2: Accuracy: 0.7958, Precision: 0.7093, Recall: 1.0000, F1: 0.8299
Epoch 3: Accuracy: 0.7958, Precision: 0.7093, Recall: 1.0000, F1: 0.8299
Epoch 4: Accuracy: 0.8829, Precision: 0.8738, Recall: 0.8942, F1: 0.8839


Parameters: { "n_estimators" } are not used.

Parameters: { "n_estimators" } are not used.

Parameters: { "n_estimators" } are not used.

Parameters: { "n_estimators" } are not used.



Epoch 5: Accuracy: 0.8829, Precision: 0.8738, Recall: 0.8942, F1: 0.8839
Epoch 6: Accuracy: 0.8829, Precision: 0.8738, Recall: 0.8942, F1: 0.8839
Epoch 7: Accuracy: 0.8829, Precision: 0.8738, Recall: 0.8942, F1: 0.8839


Parameters: { "n_estimators" } are not used.

Parameters: { "n_estimators" } are not used.

Parameters: { "n_estimators" } are not used.



Epoch 8: Accuracy: 0.8829, Precision: 0.8738, Recall: 0.8942, F1: 0.8839
Epoch 9: Accuracy: 0.8829, Precision: 0.8738, Recall: 0.8942, F1: 0.8839
Epoch 10: Accuracy: 0.8829, Precision: 0.8738, Recall: 0.8942, F1: 0.8839

Final Validation Metrics for Client 3:
Accuracy: 0.8829
Precision: 0.8738
Recall: 0.8942
F1 Score: 0.8839

Client 4 Data:


Parameters: { "n_estimators" } are not used.

Parameters: { "n_estimators" } are not used.

Parameters: { "n_estimators" } are not used.



Epoch 1: Accuracy: 0.4973, Precision: 0.4973, Recall: 1.0000, F1: 0.6643
Epoch 2: Accuracy: 0.4973, Precision: 0.4973, Recall: 1.0000, F1: 0.6643
Epoch 3: Accuracy: 0.7941, Precision: 0.7072, Recall: 1.0000, F1: 0.8285
Epoch 4: Accuracy: 0.7941, Precision: 0.7072, Recall: 1.0000, F1: 0.8285


Parameters: { "n_estimators" } are not used.

Parameters: { "n_estimators" } are not used.

Parameters: { "n_estimators" } are not used.

Parameters: { "n_estimators" } are not used.



Epoch 5: Accuracy: 0.7941, Precision: 0.7072, Recall: 1.0000, F1: 0.8285
Epoch 6: Accuracy: 0.8809, Precision: 0.8676, Recall: 0.8976, F1: 0.8823
Epoch 7: Accuracy: 0.8809, Precision: 0.8676, Recall: 0.8976, F1: 0.8823
Epoch 8: Accuracy: 0.8817, Precision: 0.8700, Recall: 0.8960, F1: 0.8828


Parameters: { "n_estimators" } are not used.

Parameters: { "n_estimators" } are not used.

Parameters: { "n_estimators" } are not used.



Epoch 9: Accuracy: 0.8817, Precision: 0.8700, Recall: 0.8960, F1: 0.8828
Epoch 10: Accuracy: 0.8821, Precision: 0.8710, Recall: 0.8956, F1: 0.8831

Final Validation Metrics for Client 4:
Accuracy: 0.8821
Precision: 0.8710
Recall: 0.8956
F1 Score: 0.8831

Client 5 Data:


Parameters: { "n_estimators" } are not used.

Parameters: { "n_estimators" } are not used.

Parameters: { "n_estimators" } are not used.



Epoch 1: Accuracy: 0.4986, Precision: 0.4986, Recall: 1.0000, F1: 0.6654
Epoch 2: Accuracy: 0.7974, Precision: 0.7111, Recall: 1.0000, F1: 0.8311
Epoch 3: Accuracy: 0.7974, Precision: 0.7111, Recall: 1.0000, F1: 0.8311
Epoch 4: Accuracy: 0.8848, Precision: 0.8776, Recall: 0.8937, F1: 0.8856


Parameters: { "n_estimators" } are not used.

Parameters: { "n_estimators" } are not used.

Parameters: { "n_estimators" } are not used.



Epoch 5: Accuracy: 0.8848, Precision: 0.8776, Recall: 0.8937, F1: 0.8856
Epoch 6: Accuracy: 0.8848, Precision: 0.8776, Recall: 0.8937, F1: 0.8856
Epoch 7: Accuracy: 0.8848, Precision: 0.8776, Recall: 0.8937, F1: 0.8856


Parameters: { "n_estimators" } are not used.

Parameters: { "n_estimators" } are not used.

Parameters: { "n_estimators" } are not used.

Parameters: { "n_estimators" } are not used.



Epoch 8: Accuracy: 0.8848, Precision: 0.8776, Recall: 0.8937, F1: 0.8856
Epoch 9: Accuracy: 0.8848, Precision: 0.8776, Recall: 0.8937, F1: 0.8856
Epoch 10: Accuracy: 0.8848, Precision: 0.8776, Recall: 0.8937, F1: 0.8856

Final Validation Metrics for Client 5:
Accuracy: 0.8848
Precision: 0.8776
Recall: 0.8937
F1 Score: 0.8856



NameError: name 'X_test' is not defined