In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [21]:
# Load dataset directly from UCI URL
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data"
data = pd.read_csv(url)

print(data.head())


             name  MDVP:Fo(Hz)  MDVP:Fhi(Hz)  MDVP:Flo(Hz)  MDVP:Jitter(%)  \
0  phon_R01_S01_1      119.992       157.302        74.997         0.00784   
1  phon_R01_S01_2      122.400       148.650       113.819         0.00968   
2  phon_R01_S01_3      116.682       131.111       111.555         0.01050   
3  phon_R01_S01_4      116.676       137.871       111.366         0.00997   
4  phon_R01_S01_5      116.014       141.781       110.655         0.01284   

   MDVP:Jitter(Abs)  MDVP:RAP  MDVP:PPQ  Jitter:DDP  MDVP:Shimmer  ...  \
0           0.00007   0.00370   0.00554     0.01109       0.04374  ...   
1           0.00008   0.00465   0.00696     0.01394       0.06134  ...   
2           0.00009   0.00544   0.00781     0.01633       0.05233  ...   
3           0.00009   0.00502   0.00698     0.01505       0.05492  ...   
4           0.00011   0.00655   0.00908     0.01966       0.06425  ...   

   Shimmer:DDA      NHR     HNR  status      RPDE       DFA   spread1  \
0      0.0654

In [22]:
if 'name' in data.columns:
    data = data.drop(columns=['name'])

X = data.drop('status', axis=1)
y = data['status']

In [23]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [24]:
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y)



In [25]:
# Split training data into 3 clients
client_data = np.array_split(range(len(X_train)), 3)
clients = []
for idx in client_data:
    clients.append((X_train[idx], y_train.iloc[idx]))

In [26]:
 # Local training on each client

local_models = []
for i, (X_c, y_c) in enumerate(clients):
    model = LogisticRegression(max_iter=1000)
    model.fit(X_c, y_c)
    local_models.append(model)
    print(f"Client {i+1} trained, local accuracy: {accuracy_score(y_c, model.predict(X_c)):.2f}")

Client 1 trained, local accuracy: 0.87
Client 2 trained, local accuracy: 0.85
Client 3 trained, local accuracy: 0.90


In [27]:
# Average coefficients & intercepts
coef_avg = np.mean([m.coef_ for m in local_models], axis=0)
intercept_avg = np.mean([m.intercept_ for m in local_models], axis=0)

In [28]:
# Create global model
global_model = LogisticRegression(max_iter=1000)
global_model.coef_ = coef_avg
global_model.intercept_ = intercept_avg
global_model.classes_ = np.array([0,1])  # set classes

In [30]:
#Evaluate global model

y_pred = global_model.predict(X_test)
print("\nGlobal Model Accuracy on Test Set:", accuracy_score(y_test, y_pred))


Global Model Accuracy on Test Set: 0.8974358974358975


In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# -------------------------
# 1. Load dataset
# -------------------------
data = pd.read_csv(r"C:\Users\tulsi\Downloads\parkinsons.data")
if 'name' in data.columns:
    data = data.drop(columns=['name'])

X = data.drop('status', axis=1)
y = data['status']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

# -------------------------
# 2. Simulate 3 clients
# -------------------------
NUM_CLIENTS = 3
client_data = np.array_split(range(len(X_train)), NUM_CLIENTS)
clients = []
for idx in client_data:
    clients.append((X_train[idx], y_train.iloc[idx]))

# -------------------------
# 3. Initialize global model
# -------------------------
global_model = LogisticRegression(max_iter=1000)
# Initialize weights randomly by fitting once on small data
global_model.fit(X_train[:5], y_train.iloc[:5])

# -------------------------
# 4. Federated Learning rounds
# -------------------------
NUM_ROUNDS = 5

for rnd in range(NUM_ROUNDS):
    local_models = []
    print(f"\n--- Round {rnd+1} ---")
    
    # Each client trains locally on global weights
    for i, (X_c, y_c) in enumerate(clients):
        local_model = LogisticRegression(max_iter=1000)
        # Set global weights to local model
        local_model.coef_ = global_model.coef_.copy()
        local_model.intercept_ = global_model.intercept_.copy()
        local_model.classes_ = np.array([0,1])
        # Train locally
        local_model.fit(X_c, y_c)
        local_models.append(local_model)
        print(f"Client {i+1} local accuracy: {accuracy_score(y_c, local_model.predict(X_c)):.2f}")
    
    # Aggregate weights (FedAvg)
    coef_avg = np.mean([m.coef_ for m in local_models], axis=0)
    intercept_avg = np.mean([m.intercept_ for m in local_models], axis=0)
    
    # Update global model
    global_model.coef_ = coef_avg
    global_model.intercept_ = intercept_avg

# -------------------------
# 5. Evaluate global model
# -------------------------
y_pred = global_model.predict(X_test)
print("\nGlobal Model Accuracy on Test Set:", accuracy_score(y_test, y_pred))



--- Round 1 ---
Client 1 local accuracy: 0.87
Client 2 local accuracy: 0.85
Client 3 local accuracy: 0.90

--- Round 2 ---
Client 1 local accuracy: 0.87
Client 2 local accuracy: 0.85
Client 3 local accuracy: 0.90

--- Round 3 ---
Client 1 local accuracy: 0.87
Client 2 local accuracy: 0.85
Client 3 local accuracy: 0.90

--- Round 4 ---
Client 1 local accuracy: 0.87
Client 2 local accuracy: 0.85
Client 3 local accuracy: 0.90

--- Round 5 ---
Client 1 local accuracy: 0.87
Client 2 local accuracy: 0.85
Client 3 local accuracy: 0.90

Global Model Accuracy on Test Set: 0.8974358974358975
