In [1]:
import os
import sys
import time
import torch
import random
import pickle
import argparse
import numpy as np
import pandas as pd
import torch.nn as nn
from pprint import pprint
from tqdm.auto import tqdm, trange
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

sys.path.append("../")
from models import build_model
from data_distributor import *
from dataloaders import *
import aggregation_rules
from aggregation_rules import Aggregator

# Set seed for reproducibility
seed_value = 1
random.seed(seed_value)
np.random.seed(seed_value)
torch.manual_seed(seed_value)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [2]:
from sklearn import svm
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.preprocessing import StandardScaler, MinMaxScaler, Normalizer
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error

In [3]:
def load_dataset_from_disk( path):
    '''
    Load the gradient_dataset from disk as a pickle file (name.pkl)
    '''
    with open(path, 'rb') as f:
        gradient_dataset = pickle.load(f)
    # input_grads = gradient_dataset['input'][0].reshape(-1, 1)
    # target_grads = gradient_dataset['target'][0].reshape(-1, 1)
    # input_grads = np.array([item for sublist in gradient_dataset['input'] for item in sublist])
    # target_grads = np.array([item for sublist in gradient_dataset['target'] for item in sublist])
    input_grads = np.array(gradient_dataset['input'])
    target_grads = np.array(gradient_dataset['target'])
    return input_grads, target_grads

def sample_data(datalist, sample_size=None, percent=None):
    '''
    Sample the data from the datalist
    args:
        - datalist: [data1, data2, ...]
        - data1: numpy array of shape (num_samples, num_features)
    '''
    if sample_size is None and percent is None:
        raise ValueError("Either sample_size or percent must be specified.")
    elif sample_size is None and percent is not None:
        # calculate the sample size
        sample_size = int(datalist[0].shape[0] * percent)
        # print(sample_size)

    indices = np.random.choice(datalist[0].shape[0], sample_size, replace=False)
    return [data[indices] for data in datalist]

def data_standardization(inputs, targets):
    '''
    Standardize the data
    args:
        - datalist: [data1, data2, ...]
        - data1: numpy array of shape (num_samples, num_features)
    '''
    # standardize the dataset
    input_scaler = MinMaxScaler()
    target_scaler = MinMaxScaler()
    # input_scaler = Normalizer()

    input_scaler.fit(inputs)
    inputs_std = input_scaler.transform(inputs)

    target_scaler.fit(targets)
    targets_std = target_scaler.transform(targets)

    # return inputs_std, targets, input_scaler#, target_scaler
    return inputs_std, targets_std, input_scaler, target_scaler

def data_destandardization(data, scaler):
    '''
    De-standardize the data
    args:
        - data: numpy array of shape (num_samples, num_features)
        - scaler: sklearn.preprocessing.StandardScaler
    '''
    return scaler.inverse_transform(data.reshape(-1, 1))

def inference(input_grads, model, input_scaler, target_scaler):
    '''
    Inference the model
    args:
        - grad_list: list of gradients
        - model: model
        - input_scaler: sklearn.preprocessing.StandardScaler
        - target_scaler: sklearn.preprocessing.StandardScaler
    '''
    if len(input_grads.shape) == 1:
        input_grads = input_grads.reshape(-1, 1)

    # standardize the data
    input_grads_std = input_scaler.transform(input_grads)
    # inference
    pred_std = model.predict(input_grads_std)
    # de-standardize the data
    # pred_grads = pred_std.reshape(-1, 1)
    pred_grads = target_scaler.inverse_transform(pred_std.reshape(-1, 1))
    return pred_grads

In [4]:
def data_prep(data_path, data_coverage):
    # Load the dataset
    input_grads, target_grads = load_dataset_from_disk(data_path)

    X_train, y_train = input_grads[0].reshape(-1, 1), target_grads[0].reshape(-1, 1)
    X_test, y_test = input_grads[1:], target_grads[1:]

    X_train, y_train = sample_data([X_train, y_train], percent=data_coverage)
    # print(X_train.shape, y_train.shape)

    # Splitting the data into training and testing sets
    # X_train, X_test, y_train, y_test = train_test_split(input_grads, target_grads, test_size=0.3, random_state=seed_value)

    # standardize training data
    X_train_std, y_train_std, in_scaler, tar_scaler = data_standardization(X_train, y_train)
    # X_train_std, y_train_std, in_scaler = data_standardization(X_train, y_train)
    return X_train_std, y_train_std, X_test, y_test, in_scaler, tar_scaler

def test(model, X_test, y_test, in_scaler, tar_scaler):
    mse_list = []
    mape_list = []
    for i, v in enumerate(X_test):
        gt = y_test[i].reshape(-1, 1)
        y_pred = inference(X_test[i], model, in_scaler, tar_scaler)
        mse = mean_squared_error(gt, y_pred)
        mape = mean_absolute_percentage_error(gt+1, y_pred+1)
        mse_list.append(mse)
        mape_list.append(mape)
    return np.mean(mse_list), np.mean(mape_list)

In [5]:
# X_train_std, y_train_std, X_test, y_test, in_scaler, tar_scaler = data_prep(data_path="../data/grad_datasets/determ/cifar100_cusCNN.pkl", data_coverage=0.1)

# metric_dict = {'data_coverage': 0.1, 'fit_time': 0, 'mse': 0, 'mape': 0}
# model = LinearRegression()
# # model = MLPRegressor(hidden_layer_sizes=(100,), max_iter=100)

# # time the training
# start_time = time.time()
# model.fit(X_train_std, y_train_std)
# metric_dict['fit_time'] = time.time() - start_time
# metric_dict['mse'], metric_dict['mape'] = test(model, X_test, y_test, in_scaler, tar_scaler)

# pprint(metric_dict)

In [6]:
# data_path="../data/grad_datasets/determ/cifar100_cusCNN.pkl"

# noise_type = data_path.split('/')[-2]
# dataset, model = data_path[:-4].split('/')[-1].split('_')
# noise_type, dataset, model

In [7]:
data_root = "../data/grad_datasets/"
data_paths = []
for dir in os.listdir(data_root):
    n_type = os.path.join(data_root, dir)
    for data in os.listdir(n_type):
        full_path = os.path.join(n_type, data)
        print(full_path)
        data_paths.append(full_path)

../data/grad_datasets/determ/mnist_cusMLP.pkl
../data/grad_datasets/determ/cifar10_cusCNN.pkl
../data/grad_datasets/determ/cifar10_resnet18.pkl
../data/grad_datasets/determ/cifar100_cusCNN.pkl
../data/grad_datasets/determ/cifar100_resnet18.pkl
../data/grad_datasets/randm_proc/mnist_cusMLP.pkl
../data/grad_datasets/randm_proc/cifar10_cusCNN.pkl
../data/grad_datasets/randm_proc/cifar10_resnet18.pkl
../data/grad_datasets/randm_proc/cifar100_cusCNN.pkl
../data/grad_datasets/randm_proc/cifar100_resnet18.pkl
../data/grad_datasets/randm/cifar10_resnet18.pkl
../data/grad_datasets/randm/cifar100_cusCNN.pkl
../data/grad_datasets/randm/cifar100_resnet18.pkl
../data/grad_datasets/randm/mnist_cusMLP.pkl
../data/grad_datasets/randm/cifar10_cusCNN.pkl


# LinearRegression

In [8]:
data_coverage_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1.0]
eval_list = []
for p in data_paths:
    print(p)
    for dc in data_coverage_list:
        noise_type = p.split('/')[-2]
        dataset, model = p[:-4].split('/')[-1].split('_')
        metric_dict = {'correction_model':'LR', 'dataset': dataset, 'model': model, 'noise_type': noise_type,
                       'data_coverage': dc, 'fit_time (s)': 0, 'mse': 0, 'mape': 0, 'sample_size': 0}

        X_train_std, y_train_std, X_test, y_test, in_scaler, tar_scaler = data_prep(data_path=p, data_coverage=dc)
        metric_dict['sample_size'] = X_train_std.shape[0]
        model = LinearRegression()
        # model = MLPRegressor(hidden_layer_sizes=(100,), max_iter=100)
        start_time = time.time()
        model.fit(X_train_std, y_train_std)
        metric_dict['fit_time (s)'] = time.time() - start_time
        metric_dict['mse'], metric_dict['mape'] = test(model, X_test, y_test, in_scaler, tar_scaler)
        eval_list.append(metric_dict)
        # pprint(metric_dict)
    # break

../data/grad_datasets/determ/mnist_cusMLP.pkl
../data/grad_datasets/determ/cifar10_cusCNN.pkl
../data/grad_datasets/determ/cifar10_resnet18.pkl
../data/grad_datasets/determ/cifar100_cusCNN.pkl
../data/grad_datasets/determ/cifar100_resnet18.pkl
../data/grad_datasets/randm_proc/mnist_cusMLP.pkl
../data/grad_datasets/randm_proc/cifar10_cusCNN.pkl
../data/grad_datasets/randm_proc/cifar10_resnet18.pkl
../data/grad_datasets/randm_proc/cifar100_cusCNN.pkl
../data/grad_datasets/randm_proc/cifar100_resnet18.pkl
../data/grad_datasets/randm/cifar10_resnet18.pkl
../data/grad_datasets/randm/cifar100_cusCNN.pkl
../data/grad_datasets/randm/cifar100_resnet18.pkl
../data/grad_datasets/randm/mnist_cusMLP.pkl
../data/grad_datasets/randm/cifar10_cusCNN.pkl


# MLPRegressor

In [9]:
for p in data_paths:
    print(p)
    for dc in data_coverage_list:
        noise_type = p.split('/')[-2]
        dataset, model = p[:-4].split('/')[-1].split('_')
        metric_dict = {'correction_model':'MLP10_10_200iter', 'dataset': dataset, 'model': model, 'noise_type': noise_type,
                       'data_coverage': dc, 'fit_time (s)': 0, 'mse': 0, 'mape': 0, 'sample_size': 0}

        X_train_std, y_train_std, X_test, y_test, in_scaler, tar_scaler = data_prep(data_path=p, data_coverage=dc)
        metric_dict['sample_size'] = X_train_std.shape[0]
        # model = LinearRegression()
        model = MLPRegressor(hidden_layer_sizes=(10,10,), max_iter=200)
        start_time = time.time()
        model.fit(X_train_std, y_train_std)
        metric_dict['fit_time (s)'] = time.time() - start_time
        metric_dict['mse'], metric_dict['mape'] = test(model, X_test, y_test, in_scaler, tar_scaler)
        eval_list.append(metric_dict)
        # pprint(metric_dict)
    # break

../data/grad_datasets/determ/mnist_cusMLP.pkl
../data/grad_datasets/determ/cifar10_cusCNN.pkl
../data/grad_datasets/determ/cifar10_resnet18.pkl
../data/grad_datasets/determ/cifar100_cusCNN.pkl
../data/grad_datasets/determ/cifar100_resnet18.pkl
../data/grad_datasets/randm_proc/mnist_cusMLP.pkl
../data/grad_datasets/randm_proc/cifar10_cusCNN.pkl
../data/grad_datasets/randm_proc/cifar10_resnet18.pkl
../data/grad_datasets/randm_proc/cifar100_cusCNN.pkl
../data/grad_datasets/randm_proc/cifar100_resnet18.pkl
../data/grad_datasets/randm/cifar10_resnet18.pkl
../data/grad_datasets/randm/cifar100_cusCNN.pkl
../data/grad_datasets/randm/cifar100_resnet18.pkl
../data/grad_datasets/randm/mnist_cusMLP.pkl
../data/grad_datasets/randm/cifar10_cusCNN.pkl


In [11]:
# print(eval_list)

df = pd.DataFrame(eval_list)
df

Unnamed: 0,correction_model,dataset,model,noise_type,data_coverage,fit_time (s),mse,mape,sample_size
0,LR,mnist,cusMLP,determ,0.1,0.013259,1.788503e-19,2.633291e-10,46887
1,LR,mnist,cusMLP,determ,0.3,0.001928,2.800072e-18,1.163307e-09,140662
2,LR,mnist,cusMLP,determ,0.5,0.002637,1.464369e-17,3.114732e-09,234437
3,LR,mnist,cusMLP,determ,0.7,0.003462,1.887340e-19,3.024214e-10,328211
4,LR,mnist,cusMLP,determ,0.9,0.004339,5.195344e-18,1.557856e-09,421986
...,...,...,...,...,...,...,...,...,...
175,MLP10_10_200iter,cifar10,cusCNN,randm,0.3,14.441177,2.295606e-04,9.494296e-04,621870
176,MLP10_10_200iter,cifar10,cusCNN,randm,0.5,25.368040,2.295487e-04,9.354309e-04,1036451
177,MLP10_10_200iter,cifar10,cusCNN,randm,0.7,37.491253,2.295606e-04,9.495156e-04,1451031
178,MLP10_10_200iter,cifar10,cusCNN,randm,0.9,47.595510,2.295323e-04,9.167629e-04,1865611


In [12]:
# df.to_csv('../data/correction_performance.csv', index=False)

180

In [None]:
def MSE_MAPELoss(y_pred, y_true):
    mape = torch.mean(torch.abs((y_true - y_pred) / y_true))
    mse = torch.mean(torch.pow(y_true - y_pred, 2))
    loss = mse
    if not torch.isnan(mape) and mape<0.05:
        loss = mse + mape
    return loss

class CorModel(nn.Module):
    def __init__(self):
        super(CorModel, self).__init__()
        self.fc = nn.Linear(1, 1)

    def forward(self, x):
        return self.fc(x)
    
    def fit(self, X_train_std, y_train_std, epochs=10, verbose=False):
        self.train()
        x_data, y_data = torch.tensor(X_train_std, dtype=torch.float32).view(-1, 1), torch.tensor(y_train_std, dtype=torch.float32).view(-1, 1)
        dataset = TensorDataset(x_data, y_data)
        data_loader = DataLoader(dataset, batch_size=512, shuffle=True)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        criterion = MSE_MAPELoss
        for ep in range(epochs):
            with tqdm(data_loader) as tepoch:
                tepoch.set_description(f"EP {ep+1}/{epochs}")
                for x, y in tepoch:
                    # print(x.shape, y.shape)
                    optimizer.zero_grad()
                    y_pred = model(x)
                    loss = criterion(y_pred, y)
                    loss.backward()
                    optimizer.step()
                    tepoch.set_postfix({'train_loss': loss.item()})
    
    def predict(self, x):
        x_data = torch.tensor(x, dtype=torch.float32).view(-1, 1)
        self.eval()
        return self(x_data).detach().numpy()
    

model.fit(X_train_std, y_train_std, epochs=10)

In [None]:
model.eval()

for i, v in enumerate(X_test):
    gt = y_test[i].reshape(-1, 1)
    # std input data
    X_test_std = in_scaler.transform(X_test[i].reshape(-1, 1))
    y_test_std = tar_scaler.transform(y_test[i].reshape(-1, 1))
    # print(X_test_std.shape)
    x_data = torch.tensor(X_test_std, dtype=torch.float32, requires_grad=False)
    y_data = torch.tensor(y_test_std, dtype=torch.float32, requires_grad=False)
    # print(x_data.shape)

    # make test data loader
    dataset = TensorDataset(x_data, y_data)
    data_loader = DataLoader(dataset, batch_size=512, shuffle=False)

    # inference
    y_pred = []
    with tqdm(data_loader) as tepoch:
        for x, y in tepoch:
            batch_preds = model(x)
            y_pred.extend(batch_preds.detach().numpy().tolist())

    break
    mse = mean_squared_error(gt, y_pred)
    print(f"MSE : {mse}")
    mape = mean_absolute_percentage_error(gt, y_pred)
    print(f"MAPE: {mape}")
    print()

In [None]:
y_pred_destd = tar_scaler.inverse_transform(np.array(y_pred).reshape(-1, 1))

In [None]:
mse = mean_squared_error(gt, y_pred)
print(f"MSE : {mse}")
mape = mean_absolute_percentage_error(gt+1, y_pred_destd+1)
print(f"MAPE: {mape}")
print()

In [None]:
y_pred_destd

In [None]:
gt