In [2]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from sklearn.preprocessing import StandardScaler,Normalizer
from sklearn.model_selection import train_test_split
torch.manual_seed(42)
from itertools import combinations
from itertools import accumulate
import copy, pickle, math
import pandas as pd
import numpy as np

In [3]:
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
import pandas as pd

In [4]:
cal_housing = fetch_california_housing()
X = pd.DataFrame(cal_housing.data,columns=cal_housing.feature_names)
y = cal_housing.target
X_train, X_test, y_train, y_test = train_test_split(X, y,random_state=1, test_size=0.2)

In [5]:
sc_X = StandardScaler()
X_trainscaled=sc_X.fit_transform(X_train)
X_testscaled=sc_X.transform(X_test)

In [6]:
y_train

array([2.556, 1.146, 1.375, ..., 1.048, 1.407, 1.26 ])

In [7]:
reg = MLPRegressor(hidden_layer_sizes=(64,64,64),activation="relu" ,random_state=1, max_iter=2000).fit(X_trainscaled, y_train)

In [8]:
reg.predict(X_trainscaled[:5])

array([2.50870572, 1.43059859, 1.40194489, 1.99221932, 4.58279794])

In [9]:
y_train[:5]

array([2.556, 1.146, 1.375, 1.188, 4.227])

In [10]:
# load the dataset CA pricing 
class CADataset(torch.utils.data.Dataset):

  def __init__(self, X, y, scale_data=True):
    if not torch.is_tensor(X) and not torch.is_tensor(y):
      # Apply scaling if necessary
      if scale_data:
          X = StandardScaler().fit_transform(X)
    
      self.X = torch.from_numpy(X)
      self.y = torch.from_numpy(y)

  def __len__(self):
      return len(self.X)

  def __getitem__(self, i):
      return self.X[i], self.y[i]

cal_housing = fetch_california_housing()
X = pd.DataFrame(cal_housing.data,columns=cal_housing.feature_names)
y = cal_housing.target
X = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y,random_state=1, test_size=0.2)

dataset_train = CADataset(X_train, y_train)
trainloader = torch.utils.data.DataLoader(dataset_train, batch_size=200, shuffle=True, num_workers=1)

dataset_test = CADataset(X_test, y_test)
testloader = torch.utils.data.DataLoader(dataset_test, batch_size=10, shuffle=True, num_workers=1)


In [11]:
# define the model 
# 1) non-label--user; 2) label--party

class user_m(nn.Module): # add customerized user side
  def __init__(self, dim_x=32):
      super().__init__()
      self.layers = nn.Sequential(
      nn.Linear(8, 64),
      nn.ReLU(),
      nn.Linear(64, 64),
      nn.ReLU(),
      nn.Linear(64, dim_x),
      nn.ReLU(),
    )
  def forward(self, x):
    return self.layers(x)


class label_m(nn.Module): # add for customerized label party
  def __init__(self, dim_x=32):
    super().__init__()
    self.layers = nn.Sequential(
        nn.Linear(dim_x, 16),
        nn.ReLU(),
        nn.Linear(16, 1),
    )
    
  def forward(self, x):
    return self.layers(x)

class MLP_r(nn.Module):
  def __init__(self, dim_x=32):
    super().__init__()
    self.user = user_m(dim_x)
    self.label = label_m(dim_x)

  def forward(self, x):
    self.f_int = self.user(x)
    return self.label(self.f_int)

def weights_init(m):
    if hasattr(m, "weight"):
        m.weight.data.uniform_(-0.5, 0.5)
    if hasattr(m, "bias"):
        m.bias.data.uniform_(-0.5, 0.5)



class new_ml(nn.Module): # add for customerized label party
  def __init__(self):
    super().__init__()
    self.layers = nn.Sequential(
        nn.Linear(8, 64),
        nn.ReLU(),
        nn.Linear(64, 64),
        nn.ReLU(),
        nn.Linear(64, 1),
    )
    
  def forward(self, x):
    return self.layers(x)


In [12]:
'''training and evaluation of a model.'''

def train_model(mlp, trainloader, iteration):
  train_loss_function = nn.MSELoss()
  optimizer = torch.optim.Adam(mlp.parameters())

  for epoch in range(0, iteration):
    current_loss = 0.0
    for i, data in enumerate(trainloader, 0):
      inputs, targets = data
      inputs, targets = inputs.float(), targets.float()
      targets = targets.reshape((targets.shape[0], 1))
      optimizer.zero_grad()
      outputs = mlp(inputs)
      loss = train_loss_function(outputs, targets)   
      loss.backward()
      optimizer.step()
    print ('training epoch: {epoch}\tLoss: {:0.4f}'.format(loss.item(), epoch=epoch+1))
  print('final training loss: {:0.4f}'.format(loss.item()))
  return mlp


def evaluate(model, testloader):
  accumulate_loss = 0
  train_loss_function = nn.L1Loss()
  for i, data in enumerate(testloader, 0):
    inputs, targets = data
    inputs, targets = inputs.float(), targets.float()
    targets = targets.reshape((targets.shape[0], 1))
    outputs = model(inputs)
  
    loss = train_loss_function(outputs, targets)
    accumulate_loss+=loss.item()
  return accumulate_loss/len(testloader)

if False:
  mlp_new = MLP_r(dim_x=16, flag=True)
  mlp_new = train_model(mlp_new, trainloader)

In [13]:
if True:
  
  mlp_new = MLP_r()
  mlp_new = train_model(mlp_new, trainloader, 50)

training epoch: 1	Loss: 0.6792
training epoch: 2	Loss: 0.5951
training epoch: 3	Loss: 0.4185
training epoch: 4	Loss: 0.5632
training epoch: 5	Loss: 0.3613
training epoch: 6	Loss: 0.3211
training epoch: 7	Loss: 0.2284
training epoch: 8	Loss: 0.3100
training epoch: 9	Loss: 0.3411
training epoch: 10	Loss: 0.4236
training epoch: 11	Loss: 0.3226
training epoch: 12	Loss: 0.2663
training epoch: 13	Loss: 0.1433
training epoch: 14	Loss: 0.3735
training epoch: 15	Loss: 0.2288
training epoch: 16	Loss: 0.2021
training epoch: 17	Loss: 0.2685
training epoch: 18	Loss: 0.1419
training epoch: 19	Loss: 0.4444
training epoch: 20	Loss: 0.2399
training epoch: 21	Loss: 0.2376
training epoch: 22	Loss: 0.3521
training epoch: 23	Loss: 0.2229
training epoch: 24	Loss: 0.2399
training epoch: 25	Loss: 0.3438
training epoch: 26	Loss: 0.4552
training epoch: 27	Loss: 0.3111
training epoch: 28	Loss: 0.2900
training epoch: 29	Loss: 0.3895
training epoch: 30	Loss: 0.2259
training epoch: 31	Loss: 0.2409
training epoch: 3

In [14]:
evaluate(mlp_new, testloader)

3.260981149592642

In [15]:
mlp_new(dataset_train.X[:5].float())

tensor([[2.0811],
        [1.5123],
        [1.2371],
        [2.0557],
        [4.7536]], grad_fn=<AddmmBackward0>)

In [16]:
dataset_train.y[:5]

tensor([2.5560, 1.1460, 1.3750, 1.1880, 4.2270], dtype=torch.float64)

In [20]:
def result_summary(original_score, res_dummy):

  res = abs(original_score-torch.tensor(res_dummy))/abs(original_score)
  print ("L1-loss mean", sum(abs(original_score-torch.tensor(res_dummy)))/score.shape[0])
  error = sum(res)/score.shape[0]
  print ("mean error rate (L1)", error)
  import pandas as pd

  res_list = [t.detach().numpy().tolist() for t in res]
  s = pd.Series(res_list)
  print (s.describe())
  return error

In [18]:
# very raw single_data/target and no list things
def square_error(pred, target):
  return torch.sum((pred-target)**2)
loss_function = square_error

def mean_error(pred, target): # do not use L1-error
  return torch.sum(abs(pred-target))

# use a json file to log it
dic = {}
loss_function = square_error
predic_loss_func = square_error

cos = torch.nn.CosineSimilarity(dim=0)

def simple_short_test(model, datas, labels, 
                      label_model, # surrogate model setting
                      known_datas=None, known_labels=None, # known data points
                      iteration=2000, end_threhold=0.001, op='Adam', learning_rate=1,
                      semi_flag=True, lamda1=0.05, lamda3=0.01,
                      R_predict_flag=True, lamda2=0.05, 
                      PRINT_INT=False,
                      PRINT_flg=False):
  
  torch.manual_seed(66)

  original_dy_dx_list = []
  dummy_score_list = []

  for (data, score) in zip(datas, labels):
    data = data.reshape(1, -1)
    score = score.reshape(1, -1)

    # compute the loss to be sent g
    f_embding = model.user(data.float())
    out = model.label(f_embding)
    score = score.reshape((1,-1))
    y = loss_function(out,  score.reshape((1, -1)))
    dy_dx = torch.autograd.grad(y.float(), f_embding) # g
    original_dy_dx = list((_.detach().clone() for _ in dy_dx))
  

    original_dy_dx_list.append(original_dy_dx)

    dummy_score = torch.randn(score.size()).requires_grad_(True)
    dummy_score_list.append(dummy_score)
  


  # if there are less data, select all the data, otherwise randomly select few 3 tuples for penalty
  trip_list = list(combinations(range(datas.shape[0]), 3))
  original_label = list((_.detach().clone() for _ in dummy_score_list))

  torch.manual_seed(42)
  #label_model = label_surrogate(dim_x=dim_x, flag=bias_flag)
  #label_model = label_m(dim_x)
  #label_model = copy.deepcopy(start_label_model)
  
  if PRINT_flg:
    print("initila surrogate model parameters", list(label_model.parameters()))
  
  # step2b: start the reconstruction process

  if op=="Adam":
    optimizer = torch.optim.Adam(list(label_model.parameters())+dummy_score_list, lr=learning_rate)
  elif op=="SGD":
    optimizer = torch.optim.SGD(list(label_model.parameters())+dummy_score_list, lr=0.01, momentum=0.9)
  else:
    optimizer = torch.optim.LBFGS(list(label_model.parameters())+dummy_score_list, lr=learning_rate)

  

  for iters in range(iteration):

    # print ("===========current parameters of iterations", iters, " ============")
    # for param_group in optimizer.param_groups:
    #   print (list(param_group.values())[0]) 

    # print ("current gradient of iterations", iters)
    # print ("model grad", list(label_model.parameters())[0].grad)
    # for i in range(len(dummy_score_list)):
    #   print ("label grad", dummy_score_list[i].grad)
    if PRINT_INT:
      print ("============current gradient of iterations", iters, " ============")
      print ("GT labels", labels)
      print("original score", model(datas.float()).reshape(1, -1))
      print ("dummy labels", torch.tensor(dummy_score_list))
      print("surrogate score ", label_model(model.user(datas.float())).reshape(1, -1))

    # known_data, label list
    known_list = [1, 2]
    def closure():
      optimizer.zero_grad()
      agg_loss = 0

      # all the data point loss     
      for (data, score, dummy_score, original_dy_dx) in zip(datas, labels, dummy_score_list, original_dy_dx_list): 

        f_embding = model.user(data.float())
        pred = label_model(f_embding) # dummy prediction
        dummy_loss = loss_function(pred, dummy_score)
        dummy_dy_dx = torch.autograd.grad(dummy_loss, f_embding, create_graph=True)

        grad_diff = 0
        for gx, gy in zip(dummy_dy_dx, original_dy_dx): 
            grad_diff += ((gx - gy) ** 2).sum()

        # this regularization to make the surrogate model behaves as normal 
        if R_predict_flag: # print the predict_loss scale
          predict_loss = predic_loss_func(pred, dummy_score)
          grad_diff += lamda2*predict_loss
        agg_loss+=grad_diff


      if known_datas!=None:

        known_original_dy_dx_list = []
    
        for (data, score) in zip(known_datas, known_labels):
          data = data.reshape(1, -1)
          score = score.reshape(1, -1)

          # compute the loss to be sent g
          f_embding = model.user(data.float())
          out = model.label(f_embding)
          score = score.reshape((1,-1))
          y = loss_function(out,  score.reshape((1, -1)))
          dy_dx = torch.autograd.grad(y.float(), f_embding) # g
          known_original_dy_dx = list((_.detach().clone() for _ in dy_dx))
        

          known_original_dy_dx_list.append(known_original_dy_dx)

        # this regularization forces the model surrogate to behave like the original model
        # adjust the MODEL
        # use the gradient loss with GT label

        for (data, score, known_original_dy_dx) in zip(known_datas, known_labels, known_original_dy_dx_list): 
          f_embding = model.user(data.float())
          pred = label_model(f_embding) # dummy prediction
          dummy_loss = loss_function(pred, score)
          dummy_dy_dx = torch.autograd.grad(dummy_loss, f_embding, create_graph=True)
          
          #label_loss = loss_function(dummy_score, score)
          grad_diff = 0
          for gx, gy in zip(dummy_dy_dx, known_original_dy_dx): 
              grad_diff += ((gx - gy) ** 2).sum()
          agg_loss+=lamda1 *grad_diff

          predict_loss_known = predic_loss_func(pred, score)
          agg_loss += lamda3*predict_loss_known


      # add semi-supervised data point (the prediction) 

      agg_loss.backward()
      
      return grad_diff

    optimizer.step(closure)

    
    # print (iters, "%.4f" % closure().item())

    if closure().item() < end_threhold:
      break

    if iters % 50 == 0: 
        current_loss = closure()
        error = result_summary(labels, dummy_score_list)
        print(iters, "%.4f" % current_loss.item())
  
  if PRINT_flg:
    print (original_dy_dx)

    print("GT (ground truth) original score", labels)
    print("initial dummy score", original_label)
    print("end dummy score", dummy_score_list)

    print ("ground truth label party model", list(model.label.parameters()))
    print("end surrogate label_model", list(label_model.parameters()))

    print("original prediction score", model(datas.float()))
    print("surrogate prediction score ", label_model(model.user(datas.float())))

    
  return labels, label_model(model.user(data.float())), dummy_score_list, label_model

In [21]:
class label_surrogate(nn.Module): # add for customerized label party
  def __init__(self, dim_x=16, flag=False):
    super().__init__()
    self.layers = nn.Sequential(
        nn.Linear(dim_x, 8, bias=flag),
        nn.Linear(8, 1, bias=flag),
        # nn.Linear(4, 1, bias=flag),
    )
    
  def forward(self, x):
    return self.layers(x)

surrogate = label_surrogate(dim_x=32, flag=True)

# PATH = "/content/drive/MyDrive/epoch_m/"+str(4)
# mlp_new = MLP_r(dim_x=16, flag=True)
# mlp_new.load_state_dict(torch.load(PATH), strict=False)

IND = 0
Step = 10
num = 5
data = dataset_train.X[IND:IND+Step]
score = dataset_train.y[IND:IND+Step]


known_datas = dataset_train.X[IND+Step:IND+num+Step]
known_labels = dataset_train.y[IND+Step:IND+Step+num]
# bias seems a strong factor no matter what we choose
# we may need a regularization conerning the range of score we consider !
original_label, surrogate_score, dummy_score, label_model_p = simple_short_test(mlp_new, data, score, surrogate, 
                                                                                known_datas, known_labels, 
                                                                                op="Adam", iteration=3500, learning_rate=1, end_threhold=0.00001,
                                                                                lamda1=0.1, lamda3=0.1, R_predict_flag=True, lamda2=0.001, PRINT_flg=True)

initila surrogate model parameters [Parameter containing:
tensor([[ 0.1352,  0.1467, -0.0414,  0.1624, -0.0387,  0.0357, -0.0861,  0.1038,
          0.1558, -0.1297,  0.1537,  0.0331,  0.1306,  0.0239,  0.0852, -0.0250,
          0.1363,  0.0261, -0.0825,  0.0451, -0.0814, -0.0207, -0.0718,  0.1173,
         -0.1395, -0.0815, -0.0499, -0.1063,  0.0167, -0.1746,  0.1596, -0.1502],
        [ 0.1365,  0.0294, -0.0574,  0.1092,  0.0276,  0.1428,  0.0193, -0.0558,
          0.0475, -0.0479,  0.0744,  0.1578,  0.1022, -0.0773,  0.1020,  0.0316,
          0.0898, -0.1077, -0.1750, -0.0683, -0.1356,  0.1451,  0.0509,  0.0732,
          0.0559, -0.0031,  0.1383, -0.1256,  0.0111, -0.1207,  0.0545, -0.0609],
        [ 0.0542, -0.0368,  0.1466, -0.1048, -0.1054, -0.1054,  0.1590,  0.0589,
          0.1701, -0.1459, -0.1753, -0.1383, -0.1189,  0.0716,  0.0633,  0.1469,
         -0.0913, -0.1205,  0.0938, -0.0715,  0.1073, -0.0419,  0.1011, -0.1373,
         -0.0892,  0.0539,  0.0374, -0.0451,  0.1

In [None]:
# set a small regularization for the predication score
class label_surrogate(nn.Module): # add for customerized label party
  def __init__(self, dim_x=16, flag=False):
    super().__init__()
    self.layers = nn.Sequential(
        nn.Linear(dim_x, 8, bias=flag),
        nn.Linear(8, 1, bias=flag),
        # nn.Linear(4, 1, bias=flag),
    )
    
  def forward(self, x):
    return self.layers(x)

surrogate = label_surrogate(dim_x=32, flag=True)

# PATH = "/content/drive/MyDrive/epoch_m/"+str(4)
# mlp_new = MLP_r(dim_x=16, flag=True)
# mlp_new.load_state_dict(torch.load(PATH), strict=False)

IND = 0
Step = 10
num = 5
data = dataset_train.X[IND:IND+Step]
score = dataset_train.y[IND:IND+Step]


known_datas = dataset_train.X[IND+Step:IND+num+Step]
known_labels = dataset_train.y[IND+Step:IND+Step+num]
# bias seems a strong factor no matter what we choose
# we may need a regularization conerning the range of score we consider !
original_label, surrogate_score, dummy_score, label_model_p = simple_short_test(mlp_new, data, score, surrogate, 
                                                                                known_datas, known_labels, 
                                                                                op="Adam", iteration=3500, learning_rate=1, end_threhold=0.00001,
                                                                                lamda1=0.1, lamda3=0.05, R_predict_flag=True, lamda2=0.001, PRINT_flg=True)

initila surrogate model parameters [Parameter containing:
tensor([[-0.0387,  0.0357, -0.0861,  0.1038,  0.1558, -0.1297,  0.1537,  0.0331,
          0.1306,  0.0239,  0.0852, -0.0250,  0.1363,  0.0261, -0.0825,  0.0451,
         -0.0814, -0.0207, -0.0718,  0.1173, -0.1395, -0.0815, -0.0499, -0.1063,
          0.0167, -0.1746,  0.1596, -0.1502,  0.1365,  0.0294, -0.0574,  0.1092],
        [ 0.0276,  0.1428,  0.0193, -0.0558,  0.0475, -0.0479,  0.0744,  0.1578,
          0.1022, -0.0773,  0.1020,  0.0316,  0.0898, -0.1077, -0.1750, -0.0683,
         -0.1356,  0.1451,  0.0509,  0.0732,  0.0559, -0.0031,  0.1383, -0.1256,
          0.0111, -0.1207,  0.0545, -0.0609,  0.0542, -0.0368,  0.1466, -0.1048],
        [-0.1054, -0.1054,  0.1590,  0.0589,  0.1701, -0.1459, -0.1753, -0.1383,
         -0.1189,  0.0716,  0.0633,  0.1469, -0.0913, -0.1205,  0.0938, -0.0715,
          0.1073, -0.0419,  0.1011, -0.1373, -0.0892,  0.0539,  0.0374, -0.0451,
          0.1054,  0.1202, -0.1282, -0.0944,  0.1

In [None]:
# set a small regularization for the predication score
class label_surrogate(nn.Module): # add for customerized label party
  def __init__(self, dim_x=16, flag=False):
    super().__init__()
    self.layers = nn.Sequential(
        nn.Linear(dim_x, 8, bias=flag),
        nn.Linear(8, 1, bias=flag),
        # nn.Linear(4, 1, bias=flag),
    )
    
  def forward(self, x):
    return self.layers(x)

surrogate = label_surrogate(dim_x=32, flag=True)

# PATH = "/content/drive/MyDrive/epoch_m/"+str(4)
# mlp_new = MLP_r(dim_x=16, flag=True)
# mlp_new.load_state_dict(torch.load(PATH), strict=False)

IND = 0
Step = 10
num = 5
data = dataset_train.X[IND:IND+Step]
score = dataset_train.y[IND:IND+Step]


known_datas = dataset_train.X[IND+Step:IND+num+Step]
known_labels = dataset_train.y[IND+Step:IND+Step+num]
# bias seems a strong factor no matter what we choose
# we may need a regularization conerning the range of score we consider !
original_label, surrogate_score, dummy_score, label_model_p = simple_short_test(mlp_new, data, score, surrogate, 
                                                                                known_datas, known_labels, 
                                                                                op="Adam", iteration=3500, learning_rate=1, end_threhold=0.00001,
                                                                                lamda1=0.2, lamda3=0.05, R_predict_flag=True, lamda2=0.001, PRINT_flg=True)

initila surrogate model parameters [Parameter containing:
tensor([[ 0.1352,  0.1467, -0.0414,  0.1624, -0.0387,  0.0357, -0.0861,  0.1038,
          0.1558, -0.1297,  0.1537,  0.0331,  0.1306,  0.0239,  0.0852, -0.0250,
          0.1363,  0.0261, -0.0825,  0.0451, -0.0814, -0.0207, -0.0718,  0.1173,
         -0.1395, -0.0815, -0.0499, -0.1063,  0.0167, -0.1746,  0.1596, -0.1502],
        [ 0.1365,  0.0294, -0.0574,  0.1092,  0.0276,  0.1428,  0.0193, -0.0558,
          0.0475, -0.0479,  0.0744,  0.1578,  0.1022, -0.0773,  0.1020,  0.0316,
          0.0898, -0.1077, -0.1750, -0.0683, -0.1356,  0.1451,  0.0509,  0.0732,
          0.0559, -0.0031,  0.1383, -0.1256,  0.0111, -0.1207,  0.0545, -0.0609],
        [ 0.0542, -0.0368,  0.1466, -0.1048, -0.1054, -0.1054,  0.1590,  0.0589,
          0.1701, -0.1459, -0.1753, -0.1383, -0.1189,  0.0716,  0.0633,  0.1469,
         -0.0913, -0.1205,  0.0938, -0.0715,  0.1073, -0.0419,  0.1011, -0.1373,
         -0.0892,  0.0539,  0.0374, -0.0451,  0.1

In [None]:
result_summary(score, dummy_score)

L1-loss mean tensor(2.5921, dtype=torch.float64)
mean error rate (L1) tensor(2.1116, dtype=torch.float64)
count    10.000000
mean      2.111583
std       1.559850
min       0.095273
25%       1.114426
50%       1.859144
75%       2.880481
max       5.442462
dtype: float64


tensor(2.1116, dtype=torch.float64)

In [None]:
result_summary(score, dummy_score)

L1-loss mean tensor(0.6453, dtype=torch.float64)
mean error rate (L1) tensor(0.4520, dtype=torch.float64)
count    10.000000
mean      0.452033
std       0.391075
min       0.092985
25%       0.306210
50%       0.346957
75%       0.397328
max       1.418489
dtype: float64
