In [42]:
import numpy as np
import pandas as pd
from scipy.stats import multivariate_normal
from torch.utils.data import DataLoader, Dataset
import torch

In [43]:
########################
####### setting ########
########################
RANDOM_SEED = 100
NUM_SAMPLES = 3000

Y_MEAN = 0.6 # the bernoulli parameter

X_DIST = {0:{"mean":(-2,-2), "cov":np.array([[10,1], [1,3]])}, 
     1:{"mean":(2,2), "cov":np.array([[5,1], [1,5]])}}

def X_PRIME(x):
    return (x[0]*np.cos(np.pi/4) - x[1]*np.sin(np.pi/4), 
            x[0]*np.sin(np.pi/4) + x[1]*np.cos(np.pi/4))
def Z_MEAN(x, y):
    """
    Given x and y, the probability of z = 1.
    """
    x_transform = X_PRIME(x)
    return multivariate_normal.pdf(x_transform, mean = X_DIST[1]["mean"], cov = X_DIST[1]["cov"])/(
        multivariate_normal.pdf(x_transform, mean = X_DIST[1]["mean"], cov = X_DIST[1]["cov"]) + 
        multivariate_normal.pdf(x_transform, mean = X_DIST[0]["mean"], cov = X_DIST[0]["cov"])
    )
########################

In [53]:
ys = np.random.binomial(n = 1, p = Y_MEAN, size = NUM_SAMPLES)

xs, zs = [], []

for y in ys:
    x = np.random.multivariate_normal(mean = X_DIST[y]["mean"], cov = X_DIST[y]["cov"], size = 1)[0]
    z = np.random.binomial(n = 1, p = Z_MEAN(x,y), size = 1)[0]
    xs.append(x)
    zs.append(z)
    
data = pd.DataFrame(zip(np.array(xs).T[0], np.array(xs).T[1], ys, zs), columns = ["x1", "x2", "y", "z"])

In [54]:
data

Unnamed: 0,x1,x2,y,z
0,4.874080,-2.060968,0,1
1,-5.882534,0.240451,0,0
2,-0.840812,-2.244116,0,1
3,0.704554,0.097267,1,0
4,-0.479125,3.824697,1,1
...,...,...,...,...
2995,2.740700,-2.332828,0,1
2996,5.118027,1.089216,1,1
2997,-0.600641,4.131752,1,0
2998,0.828398,0.202907,1,1


In [55]:
# 3 clients: 
#           client 1: %60 y = 1, %10 y = 0
#           client 2: %60 y = 1, %10 y = 0
#           client 3: %60 y = 1, %10 y = 0