In [11]:
import sys
sys.path.append('..')

In [30]:
from main import data_generator
from main import util
import numpy as np
import pandas as pd
import random
from scipy.special import expit

In [76]:
def sample_LY_auto_g(n_samples, network, burn_in, verbose, prob_v_given_boundary_L, prob_v_given_boundary_Y):
    V_DOMAIN = [1, 0]
    samples = []
    num_vertices = len(network)

    for _ in range(n_samples):
        if verbose:
            print("progress: ", _ / n_samples)
            
        sample = pd.DataFrame(index=network.keys(), columns=['L', 'A', 'Y'])
        
        # Initialize with random values
        sample['L'] = {vertex: random.choice(V_DOMAIN) for vertex in network}
        sample['Y'] = {vertex: random.choice(V_DOMAIN) for vertex in network}
        
        # Fill column 'A' with all ones
        sample['A'] = 1
        
        # Use Gibbs sampler to generate 1 sample
        for i in range(burn_in):
            for subject in network.keys():
                # sample L_i
                boundary_values_L = {
                    'L_neighbors': [sample.loc[neighbor, 'L'] for neighbor in network[subject]]
                }
                p_L = prob_v_given_boundary_L(boundary_values_L)
                sample.loc[subject, 'L'] = np.random.choice(V_DOMAIN, size=1, p=np.array([p_L, 1-p_L]))[0]
                
                # sample Y_i
                boundary_values_Y = {
                    'L_self': sample.loc[subject, 'L'],
                    'L_neighbors': [sample.loc[neighbor, 'L'] for neighbor in network[subject]],
                    'A_self': sample.loc[subject, 'A'],
                    'A_neighbors': [sample.loc[neighbor, 'A'] for neighbor in network[subject]],
                    'Y_neighbors': [sample.loc[neighbor, 'Y'] for neighbor in network[subject]],
                }
                p_Y = prob_v_given_boundary_Y(boundary_values_Y)
                sample.loc[subject, 'Y'] = np.random.choice(V_DOMAIN, size=1, p=np.array([p_Y, 1-p_Y]))[0]

        # Add the current sample DataFrame to the list
        samples.append(sample)

    return samples

def sample_LY_ours(n_samples, network, burn_in, verbose, prob_v_given_boundary_L, prob_v_given_boundary_Y):
    V_DOMAIN = [1, 0]
    samples = []
    num_vertices = len(network)
    
    # Use Gibbs sampler to generate the "observed" sample for L
    L_sample = pd.DataFrame(index=network.keys(), columns=['L'])
    L_sample['L'] = {vertex: random.choice(V_DOMAIN) for vertex in network}
    for i in range(burn_in):
        for subject in network.keys():
            # sample L_i
            boundary_values_L = {
                'L_neighbors': [L_sample.loc[neighbor, 'L'] for neighbor in network[subject]]
            }
            p_L = prob_v_given_boundary_L(boundary_values_L)
            L_sample.loc[subject, 'L'] = np.random.choice(V_DOMAIN, size=1, p=np.array([p_L, 1-p_L]))[0]

    for _ in range(n_samples):
        if verbose:
            print("progress: ", _ / n_samples)
        sample = pd.DataFrame(index=network.keys(), columns=['L', 'A', 'Y'])
        
        # Initialize
        sample['L'] = L_sample
        sample['A'] = 1
        sample['Y'] = {vertex: random.choice(V_DOMAIN) for vertex in network}
        
        # Use Gibbs sampler to generate 1 sample of Y
        for i in range(burn_in):
            for subject in network.keys():
                # sample Y_i
                boundary_values_Y = {
                    'L_self': sample.loc[subject, 'L'],
                    'L_neighbors': [sample.loc[neighbor, 'L'] for neighbor in network[subject]],
                    'A_self': sample.loc[subject, 'A'],
                    'A_neighbors': [sample.loc[neighbor, 'A'] for neighbor in network[subject]],
                    'Y_neighbors': [sample.loc[neighbor, 'Y'] for neighbor in network[subject]],
                }
                p_Y = prob_v_given_boundary_Y(boundary_values_Y)
                sample.loc[subject, 'Y'] = np.random.choice(V_DOMAIN, size=1, p=np.array([p_Y, 1-p_Y]))[0]

        # Add the current sample DataFrame to the list
        samples.append(sample)

    return samples

In [77]:
def prob_v_given_boundary_L(boundary_values):
    weighted_sum = 0
    weights = {
        'L_neighbors': 0.2,
    }
    for key, values in boundary_values.items():
        if values is not None:
            if isinstance(values, list):
                weighted_sum += weights[key] * sum(values)
            else:
                weighted_sum += weights[key] * values
    return expit(weighted_sum)

def prob_v_given_boundary_Y(boundary_values):
    weighted_sum = 0
    weights = {
        'Y_neighbors': 0.4,
        'L_self': -2.0,
        'A_self': -1,
        'L_neighbors': 0.4,
        'A_neighbors': 0.4
    }
    for key, values in boundary_values.items():
        if values is not None:
            if isinstance(values, list):
                weighted_sum += weights[key] * sum(values)
            else:
                weighted_sum += weights[key] * values
    return expit(weighted_sum)

In [107]:
NUM_OF_VERTICES = 300
VERBOSE = True
BURN_IN = 50
N_SAMPLES = 100

network = util.create_random_network(n=NUM_OF_VERTICES, min_neighbors=1, max_neighbors=5)
auto_g_1 = sample_LY_auto_g(N_SAMPLES, network, BURN_IN, VERBOSE, prob_v_given_boundary_L, prob_v_given_boundary_Y)
# auto_g_2 = sample_LY_auto_g(N_SAMPLES, network, BURN_IN, VERBOSE, prob_v_given_boundary_L, prob_v_given_boundary_Y)
ours = sample_LY_ours(N_SAMPLES, network, BURN_IN, VERBOSE, prob_v_given_boundary_L, prob_v_given_boundary_Y)

progress:  0.0
progress:  0.01
progress:  0.02
progress:  0.03
progress:  0.04
progress:  0.05
progress:  0.06
progress:  0.07
progress:  0.08
progress:  0.09
progress:  0.1
progress:  0.11
progress:  0.12
progress:  0.13
progress:  0.14
progress:  0.15
progress:  0.16
progress:  0.17
progress:  0.18
progress:  0.19
progress:  0.2
progress:  0.21
progress:  0.22
progress:  0.23
progress:  0.24
progress:  0.25
progress:  0.26
progress:  0.27
progress:  0.28
progress:  0.29
progress:  0.3
progress:  0.31
progress:  0.32
progress:  0.33
progress:  0.34
progress:  0.35
progress:  0.36
progress:  0.37
progress:  0.38
progress:  0.39
progress:  0.4
progress:  0.41
progress:  0.42
progress:  0.43
progress:  0.44
progress:  0.45
progress:  0.46
progress:  0.47
progress:  0.48
progress:  0.49
progress:  0.5
progress:  0.51
progress:  0.52
progress:  0.53
progress:  0.54
progress:  0.55
progress:  0.56
progress:  0.57
progress:  0.58
progress:  0.59
progress:  0.6
progress:  0.61
progress:  0.62

In [86]:
def test_dist(df1, df2):
    diff = []

    for subject in range(NUM_OF_VERTICES):
        df1_value_of_subject = []
        df2_value_of_subject = []

        for sample in range(N_SAMPLES):
            df1_value_of_subject.append(df1[sample]['Y'][subject])
            df2_value_of_subject.append(df2[sample]['Y'][subject])

        diff.append(sum(df1_value_of_subject) / N_SAMPLES - sum(df2_value_of_subject) / N_SAMPLES)

    return diff

In [112]:
test_dist(auto_g_1, ours)

[0.22999999999999998,
 0.24,
 0.17000000000000004,
 0.10999999999999999,
 0.24,
 0.13,
 -0.21999999999999997,
 -0.13,
 -0.28,
 -0.21,
 0.16,
 0.08000000000000007,
 -0.3,
 -0.12,
 -0.24999999999999994,
 0.21000000000000002,
 0.18000000000000005,
 0.15000000000000002,
 -0.14999999999999997,
 0.33,
 -0.24,
 -0.10999999999999999,
 -0.17,
 -0.10000000000000003,
 0.23,
 0.17000000000000004,
 -0.040000000000000036,
 -0.14,
 0.26,
 -0.21999999999999997,
 -0.29000000000000004,
 0.13999999999999999,
 -0.10999999999999999,
 0.040000000000000036,
 0.15000000000000002,
 0.19000000000000003,
 0.27,
 0.030000000000000027,
 0.29000000000000004,
 0.18999999999999995,
 0.039999999999999925,
 -0.13999999999999996,
 0.0,
 0.22999999999999998,
 -0.29,
 0.28,
 -0.12,
 0.14999999999999997,
 -0.26,
 -0.17999999999999994,
 -0.13999999999999996,
 -0.25,
 -0.19,
 0.12,
 0.21,
 0.18,
 0.29,
 -0.16000000000000003,
 0.26,
 -0.010000000000000009,
 0.06000000000000005,
 -0.15000000000000002,
 -0.13,
 0.19999999999999

In [101]:
test_dist(auto_g_1, auto_g_2)

[-0.014000000000000012,
 -0.01200000000000001,
 -0.0040000000000000036,
 0.008000000000000007,
 -0.02200000000000002,
 -0.0020000000000000018,
 -0.04600000000000004,
 -0.04800000000000004,
 -0.016000000000000014,
 0.06999999999999995,
 0.020000000000000018,
 0.025999999999999968,
 0.0,
 0.008000000000000007,
 0.026000000000000023,
 -0.02200000000000002,
 -0.014000000000000012,
 -0.026000000000000023,
 0.03799999999999998,
 0.006000000000000005,
 -0.04799999999999999,
 -0.03600000000000003,
 0.03200000000000003,
 -0.066,
 0.045999999999999985,
 -0.043999999999999984,
 -0.043999999999999984,
 -0.03400000000000003,
 0.02400000000000002,
 0.014000000000000012]

In [108]:
auto_g_1

[     L  A  Y
 0    1  1  1
 1    0  1  0
 2    0  1  1
 3    1  1  0
 4    1  1  0
 ..  .. .. ..
 295  0  1  1
 296  1  1  0
 297  1  1  0
 298  1  1  1
 299  0  1  0
 
 [300 rows x 3 columns],
      L  A  Y
 0    1  1  0
 1    1  1  1
 2    1  1  0
 3    1  1  0
 4    0  1  1
 ..  .. .. ..
 295  1  1  0
 296  1  1  0
 297  1  1  1
 298  0  1  1
 299  0  1  1
 
 [300 rows x 3 columns],
      L  A  Y
 0    1  1  1
 1    0  1  1
 2    1  1  0
 3    1  1  0
 4    1  1  0
 ..  .. .. ..
 295  1  1  0
 296  1  1  0
 297  1  1  0
 298  1  1  1
 299  1  1  1
 
 [300 rows x 3 columns],
      L  A  Y
 0    0  1  1
 1    0  1  1
 2    1  1  0
 3    1  1  0
 4    0  1  1
 ..  .. .. ..
 295  1  1  1
 296  0  1  1
 297  1  1  1
 298  1  1  0
 299  0  1  0
 
 [300 rows x 3 columns],
      L  A  Y
 0    1  1  1
 1    1  1  0
 2    1  1  0
 3    0  1  1
 4    1  1  1
 ..  .. .. ..
 295  0  1  0
 296  1  1  0
 297  1  1  0
 298  0  1  1
 299  0  1  0
 
 [300 rows x 3 columns],
      L  A  Y
 0    1  1 

In [109]:
averages = []

for sample in ours:
    averages.append(np.mean(sample["Y"]))

print(np.mean(averages))

0.5694333333333333


In [110]:
averages = []

for sample in auto_g_1:
    averages.append(np.mean(sample["Y"]))

print(np.mean(averages))

0.5804


0.5143333333333334
