In [260]:
import sys
sys.path.append('..')

import numpy as np
import pandas as pd
from scipy.optimize import minimize
import networkx as nx
import random
from main import maximal_independent_set
from scipy.stats import norm

In [287]:
# My Implementation (Full Interference Setting)

def random_network_adjacency_matrix(n, min_neighbors, max_neighbors):
    while True:
        degree_sequence = [random.randint(min_neighbors, max_neighbors) for _ in range(n)]
        if sum(degree_sequence) % 2 == 0:
            break
    g = nx.configuration_model(degree_sequence)
    g = nx.Graph(g)
    g.remove_edges_from(nx.selfloop_edges(g))
    adj_matrix = nx.adjacency_matrix(g).toarray()

    return nx.to_dict_of_lists(g), adj_matrix

# random network:
n_samples = 10000
network_dict, network = random_network_adjacency_matrix(n_samples, 0, 5)

var_U = 2
var_eL = 0.5
beta = 1

covariance = beta * beta * var_U

# Update for variance calculation on diagonals
neighbors = np.sum(network, axis=1)
variance = neighbors * covariance + var_eL

cov_mat = np.full(network.shape, covariance)
cov_mat = np.where(network > 0, cov_mat, 0)
np.fill_diagonal(cov_mat, variance)

L = np.random.multivariate_normal([0]*n_samples, cov_mat, size=1)

  adj_matrix = nx.adjacency_matrix(g).toarray()


KeyboardInterrupt: 

In [None]:
ind_set = maximal_independent_set.maximal_n_apart_independent_set(network_dict, n=1)

In [280]:
def build_dataset(network, ind_set, L):
    df = pd.DataFrame(L.T, columns=["L_i"])
    df_subset = df.iloc[list(ind_set)]
    df_subset['nb_count'] = df_subset.apply(lambda x: len(network[x.name]), axis=1)
    return df_subset

def custom_nll(params, data):
    sigma_multiplier, sigma_constant = params
    mu = 0
    sigma_values = sigma_multiplier * data['nb_count'] + sigma_constant
    ll = -np.sum(0.5 * np.log(2 * np.pi * sigma_values**2) + ((data['L_i'] - mu)**2) / (2 * sigma_values**2))
    return -ll

In [281]:
data = build_dataset(network_dict, ind_set, L)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_subset['nb_count'] = df_subset.apply(lambda x: len(network[x.name]), axis=1)


In [283]:
# Our Custom MLE with variance depending on number of neighbors

print("estimate:", minimize(custom_nll, [0.1, 0.1], args=(data), method="L-BFGS-B").x)
print("true:", covariance, var_eL)

estimate: [ 8.02280749e-01 -2.19617953e-07]
true: 2 0.5


In [286]:
[len(data[data['nb_count'] == i]) for i in range(0, 5)]

[337, 210, 172, 150, 96]

In [34]:
# questions:
# 1. return 0.5 / n * np.linalg.norm(L[:, var_index] - np.dot(Z, params)) ** 2 in least_squares_loss doesnt seem to be following the
# paper's implementation.

# 2. why can't we just directly use the empirical covariance matrix on all variables.


In [69]:
true_cov_mat = np.array([[2, 0.9, 0.3],
                        [0.9, 2, 0.7],
                        [0.3, 0.7, 2]])
L = np.random.multivariate_normal([0,0,0], true_cov_mat, size=10000)
L1 = L[:,0]
L2 = L[:,1]
L3 = L[:,2]
print(np.var(L1), np.var(L2), np.var(L3))

2.014116302093843 1.9611859638356997 2.0133962451971392


In [76]:
n_samples = 5000
U1 = np.random.normal(0, 0.8, n_samples)
U2 = np.random.normal(0, 0.8, n_samples)
U3 = np.random.normal(0, 0.8, n_samples)

noise_L1 = np.random.normal(0, 0.5, n_samples)
noise_L2 = np.random.normal(0, 0.5, n_samples)
noise_L3 = np.random.normal(0, 0.5, n_samples)
beta = 0.5
L1 = beta * U1 + noise_L1
L2 = beta * U1 + beta * U2 + beta * U3 + noise_L2
L3 = beta * U3 + noise_L3

L = np.array([L1, L2, L3])
print(np.cov(L1, L2))

[[0.41890978 0.1637278 ]
 [0.1637278  0.71993856]]


In [73]:
print(1 * np.var(U1) * beta**2 + np.var(noise_L1))
print(3 * np.var(U1) * beta**2 + np.var(noise_L1))

0.4066664483208884
0.719150370899255


In [77]:

# true_cov_mat = np.array([[1, 0.5, 0],
#                         [0.5, 1, 0.7],
#                         [0, 0.7, 1]])
# L = np.random.multivariate_normal([0,0,0], true_cov_mat, size=5000)
# L1 = L[:,0]
# L2 = L[:,1]
# L3 = L[:,2]
eps_L1 = L1 - np.mean(L1)
eps_L2 = L2 - np.mean(L2)
eps_L3 = L3 - np.mean(L3)

def least_squares_loss(params, L, Z, var_index):
    n, d = L.shape
    return 0.5 * n * np.linalg.norm(L[:, var_index] - np.dot(Z, params)) ** 2
    # return 0.5 / n * np.linalg.norm(L[:, var_index] - np.dot(Z, params)) ** 2

d = 3
max_iter = 10


# random guess for cov mat
cov_mat = np.array([[0.0, 0.0, 0.0],
                    [0.0, 0.0, 0.0],
                    [0.0, 0.0, 0.0]])

var = np.array([[np.var(eps_L1), 0.0, 0.0],
                [0.0, np.var(eps_L2), 0.0],
                [0.0, 0.0, np.var(eps_L3)]])

for iter in range(max_iter):

    for var_index in range(d):
        zero_param_bidirected = {}

        omega = cov_mat + var
        omega_minusi = np.delete(omega, var_index, axis=0)
        omega_minusii = np.delete(omega_minusi, var_index, axis=1)
        omega_minusii_inv = np.linalg.inv(omega_minusii)

        epsilon = L.copy()
        epsilon_minusi = np.delete(epsilon, var_index, axis=1)

        Z_minusi = epsilon_minusi @ omega_minusii_inv.T
        Z = np.insert(Z_minusi, var_index, 0, axis=1)

        sol = minimize(least_squares_loss,
                    np.zeros(d),
                    args=(L, Z, var_index))

        cov_mat[:, var_index] = sol.x
        cov_mat[var_index, :] = sol.x
        var[var_index, var_index] = np.var(L[:, var_index])
        print()

print("cov mat:", cov_mat)
print("var:", var)


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 2 is different from 4999)

In [31]:
np.cov(L[:,0], L[:,2])

array([[ 0.97020581, -0.00972078],
       [-0.00972078,  1.0177522 ]])