In [10]:
import numpy as np
import pandas as pd
import GPy
from scipy.special import expit
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from scipy.spatial import distance
import statsmodels.api as sm
import warnings
import numpy as np
from scipy.optimize import differential_evolution
from scipy.stats import rankdata
from sklearn.cross_decomposition import CCA
import math
import copy
from math import log
from collections import Counter
import pickle
from sklearn.linear_model import Lasso
warnings.filterwarnings("ignore")

In [11]:
adjacency_matrix = np.load('adjacency_matrix.npy')

# Read beta back from the file
beta_star = np.load('beta_star.npy')
print("adjacency_matrix:", adjacency_matrix)
print("beta_star:", beta_star)

adjacency_matrix: [[0. 1. 0. 1. 1. 0. 1. 1. 1. 0.]
 [1. 0. 0. 1. 1. 0. 0. 0. 0. 1.]
 [0. 0. 0. 1. 0. 1. 1. 1. 1. 1.]
 [1. 1. 1. 0. 1. 1. 1. 1. 1. 1.]
 [1. 1. 0. 1. 0. 1. 1. 1. 1. 1.]
 [0. 0. 1. 1. 1. 0. 0. 0. 0. 0.]
 [1. 0. 1. 1. 1. 0. 0. 0. 1. 1.]
 [1. 0. 1. 1. 1. 0. 0. 0. 0. 1.]
 [1. 0. 1. 1. 1. 0. 1. 0. 0. 1.]
 [0. 1. 1. 1. 1. 0. 1. 1. 1. 0.]]
beta_star: [ 0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.17457962  0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          2.87637077  0.          0.
  0.          0.          0.         -3.55335018  0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.         12.66519217  0.          0.          0.   

In [12]:
K = 2
error_std = 1

In [13]:
def transform_treatment(adjacency_matrix, A, K):
    # Extract upper triangular part (excluding diagonal)
    upper_triangular = np.triu(adjacency_matrix, k=1)
    
    # Generate B based on the rules
    B = []
    for i in range(len(adjacency_matrix)):
        for j in range(len(adjacency_matrix)):
            if upper_triangular[i, j] == 1:
                B.append(K * (A[i] - 1) + A[j])
    
    B = np.array(B)
    
    # Create X
    # Step 1: Start with 1
    X = [1]
    
    # Step 2: Add one-hot encoding of A (length K for each element in A)
    for a in A:
        one_hot_a = np.eye(K)[a - 1]  # One-hot encoding
        X.extend(one_hot_a)
    
    # Step 3: Add one-hot encoding of B (length K^2 for each element in B)
    for b in B:
        one_hot_b = np.eye(K**2)[b - 1]  # One-hot encoding
        X.extend(one_hot_b)
    
    X = np.array(X)

    return X

In [14]:
from sklearn.linear_model import Lasso

def lasso_stochastic_bandit(phi, T, K, adjacency_matrix, C_max_wave, L, error_std, Y_t, X_t, beta_star):
    
    n_features = X_t.shape[1]  # Number of features

    Y_t_exp = Y_t

    l_2_error_parameter = np.array([])

    for tau in range(len(Y_t), T):
        # Compute lambda for LASSO
        lamada = 2 * error_std * C_max_wave * L * np.sqrt(2 * (2 * np.log(tau) + np.log(n_features)) / tau)

        # Perform LASSO regression with lambda using Y_t_tran and X_t_tran
        lasso = Lasso(alpha=lamada, fit_intercept=False)
        lasso.fit(X_t, Y_t)  # Fit the model to get beta estimates
        beta_esti = lasso.coef_  # Estimated beta values
        #print(f"parameter square error:{np.sqrt(sum((beta_esti - beta_star)**2))}")
        s_esti = np.count_nonzero(beta_esti)  # Number of non-zero coefficients

        # Perform optimization to find X_max
        # Check if expected Y_t has not changed for 20 iterations
        
        A = optimization(beta_esti, s_esti, X_t, K, adjacency_matrix, lamada, phi, tau)
        X_max = transform_treatment(adjacency_matrix, A, K)

        # Generate Y_max based on true beta_star
        Y_max = beta_star @ X_max + np.random.normal(0, error_std, 1)

        # Generate Y_max based on true beta_star
        Y_max_exp = beta_star @ X_max
    
        #print(f"time {tau}: mean reward {beta_star @ X_max}")
        #print(f"time {tau}: action {X_max}")

        l_2_error_parameter = np.append(l_2_error_parameter, np.sqrt(sum((beta_esti - beta_star)**2)))

        # Append X_max and Y_max to X_t and Y_t
        Y_t = np.append(Y_t, Y_max)
        X_t = np.vstack([X_t, X_max])

        Y_t_exp = np.append(Y_t_exp, Y_max_exp)

    return Y_t, X_t, Y_t_exp, l_2_error_parameter


In [15]:
def optimization(beta_esti, s_esti, X_t, K, adjacency_matrix, lamada, phi, tau):
    # Define the objective function
    # Define the objective function
    def objective(X_max):
        X_max = np.array(X_max)
        sqrt_term = np.sqrt(X_max @ np.linalg.pinv((1 / tau) * (X_t.T @ X_t)) @ X_max.reshape(-1, 1))
        constant = (4 * np.sqrt(s_esti) * lamada / phi)
        return -(beta_esti @ X_max + constant * sqrt_term)  # Negate for maximization
    
    # Initialize X_max with all zeros
    num_vars = len(adjacency_matrix)
    A = np.random.randint(1, K + 1, size=len(adjacency_matrix))
    
    X = transform_treatment(adjacency_matrix, A, K)
    
    epochs = 5
    for epoch in range(epochs):
        current_value = objective(X)

        # Greedy optimization within this epoch
        for i in range(num_vars):
            # Flip the value of X_max[i] (0 -> L, L -> 0)
            original_value = A[i]

            
            # only useful for K = 2
            A[i] = 1 if original_value == 2 else 2
            
            X = transform_treatment(adjacency_matrix, A, K)
            new_value = objective(X)

            # If the objective improves, keep the change; otherwise, revert
            if new_value < current_value:
                current_value = new_value
            else:
                A[i] = original_value

    #print(current_value)
    return A


In [16]:
def while_lasso_stochastic_bandit(beta_star,phi):
    error_std = 1
    C_max_wave = 1
    t = 2
    T = 1000 + t
    L = 1
    K = 2

    # Randomly generate two A arrays
    A1 = np.random.randint(1, K + 1, size=len(adjacency_matrix))
    A2 = np.random.randint(1, K + 1, size=len(adjacency_matrix))
    
    # Compute the treatment matrix for each A
    X_max1 = transform_treatment(adjacency_matrix, A1, K)
    X_max2 = transform_treatment(adjacency_matrix, A2, K)
    
    # Combine the results as the first two rows of X_t
    X_t = np.array([X_max1, X_max2])
    
    # Generate random error term
    error_t = np.random.normal(0, error_std, t)
    
    # Compute Y = X @ beta_star + error_t
    Y_t = X_t @ beta_star + error_t
    
    Y_T, X_T, Y_t_exp, l_2_error_parameter = lasso_stochastic_bandit(phi, T, K, adjacency_matrix, C_max_wave, L, error_std, Y_t, X_t, beta_star)

    return Y_t_exp, l_2_error_parameter

In [17]:
for phi in {5, 10, 15, 30, 60}:
    Y_t_exp_times = []
    l_2_error_parameter_times = []

    # Run the experiment 10 times
    for times in range(10):
        print(f"Running experiment {times + 1} for phi={phi}")
        
        # Ensure `beta_star` and `t` are properly initialized before calling the function
        # Replace with actual initialization of beta_star and other parameters if needed
        Y_t_exp, l_2_error_parameter = while_lasso_stochastic_bandit(beta_star, phi)  # Replace with the actual function

        # Append the results of this experiment
        Y_t_exp_times.append(Y_t_exp)
        l_2_error_parameter_times.append(l_2_error_parameter)

    # Convert lists to NumPy arrays for easier computation
    Y_t_exp_times = np.array(Y_t_exp_times)
    l_2_error_parameter_times = np.array(l_2_error_parameter_times)

    # Save the results to .npy files
    np.save(f'Y_t_exp_lasso-stochastic-bandit-under-interference-n10-k2-d141-s10-phi={phi}_10times.npy', Y_t_exp_times)
    np.save(f'l_2_error_parameter_lasso-stochastic-bandit-under-interference-n10-k2-d141-s10-phi={phi}_10times.npy', l_2_error_parameter_times)


Running experiment 1 for phi=5
Running experiment 2 for phi=5
Running experiment 3 for phi=5
Running experiment 4 for phi=5
Running experiment 5 for phi=5
Running experiment 6 for phi=5
Running experiment 7 for phi=5
Running experiment 8 for phi=5
Running experiment 9 for phi=5
Running experiment 10 for phi=5
Running experiment 1 for phi=10
Running experiment 2 for phi=10
Running experiment 3 for phi=10
Running experiment 4 for phi=10
Running experiment 5 for phi=10
Running experiment 6 for phi=10
Running experiment 7 for phi=10
Running experiment 8 for phi=10
Running experiment 9 for phi=10
Running experiment 10 for phi=10
Running experiment 1 for phi=15
Running experiment 2 for phi=15
Running experiment 3 for phi=15
Running experiment 4 for phi=15
Running experiment 5 for phi=15
Running experiment 6 for phi=15
Running experiment 7 for phi=15
Running experiment 8 for phi=15
Running experiment 9 for phi=15
Running experiment 10 for phi=15
Running experiment 1 for phi=60
Running experim