In [1]:
import numpy as np
import pandas as pd
from scipy.special import expit
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from scipy.spatial import distance
import statsmodels.api as sm
import warnings
import numpy as np
from scipy.optimize import differential_evolution
from scipy.stats import rankdata
from sklearn.cross_decomposition import CCA
import math
import copy
from math import log
from collections import Counter
import pickle
from sklearn.linear_model import Lasso
warnings.filterwarnings("ignore")

In [3]:
with open('relation_matrix_dic.pkl', 'rb') as f:
    relation_matrix_dic = pickle.load(f)

# define a new adjacency_matrix_dic: adjacency_matrix_dic[i] = relation_matrix_dic[i].iloc[0:20, 0:20]
adjacency_matrix_dic = {i: relation_matrix_dic[i].iloc[0:20, 0:20] for i in relation_matrix_dic}

# Copy adjacency_matrix_dic and extend its keys
adjacency_matrix_dic_new = adjacency_matrix_dic.copy()

for i in range(100):
    adjacency_matrix_dic_new[i + 100] = adjacency_matrix_dic[i]

# Update the original adjacency_matrix_dic with the extended dictionary
adjacency_matrix_dic = adjacency_matrix_dic_new

# Read beta back from the file
beta = np.load('beta.npy')
print("beta:", beta)

beta: [3.0143364  4.21106128 4.49056789 6.84433191 4.38055757 4.59035439
 5.62650526 4.348927   3.26627828 2.23006741]


In [5]:
len(adjacency_matrix_dic[0])

20

In [7]:
# follow the beta, only 1,2,3 related is non-zero
K = 14

# Compute total length
total_length = 1 + K + K + math.comb(K, 2)
total_length

120

In [9]:
new_beta = np.zeros(total_length)

# Assign values based on the provided instructions
new_beta[0:4] = beta[0:4]            # Copy the first four elements unchanged
new_beta[10:13] = beta[4:7]          # Copy beta[4:6] into new_beta[11:13]
new_beta[20:22] = beta[7:9]          # Copy beta[7:8] into new_beta[20:21]

# Print the new_beta array
print("new_beta:", new_beta)

beta = new_beta

new_beta: [3.0143364  4.21106128 4.49056789 6.84433191 0.         0.
 0.         0.         0.         0.         4.38055757 4.59035439
 5.62650526 0.         0.         0.         0.         0.
 0.         0.         4.348927   3.26627828 0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.  

In [11]:
for i in range(10):
    print(f"Position {i}: beta = {beta[i]}")

idx = 10

for i in range(1, 10):
    for j in range(1, 10):
        if i <= j:
            print(f"Position ({i},{j}): beta = {beta[idx]}")
            idx += 1

Position 0: beta = 3.0143363957683738
Position 1: beta = 4.211061281023643
Position 2: beta = 4.490567890019025
Position 3: beta = 6.84433191283132
Position 4: beta = 0.0
Position 5: beta = 0.0
Position 6: beta = 0.0
Position 7: beta = 0.0
Position 8: beta = 0.0
Position 9: beta = 0.0
Position (1,1): beta = 4.3805575746037055
Position (1,2): beta = 4.590354387978943
Position (1,3): beta = 5.626505257258666
Position (1,4): beta = 0.0
Position (1,5): beta = 0.0
Position (1,6): beta = 0.0
Position (1,7): beta = 0.0
Position (1,8): beta = 0.0
Position (1,9): beta = 0.0
Position (2,2): beta = 0.0
Position (2,3): beta = 4.348926998816991
Position (2,4): beta = 3.2662782844053506
Position (2,5): beta = 0.0
Position (2,6): beta = 0.0
Position (2,7): beta = 0.0
Position (2,8): beta = 0.0
Position (2,9): beta = 0.0
Position (3,3): beta = 0.0
Position (3,4): beta = 0.0
Position (3,5): beta = 0.0
Position (3,6): beta = 0.0
Position (3,7): beta = 0.0
Position (3,8): beta = 0.0
Position (3,9): beta 

In [13]:
# define a function named "counting_treatment"
def counting_treatment(adjacency_matrix, A, K):

    # adjacency_matrix is a adjacency matrix dataframe with length 20 * 20 known
    # A is a np.array with length 20, each element 1,2,...,K known corresponding to each node in the network

    # a empty df named counting_treatment with column named: 0,1,...,K, (1,1), ..., (1,K), (2,2), ..., (2,K),...(K,K)

    # add different numbers of different nodes and different edges. For the column 0, directly give 1.

    # Define column names
    basic_columns = list(range(0, K + 1))  # Including column 0
    edge_columns = [(i, j) for i in range(1, K + 1) for j in range(i, K + 1)]
    columns = basic_columns + edge_columns
    
    # Create an empty DataFrame
    counting_treatment_df = pd.DataFrame(columns=columns)
    counting_treatment_df.loc[0] = 0  # Initialize all counts to 0
    
    # Column 0 gets the value of 1 as per the description
    counting_treatment_df.at[0, 0] = 1

    # Count number of nodes with each treatment
    for treatment in range(1, K + 1):
        counting_treatment_df.at[0, treatment] = sum(A == treatment)
    
    # Count the number of edges connecting nodes of different treatments
    for i in range(len(adjacency_matrix_dic[0])):
        for j in range(i + 1, len(adjacency_matrix_dic[0])):  # Iterate only for j > i to avoid double-counting
            if adjacency_matrix.iloc[i, j] != 0:  # If there's an edge between node i and node j
                treatment_i = A[i]
                treatment_j = A[j]
                # Use sorted tuple to represent undirected edge
                edge = tuple(sorted((treatment_i, treatment_j)))
                if edge in counting_treatment_df.columns:
                    counting_treatment_df.at[0, edge] += 1
                
    return counting_treatment_df

# generate_outcome 

In [16]:
def generate_outcome(adjacency_matrix, A, K, beta, error):
    # Generate the treatment matrix using the counting_treatment function
    counting_treatment_df = counting_treatment(adjacency_matrix, A, K)

    # Convert the treatment matrix to a NumPy array for easier calculation
    counting_treatment_array = counting_treatment_df.values.flatten()

    # Ensure that beta and counting_treatment_array have the same length
    if len(beta) != len(counting_treatment_array):
        raise ValueError("Length of beta does not match the number of columns in counting_treatment.")

    # Compute the outcome Y
    Y_exp = np.dot(counting_treatment_array, beta)
    
    Y = np.dot(counting_treatment_array, beta) + error

    # Return the results
    return Y, Y_exp, counting_treatment_df

# lasso regression

In [19]:
def predict_lasso_model(experimented_data, lambda_value):
    # Step 1: Prepare Data (X, Y)
    # X consists of all data except 'Y' from experimented_data
    X = experimented_data.iloc[:, :-1].values  # Take all columns except the last one (assumed 'Y' is the last column)
    Y = experimented_data['Y'].values.reshape(-1, 1)  # Rewards (Y) as a column vector
    
    # Step 2: Fit Lasso Regression Model with lambda (alpha) as the regularization parameter
    lasso_model = Lasso(alpha=lambda_value)  # Lambda corresponds to the alpha parameter in Lasso
    
    # Train the Lasso model
    lasso_model.fit(X, Y.ravel())  # Use ravel to convert Y to a 1D array
    
    return lasso_model

In [21]:
def predict_lasso_reward(A, model, adjacency_matrix, K):

    # Predict for new target treatments
    # Generate new target treatments using counting_treatment function
    counting_treatment_df = counting_treatment(adjacency_matrix, A, K)  # Get DataFrame representation
    
    # Convert the dataframe to a NumPy array for GPy compatibility
    X_target = counting_treatment_df.values  # Convert the DataFrame to a NumPy array

    if X_target.ndim == 1:
        X_target = X_target.reshape(1, -1)  # Reshape to be a 2D array with one row

    # Predict the rewards and the full covariance matrix for the new target treatments
    predicted_reward = model.predict(X_target)

    # Simplify the results to a scalar value if necessary
    # For Lasso, the output of .predict() should be a 1D array, so we can just return the first value
    
    return predicted_reward[0]


In [23]:
# optimization genetic algorithm
def get_children_elite(parent_treatment_dic, parent_reward_predict, model, adjacency_matrix, K, generation_num):
    child_treatment_dic = {}
    child_reward_predict = {}
    
    # Step 2: Generate children from parent treatments using roulette wheel selection
    for i in range(generation_num):
        # Use roulette wheel selection to choose two parents based on UCB values
        A1, A2 = roulette_wheel_selection(parent_reward_predict, parent_treatment_dic)
        
        # Crossover: with 0.5 probability choose genes from A1 or A2
        child_treatment_dic[i + generation_num] = np.where(np.random.rand(len(A1)) < 0.5, A1, A2)
        
        # Predict reward and UCB for the new child
        predicted_rewards = predict_lasso_reward(child_treatment_dic[i + generation_num], model, adjacency_matrix, K)
        child_reward_predict[i + generation_num] = predicted_rewards

    # Step 3: Select the top treatments from both parents and children
    combined_predict = {**parent_reward_predict, **child_reward_predict}
    
    # Sort the combined dictionary by UCB values and select the top N treatments
    sorted_indices = sorted(combined_predict, key=combined_predict.get, reverse=True)[:generation_num]
    child_elite = {idx: combined_predict[idx] for idx in sorted_indices}
    
    child_elite_treatment_dic = {}
    for idx in sorted_indices:
        if idx in parent_reward_predict:
            child_elite_treatment_dic[idx] = parent_treatment_dic[idx]
        else:
            child_elite_treatment_dic[idx] = child_treatment_dic[idx]

    return child_elite_treatment_dic, child_elite


def roulette_wheel_selection(reward_predict, treatment_dic):

    values = np.array(list(reward_predict.values()))

    # Ensure no negative values by shifting if necessary
    if np.min(values) < 0:
        values = values - np.min(values)  # Shift values to make them non-negative
    
    total = np.sum(values)
    
    if total > 0:
        probabilities = values / total  # Normalize to create valid probabilities
    else:
        # If all UCB values are zero, use uniform probability
        probabilities = np.ones_like(values) / len(values)  # Equal probability if all UCB are zero

    # Select two parents based on the computed probabilities
    selected_indices = np.random.choice(len(values), size=2, p=probabilities, replace=False)
    return treatment_dic[selected_indices[0]], treatment_dic[selected_indices[1]]

In [25]:
# get the next experiment

def get_next_experiment(model, adjacency_matrix, K, generation_num, epoch):
    parent_treatment_dic = {i: np.random.randint(1, K+1, size=adjacency_matrix.shape[0]) for i in range(100)}
    parent_reward_predict = {}
    
    # Step 1: Calculate UCB for parent treatments
    for i in range(generation_num):
        predicted_rewards = predict_lasso_reward(parent_treatment_dic[i], model, adjacency_matrix, K)
        # UCB
        parent_reward_predict[i] = predicted_rewards

        
    # Epoch loop
    # Initialize variables
    no_progress_counter = 0  # Counter to track epochs without improvement
    best = -np.inf  # To store the best UCB value so far
    
    
    # Epoch loop
    for k in range(1, epoch + 1):
        # Get the elite children treatments and UCBs
        child_elite_treatment_dic, child_elite = get_children_elite(parent_treatment_dic, parent_reward_predict, model, adjacency_matrix, K, generation_num)
        
        # Reassign parent treatments and UCB predictions with renumbered indices
        parent_treatment_dic = {new_idx: treatment for new_idx, treatment in enumerate(child_elite_treatment_dic.values())}
        parent_reward_predict = {new_idx: sur for new_idx, sur in enumerate(child_elite.values())}
    
        # Get the best UCB in this epoch
        current_best = parent_reward_predict[0]
    
        # Print the UCB of the best treatment after each epoch
        # print(f"Epoch {k}: Best UCB = {current_best}")
    
        # Check if the best UCB has improved
        if current_best > best:
            best = current_best  # Update the best UCB
            no_progress_counter = 0  # Reset the counter
        else:
            no_progress_counter += 1  # No improvement, increment the counter
    
        # If no improvement for 10 epochs, break the loop
        if no_progress_counter >= 10:
            # print(f"No improvement for 10 consecutive epochs. Stopping at epoch {k}.")
            break

    return parent_treatment_dic[0]

# initial experiment 0: for a selected vector treatments, we get rewards

In [28]:
def exploration_then_exploitation(E):

    d = len(beta)
    sigma = 1
    lambda_value = 2 * sigma

    # Initialize empty lists to store A and reward over time
    A_time = []
    reward_time = []
    
    # Create an empty DataFrame for experimented data
    experimented_data = pd.DataFrame()

    # Get the length of adjacency_matrix_dic (number of keys)
    length = len(adjacency_matrix_dic)
    
    # Generate the error array from a normal distribution with the specified length
    mean = 0  # Define the mean
    std_dev = sigma
    
    error_array = mean + std_dev * np.random.randn(length)
    
    for t in range(E):
        # Create an array A , with length matching the adjacency matrix
        A = np.random.randint(1, K + 1, size=adjacency_matrix_dic[t].shape[0])

        Y, Y_exp, counting_treatment_df = generate_outcome(adjacency_matrix_dic[t], A, K, beta, error_array[t])
        
        # Create a DataFrame for Y
        Y_df = pd.DataFrame([Y], columns=['Y'])
        
        # Concatenate treatment and Y_df to form experimented_data (append vertically)
        experiment_data_current = pd.concat([counting_treatment_df, Y_df], axis=1)
        
        # Append to the existing DataFrame (vertically concatenating)
        experimented_data = pd.concat([experimented_data, experiment_data_current], axis=0, ignore_index=True)
        
        # Append A and total reward to the respective lists
        reward_time.append(Y_exp)

    generation_num = 100
    epoch = 100

    T = 200

    model = predict_lasso_model(experimented_data, lambda_value)
        
    # Get the next treatment vector A
    A = get_next_experiment(model, adjacency_matrix_dic[t], K, generation_num, epoch)
    
    # Generate the outcome based on A
    Y, Y_exp, counting_treatment_df = generate_outcome(adjacency_matrix_dic[t], A, K, beta, error_array[t])

    for t in range(E,T):  # Iterate for T time steps

        # Get the next treatment vector A
        A = get_next_experiment(model, adjacency_matrix_dic[t], K, generation_num, epoch)
        
        # Generate the outcome based on A
        Y, Y_exp, counting_treatment_df = generate_outcome(adjacency_matrix_dic[t], A, K, beta, error_array[t])

        
        reward_time.append(Y_exp)
    
        # Print the reward for the current time step
        print(f"Time {t} with reward: {Y_exp}")
        # print(f"Time {t} with treatment: {A}")

    # Convert reward_time to a NumPy array
    reward_time = np.array(reward_time)

    return reward_time

    

In [30]:
Y_t_exp_times = []
# Run the experiment 10 times
for times in range(10):
    print(f"Running experiment {times + 1} for ete")

    Y_t_exp = exploration_then_exploitation(130)

    # Append the results of this experiment
    Y_t_exp_times.append(Y_t_exp)
    

# Convert lists to NumPy arrays for easier computation
Y_t_exp_times = np.array(Y_t_exp_times)
# Save the results to .npy files
np.save(f'Y_t_exp_time-ete-under-interference-n20-k14-d120-s10_10times.npy', Y_t_exp_times)

Running experiment 1 for ete
Time 130 with reward: 358.1098274030349
Time 131 with reward: 346.86152688879577
Time 132 with reward: 314.12603585396755
Time 133 with reward: 301.43591070062064
Time 134 with reward: 330.0071658402734
Time 135 with reward: 257.8637714181686
Time 136 with reward: 349.5865034055351
Time 137 with reward: 279.0405562866745
Time 138 with reward: 298.6382259931066
Time 139 with reward: 295.23408199090784
Time 140 with reward: 281.20585371549777
Time 141 with reward: 264.98445514049354
Time 142 with reward: 338.7050198379074
Time 143 with reward: 296.29839856414907
Time 144 with reward: 313.73077084175793
Time 145 with reward: 234.66149972539756
Time 146 with reward: 286.07981774353584
Time 147 with reward: 330.33996110209677
Time 148 with reward: 253.92844157273166
Time 149 with reward: 304.2280669680244
Time 150 with reward: 289.92203985430217
Time 151 with reward: 301.29804498282726
Time 152 with reward: 345.0630464065839
Time 153 with reward: 316.96038484382