In [8]:
import pandas as pd
import numpy as np
import itertools
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

In [2]:
# define partition rules
def partition(condition):
    if condition == 1:
        # 单超平面分割，4种情况
        return [(i,) for i in range(4)]
    else:
        # 两个超平面分割，6种情况
        two_planes = list(itertools.combinations(range(4), 2))
        
        # 三个超平面分割，24种情况
        three_planes = []
        for m in range(4):
            other_dims = [i for i in range(4) if i != m]
            for n1, n2 in itertools.combinations(other_dims, 2):
                three_planes.append((m, n1, n2))
                three_planes.append((m, n2, n1))
        
        return two_planes + three_planes

# generate centers
def generate_centers():
    centers = {}
    
    # 条件1的中心点
    for rule in partition(1):
        dim = rule[0]
        centers[rule] = ([0.25 if i == dim else 0.5 for i in range(4)],
                         [0.75 if i == dim else 0.5 for i in range(4)])
    
    # 条件2的中心点
    for rule in partition(2):
        if len(rule) == 2:  # 两个超平面
            dim1, dim2 = rule
            centers[rule] = tuple([0.25 + 0.5*i if d in rule else 0.5 for d in range(4)]
                                  for i in range(2) for j in range(2))
        else:  # 三个超平面
            m, n1, n2 = rule
            centers[rule] = (
                [0.25 if d == m else (0.25 if d == n1 else 0.5) for d in range(4)],
                [0.25 if d == m else (0.75 if d == n1 else 0.5) for d in range(4)],
                [0.75 if d == m else (0.25 if d == n2 else 0.5) for d in range(4)],
                [0.75 if d == m else (0.75 if d == n2 else 0.5) for d in range(4)]
            )
    
    return centers

# generate all possible centers
all_centers = generate_centers()

In [10]:
# define model parameters
class ModelParams:
    def __init__(self, k, beta):
        self.k = k
        self.beta = beta

# get conters depending on k and condition
def get_centers(k, condition):
    rules = partition(condition)
    if 1 <= k <= len(rules):
        return all_centers[rules[k-1]]
    else:
        raise ValueError(f"Invalid k for condition {condition}. Must be between 1 and {len(rules)}.")

# define likelihood
def likelihood(params, data, condition):
    k, beta = params.k, params.beta
    
    x = data[['feature1', 'feature2', 'feature3', 'feature4']].values
    c = data['choice'].values
    r = data['feedback'].values

    # calculate distances between x and centers
    centers = get_centers(k, condition)
    distances = np.array([np.linalg.norm(x - np.array(center), axis=1) for center in centers])

    # calculate choosing probablity
    probs = np.exp(-beta * distances)
    probs /= np.sum(probs, axis=0, keepdims=True)
    p_c = probs[c - 1, np.arange(len(c))]
    
    return np.where(r == 1, p_c, 1 - p_c)

# define prior
def prior(params, condition):
    max_k = len(partition(condition))
    k_prior = 1/max_k if 1 <= params.k <= max_k else 0
    beta_prior = np.exp(-params.beta) if params.beta > 0 else 0
    return k_prior * beta_prior

# define posterior
def posterior(params, data, condition):
    log_prior = np.log(prior(params, condition))
    log_likelihood = np.sum(np.log(likelihood(params, data, condition)))
    return log_prior + log_likelihood  # log posterior

# fit model
def fit_model(data):
    condition = data['condition'].iloc[0]
    max_k = len(partition(condition))
    beta_values = [1, 5, 9, 13, 17, 21, 25, 29, 33, 37]
    
    # calculate posterior probabilities for all possible combinations of k and beta values
    posteriors = {(k, beta): posterior(ModelParams(k, beta), data, condition) 
                  for k in range(1, max_k+1) 
                  for beta in beta_values}
    
    # find the k and beta values corresponding to the maximum a posteriori probability
    best_k, best_beta = max(posteriors, key=posteriors.get)
    
    return ModelParams(k=best_k, beta=best_beta), posteriors

# fit model trial by trial
def fit_model_for_steps(data):
    num_trials = len(data)
    step_results = []
    
    for step in range(1, num_trials + 1):
        trial_data = data.iloc[:step]
        fitted_params, posteriors = fit_model(trial_data)
        step_results.append({
            'k': fitted_params.k,
            'beta': fitted_params.beta,
            'posteriors': posteriors
        })
    return step_results

In [11]:
# extract data
data = pd.read_csv('Task2.csv')

# fit model by subjects
results = {}
for iSub, subject_data in data.groupby('iSub'):
    try:
        step_results = fit_model_for_steps(subject_data)
        condition = subject_data['condition'].iloc[0]
        results[iSub] = {'step_results': step_results, 'condition': condition}
    except Exception as e:
        print(f"Error fitting model for subject {iSub}: {str(e)}")
        continue

# print final fitted parameters
for iSub, subject_info in results.items():
    step_results = subject_info['step_results']
    final_result = step_results[-1]
    print(f"Subject {iSub}:")
    print(f"  Final Fitted k: {final_result['k']}")
    print(f"  Final Fitted beta: {final_result['beta']}")
    print(f"  Final log posterior: {max(final_result['posteriors'].values())}")
    print()

Subject 1:
  Final Fitted k: 1
  Final Fitted beta: 13
  Final log posterior: -30.022110374756757

Subject 4:
  Final Fitted k: 1
  Final Fitted beta: 17
  Final log posterior: -30.30927039150715

Subject 6:
  Final Fitted k: 7
  Final Fitted beta: 21
  Final log posterior: -54.15618557108044

Subject 11:
  Final Fitted k: 7
  Final Fitted beta: 21
  Final log posterior: -47.65785298729752

Subject 21:
  Final Fitted k: 7
  Final Fitted beta: 25
  Final log posterior: -93.79995426401935

Subject 26:
  Final Fitted k: 7
  Final Fitted beta: 25
  Final log posterior: -46.1757302603434

Subject 27:
  Final Fitted k: 7
  Final Fitted beta: 29
  Final log posterior: -71.57839839713279



In [15]:
# plot parameters over trials
def plot_params_over_trials(step_results, iSub):
    num_steps = len(step_results)
    k_values = [result['k'] for result in step_results]
    beta_values = [result['beta'] for result in step_results]
    
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))
    
    # Plot k values
    ax1.plot(range(1, num_steps + 1), k_values, marker='o')
    ax1.set_title(f'Subject {iSub} - k value over trials')
    ax1.set_xlabel('Number of trials')
    ax1.set_ylabel('k value')
    ax1.grid(True)
    
    # Plot beta values
    ax2.plot(range(1, num_steps + 1), beta_values, marker='o')
    ax2.set_title(f'Subject {iSub} - beta value over trials')
    ax2.set_xlabel('Number of trials')
    ax2.set_ylabel('beta value')
    ax2.grid(True)
    
    plt.tight_layout()
    plt.savefig(f'params_over_trials_subject_{iSub}_.png')
    plt.close()

In [13]:
def plot_posterior_probabilities(step_results, condition, iSub):
    num_steps = len(step_results)
    max_k = max(k for result in step_results for k, _ in result['posteriors'].keys())
    beta_values = sorted(set(beta for result in step_results for _, beta in result['posteriors'].keys()))

    # Prepare data for plotting
    k_posteriors = {k: np.zeros(num_steps) for k in range(1, max_k + 1)}
    beta_posteriors = {beta: np.zeros(num_steps) for beta in beta_values}

    for step, result in enumerate(step_results):
        posteriors = result['posteriors']
        # Marginalize over beta for k, and over k for beta
        for k in range(1, max_k + 1):
            k_posteriors[k][step] = np.log(np.sum(np.exp([posteriors.get((k, beta), float('-inf')) for beta in beta_values])))
        for beta in beta_values:
            beta_posteriors[beta][step] = np.log(np.sum(np.exp([posteriors.get((k, beta), float('-inf')) for k in range(1, max_k + 1)])))

    # Normalize log probabilities
    for step in range(num_steps):
        k_max = max(k_posteriors[k][step] for k in range(1, max_k + 1))
        beta_max = max(beta_posteriors[beta][step] for beta in beta_values)
        for k in range(1, max_k + 1):
            k_posteriors[k][step] = np.exp(k_posteriors[k][step] - k_max)
        for beta in beta_values:
            beta_posteriors[beta][step] = np.exp(beta_posteriors[beta][step] - beta_max)

    # Create a figure with two subplots
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))
    fig.suptitle(f'Posterior Probabilities (Subject {iSub}, Condition {condition})', fontsize=16)

    # Plot k posteriors
    for k in range(1, max_k + 1):
        if (condition == 1 and k == 1) or (condition != 1 and k == 7):
            ax1.plot(range(1, num_steps + 1), k_posteriors[k], label=f'k={k}', linewidth=3, color='red')
        else:
            ax1.plot(range(1, num_steps + 1), k_posteriors[k], label=f'k={k}')
    ax1.set_xlabel('Trial')
    ax1.set_ylabel('Posterior Probability')
    ax1.set_title('Posterior Probabilities for k')
    ax1.legend()

    # Create a custom colormap for beta
    n_betas = len(beta_values)
    colors = plt.cm.Blues(np.linspace(0.2, 1, n_betas))
    custom_cmap = LinearSegmentedColormap.from_list("custom_blues", colors)

    # Plot beta posteriors
    for i, beta in enumerate(beta_values):
        ax2.plot(range(1, num_steps + 1), beta_posteriors[beta], label=f'beta={beta}', 
                 color=custom_cmap(i / (n_betas - 1)))
    ax2.set_xlabel('Trial')
    ax2.set_ylabel('Posterior Probability')
    ax2.set_title('Posterior Probabilities for beta')
    ax2.legend()

    plt.tight_layout()
    plt.savefig(f'posteriors_subject_{iSub}.png')
    plt.close()

In [17]:
for iSub, subject_info in results.items():
    step_results = subject_info['step_results']
    condition = subject_info['condition']
    plot_params_over_trials(step_results, iSub)
    plot_posterior_probabilities(step_results, condition, iSub)

  k_posteriors[k][step] = np.log(np.sum(np.exp([posteriors.get((k, beta), float('-inf')) for beta in beta_values])))
