# For Free Rider Page

In [37]:
# base code
import numpy as np

class Agent:
    def __init__(self, num_states, num_actions, temperature, lr, endowment, MPCR, incentive):
        actions = np.linspace(0, endowment, num=num_actions)
        self.Q = np.array([[np.random.normal((endowment - action + (MPCR * (1+incentive)) * action), (endowment - action + (MPCR) * action) * temperature/100) for action in actions] for _ in range(num_states)])
        self.temperature = temperature  
        self.num_actions = num_actions
        self.lr = lr
        self.num_states = num_states
        self.state_counts = np.full(num_states, temperature)
        self.state_frequencies = self.state_counts / np.sum(self.state_counts)
    
    def update_state_counts(self, observed_state):
        self.state_counts[observed_state] += 1
        self.state_frequencies = self.state_counts / np.sum(self.state_counts)
    
    def predict_state(self):
        predicted_state = np.random.choice(self.num_states, p=self.state_frequencies)
        return predicted_state
    
    def softmax(self, q_values):
        if self.temperature > 0:
            max_q = np.max(q_values)
            exp_q = np.exp((q_values - max_q) / self.temperature)
            probabilities = exp_q / np.sum(exp_q)
        else:
            probabilities = np.zeros_like(q_values)
            probabilities[np.argmax(q_values)] = 1.0
        return probabilities

    def choose_action(self, use_most_likely_state=False):
        if use_most_likely_state:
            predicted_state = np.argmax(self.state_frequencies)
        else:
            predicted_state = self.predict_state()
        q_values = self.Q[predicted_state]
        probabilities = self.softmax(q_values)
        action = np.random.choice(len(q_values), p=probabilities)
        self.decision = action
        return action, predicted_state
    
    def update_Q(self, state, action, reward):
        self.Q[state][action] = self.lr * reward + (1 - self.lr) * self.Q[state][action]


def discretize_total_contribution(total_contribution, num_states, N, E):
    max_total = N * E
    state = int(round((total_contribution / max_total) * (num_states - 1)))
    state = min(state, num_states - 1)
    return state


class Environment:
    def __init__(self, N, E, MR, num_rounds, temperature=10, lr=0.5, num_states=11, num_actions=11, incentive=0):
        self.N = N                  
        self.E = E                 
        self.MR = MR            
        self.num_rounds = num_rounds
        self.initial_temperature = temperature
        self.lr = lr
        self.num_states = num_states
        self.num_actions = num_actions
        self.incentive = incentive 
        self.agents = [Agent(num_states=self.num_states, num_actions=self.num_actions, temperature=self.initial_temperature, lr=self.lr, endowment=self.E, MPCR=self.MR/self.N, incentive=self.incentive) for _ in range(N)]
        self.action_values = np.linspace(0, self.E, num=self.num_actions)
    
    def run_simulation(self):
        total_contributions_over_time = []
        prev_total_contribution = 0
        prev_state = discretize_total_contribution(prev_total_contribution, self.num_states, self.N, self.E)

        for round_num in range(self.num_rounds):
            contributions = []
            actions = []
            predicted_states = []

            for agent in self.agents:
                action, predicted_state = agent.choose_action()
                contribution = self.action_values[action]
                contributions.append(contribution)
                actions.append(action)
                predicted_states.append(predicted_state)

            total_contribution = sum(contributions)
            total_contributions_over_time.append(total_contribution)

            payoff_per_agent = (self.MR * (1+self.incentive) * total_contribution) / self.N
            payoffs = [(self.E - contrib + payoff_per_agent) for contrib in contributions] # proportional incentive
            
            prev_state = discretize_total_contribution(total_contribution, self.num_states, self.N, self.E)

            for i, agent in enumerate(self.agents):
                reward = payoffs[i]
                agent.update_Q(predicted_states[i], actions[i], reward)
                agent.update_state_counts(prev_state)

        final_contributions = []
        final_rounds = 1
        for agent in self.agents:
            agent.temperature = 0 

        for round_num in range(final_rounds):
            contributions = []
            actions = []
            predicted_states = []

            for agent in self.agents:
                action, predicted_state = agent.choose_action(use_most_likely_state=True)
                contribution = self.action_values[action]
                contributions.append(contribution)
                actions.append(action)
                predicted_states.append(predicted_state)

            total_contribution = sum(contributions)
            total_contributions_over_time.append(total_contribution)
            final_contributions.append(total_contribution)

            payoff_per_agent = (self.MR * (1+self.incentive) * total_contribution) / self.N
            payoffs = [(self.E - contrib + payoff_per_agent) for contrib in contributions] # proportional incentive
            
            prev_state = discretize_total_contribution(total_contribution, self.num_states, self.N, self.E)

            for i, agent in enumerate(self.agents):
                reward = payoffs[i]
                agent.update_Q(predicted_states[i], actions[i], reward)
                agent.update_state_counts(prev_state)

        average_final_contribution = np.mean(final_contributions)
        self.result = average_final_contribution
        self.record = total_contributions_over_time
        

In [43]:
N = 10
E = 100 
MR = 5
num_rounds = 1000
temperature = 10
lr = 1

incentive = 0.1

env = Environment(N=N, E=E, MR=MR, num_rounds=num_rounds, temperature=temperature, lr=lr, incentive=incentive)
env.run_simulation()

for i in range(N):
    print(env.action_values[env.agents[i].decision])

In [44]:
for i in range(N):
    print(env.action_values[env.agents[i].decision])

20.0
10.0
20.0
30.0
0.0
30.0
0.0
40.0
0.0
20.0


# For Transparency Page

In [45]:
import numpy as np

class Agent:
    def __init__(self, num_states, num_actions, temperature, lr, endowment, MPCR, transparency):
        actions = np.linspace(0, endowment, num=num_actions)
        self.Q = np.array([[np.random.normal((endowment - action + (MPCR) * action), (endowment - action + (MPCR) * action) * temperature/100) for action in actions] for _ in range(num_states)])
        self.temperature = temperature  
        self.num_actions = num_actions
        self.lr = lr
        self.num_states = num_states
        self.state_counts = np.full(num_states, temperature)
        self.state_frequencies = self.state_counts / np.sum(self.state_counts)
    
    def update_state_counts(self, observed_state):
        self.state_counts[observed_state] += 1
        self.state_frequencies = self.state_counts / np.sum(self.state_counts)
    
    def predict_state(self):
        predicted_state = np.random.choice(self.num_states, p=self.state_frequencies)
        return predicted_state
    
    def softmax(self, q_values):
        if self.temperature > 0:
            max_q = np.max(q_values)
            exp_q = np.exp((q_values - max_q) / self.temperature)
            probabilities = exp_q / np.sum(exp_q)
        else:
            probabilities = np.zeros_like(q_values)
            probabilities[np.argmax(q_values)] = 1.0
        return probabilities

    def choose_action(self, use_most_likely_state=False):
        if use_most_likely_state:
            predicted_state = np.argmax(self.state_frequencies)
        else:
            predicted_state = self.predict_state()
        q_values = self.Q[predicted_state]
        probabilities = self.softmax(q_values)
        action = np.random.choice(len(q_values), p=probabilities)
        self.decision = action
        return action, predicted_state
    
    def update_Q(self, state, action, reward):
        self.Q[state][action] = self.lr * reward + (1 - self.lr) * self.Q[state][action]


def discretize_total_contribution(total_contribution, num_states, N, E):
    max_total = N * E
    state = int(round((total_contribution / max_total) * (num_states - 1)))
    state = min(state, num_states - 1)
    return state


class Environment:
    def __init__(self, N, E, MR, num_rounds, temperature=10, lr=0.5, num_states=11, num_actions=11, transparency=1):
        self.N = N                  
        self.E = E                 
        self.MR = MR            
        self.num_rounds = num_rounds
        self.initial_temperature = temperature
        self.lr = lr
        self.num_states = num_states
        self.num_actions = num_actions
        self.transparency = transparency
        self.agents = [Agent(num_states=self.num_states, num_actions=self.num_actions, temperature=self.initial_temperature, lr=self.lr, endowment=self.E, MPCR=self.MR/self.N, transparency=self.transparency) for _ in range(N)]
        self.action_values = np.linspace(0, self.E, num=self.num_actions)
    
    def run_simulation(self):
        total_contributions_over_time = []
        prev_total_contribution = 0
        prev_state = discretize_total_contribution(prev_total_contribution, self.num_states, self.N, self.E)

        for round_num in range(self.num_rounds):
            contributions = []
            actions = []
            predicted_states = []

            for agent in self.agents:
                action, predicted_state = agent.choose_action()
                contribution = self.action_values[action]
                contributions.append(contribution)
                actions.append(action)
                predicted_states.append(predicted_state)

            total_contribution = sum(contributions)
            total_contributions_over_time.append(total_contribution)

            payoff_per_agent = (self.MR * self.transparency * total_contribution) / self.N
            payoffs = [(self.E - contrib + payoff_per_agent) for contrib in contributions]
            
            prev_state = discretize_total_contribution(total_contribution, self.num_states, self.N, self.E)

            for i, agent in enumerate(self.agents):
                reward = payoffs[i]
                agent.update_Q(predicted_states[i], actions[i], reward)
                agent.update_state_counts(prev_state)

        final_contributions = []
        final_rounds = 1
        for agent in self.agents:
            agent.temperature = 0 

        for round_num in range(final_rounds):
            contributions = []
            actions = []
            predicted_states = []

            for agent in self.agents:
                action, predicted_state = agent.choose_action(use_most_likely_state=True)
                contribution = self.action_values[action]
                contributions.append(contribution)
                actions.append(action)
                predicted_states.append(predicted_state)

            total_contribution = sum(contributions)
            total_contributions_over_time.append(total_contribution)
            final_contributions.append(total_contribution)

            payoff_per_agent = (self.MR * self.transparency * total_contribution) / self.N
            payoffs = [(self.E - contrib + payoff_per_agent) for contrib in contributions]
            
            prev_state = discretize_total_contribution(total_contribution, self.num_states, self.N, self.E)

            for i, agent in enumerate(self.agents):
                reward = payoffs[i]
                agent.update_Q(predicted_states[i], actions[i], reward)
                agent.update_state_counts(prev_state)

        average_final_contribution = np.mean(final_contributions)
        self.result = average_final_contribution
        self.record = total_contributions_over_time
 

In [46]:
N = 10          
E = 100 
MR = 10
num_rounds = 1000
temperature = 10
lr = 1

transparency = 0.6

env = Environment(N=N, E=E, MR=MR, num_rounds=num_rounds, temperature=temperature, lr=lr, transparency=transparency)
env.run_simulation()

In [47]:
for i in range(N):
    print(env.action_values[env.agents[i].decision])

0.0
80.0
30.0
30.0
40.0
80.0
90.0
0.0
70.0
0.0
