In [17]:
import numpy as np
from numpy import random
import pandas as pd

# Library for multi-threading
from multiprocessing import Process, current_process

# Defining Functions

In [18]:
"""
Parameters:
_episodes_ (int): Number of episodes in experiment
_seed_ (int): Seed for random generation

Result: An array with length (_episodes_) filled with either _payoff_low_/_payoff_high_
"""

def generate_asset_payoff(_prob_low_, _prob_high_, _payoff_low_, _payoff_high_, _episodes_, _seed_):

    # Generate an array of random asset payoff
    random.seed(_seed_)
    asset_payoff = random.uniform(0,1,_episodes_)
    asset_payoff[asset_payoff < _prob_low_] = _payoff_low_
    asset_payoff[asset_payoff >= _prob_high_] = _payoff_high_

    return asset_payoff

In [19]:
"""
Parameters:
_asset_payoff_ (array): Array of realized asset payoff values
_private_valuation_mean_ (float): Mean of private valuation of traders
_private_valuation_sd_ (float): Standard deviation of private valuation of traders
_seed_ (int): Seed for random generation

Result: An array with the same length as the (_asset_payoff_) parameter, with trader's private valuation
"""

def generate_trader_valuation(_asset_payoff_, _private_valuation_mean_, _private_valuation_sd_, _seed_):

    # Generate an array for the valuation of the asset of continuous trader
    random.seed(_seed_)
    trader_valuation = _asset_payoff_ + random.normal(_private_valuation_mean_, _private_valuation_sd_, len(_asset_payoff_))

    return(trader_valuation)

In [20]:
"""
Parameters:
_episodes_ (int): Number of episodes in experiment
_seed_ (int): Seed for random generation

Result: An array of length (_episodes_) filled with "explore"/"exploit"
"""

def generate_dealer_action(_beta_, _episodes_, _seed_):

    # Generate a series of dealer action with probability of "explore" decreasing with the function exp(-beta*t)
    func = np.vectorize(lambda t: np.exp(-_beta_*t))
    dealer_action_prob = func(np.arange(0,_episodes_,1))

    random.seed(_seed_)
    dealer_action = random.uniform(0, 1, _episodes_)
    dealer_action = (dealer_action_prob >= dealer_action).astype(int).astype(str)
    dealer_action[dealer_action == '1'] = "explore"
    dealer_action[dealer_action == '0'] = "exploit"

    return dealer_action

In [21]:
"""
Parameters:
_lower_q_ (int): Lower limit of the uniform distribution to select initial values from
_upper_q_ (int): Upper limit of the uniform distribution to select initial values from
_no_possible_prices_ (int): Possible ask prices by the dealer
_no_dealers_ (int): Number of dealers in the environment
_seed_ (int): Seed for random generation

Result: Matrix of dimensions (_no_possible_prices_ x _no_dealers_)
"""

def generate_q_matrix(_lower_q_, _upper_q_, _no_possible_prices_, _no_dealers_, _seed_):

    # Generate Q_matrix that indicates the expected payoff of the dealer asking each price
    random.seed(_seed_)
    Q_matrix = pd.DataFrame(np.random.uniform(_lower_q_, _upper_q_, size = (_no_possible_prices_,_no_dealers_)), columns = ["dealer_" + x for x in list(map(str, np.arange(_no_dealers_)+1))])
    
    return(Q_matrix)

# Testing Functions

In [22]:
# Asset parameters
payoff_high = 4
prob_high = 0.5
payoff_low = 0
prob_low = 0.5
expected_payoff = payoff_high*prob_high + payoff_low*prob_low

# Dealer parameter
no_of_dealer = 1
lowest_ask_price = 1
highest_ask_price = 15
possible_ask_price = np.arange(lowest_ask_price, highest_ask_price+1, 1)

# Trader parameters
private_valuation_mean = 0
private_valuation_sd = 0.5

# Learning parameters
alpha = 0.01
beta = 0.0008
lower_q = 3
upper_q = 6

# Number of Experiments and Episode
K = 100
T = 200000

We first try the "generate_asset_payoff" function with our above parameters. This should generate an array of 0 or 4 values each with a probability of 1/2 and the total length of the array is T episodes.

In [23]:
generate_asset_payoff(prob_low, prob_high, payoff_low, payoff_high,T,1)

array([0., 4., 0., ..., 0., 4., 4.])

With the "generate_trader_valuation" function, this will generate an array of values of informed traders' valuation of the asset, each trader will have their own private valuation normally distributed with self-defined mean and standard deviation.

In [24]:
generate_trader_valuation(generate_asset_payoff(prob_low, prob_high, payoff_low, payoff_high,T,1), private_valuation_mean,private_valuation_sd,1)

array([ 0.81217268,  3.69412179, -0.26408588, ...,  0.41681861,
        4.00186687,  4.38609922])

With the "generate_dealer_action" function, we generate an array of either "explore" or "exploit" with the probability of "explore" decreasing over the array. The probability of "explore" at episode t is defined by the equation exp(-beta*t). With 200,000 episodes, we expect an average of 1249.5 "explore" actions.

In [25]:
np.unique(generate_dealer_action(beta,T,1), return_counts = True)

(array(['exploit', 'explore'], dtype='<U21'), array([198777,   1223]))

With the "generate_q_matrix" function, we generate a 1-dimensional array with values randomly sampled from a uniform distribution defined by U(lower_q, upper_q) and the total length of the array is the possible ask price the dealer can choose from. This is the initial Q-matrix of the dealer.

In [26]:
generate_q_matrix(lower_q, upper_q, len(possible_ask_price), no_of_dealer,1)

Unnamed: 0,dealer_1
0,4.251066
1,5.160973
2,3.000343
3,3.906998
4,3.440268
5,3.277016
6,3.558781
7,4.036682
8,4.190302
9,4.61645


# Experiment Functions

Here, we will write the functions for a single monopoly experiment.

In [27]:
"""
Make sure to have all variables predefined before running the "monopoly_experiment" function

Parameter:
payoff_high (int): Value of asset when payoff is high
prob_high (float): Probability the value of asset is high
payoff_low (int): Value of asset when payoff is low
prob_low (float): Probability the value of asset is low
lowest_ask_price (int): Lowest ask price dealer can offer
highest_ask_price (int): Highest ask price dealer can offer
private_valuation_mean (float): Mean of private valuation of traders
private_valuation_sd (float): Standard deviation of private valuation of traders
alpha (float): Parameter determines how fast dealer learns from recent trades (higher means learn faster)
beta (float): Parameter determines how often dealer chooses "explore" action (higher means less often)
lower_q (int): Lower bound of initial Q_matrix
upper_q (int): Upper bound of initial Q_matrix
T (int): Maximum number of episode in each experiment
k (int): Random seed for the experiment

Results:
1) An array of historical dealer price
2) Summary of comparative statistics (trading volume, quoted spread, realized spread)
"""

def monopoly_experiment(payoff_high, prob_high, payoff_low, prob_low, # Asset parameters
                        lowest_ask_price, highest_ask_price, # Dealer parameter
                        private_valuation_mean, private_valuation_sd, # Trader parameters
                        alpha, beta, lower_q, upper_q, T, # Learning parameters
                        k # Random seed for experiment
                        ):
    
    # Calculate initial information
    possible_ask_price = np.arange(lowest_ask_price, highest_ask_price+1, 1)

    # This array saves the historical greedy prices for each experiment
    historical_greedy_price = np.array([])
    # This array saves the historical trading volume for each experiment
    historical_trading_volume = np.array([])
    # This array saves the historical quoted spread for each experiment
    historical_quoted_spread = np.array([])
    # This array saves the historical realized spread for each experiment
    historical_realized_spread = np.array([])

    # Initate experiment with initial variables (Asset Payoff, Trader Valuation, Dealer Action, Q-Matrix)
    asset_payoff = generate_asset_payoff(prob_low, prob_high, payoff_low, payoff_high,T,k)
    trader_valuation = generate_trader_valuation(generate_asset_payoff(prob_low, prob_high, payoff_low, payoff_high,T,k), private_valuation_mean,private_valuation_sd,k)
    dealer_action = generate_dealer_action(beta,T,k)
    Q_matrix = generate_q_matrix(lower_q, upper_q, len(possible_ask_price), 1,k)

    # Set random seed for ask_price randomisation during "explore", we iterate this over every experiment to make sure different experiments are selecting different "explore" ask prices
    random.seed(k)

    # Loop over each episode
    for t in np.arange(0,T,1):
        
        # Dealer chooses to explore or exploit
        if dealer_action[t] == "explore":
            # If explore, ask_price is random integer from 1 to 15, since we have set seed=k above, this makes sure that ask_prices are taken at random differently in each experiment
            ask_price = random.choice(possible_ask_price)

        if dealer_action[t] == "exploit":
            # If exploit, ask_price is the action with the highest expected payoff from Q_matrix
            ask_price = np.argmax(Q_matrix.iloc[:,0])+1

            # Save the historical ask prices and stop if greedy price didn't change for 10000 episodes
            historical_greedy_price = np.append(historical_greedy_price, ask_price)
            if len(historical_greedy_price) > 10000 and np.std(historical_greedy_price[-10000:]) == 0:
                break       
        
        # Save the historical quoted spread
        if t >= 1: historical_quoted_spread = np.append(historical_quoted_spread, ask_price-np.mean(asset_payoff[0:t]))

        # Informed trader now chooses whether to trade in this episode according to the ask_price
        # Case 1) Trader chooses to trade
        if trader_valuation[t] >= ask_price:
            # We update the Q-matrix of the dealer based on the profit made when a trade occurred in this episode
            Q_matrix.iloc[ask_price-1,0] = alpha*(ask_price-asset_payoff[t]) + (1-alpha)*Q_matrix.iloc[ask_price-1,0]

            # Save the historical trading volume
            historical_trading_volume = np.append(historical_trading_volume,1)
            # Save the historical realized spread
            historical_realized_spread = np.append(historical_realized_spread, ask_price-asset_payoff[t])

        # Case 2) Trader chooses not to trade
        if trader_valuation[t] < ask_price:
            # Otherwise, profit is 0 and the Q-matrix of the dealer is updated accordingly
            Q_matrix.iloc[ask_price-1,0] = (1-alpha)*Q_matrix.iloc[ask_price-1,0]

            # Save the historical trading volume
            historical_trading_volume = np.append(historical_trading_volume,0)

    # For tracking of progress, prints every 100 iteration of the experiment
    if k%100 == 0: print(f" Processor{current_process().name} is processing k={k}")

    # Return historical_greedy_price and comparative statistics
    return historical_greedy_price, np.mean(historical_trading_volume), np.mean(historical_quoted_spread), np.mean(historical_realized_spread)


Here, we will write the functions for a duopoly experiment.

In [28]:
"""
Make sure to have all variables predefined before running the "duopoly_experiment" function

Parameter:
payoff_high (int): Value of asset when payoff is high
prob_high (float): Probability the value of asset is high
payoff_low (int): Value of asset when payoff is low
prob_low (float): Probability the value of asset is low
lowest_ask_price (int): Lowest ask price dealer can offer
highest_ask_price (int): Highest ask price dealer can offer
private_valuation_mean (float): Mean of private valuation of traders
private_valuation_sd (float): Standard deviation of private valuation of traders
alpha (float): Parameter determines how fast dealer learns from recent trades (higher means learn faster)
beta (float): Parameter determines how often dealer chooses "explore" action (higher means less often)
lower_q (int): Lower bound of initial Q_matrix
upper_q (int): Upper bound of initial Q_matrix
T (int): Maximum number of episode in each experiment
k (int): Random seed for the experiment

Results:
1) An array of historical dealer price
2) Summary of comparative statistics (trading volume, quoted spread, realized spread)
"""

def duopoly_experiment(payoff_high, prob_high, payoff_low, prob_low, # Asset parameters
                       lowest_ask_price, highest_ask_price, # Dealer parameter
                       private_valuation_mean, private_valuation_sd, # Trader parameters
                       alpha, beta, lower_q, upper_q, T, # Learning parameters
                       k # Random seed for experiment
                       ):
    
    # Calculate initial information
    possible_ask_price = np.arange(lowest_ask_price, highest_ask_price+1, 1)
    
    # This array saves the historical greedy prices for each experiment
    dealer_1_historical_greedy_price = np.array([])
    dealer_2_historical_greedy_price = np.array([])
    # This array saves the historical trading volume for each experiment
    historical_trading_volume = np.array([])
    # This array saves the historical quoted spread for each experiment
    historical_quoted_spread = np.array([])
    # This array saves the historical realized spread for each experiment
    historical_realized_spread = np.array([])

    # Initate experiment with initial variables (Asset Payoff, Trader Valuation, Dealer Action, Q-Matrix)
    asset_payoff = generate_asset_payoff(prob_low, prob_high, payoff_low, payoff_high,T,k)
    trader_valuation = generate_trader_valuation(generate_asset_payoff(prob_low, prob_high, payoff_low, payoff_high,T,k), private_valuation_mean,private_valuation_sd,k)
    # For dealer_2, we added K to its initial seed so that two dealer's action in any experiment will not be the same, dealer_1's action will loop from 1 to K and dealer_2's action will loop from K+1 to 2K
    dealer_1_action = generate_dealer_action(beta,T,k)
    dealer_2_action = generate_dealer_action(beta,T,K+k)
    Q_matrix = generate_q_matrix(lower_q, upper_q, len(possible_ask_price), 2,k)

    # Set random seed for ask_price randomisation during "explore", we iterate this over every experiment to make sure different experiments are selecting different "explore" ask prices
    random.seed(k)


    # Loop over each episode
    for t in np.arange(0,T,1):
        
        # Dealer 1 choose to explore or exploit
        if dealer_1_action[t] == "explore":
            # If explore, ask_price is random integer from 1 to 15, since we have set seed=k above, this makes sure that ask_prices are taken at random differently in each experiment
            dealer_1_ask_price = random.choice(possible_ask_price)

        if dealer_1_action[t] == "exploit":
            # If exploit, ask_price is the action with the highest expected payoff from Q_matrix
            dealer_1_ask_price = np.argmax(Q_matrix.iloc[:,0])+1

            # Use an array to save historical ask prices
            dealer_1_historical_greedy_price = np.append(dealer_1_historical_greedy_price, dealer_1_ask_price)

            # Stop if greedy price for both dealers didn't change for 10000 episodes
            if len(dealer_1_historical_greedy_price) > 10000 and np.std(dealer_1_historical_greedy_price[-10000:]) == 0 and \
            len(dealer_2_historical_greedy_price) > 10000 and np.std(dealer_2_historical_greedy_price[-10000:]) == 0:
                break


        # Dealer 2 choose to explore or exploit
        if dealer_2_action[t] == "explore":
            # If explore, ask_price is random integer from 1 to 15, since we have set seed=k above, this makes sure that ask_prices are taken at random differently in each experiment
            dealer_2_ask_price = random.choice(possible_ask_price)

        if dealer_2_action[t] == "exploit":
            # If exploit, ask_price is the action with the highest expected payoff from Q_matrix
            dealer_2_ask_price = np.argmax(Q_matrix.iloc[:,1])+1

            # Use an array to save historical ask prices
            dealer_2_historical_greedy_price = np.append(dealer_2_historical_greedy_price, dealer_2_ask_price)

            # Stop if greedy price for both dealers didn't change for 10000 episodes
            if len(dealer_1_historical_greedy_price) > 10000 and np.std(dealer_1_historical_greedy_price[-10000:]) == 0 and \
            len(dealer_2_historical_greedy_price) > 10000 and np.std(dealer_2_historical_greedy_price[-10000:]) == 0:
                break


        # Minimum dealer price for this episode
        all_ask_prices = [dealer_1_ask_price, dealer_2_ask_price]
        lower_ask_price = min(all_ask_prices)
        higher_ask_price = max(all_ask_prices)

        # Save the historical quoted spread
        if t >= 1: historical_quoted_spread = np.append(historical_quoted_spread, lower_ask_price-np.mean(asset_payoff[0:t]))

        # Create a binary variable that is 1 when both dealer offers the same ask price
        if np.std(all_ask_prices) == 0: 
            whether_same_ask_prices = 1
        else:
            whether_same_ask_prices = 0


        # Informed trader now chooses whether to trade in this episode according to the lowest_ask_price and which dealer to trade with
        # Case 1) Trader chooses to trade with both dealers
        if trader_valuation[t] >= lower_ask_price and whether_same_ask_prices == 1:
            Q_matrix.iloc[dealer_1_ask_price-1,0] = alpha*(dealer_1_ask_price-asset_payoff[t])/2 + (1-alpha)*Q_matrix.iloc[dealer_1_ask_price-1,0]
            Q_matrix.iloc[dealer_2_ask_price-1,1] = alpha*(dealer_2_ask_price-asset_payoff[t])/2 + (1-alpha)*Q_matrix.iloc[dealer_2_ask_price-1,1]

            # Save the historical trading volume
            historical_trading_volume = np.append(historical_trading_volume,1)
            # Save the historical realized spread
            historical_realized_spread = np.append(historical_realized_spread, lower_ask_price-asset_payoff[t])

        # Case 2) Trader chooses to trade with one dealer with the lower ask price
        if trader_valuation[t] >= lower_ask_price and whether_same_ask_prices == 0:
            lower_price_dealer = np.argmin(all_ask_prices)
            higher_price_dealer = np.argmax(all_ask_prices)
            Q_matrix.iloc[lower_ask_price-1,lower_price_dealer] = alpha*(lower_ask_price-asset_payoff[t]) + (1-alpha)*Q_matrix.iloc[lower_ask_price-1,lower_price_dealer]
            Q_matrix.iloc[higher_ask_price-1,higher_price_dealer] = (1-alpha)*Q_matrix.iloc[higher_ask_price-1,higher_price_dealer]

            # Save the historical trading volume
            historical_trading_volume = np.append(historical_trading_volume,1)
            # Save the historical realized spread
            historical_realized_spread = np.append(historical_realized_spread, lower_ask_price-asset_payoff[t])

        # Case 3) Trader chooses not to trade
        if trader_valuation[t] < lower_ask_price:
            Q_matrix.iloc[dealer_1_ask_price-1,0] = (1-alpha)*Q_matrix.iloc[dealer_1_ask_price-1,0]
            Q_matrix.iloc[dealer_2_ask_price-1,1] = (1-alpha)*Q_matrix.iloc[dealer_2_ask_price-1,1]

            # Save the historical trading volume
            historical_trading_volume = np.append(historical_trading_volume,0)

         
    # For tracking of progress, prints every 100 iteration of the experiment
    if k%100 == 0: print(f" Processor{current_process().name} is processing k={k}")

    # Return historical_greedy_price and comparative statistics
    return dealer_1_historical_greedy_price, dealer_2_historical_greedy_price, np.mean(historical_trading_volume), np.mean(historical_quoted_spread), np.mean(historical_realized_spread)

Here, we will write the functions for a multi-agent experiment.

In [29]:
"""
Make sure to have all variables predefined before running the "multiagent_experiment" function

Parameter:
payoff_high (int): Value of asset when payoff is high
prob_high (float): Probability the value of asset is high
payoff_low (int): Value of asset when payoff is low
prob_low (float): Probability the value of asset is low
no_dealers (int): Number of dealer in the experiment
lowest_ask_price (int): Lowest ask price dealer can offer
highest_ask_price (int): Highest ask price dealer can offer
private_valuation_mean (float): Mean of private valuation of traders
private_valuation_sd (float): Standard deviation of private valuation of traders
alpha (float): Parameter determines how fast dealer learns from recent trades (higher means learn faster)
beta (float): Parameter determines how often dealer chooses "explore" action (higher means less often)
lower_q (int): Lower bound of initial Q_matrix
upper_q (int): Upper bound of initial Q_matrix
T (int): Maximum number of episode in each experiment
k (int): Random seed for the experiment

Results:
1) An array of historical dealer price
2) Summary of comparative statistics (trading volume, quoted spread, realized spread)
"""

def multiagent_experiment(payoff_high, prob_high, payoff_low, prob_low, # Asset parameters
                          no_dealers, lowest_ask_price, highest_ask_price, # Dealer parameter
                          private_valuation_mean, private_valuation_sd, # Trader parameters
                          alpha, beta, lower_q, upper_q, T, # Learning parameters
                          k # Random seed for experiment
                          ):
    
    # Calculate initial information
    possible_ask_price = np.arange(lowest_ask_price, highest_ask_price+1, 1)
    
    # This list saves the historical greedy prices in arrays for each experiment
    historical_greedy_price = [np.array([]) for _ in np.arange(no_dealers)]
    # This array saves the historical trading volume for each experiment
    historical_trading_volume = np.array([])
    # This array saves the historical quoted spread for each experiment
    historical_quoted_spread = np.array([])
    # This array saves the historical realized spread for each experiment
    historical_realized_spread = np.array([])

    # Initate experiment with initial variables (Asset Payoff, Trader Valuation, Dealer Action, Q-Matrix)
    asset_payoff = generate_asset_payoff(prob_low, prob_high, payoff_low, payoff_high,T,k)
    trader_valuation = generate_trader_valuation(generate_asset_payoff(prob_low, prob_high, payoff_low, payoff_high,T,k), private_valuation_mean,private_valuation_sd,k)
    dealer_action = [generate_dealer_action(beta,T,1+n*K) for n in np.arange(1,no_dealers+1,1)]
    Q_matrix = generate_q_matrix(lower_q, upper_q, len(possible_ask_price), no_dealers,k)

    # Set random seed for ask_price randomisation during "explore", we iterate this over every experiment to make sure different experiments are selecting different "explore" ask prices
    random.seed(k)

    # Loop over each episode
    for t in np.arange(0,T,1):

        """
        Getting Ask Prices
        """
        # We use an array to store ask prices of dealers
        dealer_ask_prices_array = np.array([])

        # Loop over all dealers and get ask price for each of them
        for n in np.arange(no_dealers):

            # Dealer n choose to explore or exploit
            if dealer_action[n][t] == "explore":
                # If explore, ask_price is random integer from 1 to 15, since we have set seed=k above, this makes sure that ask_prices are taken at random differently in each experiment
                dealer_ask_prices_array = np.append(dealer_ask_prices_array, random.choice(possible_ask_price))

            if dealer_action[n][t] == "exploit":
                # If exploit, ask_price is the action with the highest expected payoff from the dealer's Q_matrix
                dealer_ask_price = np.argmax(Q_matrix.iloc[:,n])+1
                dealer_ask_prices_array = np.append(dealer_ask_prices_array, dealer_ask_price)

                # Save historical greedy price for dealer n into the corresponding list
                historical_greedy_price[n] = np.append(historical_greedy_price[n], dealer_ask_price)

        # Stop if greedy price for all dealers didn't change for 10000 episodes
        if min([len(array) for array in historical_greedy_price]) > 10000 and max([np.std(array[-10000:]) for array in historical_greedy_price]) == 0:
            break

        """
        Saving Comparative Stats
        """
        # Minimum dealer price for this episode
        lower_ask_price = min(dealer_ask_prices_array)

        # Save the historical quoted spread
        if t >= 1: historical_quoted_spread = np.append(historical_quoted_spread, lower_ask_price-np.mean(asset_payoff[0:t]))
        # Save the historical trading volume and historical realized spread
        if trader_valuation[t] >= lower_ask_price: 
            historical_trading_volume = np.append(historical_trading_volume,1)
            historical_realized_spread = np.append(historical_realized_spread, lower_ask_price-asset_payoff[t])
        else:
            historical_trading_volume = np.append(historical_trading_volume,0)

        """
        Updating Q-Matrix
        """
        # Informed trader now chooses whether to trade in this episode according to the lowest_ask_price and how many dealer to trade with
        num_of_dealer_to_trade = list(dealer_ask_prices_array).count(lower_ask_price)
        # Again, we loop over the dealers to determine whether they gets the trade and how their Q-matrix is updated
        for n in np.arange(no_dealers):

            # Case 1) Trader n gets the trade
            if trader_valuation[t] >= lower_ask_price and dealer_ask_prices_array[n] == lower_ask_price:
                Q_matrix.iloc[int(lower_ask_price-1),n] = alpha*(lower_ask_price-asset_payoff[t])/num_of_dealer_to_trade + (1-alpha)*Q_matrix.iloc[int(lower_ask_price-1),n]

            # Case 2) Trader n does not get the trade
            else:
                Q_matrix.iloc[int(dealer_ask_prices_array[n]-1),n] = (1-alpha)*Q_matrix.iloc[int(dealer_ask_prices_array[n]-1),n]
    
    # For tracking of progress, prints every 100 iteration of the experiment
    if k%10 == 0: print(f" Processor{current_process().name} is processing k={k}")

    # Return historical_greedy_price and comparative statistics
    return historical_greedy_price, np.mean(historical_trading_volume), np.mean(historical_quoted_spread), np.mean(historical_realized_spread)

