In [174]:
import numpy as np
import matplotlib.pyplot as plt

np.set_printoptions(precision = 3, suppress = True)

In [205]:
# vanilla RSA adapted from seminar's RSA model
## My changes:
## Changed constructor to include truth_table
## Removed L0_truth_table() (now implemented in constructor)
## Allowed for different objects and messages as well as different numbers of each
## Allowed for different truth tables

## note to self: barebones for now -- docstrings, more exceptions would be nice

class RSAModel():
    def __init__(self, objects, messages, truth_table, alpha, prior_o, cost_function):
        if np.shape(truth_table) != (len(messages),len(objects)):
          raise ValueError("""Truth matrix must be m x n, 
          where m is number of messages and n is the number of objects""")
        self.objects = objects
        self.messages = messages
        self.truth_table = truth_table
        self.alpha = alpha
        self.prior_o = prior_o
        self.cost_function = cost_function
        
        # the following are for convenience
        self.n_obj = len(self.objects)
        self.n_mes = len(self.messages)
    
    def normalize(self, arr):
        return arr / arr.sum(axis=1)[:, np.newaxis]
    
    def L0(self):
        #compute [[m]](o): literal meaning
        literal_meaning = self.truth_table

        #P(o) -- create a matrix of priors by repeating the priors array for each row
        prior_o = np.tile(np.array(self.prior_o), (self.n_mes,1))

        #multiply the two of L0 terms together
        unnorm  =  literal_meaning * prior_o

        #normalize to obtain a probability distribution
        norm = self.normalize(unnorm)

        return norm
    
    def S1(self):
        #compute the costs for each expression in self.messages
        costs = self.cost_function()

        #utility without costs
        epsilon = 1e-10 # to prevent log(0)
        raw_utility = np.log(self.L0().T + epsilon)

        #Cost(m) -- reshape to obtain correct dimensions for matrix multiplication
        costs = np.repeat(self.cost_function(),2).reshape((self.n_mes, self.n_obj)).T

        #compute utility by subtracting costs from informativity
        utility = np.subtract(raw_utility,costs)

        #compute S1(m|o) by taking the exponent of utility times the temperature parameter alpha
        unnorm = np.exp(self.alpha*utility)

        #normalize to obtain a probability distribution
        norm = self.normalize(unnorm)

        return norm
    
    def L1(self):
        #S1(m|o)
        S1 = self.S1()
        
        #P(o) -- create a matrix of priors by repeating the priors array for each row
        prior_o = np.tile(np.array(self.prior_o), (self.n_mes, 1)) 

        #multiply S1(m|o) (transformed, since we need a distribution over objects, not over messages) and P(o) together
        unnorm = S1.T * prior_o

        #normalize to obtain a probability distribution
        norm = self.normalize(unnorm)

        return norm       

# Vanilla model
### In this example, we create a model using a zero cost function (i.e. all utterances have zero cost), boolean truth matrices (i.e. unweighted matrices) and uniform priors over objects

In [206]:
# Example of vanilla model
objects = ['greyhound','dalmatian']
messages = ['dog','greyhound','dalmatian']
truth_table = np.array([[1,1,0],[1,0,1]]).T
prior_o = [0.5,0.5]
alpha = 0.8

def cost_function(): # we'll need to figure this out later
    return [0,0,0]

vanilla = RSAModel(objects, messages, truth_table, alpha, prior_o ,cost_function)

In [207]:
# vanilla L0
vanilla.L0()

array([[0.5, 0.5],
       [1. , 0. ],
       [0. , 1. ]])

In [208]:
# vanilla S1
vanilla.S1()

[[ -0.693   0.    -23.026]
 [ -0.693 -23.026   0.   ]]
[[0.574 1.    0.   ]
 [0.574 0.    1.   ]]


array([[0.365, 0.635, 0.   ],
       [0.365, 0.   , 0.635]])

In [191]:
# vanilla L1
vanilla.L1()

array([[0.5, 0.5],
       [1. , 0. ],
       [0. , 1. ]])

# Option 1: Modify S1 by adding a prior probability of utterances
### Here, we modify the S1 equation by adding a prior probability over messages $P(m)$, resulting in following equation: <br>$$ S_{1}(m|o) \propto exp(\alpha \cdot U(m;o)) P(m)$$ <br> This is a flexible tool, but in the context of this project, the main utility is that we can choose a different prior over messages for natives and non-natives, i.e. $P(m|l)$, where $l = 1$ if the speaker is native else $l = 0$. A more fine-grained approach could change $l$ to a continuous variable representing the level of the non-native speaker. <br> The motivation here is that now the pragmatic listener can calculate the probability of the speaker saying message $m$ based on whether they are a native-speaker. For example, we can make less common words be less probable if the speaker is non-native if the pragmatic listener thinks the speaker probably does not know them.

In [214]:
class RSAModelWithPriorOverUtterances(RSAModel):
    def __init__(self, objects, messages, truth_table, alpha, prior_o, prior_m, cost_function):
        super().__init__(objects, messages, truth_table, alpha, prior_o, cost_function)
        self.prior_m = prior_m
    
    def S1(self):
        #compute the costs for each expression in self.messages
        costs = self.cost_function()

        #utility without costs
        epsilon = 1e-10 # to prevent log(0)
        raw_utility = np.log(self.L0().T + epsilon)

        #Cost(m) -- reshape to obtain correct dimensions for matrix multiplication
        costs = np.repeat(self.cost_function(),2).reshape((self.n_mes, self.n_obj)).T

        #compute utility by subtracting costs from informativity
        utility = np.subtract(raw_utility,costs)
        
        #compute P(m|o) by taking the exponent of utility times the temperature parameter alpha
        unnorm = np.exp(self.alpha*utility)
        
        #P(m): prior over messages
        prior_m = np.tile(np.array(self.prior_m), (self.n_obj,1))
        
        #weigh P(m|o) according to P(m)
        ## note to self (delete later -- the reasonsing here is that P(m|o) represents 
        # the probability of )
        weighted = unnorm * prior_m
        
        #normalize to obtain a probability distribution
        norm = self.normalize(weighted)

        return norm

In [215]:
objects = ['dog','elephant']
messages = ['dog','elephant','animal']
truth_table = np.array([[1,0,1],[0,1,1]]).T
prior_o = [0.5,0.5]
alpha = 0.8

def cost_function():
    return [0,0,0]

## Native speaker
### Below we can see L0, S1 and L1 when the speaker is native. For the prior over messages, I choose a uniform distribution for the native speaker, with the idea that the native speaker is equally likely to know all the messages. Consequentially, the results are, of course, the same as in the vanilla model.

In [216]:
prior_m_native = np.ones(3) / 3 # i.e. uniform
RSAOption1_native = RSAModelWithPriorOverUtterances(objects, messages, truth_table, alpha, 
                                                    prior_o ,prior_m_native, cost_function)
print(RSAOption1_native.L0())
print(RSAOption1_native.S1())
print(RSAOption1_native.L1())

[[1.  0. ]
 [0.  1. ]
 [0.5 0.5]]
[[0.635 0.    0.365]
 [0.    0.635 0.365]]
[[1.  0. ]
 [0.  1. ]
 [0.5 0.5]]


## Non-native speaker
### Below we can see the same for when the speaker is non-native. 

In [217]:
prior_m_nonnative = [i / 201 for i in [100,1,100]]
RSAOption1_nonnative = RSAModelWithPriorOverUtterances(objects, messages, truth_table, alpha, 
                                                    prior_o ,prior_m_nonnative, cost_function)


In [218]:
print(RSAOption1_nonnative.L0())
print(RSAOption1_nonnative.S1())
print(RSAOption1_nonnative.L1())

[[1.  0. ]
 [0.  1. ]
 [0.5 0.5]]
[[0.635 0.    0.365]
 [0.    0.017 0.983]]
[[1.    0.   ]
 [0.    1.   ]
 [0.271 0.729]]
