In [3]:
import math
import pandas as pd
from functools import reduce
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as statsf

In [4]:

W = ['Non-local NP', 'Local NP','Others']
M = ['ta', 'taziji', 'ziji']

# Helper Functions

In [5]:
def makeDictionary(lst):
    result = {}
    for i in lst: 
        result[i] = None
    return result


In [6]:
def assignProb(dic, key, value):
    dic[key] = value
    

In [7]:
def normalize(dic):
    normalized = dic.copy()
    for key,val in normalized.items():
        norm = [float(i)/sum(val) if sum(val)!=0 else 0 for i in val]
        normalized[key] = norm
    return normalized

In [8]:
def getPairs(senID, data):
    
    '''
    # input: senID(int), data(pandas dataframe)
    # output: a dict, {'ziji': ['Speaker', 'Clause Subject'],'taziji': ['Clause Subject'], 'ta': ['Speaker', 'Others']}
    '''  
    
    data2 = data[(data.experiment=="posterior") & (data.senID == senID)]
    #print(data2)
    
    Pairs = makeDictionary(M)
    for m in M:
        a = list(data2.loc[data2["condition"] == m,'selection'])
        Pairs[m] = a
        
    return Pairs


#### Message Cost
$Cost(m) = log(P(m))$

In [9]:
def getCost(P_ziji, P_taziji, P_ta):
    
    '''
    # input: three prior possibilities in floats
    # output: a dict, like {'ziji': (float), 'taziji': (float), 'ta': (float)}
    '''    
    
    P_M = makeDictionary(M)
    assignProb(P_M, "ziji", P_ziji)
    assignProb(P_M, "taziji", P_taziji)
    assignProb(P_M, "ta", P_ta)
    cost = {key: math.log(val) for key,val in P_M.items()}
    
    return P_M, cost


In [10]:
def printCost(P_M, Cost):
    print("Cost")
    print("{:<8} {:<10} {:<10}".format('M','P(m)', 'Cost'))
    for k, v in P_M.items():
        prob = v
        print("{:<8} {:<10} {:<10}".format(k, round(prob,5), round(Cost[k],5)))
        

#### Prior probability for each world state
$P(w)$

In [11]:
def getPrior(senID, data):
    
    '''
    # input: senID(int), data(pandas dataframe)
    # output: a dict, {'Speaker': (float), 'Clause Subject': (float), 'Others':  (float)}
    '''  
    
    data2 = data[(data.experiment=="prior") & (data.senID == senID)]
    #print(data2)
    
    P_W = makeDictionary(W)
    for w in W:
        prior = float(data2.loc[data2['selection']== w,'Prob'])
        assignProb(P_W, w, prior)

    return P_W


In [12]:
def printPrior(P_W):
    # Print in table
    print("P_W")
    print("{:<14} {:<10}".format('M','P(w)'))
    for k, v in P_W.items():
        prob = v
        print("{:<14} {:<10}".format(k, round(prob,5)))
        

#### Literal Listener (normalize)
$P_{LL}(w|[m]) = \frac{P(w,[m])}{P([m])}$

In [10]:
def LiteralListener(P_W, Pairs):
    
    '''
    # input1: P_W, a dict, {'Speaker': (float), 'Clause Subject': (float), 'Others':  (float)}
    # input2: Pairs, a dict, {'ziji': ['Speaker', 'Clause Subject'],'taziji': ['Clause Subject'], 'ta': ['Speaker', 'Others']}
    # output: a dict, {'ziji': [(float),(float),(float)], 'taziji': [(float),(float),(float)], 'ta': [(float),(float),(float)]} 
    '''  
    
    dic = makeDictionary(M)
    for key,val in dic.items():
        worlds = Pairs[key]
        total = reduce(lambda acc,e: acc + P_W[e] , worlds ,0)
        normalize = []
        for w in W:
            if w in worlds: normalize.append(P_W[w]/total)
            else: normalize.append(0)
        dic[key] = normalize
    return dic


In [11]:
def printLL(literal):
    # Print in table
    print("P_LL(w|m)")

    print("{:<7} {:<8} {:<10} {:<13}".format('M','Speaker','Clause Subject','Others'))
    for k, v in literal.items():
        w1, w2, w3 = v
        print("{:<8} {:<10} {:<12} {:<13}".format(k, round(w1,5), round(w2,5), round(w3,5)))
        

#### Speaker Utility
$U_s(m,w) = ln(P_{LL}(w|m))+Cost(m)$

In [12]:
def getUtility(Cost, literal):
    '''
    # input1: Cost, a dict, {'ziji': (float), 'taziji': (float), 'ta': (float)}
    # input2: literal, a dict, {'ziji': [(float),(float),(float)], 'taziji': [(float),(float),(float)], 'ta': [(float),(float), (float)]} 
    # output: {'ziji': [(float or "-inf"),(float or "-inf"),(float or "-inf")], 'taziji': [(float or "-inf"),(float or "-inf"),(float or "-inf")], 'ta': [(float or "-inf"),(float or "-inf"),(float or "-inf")]}
    '''  
    
    Utility = {key: list(map(lambda e: math.log(e)+Cost[key] if e > 0 else float('-inf'), val)) 
               for key,val in literal.items()}
    return Utility



In [13]:
def printUtility(Utility):
    print("U_s(m,w)")

    print("{:<7} {:<8} {:<10} {:<13}".format('M','Speaker','Clause Subject','Others'))
    for k, v in Utility.items():
        w1, w2, w3 = v
        print("{:<8} {:<10} {:<12} {:<13}".format(k, round(w1,5), round(w2,5), round(w3,5)))
        

#### Speaker Probability (Likelihood)
$P_s(m|w) \propto exp(\alpha \times U_s(m,w))$

In [14]:
def getSoftmax(alpha, Utility):
    '''
    # input1: alpha, a float
    # input2: Utility, a dict, {'ziji': [(float or "-inf"),(float or "-inf"),(float or "-inf")], 'taziji': [(float or "-inf"),(float or "-inf"),(float or "-inf")], 'ta': [(float or "-inf"),(float or "-inf"),(float or "-inf")]}
    # output: {'Speaker': [(float), (float), (float)], 'Clause Subject': [(float),(float),(float)], 'Others': [(float),(float),(float)]}
    '''  
    
    # alpha: ex:0-10; search between the range with step(0.1), do by-item correlation among all values, find the alpha that has the highest correlation
    
    Softmax = dict.fromkeys(W, [])
    values = list(Utility.values())
    for w in W:
        i = W.index(w)
        Softmax[w] = [math.exp(alpha*item[i]) if item[i] !='-inf' else 0 for item in values]
        
    return Softmax


In [2]:
def getLikelihood(senID, data, alpha, P_ziji, P_taziji, P_ta):
    
    
    P_M, Cost = getCost(P_ziji, P_taziji, P_ta)
    #printCost(P_M, Cost)
    
    # actual grammar
    if data[(data.senID == senID)]['type'].to_list()[0]=='possessor':
        Pairs = {'ziji': ['Non-local NP', 'Local NP'],'taziji': ['Local NP'],'ta': ['Non-local NP', 'Local NP', 'Others']}

        #Pairs = {'ziji': ['Subject', 'Clause Subject'],'taziji': ['Clause Subject'],'ta': ['Speaker', 'Clause Subject', 'Others']}
    else:
        #Pairs = {'ziji': ['Speaker', 'Clause Subject'],'taziji': ['Clause Subject'],'ta': ['Speaker', 'Others']}
        Pairs = {'ziji': ['Non-local NP', 'Local NP'],'taziji': ['Local NP'],'ta': ['Non-local NP','Others']}
    
    
    #Pairs = getPairs(senID,data) #grammar from the experiment
    
    P_W = getPrior(senID,data)
    #printPrior(P_W)
    literal = LiteralListener(P_W, Pairs) #try to take the serious grammar instead of the pairs, how strongly people consult the grammar rule
    #printLL(literal)
    Utility = getUtility(Cost, literal)
    #printUtility(Utility)
    Softmax = getSoftmax(alpha, Utility)
    #printLikelihood(Softmax)
    
    return Softmax
    

In [16]:
def printLikelihood(Softmax):
    print("P_s(m|w) - likelihood")
    print("{:<14} {:<12} {:<12} {:<12}".format('W','ziji','taziji','ta'))
    for k, v in Softmax.items():

        m1, m2, m3 = v
        print("{:<14} {:<12} {:<12} {:<12}".format(k, round(m1,5), round(m2,5), round(m3,5)))

#### Rational Listener Interpretation
$P_L(w|m) \propto P(w) \times P_s(m|w)$

In [17]:
def RationalListener(P_W, Softmax):
    '''
    # input: P_W a dict, {'Speaker': (float), 'Clause Subject': (float), 'Others':  (float)}
    # output: a dict, {'ziji': [(float),(float),(float)], 'taziji': [(float),(float),(float)], 'ta': [(float),(float),(float)]} 
    '''  
    
    rational = Softmax.copy()
    for key, val in Softmax.items():
        rational[key] = [x * P_W[key] for x in val]
    
    rational_new = dict.fromkeys(M, [])
    values = list(rational.values())
    
    for m in M:
        i = M.index(m)
        rational_new[m] = [item[i] for item in values]
    
    return rational_new



In [18]:
def printRational(Rational):
    print("P_L(w|m)")
    print("{:<14} {:<8} {:<10} {:<13}".format('M','Speaker','Clause Subject','Others'))
    for k, v in Rational.items():
        w1, w2, w3 = v
        print("{:<14} {:<10} {:<12} {:<13}".format(k, round(w1,5), round(w2,5), round(w3,5)))

In [19]:
def RationalListener_Normalized(P_W, Softmax):
    
    '''
    # input: a dict, {'ziji': [(float),(float),(float)], 'taziji': [(float),(float),(float)], 'ta': [(float),(float),(float)]} 
    # output: a dict, {'ziji': [(float),(float),(float)], 'taziji': [(float),(float),(float)], 'ta': [(float),(float),(float)]} 
    '''  
    rational = RationalListener(P_W, Softmax)
    normalized = normalize(rational)
    return normalized


In [20]:
def printNormalizedRational(Normalized):
    print("Normalized P_L(w|m)")
    print("{:<14} {:<8} {:<10} {:<13}".format('M','Speaker','Clause Subject','Others'))
    for k, v in Normalized.items():
        w1, w2, w3 = v
        print("{:<14} {:<10} {:<12} {:<13}".format(k, round(w1,5), round(w2,5), round(w3,5)))

In [21]:
# data = pd.read_csv("final_data_bySenID.csv", index_col=[0])
# W = ["Speaker","Clause Subject","Others"]
# M = ["ziji","taziji", "ta"]

# Pairs = getPairs(16,data)
# P_ziji = 309053/1046136
# P_taziji = 1503/1046136
# P_ta = 735580/1046136
# P_M, Cost = getCost(P_ziji, P_taziji, P_ta)
# P_W = getPrior(16,data)
# literal = LiteralListener(P_W, Pairs)
# Utility = getUtility(Cost, literal)
# Softmax = normalize(getSoftmax(0.93, Utility))
# Rational = RationalListener(P_W,Softmax)
# Normalized = RationalListener_Normalized(Rational)

In [100]:
#printRational(Rational)

In [101]:
#printNormalizedRational(Normalized)

In [38]:
# S_pr = 0.383
# CS_pr = 0.202
# O_pr = 0.414
# P_ziji = 309053/1046136
# P_taziji = 1503/1046136
# P_ta = 735580/1046136
# W = ["Speaker","Clause Subject","Others"]
# M = ["ziji","taziji", "ta"]
# P_M, Cost = getCost(P_ziji, P_taziji, P_ta)

# P_W = {'Speaker': S_pr, 'Clause Subject': CS_pr, 'Others':  O_pr}
# Pairs = {'ziji': ['Speaker', 'Clause Subject','Others'],'taziji': ['Speaker','Clause Subject','Others'], 'ta': ['Speaker', 'Clause Subject','Others']}

# literal = LiteralListener(P_W, Pairs)
# Utility = getUtility(Cost, literal)
# Softmax = normalize(getSoftmax(0.93, Utility))
# #Rational = RationalListener(P_W,Softmax)
# Normalized = RationalListener_Normalized(P_W, Softmax)
# Normalized
# printRational(Normalized)
