In [1]:
#Load libraries

import nltk
from nltk.corpus import treebank 
from nltk import load_parser
from nltk.chunk import tree2conlltags
from nltk.corpus import names
import random
parserdrt = load_parser('grammar.fcfg', logic_parser=nltk.sem.drt.DrtParser())
expr_read = nltk.sem.DrtExpression.fromstring
from IPython.core.display import display, HTML

In [2]:
#Pronoun Lists

male_sin_sub = ['he','i','you']
male_sin_obj = ['him','me','you']
nongender = ["it"]
female_sin_sub =['she','i','you']
female_sin_obj =['her','me','you']

In [3]:
# coNll tagging for tokens

def sentTokenize(sentence):
    #print(tree2conlltags(nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(sentence[0])), binary=False)))
    return tree2conlltags(nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(sentence)), binary=False))

In [4]:
#ML method to identify gender

def genderFeature(name):
    return {'last(1)' : name[-1]}

males = [(name, 'male') for name in names.words('male.txt')]
females = [(name, 'female') for name in names.words('female.txt')]
combined_gender = males + females
random.shuffle(combined_gender)
training = [(genderFeature(name), gender) for (name, gender) in combined_gender]
classifier = nltk.NaiveBayesClassifier.train(training)


def genderClassifier(name):
    return classifier.classify(genderFeature(name))


In [5]:
#calculates distance between the discourses and pronouns

def findNdist(sentence,src):
    prpind = []
    nound = None
    try :
        for i,item in enumerate(sentTokenize(sentence)):
            #print(i,item)
            if src.lower() == item[0].lower() and item[1] in ['NNP','NN']:
                nound = i
            if item[1] in ['PRP','PRP$'] :
                prpind.append((item[0],i))
        return list(map(lambda prp: "distfrom-{}-{}".format(prp[0],abs(prp[1]-nound)),prpind))
        #return {'distance':list(map(lambda prp: (prp[0],abs(prp[1]-nound)),prpind))}
    except:
        return []
        #return {'distance':[]}


In [6]:
# Fetch Gender from DRS

def getGender(drsEntityDict,text,src):
    for ky in drsEntityDict.keys():
        for ft in sentTokenize(text):
            if drsEntityDict[ky][0].lower() == ft[0].lower() == src :
                return drsEntityDict[ky][1].lower()
            

In [7]:
# Convert from DRS to String to extract features

def drstoStr(drstext):
    return str(drstext.simplify().resolve_anaphora())


In [8]:
# extract discoureses from string converted DRS

def drsExtract(fdrsextract):
    drsvariables = fdrsextract[fdrsextract.find("[")+1:fdrsextract.find("]")]
    drsfunctionslist = fdrsextract[fdrsextract.find("[",2)+1:fdrsextract.rfind("]")]
    return (drsvariables,drsfunctionslist)



In [9]:
#Extract entities from DRS

def getEntities(drsfunctionslist):
    
    pronounequality = list(filter(lambda _: "=" in _,drsfunctionslist.split(", ")))
    def pronouneqExtract(ent):
        return ent[ent.find("[")+1:ent.find("]")].split(",")

    def pronounsEx(pronounEnt):
        return pronounEnt[pronounEnt.find("(")+1:pronounEnt.find("=")-1]
    
    matchedEntities = [pronouneqExtract(ent) for ent in pronounequality]
    pronounEntity = [pronounsEx(pronounEnt) for pronounEnt in pronounequality]
    
    return (matchedEntities,pronounEntity)




In [10]:
# Create dictionary to store DRS features 

def getDRSEntityDict(matchedEntities,pronounEntity,drsfunctionslist):
    combinedElements = matchedEntities + [pronounEntity]
    flatten = list(set([elements for subElements in combinedElements for elements in subElements])) 
    drsEntityDict = {}
    #for i in range(len(matchedEntities)):
    for en in flatten:
        drsEntityDict[en] = [x[0:x.find("(")] for x in drsfunctionslist.split(", ") \
                             if en == x[x.find("(")+1:x.find(")")] or en == x[x.find("(")+1:x.find(",")]]
    
    return drsEntityDict

In [11]:
#remove empty dictinary elements

def remove_empty_keys(drsEntityDict):
    for k in drsEntityDict.keys():
        if not drsEntityDict[k]:
            del drsEntityDict[k]

In [12]:
# append the feature to dictionary

def appendToDRSDict(sentence):
    stack = []
    for item in sentTokenize(sentence):
        if item[1] == 'NNP' and (item[2] == 'B-PERSON' or item[2] == 'O' or item[2] == 'B-GPE'):
            stack.append([item[0],[item[1],item[2]]+findNdist(sentence,item[0])])
        elif item[1] == 'NN':
            stack.append([item[0],[item[1],item[2]]+findNdist(sentence,item[0])])
        elif item[1] == 'CC':
            stack.append([item[0],[item[1],item[2]]])
        elif item[1] == 'PRP':
            stack.append([item[0],[item[1],item[2]]])
        
    return stack

In [13]:
# Add more features to dictionary


def extendToDRSDict(stack,drsEntityDict):
    tempEntityDict = drsEntityDict
    #print(tempEntityDict)
    for key in tempEntityDict.keys():        
        for stackElement in stack:
            if tempEntityDict[key][0].lower() == stackElement[0].lower() and len(tempEntityDict[key])>=1:
                tempEntityDict[key].extend(stackElement[1])
        """        
        tempEntityDict[key].extend([x[1] for x in stack if tempEntityDict[key][0].lower() == x[0].\
                                   lower() and len(tempEntityDict[key])>1][0])
        """
    return tempEntityDict


In [14]:
# method to resolve anaphora

def resolveDRSAnaphora(drsEntityDict,text):
    resolvestack = []
    for k in drsEntityDict.keys():
        properties = [elements.lower() for elements in drsEntityDict[k]]
        if set(['nn','nnp','b-gpe','b-person']).intersection(set(properties)) \
                    and set(['male','female']).intersection(set(properties)):
            
            distanceNNP = list(filter(lambda x : x.startswith('distfrom'),drsEntityDict[k]))
            for dist in distanceNNP:
                if dist.split("-")[1].lower() in male_sin_sub:
                    resolvestack.append((properties[0],dist.split("-")[1].lower()))
                    
                elif dist.split("-")[1].lower() in male_sin_obj:
                    resolvestack.append((properties[0],dist.split("-")[1].lower()))
            
                elif dist.split("-")[1].lower() in female_sin_sub:
                    resolvestack.append((properties[0],dist.split("-")[1].lower()))
             
                elif dist.split("-")[1].lower() in female_sin_obj:
                    resolvestack.append((properties[0],dist.split("-")[1].lower()))
                    
        
        elif set(['nn','o']).intersection(set(properties)) \
                    and set(['nongender']).intersection(set(properties)):
            distanceNNP = list(filter(lambda x : x.startswith('distfrom'),drsEntityDict[k]))
            for dist in distanceNNP:
                if dist.split("-")[1].lower() in nongender:
                    resolvestack.append((properties[0],dist.split("-")[1].lower()))
                    
    #print(resolvestack) 
    newresolveStack =[]
    for x in resolvestack:
        if getGender(drsEntityDict,text,x[0]) == 'male' and (x[1] in male_sin_sub or x[1] in male_sin_obj):
            newresolveStack.append(x)
        elif getGender(drsEntityDict,text,x[0]) == 'female' and (x[1] in female_sin_sub or x[1] in female_sin_obj):
            newresolveStack.append(x)
        elif getGender(drsEntityDict,text,x[0]) == 'nongender' and (x[1] in nongender):
            newresolveStack.append(x)
            
    return newresolveStack   
    

In [15]:
#Graphical output method

def outputUI(resolveStack,text):
    colorStack = ['#158467','#d54062','#56556e','#318fb5','#cdb30c','#f6ab6c','#84a9ac','#99b898','#fa7d09','#96bb7c','#ff9c71']
    dictOutput = {}
    for tup in resolveStack:
        if len(dictOutput)==0 or tup[1] not in dictOutput.keys():
            dictOutput[tup[1]] = [tup[0]]
        elif tup[1] in dictOutput.keys():
            dictOutput[tup[1]].append(tup[0])

    for i,key in enumerate(dictOutput.keys()):
        dictOutput[key].append(colorStack[i])

    htmlStack = text.split()
    for i,tkn in enumerate(htmlStack):
        for key in dictOutput.keys():
            if tkn == key or tkn in dictOutput[key]:
                htmlStack[i] = '<span style="background-color:'+dictOutput[key][-1]+'">'+tkn+"</span>"
    

    return display(HTML('<h3>'+" ".join(htmlStack)+'</h3>'))

In [16]:
def main(drstext):
    fdrsextract = drstoStr(drstext)
    #print(fdrsextract)

    drsvariables,drsfunctionslist = drsExtract(fdrsextract)

    #print(drsvariables)
    #print(drsfunctionslist)

    matchedEntities,pronounEntity = getEntities(drsfunctionslist)
    #print(matchedEntities)
    #print(pronounEntity)
    
    drsEntityDict = getDRSEntityDict(matchedEntities,pronounEntity,drsfunctionslist)
    #print(drsEntityDict)
    #print()
    
    stack=appendToDRSDict(text)
    #print(stack)
    #print()

    drsEntityDict = extendToDRSDict(stack,drsEntityDict)
    #print(drsEntityDict)

    resolveStack = resolveDRSAnaphora(drsEntityDict,text)
     

    #print(resolveStack) 
    print(text)
    print()
    print("Resolved = {}".format(resolveStack))
    outputui = outputUI(resolveStack,text)
    #print(outputui)

In [17]:
text = "John met elisa in Barista. She asked him to order a Pizza".lower()
drstxt = '([x,y,z1,u,v],[John(x),MALE(x),Elisa(y),FEMALE(y),met(x,y),SHE(u),HIM(v),PRO(u),PRO(v),ask(u,v),order(u,v),pizza(z1)])'
drstext = expr_read(drstxt)
#print(drstext.simplify().resolve_anaphora())
drstext.draw()
main(drstext)

john met elisa in barista. she asked him to order a pizza

Resolved = [('elisa', 'she'), ('john', 'him')]


In [18]:
#Example 1
text = "Jason built Jack a robot , and he gave it to him .".lower()
drstxt = '([x,y,z1,u,v,z22],[Jason(x),MALE(x),Jack(y),MALE(y),built(x,z1),robot(z1,y),NONGENDER(z1),HE(u),HIM(v),PRO(u),PRO(v),gave(u,v),It(z22),PRO(z22)])'
drstext = expr_read(drstxt)
main(drstext)
drstext.resolve_anaphora().draw()

jason built jack a robot , and he gave it to him .

Resolved = [('jack', 'he'), ('jack', 'him'), ('jason', 'he'), ('jason', 'him'), ('robot', 'it')]


In [19]:
#Example 2
text = "John met Adam in Barista. He asked him to order a Pizza".lower()
drstxt = '([x,y,z1,u,v],[John(x),MALE(x),Adam(y),MALE(y),met(x,y),HE(u),HIM(v),PRO(u),PRO(v),ask(u,v),order(u,v),pizza(z1)])'
drstext = expr_read(drstxt)
print(drstext.simplify().resolve_anaphora())
main(drstext)
drstext.resolve_anaphora().draw()

([u,v,x,y,z1],[John(x), MALE(x), Adam(y), MALE(y), met(x,y), HE(u), HIM(v), (u = [x,y,z1,v]), (v = [x,y,z1,u]), ask(u,v), order(u,v), pizza(z1)])
john met adam in barista. he asked him to order a pizza

Resolved = [('adam', 'he'), ('adam', 'him'), ('john', 'he'), ('john', 'him')]


In [20]:
#Example 3
text = "John is a man. He walks bravely".lower()
drstxt = '([x,y,u],[John(x),MALE(x),man(y),is(x,y),HE(u),PRO(u),walks(u)])'
drstext = expr_read(drstxt)
print(drstext.simplify().resolve_anaphora())
main(drstext)
drstext.resolve_anaphora().draw()

([u,x,y],[John(x), MALE(x), man(y), is(x,y), HE(u), (u = [x,y]), walks(u)])
john is a man. he walks bravely

Resolved = [('john', 'he')]


In [None]:
#Example 4
text = "John met elisa in Barista. She asked him to order a Pizza".lower()
drstxt = '([x,y,z1,u,v],[John(x),MALE(x),Elisa(y),FEMALE(y),met(x,y),SHE(u),HIM(v),PRO(u),PRO(v),ask(u,v),order(u,v),pizza(z1)])'
drstext = expr_read(drstxt)
print(drstext.simplify().resolve_anaphora())
main(drstext)
drstext.resolve_anaphora().draw()

([u,v,x,y,z1],[John(x), MALE(x), Elisa(y), FEMALE(y), met(x,y), SHE(u), HIM(v), (u = [x,y,z1,v]), (v = [x,y,z1,u]), ask(u,v), order(u,v), pizza(z1)])
john met elisa in barista. she asked him to order a pizza

Resolved = [('elisa', 'she'), ('john', 'him')]


In [None]:
#Example 5
text="The sniper shot the terrorist because he was dangerous".lower()
drstxt = '([x,y,u],[Sniper(x),MALE(x),Terrorist(y),MALE(y),shot(x,y),HE(u),PRO(u),dangerous(u)])'
drstext = expr_read(drstxt)
print(drstext.simplify().resolve_anaphora())
main(drstext)
drstext.resolve_anaphora().draw()

In [None]:
#Example 6
text="The sniper shot the terrorist because he was ordered to".lower()
drstxt = '([x,y,u],[Sniper(x),MALE(x),Terrorist(y),MALE(y),shot(x,y),HE(u),PRO(u),order(u)])'
drstext = expr_read(drstxt)
print(drstext.simplify().resolve_anaphora())
main(drstext)
drstext.resolve_anaphora().draw()

In [None]:
#Example 7
text = "Angus owns a dog . It bit Mia".lower()
drstxt = '([x,z33,z36,z37],[Angus(x),MALE(x),dog(z33),NONGENDER(z33), own(x,z33),It(z37), PRO(z37), Mia(z36),FEMALE(z36) bite(z37,z36)])'
drstext = expr_read(drstxt)
print(drstext.simplify().resolve_anaphora())
main(drstext)
drstext.resolve_anaphora().draw()

In [None]:
# def resolveDRSAnaphora(drsEntityDict,text):
#     resolveStack = []
#     for k in drsEntityDict.keys():
#         properties = [elements.lower() for elements in drsEntityDict[k]]
#         if set(['nn','nnp','b-gpe','b-person']).intersection(set(properties)) \
#             and set(['male','female']).intersection(set(properties)):
            
            
#             for key in drsEntityDict.keys():
#                 distanceNNP = list(filter(lambda x : x.startswith('distfrom'),drsEntityDict[k]))
#                 prop = [ele.lower() for ele in drsEntityDict[key]]
#                 if 'prp' in prop:
#                     for dist in distanceNNP:
#                         if prop[0] in male_sin_sub and prop[0] in dist:
#                             #resolveStack.append((properties[0],prop[0],dist))
#                             resolveStack.append((properties[0],prop[0]))
                            
#                         elif prop[0] in male_sin_obj and prop[0] in dist:
#                             #resolveStack.append((properties[0],prop[0],dist))
#                             resolveStack.append((properties[0],prop[0]))
                            
#                         elif prop[0] in female_sin_obj and prop[0] in dist:
#                             #resolveStack.append((properties[0],prop[0],dist))
#                             resolveStack.append((properties[0],prop[0]))
                            
#                         elif prop[0] in female_sin_sub and prop[0] in dist:
#                             #resolveStack.append((properties[0],prop[0],dist))
#                             resolveStack.append((properties[0],prop[0]))                            
            
#         elif set(['nn','o']).intersection(set(properties)) and set(['nongender']).intersection(set(properties)):
#             for key in drsEntityDict.keys():
#                 distanceNNP = list(filter(lambda x:x.startswith('distfrom'),drsEntityDict[k]))
#                 prop = [ele.lower() for ele in drsEntityDict[key]]
#                 if 'prp' in prop:
#                     for dist in distanceNNP:
#                         if prop[0] in nongender and prop[0] in dist:
#                             #resolveStack.append((properties[0],prop[0],dist))
#                             resolveStack.append((properties[0],prop[0]))
                            
    
#     newresolveStack =[]
#     for x in resolveStack:
#         if getGender(drsEntityDict,text,x[0]) == 'male' and (x[1] in male_sin_sub or x[1] in male_sin_obj):
#             newresolveStack.append(x)
#         elif getGender(drsEntityDict,text,x[0]) == 'female' and (x[1] in female_sin_sub or x[1] in female_sin_obj):
#             newresolveStack.append(x)
#         elif getGender(drsEntityDict,text,x[0]) == 'nongender' and (x[1] in nongender):
#             newresolveStack.append(x)
            
#     return newresolveStack
