In [1]:
from stanza.server import CoreNLPClient

In [59]:
def gender_processing(sentence_groups):
    def gender(tag):
        gender_sum = sentence_groups[tag]['gender']
        number = sentence_groups[tag]['number']
        if number == 0:
            return "UNKNOWN"
        print(tag, gender_sum, number)
        gender = gender_sum/number
        if gender<-(1/3):
            return "FEMALE"
        elif gender>(1/3):
            return "MALE"
        else:
            return "UNKNOWN"
    for tag in sentence_groups.keys():
        sentence_groups[tag]['gender'] = gender(tag)

In [61]:
def sentence_selection_mod(text, annotations):
    """Groups the sentences based on animate entities
    
    Input:
        text : the story
        annotations : annotators of text
        
    Output:
        sentence_groups : dictionary containing sentences belonging to different animate entities    
    """    
    
    sentence_groups = dict()
    tag_dict = dict()
    for chain in ann.corefChain:
        sentences = set()
        rep_mention = chain.mention[chain.representative]
        if rep_mention.animacy == "INANIMATE" or rep_mention.mentionType == "PRONOMINAL":
            continue
        sI = rep_mention.sentenceIndex
        hI = rep_mention.headIndex
        if (sI, hI) not in tag_dict:
            tag_dict[(sI, hI)] = ann.sentence[sI].token[hI].word
        tag = tag_dict[(sI, hI)].lower()
        print(tag, rep_mention.mentionType, chain.chainID)
        if tag not in sentence_groups:
            sentence_groups[tag] = dict()
            sentence_groups[tag]['gender'] = 0
            sentence_groups[tag]['number'] = 0
            sentence_groups[tag]['sentences'] = set()
        for mention in chain.mention:
            if mention.mentionType == "PRONOMINAL" and mention.number == "SINGULAR":
                sentence_groups[tag]['gender'] += (0 if mention.gender == "UNKNOWN" else (1 if mention.gender == "MALE" else -1))
                sentence_groups[tag]['number'] +=1
            sentence_groups[tag]['sentences'].add(mention.sentenceIndex)
#         sentence_groups[tag]['sentences'] = sentence_groups[tag]['sentences'].union(sentences.copy())
    gender_processing(sentence_groups)
    return sentence_groups

In [49]:
file = open(r'C:\Users\Sourav\BTP Code\Panchatantra\the_monkey_the_wedge.txt', errors='ignore')
text = file.read()
file.close()

In [34]:
import time
t1 = time.time()
with CoreNLPClient(annotators = ['tokenize','ssplit','pos','lemma','ner', 'parse', 'depparse','coref'],
    properties={'annotators': 'coref', 'coref.algorithm' : 'neural'},
    memory='5G', be_quiet=True, outputFormat = 'json', max_char_length=500000, timeout=36000000) as client:
    ann = client.annotate(text)
t2 = time.time()
print(t2-t1)

2021-02-10 00:09:20 INFO: Writing properties to tmp file: corenlp_server-b6f88ff190f54392.props
2021-02-10 00:09:20 INFO: Starting server with command: java -Xmx5G -cp C:\Users\Sourav\stanza_corenlp\* edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 36000000 -threads 5 -maxCharLength 500000 -quiet True -serverProperties corenlp_server-b6f88ff190f54392.props -annotators tokenize,ssplit,pos,lemma,ner,parse,depparse,coref -preload -outputFormat serialized


33.27117896080017


In [62]:
grps = sentence_selection_mod(text, ann)

merchant NOMINAL 5
monkey NOMINAL 56
workers NOMINAL 29
MALE merchant
MALE monkey


ZeroDivisionError: division by zero

In [56]:
ann.corefChain

[chainID: 32
mention {
  mentionID: 32
  mentionType: "NOMINAL"
  number: "SINGULAR"
  gender: "NEUTRAL"
  animacy: "INANIMATE"
  beginIndex: 18
  endIndex: 20
  headIndex: 19
  sentenceIndex: 6
  position: 4
}
mention {
  mentionID: 12
  mentionType: "NOMINAL"
  number: "SINGULAR"
  gender: "NEUTRAL"
  animacy: "INANIMATE"
  beginIndex: 9
  endIndex: 14
  headIndex: 10
  sentenceIndex: 2
  position: 3
}
representative: 1
, chainID: 48
mention {
  mentionID: 45
  mentionType: "NOMINAL"
  number: "SINGULAR"
  gender: "MALE"
  animacy: "INANIMATE"
  beginIndex: 22
  endIndex: 24
  headIndex: 23
  sentenceIndex: 8
  position: 7
}
mention {
  mentionID: 48
  mentionType: "NOMINAL"
  number: "SINGULAR"
  gender: "MALE"
  animacy: "INANIMATE"
  beginIndex: 5
  endIndex: 7
  headIndex: 6
  sentenceIndex: 9
  position: 2
}
mention {
  mentionID: 46
  mentionType: "PRONOMINAL"
  number: "SINGULAR"
  gender: "NEUTRAL"
  animacy: "INANIMATE"
  beginIndex: 28
  endIndex: 29
  headIndex: 28
  sente

In [58]:
print(grps)

{'merchant': {'gender': 'UNKNOWN', 'number': 3, 'sentences': {0, 1}}, 'monkey': {'gender': 'MALE', 'number': 6, 'sentences': {8, 10, 11, 7}}, 'workers': {'gender': 'UNKNOWN', 'number': 3, 'sentences': {2, 6}}}


In [53]:
#metrics
def metrics(obv, data):
    stp = sprec_den = sfn = 0
    gtp = 0
    keys = set(obv.keys()).intersection(set(data.keys()))
    for char in keys:
        stp += len(obv[char]["sentences"].intersection(data[char]["sentences"]))
        gtp += 1 if (obv[char]["gender"]==data[char]["gender"]) else 0
        sprec_den += len(obv[char]["sentences"])
        sfn += len(data[char]["sentences"]) - len(obv[char]["sentences"].intersection(data[char]["sentences"]))
    sprecision = stp/sprec_den
    srecall = stp/(stp+sfn)
    sF1_score = 2*sprecision*srecall/(sprecision+srecall)
    gaccuracy = gtp/len(keys)
    caccuracy = len(keys)/len(data.keys())
    return (caccuracy, gaccuracy, sprecision, srecall, sF1_score)

In [43]:
import pickle
file = open(r'C:\Users\Sourav\BTP Code\Panchatantra\the_monkey_the_wedge.gpickle', 'rb')
data = pickle.load(file)
file.close()
print(data)

{'merchant': {'gender': 'MALE', 'sentences': [0]}, 'carpenters': {'gender': 'UNKNOWN', 'sentences': [0, 1]}, 'masons': {'gender': 'UNKNOWN', 'sentences': [0, 1]}, 'workers': {'gender': 'UNKNOWN', 'sentences': [2, 6]}, 'carpenter': {'gender': 'MALE', 'sentences': [3, 4, 5]}, 'monkeys': {'gender': 'UNKNOWN', 'sentences': [2, 6]}, 'monkey': {'gender': 'MALE', 'sentences': [7, 8, 10, 11]}}


In [27]:
#the elephant and the sparrow
metrics(grps, data)

(0.3333333333333333,
 0.14285714285714285,
 1.0,
 0.5333333333333333,
 0.6956521739130436)

In [12]:
#the monkey and the crocodile
metrics(grps, data)

(1.0, 0.1, 0.9836065573770492, 0.5405405405405406, 0.6976744186046512)

In [19]:
#the_story_of_the_merchant_son
metrics(grps, data)

(0.9090909090909091,
 0.3888888888888889,
 0.7317073170731707,
 0.5504587155963303,
 0.6282722513089005)

In [31]:
#the_thief_and_the_brahmins
metrics(grps, data)

(0.8333333333333334,
 0.8333333333333334,
 0.9047619047619048,
 0.7808219178082192,
 0.8382352941176471)

In [47]:
#the_monkey_the_wedge
metrics(grps, data)

(0.42857142857142855, 0.6666666666666666, 0.875, 1.0, 0.9333333333333333)

In [50]:
!pip install jupyterthemes

Collecting jupyterthemes
  Downloading jupyterthemes-0.20.0-py2.py3-none-any.whl (7.0 MB)
Collecting lesscpy>=0.11.2
  Downloading lesscpy-0.14.0-py2.py3-none-any.whl (46 kB)
Collecting ply
  Downloading ply-3.11-py2.py3-none-any.whl (49 kB)
Installing collected packages: ply, lesscpy, jupyterthemes
Successfully installed jupyterthemes-0.20.0 lesscpy-0.14.0 ply-3.11


In [5]:
!jt -t onedork -fs 115 -tfs 115 -ofs 10 -nfs 130 -cellw 88% -T