In [15]:
%load_ext autoreload
%autoreload 2

#standard library imports
import re
import nltk
import numpy as np
import pandas as pd
import os

#modeling functions & utilities
from pronounResolution import pronResolution_base, pronResolution_nnMod, pronResolution_nn, pronEval
from relationExtract import simpleRE, REEval

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [31]:
files = [x for x in os.listdir('prep_scripts') if '_gapi' in x]
df = pd.read_csv('prep_scripts/' + files[0])[['speaker', 'dialogue', 'sentences', 'sentiment', 'entities', 'tokens']]
df['tokens'] = df['tokens'].apply(lambda x: eval(x))
df['sentiment'] = df['sentiment'].apply(lambda x: eval(x))
df.head(10)

Unnamed: 0,speaker,dialogue,sentences,sentiment,entities,tokens
0,narrator,1989 – Hank Pym enters a SHIELD facility,[{'content': u'1989 \u2013 Hank Pym enters a S...,"{'score': 0.3, 'magnitude': 0.3}","[{'salience': 0.5188706, 'meta': {}, 'type': u...","[{'content': '1989', 'label': 'ROOT', 'begin':..."
1,Dr. Hank Pym,Stark.,"[{'content': u'Stark.', 'begin': 0, 'score': 0...","{'score': 0.1, 'magnitude': 0.1}","[{'salience': 1, 'meta': {}, 'type': u'WORK_OF...","[{'content': 'Stark', 'label': 'ROOT', 'begin'..."
2,Mitchell Carson,He doesn't seem happy.,"[{'content': u""He doesn't seem happy."", 'begin...","{'score': -0.6, 'magnitude': 0.6}",[],"[{'content': 'He', 'label': 'NSUBJ', 'begin': ..."
3,Howard Stark,"Hello, Hank. You're supposed to be in Moscow.","[{'content': u'Hello, Hank.', 'begin': 0, 'sco...","{'score': -0.1, 'magnitude': 1}","[{'salience': 0.80661523, 'meta': {}, 'type': ...","[{'content': 'Hello', 'label': 'DISCOURSE', 'b..."
4,Dr. Hank Pym,I took a detour.[he places a vial containing a...,[{'content': u'I took a detour.[he places a vi...,"{'score': 0.4, 'magnitude': 0.4}","[{'salience': 0.34197578, 'meta': {}, 'type': ...","[{'content': 'I', 'label': 'NSUBJ', 'begin': 0..."
5,Peggy Carter,Tell me that isn't what I think it is.,"[{'content': u""Tell me that isn't what I think...","{'score': -0.6, 'magnitude': 0.6}",[],"[{'content': 'Tell', 'label': 'ROOT', 'begin':..."
6,Dr. Hank Pym,"It depends, if you think it's a poor attempt t...","[{'content': u""It depends, if you think it's a...","{'score': -0.5, 'magnitude': 1}","[{'salience': 0.88545346, 'meta': {}, 'type': ...","[{'content': 'It', 'label': 'NSUBJ', 'begin': ..."
7,Mitchell Carson,You were instructed to go to Russia. May I rem...,[{'content': u'You were instructed to go to Ru...,"{'score': 0, 'magnitude': 0.7}","[{'salience': 0.87446207, 'meta': {}, 'type': ...","[{'content': 'You', 'label': 'NSUBJPASS', 'beg..."
8,Dr. Hank Pym,I'm a scientist.,"[{'content': u""I'm a scientist."", 'begin': 0, ...","{'score': 0.3, 'magnitude': 0.3}","[{'salience': 1, 'meta': {}, 'type': u'PERSON'...","[{'content': 'I', 'label': 'NSUBJ', 'begin': 0..."
9,Howard Stark,Then act like one. The Pym Particle is the mos...,"[{'content': u'Then act like one.', 'begin': 0...","{'score': 0.1, 'magnitude': 1.2}","[{'salience': 0.6887065, 'meta': {}, 'type': u...","[{'content': 'Then', 'label': 'ADVMOD', 'begin..."


In [32]:
cList = list(df.speaker.unique())
df['total_sent'] = df['sentiment'].apply(lambda x: x['score'] * x['magnitude'])
cDict = dict(df.groupby('speaker').total_sent.sum())

# number of pronouns for each line
df['num_pron'] = df['tokens'].apply(lambda x: sum([int(t['pos'] == 'PRON') for t in x]))

# total sentiment score for each line
df['total_sent'] = df['sentiment'].apply(lambda x: x['score'] * x['magnitude'])

#set nearby speakers
charRange = 10
nearbyList = np.dstack((df.shift(i).speaker.values for i in range(-charRange, charRange+1)))[0]
df['nearbyChars'] = None
for i, nearbyChars in enumerate(nearbyList):
    df.set_value(i, 'nearbyChars', nearbyChars)

df.head()
    

Unnamed: 0,speaker,dialogue,sentences,sentiment,entities,tokens,total_sent,num_pron,nearbyChars
0,narrator,1989 – Hank Pym enters a SHIELD facility,[{'content': u'1989 \u2013 Hank Pym enters a S...,"{'score': 0.3, 'magnitude': 0.3}","[{'salience': 0.5188706, 'meta': {}, 'type': u...","[{'content': '1989', 'label': 'ROOT', 'begin':...",0.09,0,"[Dr. Hank Pym, Howard Stark, Dr. Hank Pym, Mit..."
1,Dr. Hank Pym,Stark.,"[{'content': u'Stark.', 'begin': 0, 'score': 0...","{'score': 0.1, 'magnitude': 0.1}","[{'salience': 1, 'meta': {}, 'type': u'WORK_OF...","[{'content': 'Stark', 'label': 'ROOT', 'begin'...",0.01,0,"[Mitchell Carson, Dr. Hank Pym, Howard Stark, ..."
2,Mitchell Carson,He doesn't seem happy.,"[{'content': u""He doesn't seem happy."", 'begin...","{'score': -0.6, 'magnitude': 0.6}",[],"[{'content': 'He', 'label': 'NSUBJ', 'begin': ...",-0.36,1,"[Dr. Hank Pym, Mitchell Carson, Dr. Hank Pym, ..."
3,Howard Stark,"Hello, Hank. You're supposed to be in Moscow.","[{'content': u'Hello, Hank.', 'begin': 0, 'sco...","{'score': -0.1, 'magnitude': 1}","[{'salience': 0.80661523, 'meta': {}, 'type': ...","[{'content': 'Hello', 'label': 'DISCOURSE', 'b...",-0.1,1,"[Peggy Carter, Dr. Hank Pym, Mitchell Carson, ..."
4,Dr. Hank Pym,I took a detour.[he places a vial containing a...,[{'content': u'I took a detour.[he places a vi...,"{'score': 0.4, 'magnitude': 0.4}","[{'salience': 0.34197578, 'meta': {}, 'type': ...","[{'content': 'I', 'label': 'NSUBJ', 'begin': 0...",0.16,3,"[Dr. Hank Pym, Peggy Carter, Dr. Hank Pym, Mit..."


In [35]:
 df.head().apply(lambda x:pronResolution_nnMod(cList, x), axis=1)

0    [{'content': '1989', 'label': 'ROOT', 'begin':...
1    [{'content': 'Stark', 'label': 'ROOT', 'begin'...
2    [{'content': 'He', 'label': 'NSUBJ', 'char': [...
3    [{'content': 'Hello', 'label': 'DISCOURSE', 'b...
4    [{'content': 'I', 'label': 'NSUBJ', 'char': 'D...
dtype: object

In [33]:
df['tokens'] = df.apply(lambda x:pronResolution_nnMod(cList, x), axis=1)
df.head()

Unnamed: 0,speaker,dialogue,sentences,sentiment,entities,tokens,total_sent,num_pron,nearbyChars
0,narrator,1989 – Hank Pym enters a SHIELD facility,[{'content': u'1989 \u2013 Hank Pym enters a S...,"{'score': 0.3, 'magnitude': 0.3}","[{'salience': 0.5188706, 'meta': {}, 'type': u...","[{'content': '1989', 'label': 'ROOT', 'begin':...",0.09,0,"[Dr. Hank Pym, Howard Stark, Dr. Hank Pym, Mit..."
1,Dr. Hank Pym,Stark.,"[{'content': u'Stark.', 'begin': 0, 'score': 0...","{'score': 0.1, 'magnitude': 0.1}","[{'salience': 1, 'meta': {}, 'type': u'WORK_OF...","[{'content': 'Stark', 'label': 'ROOT', 'begin'...",0.01,0,"[Mitchell Carson, Dr. Hank Pym, Howard Stark, ..."
2,Mitchell Carson,He doesn't seem happy.,"[{'content': u""He doesn't seem happy."", 'begin...","{'score': -0.6, 'magnitude': 0.6}",[],"[{'content': 'He', 'label': 'NSUBJ', 'char': [...",-0.36,1,"[Dr. Hank Pym, Mitchell Carson, Dr. Hank Pym, ..."
3,Howard Stark,"Hello, Hank. You're supposed to be in Moscow.","[{'content': u'Hello, Hank.', 'begin': 0, 'sco...","{'score': -0.1, 'magnitude': 1}","[{'salience': 0.80661523, 'meta': {}, 'type': ...","[{'content': 'Hello', 'label': 'DISCOURSE', 'b...",-0.1,1,"[Peggy Carter, Dr. Hank Pym, Mitchell Carson, ..."
4,Dr. Hank Pym,I took a detour.[he places a vial containing a...,[{'content': u'I took a detour.[he places a vi...,"{'score': 0.4, 'magnitude': 0.4}","[{'salience': 0.34197578, 'meta': {}, 'type': ...","[{'content': 'I', 'label': 'NSUBJ', 'char': 'D...",0.16,3,"[Dr. Hank Pym, Peggy Carter, Dr. Hank Pym, Mit..."


In [26]:
x=[1,2,3,4,5]
x[2:]

[3, 4, 5]

In [6]:
pronEval([df, df], numExamples=2)


******** line 766 ********
764. Kurt:
Oh, no.

765. narrator:
back with Scott and the ants

=> 766. Scott Lang:
=> I'm employing the bullet ants. Hapanera-clamda-mana-merna. I don't remember what it's called but I feel bad for this guy. [using the ants Scott takes down one of the security guards with Luis also punching him]

767. Luis:
See, that's what I'm talkin’ bout. That's what I call it, an unfortunate casualty, in a very serious operation, you know? [Hope then comes along and enters the room and places the signal decoy]

768. Kurt:
Signal decoy in place. Mean pretty lady did good, Scott.

******** test model 1: line 766 ********
6 pronouns resolved
1. I => ['Peggy Carter']
2. I => ['Pym Tech Employee']
3. what => ['Hope van Dyne']
4. it => ['Cop on Speaker']
5. I => ['Hideous Rabbit']
6. him => ['Pym Tech Employee']

how many are correctly identified? 2

******** line 766 ********
764. Kurt:
Oh, no.

765. narrator:
back with Scott and the ants

=> 766. Scott Lang:
=> I'm employi

In [7]:
df['relations'] = df.apply(lambda x:simpleRE(x), axis=1)
df.head()

Unnamed: 0,speaker,dialogue,sentences,sentiment,entities,tokens,total_sent,chars,hasChar,relations
0,narrator,1989 – Hank Pym enters a SHIELD facility,[{'content': u'1989 \u2013 Hank Pym enters a S...,"{'magnitude': 0.3, 'score': 0.3}","[{'salience': 0.5188706, 'meta': {}, 'type': u...","[{'pos': 'NUM', 'index': 0, 'begin': 0, 'label...",0.09,"[Mitchell Carson, Dr. Hank Pym, narrator, nan,...",False,"[{'line': 0, 'ent1': 'Pym', 'class': 0, 'relat..."
1,Dr. Hank Pym,Stark.,"[{'content': u'Stark.', 'begin': 0, 'score': 0...","{'magnitude': 0.1, 'score': 0.1}","[{'salience': 1, 'meta': {}, 'type': u'WORK_OF...","[{'pos': 'NOUN', 'index': 0, 'begin': 0, 'labe...",0.01,"[Howard Stark, Mitchell Carson, Dr. Hank Pym, ...",False,
2,Mitchell Carson,He doesn't seem happy.,"[{'content': u""He doesn't seem happy."", 'begin...","{'magnitude': 0.6, 'score': -0.6}",[],"[{'pos': 'PRON', 'index': 3, 'begin': 0, 'labe...",-0.36,"[Dr. Hank Pym, Howard Stark, Mitchell Carson, ...",True,
3,Howard Stark,"Hello, Hank. You're supposed to be in Moscow.","[{'content': u'Hello, Hank.', 'begin': 0, 'sco...","{'magnitude': 1, 'score': -0.1}","[{'salience': 0.80661523, 'meta': {}, 'type': ...","[{'pos': 'X', 'index': 2, 'begin': 0, 'label':...",-0.1,"[Peggy Carter, Dr. Hank Pym, Howard Stark, Mit...",True,
4,Dr. Hank Pym,I took a detour.[he places a vial containing a...,[{'content': u'I took a detour.[he places a vi...,"{'magnitude': 0.4, 'score': 0.4}","[{'salience': 0.34197578, 'meta': {}, 'type': ...","[{'pos': 'PRON', 'index': 1, 'begin': 0, 'labe...",0.16,"[Dr. Hank Pym, Peggy Carter, Dr. Hank Pym, How...",True,"[{'line': 4, 'ent1': ['Hope van Dyne'], 'class..."


In [8]:
REEval([df, df], 2)


******** line 657 ********
655. Dr. Hank Pym:
Darren. How the hell did you get in here? [Pym closes the door]

656. Darren Cross:
You left the front door open, Hank. It's official. You're old. [Hope and Scott hear them from the kitchen]

=> 657. Hope van Dyne:
=> The plans! He will kill him. [back with Cross and Pym]

658. Dr. Hank Pym:
Well to what do I owe this pleasure?

659. Darren Cross:
I have good news.

******** test model 1: line 657 ********
1 relations identified
entities: ['Detective'] => ['Mitchell Carson']
relation: kill
category: 0

how many are correctly identified? 1

******** line 657 ********
655. Dr. Hank Pym:
Darren. How the hell did you get in here? [Pym closes the door]

656. Darren Cross:
You left the front door open, Hank. It's official. You're old. [Hope and Scott hear them from the kitchen]

=> 657. Hope van Dyne:
=> The plans! He will kill him. [back with Cross and Pym]

658. Dr. Hank Pym:
Well to what do I owe this pleasure?

659. Darren Cross:
I have good

In [18]:
def checkQuery(relationList, ent1, ent2, relationClass):
    for relation in relationList:
        if ent1 in relation['ent1'] and ent2 in relation['ent2'] and relationClass == relation['class']:
            return True
    return False

def printAnswer(row):
    print('Movie: {}, Line {}'.format(row.movie, row.lineNum))
    print(row.dialogue)
    print()

#Simple Query System

print('Select the movies of your interest:')
print('***Enter all to use all movies')
print('***Enter n, m, x, y (numbers separated by commas) for specific selections')
print('***Enter random, n for n random selections\n')

files = [x for x in os.listdir('prep_scripts') if '_gapi' in x]
for i, fileName in enumerate(files):
    print('{}. {}'.format(i+1, re.split(r'_tw_|_imsdb_', fileName)[0]))


x = input()


#random selection
try:
    if 'random' in x:
        queryFiles = np.random.choice(files, int(x.split(',')[-1]), replace=False)
    elif x != 'all':
        queryFiles = np.array(files)[[int(select) - 1 for select in x.split(',')]]
    #use all files
    else:
        queryFiles = files    
        
except:
    print('\nunexpected input, will use all movie files\n')
    queryFiles = files    

#print(queryFiles)
df_data = None

for i, fileName in enumerate(queryFiles):    
    df = pd.read_csv('prep_scripts/'+fileName)[['speaker', 'dialogue', 'sentences', 'sentiment', 'entities', 'tokens']]
    df['tokens'] = df['tokens'].apply(lambda x: eval(x))
    df['sentiment'] = df['sentiment'].apply(lambda x: eval(x))
    df['total_sent'] = df['sentiment'].apply(lambda x: x['score'] * x['magnitude'])
    df['movie'] = re.split(r'_tw_|_imsdb_', fileName)[0]
    df['lineNum'] = df.index + 1
    
    cList = list(df.speaker.unique())
    cDict = dict(df.groupby('speaker').total_sent.sum())
    
    #resolve entities
    df['tokens'], df['hasChar'] = zip(*df.apply(lambda x:pronResolution_base(cList, x), axis=1).values)
    
    #extract relations
    df['relations'] = df.apply(lambda x:simpleRE(x), axis=1)
    
    if i == 0:
        df_data = df[df.relations.notnull()]
        
    else:
        df_data = pd.concat((df_data, df[df.hasRelation == True]))

    
print('Type end to finish at any time')

#relationList = df_data[df_data.hasRelation == True]['relations'].values

while True:
    print('What relation are you looking for?')
    ent1 = input('Entity 1:')
    if ent1 == 'end':
        break
    ent2 = input('Entity 2:')
    if ent2 == 'end':
        break
    relationClass = int(input('Relation category: '))
    
    qMatch = df_data.relations.apply(lambda x: checkQuery(x, ent1, ent2, relationClass))
    if sum(qMatch) == 0:
        print('nothing found\n')
    else:
        df_data[qMatch].apply(lambda x: printAnswer(x), axis=1)
    

Select the movies of your interest:
***Enter all to use all movies
***Enter n, m, x, y (numbers separated by commas) for specific selections
***Enter random, n for n random selections

1. ant-man
2. avengers_age_of_ultron
3. captain_america_civil_war
4. captain_america_the_first_avenger
5. captain_america_the_winter_soldier
6. fantastic_four
7. iron_man_3
8. lego_marvel_super_heroes
9. spider-man
10. the_amazing_spider-man_2
11. the_amazing_spider-man
12. the_avengers
13. the_wolverine
14. thor_the_dark_world
15. thor
16. x-men_apocalypse
17. x-men_days_of_future_past
18. x-men
19. x-men_the_last_stand
15
Type end to finish at any time
What relation are you looking for?
Entity 1:Thor
Entity 2:Loki
Relation category: 0
nothing found
What relation are you looking for?
Entity 1:Loki
Entity 2:Thor
Relation category: 0
Movie: thor, Line 265
It all makes sense now. Why you favored Thor, all these years! Because no matter how much you claim to love me, you could never have a Frost Giant sitti

In [143]:
df_data.tail().apply(lambda x: printAnswer(x), axis=1)

Movie: x-men_the_last_stand, Line 334
Help me? What's wrong with me?
Movie: x-men_the_last_stand, Line 335
Absolutely nothing.
Movie: x-men_the_last_stand, Line 336
Eric, stop.
Movie: x-men_the_last_stand, Line 337
No, Charles, not this time. You've always held her back.
Movie: x-men_the_last_stand, Line 338
For your own good, Jean.


333    None
334    None
335    None
336    None
337    None
dtype: object

In [154]:
df_data.relations.values[0]

[{'class': 0,
  'ent1': ['Admission Nurse'],
  'ent2': ['Volstagg'],
  'line': 7,
  'relation': 'telling'},
 {'class': 0,
  'ent1': ['Frigga'],
  'ent2': ['Frigga'],
  'line': 7,
  'relation': 'ask'}]