In [1]:
MODEL_URL = "https://github.com/huggingface/neuralcoref-models/releases/" \
            "download/en_coref_md-3.0.0/en_coref_md-3.0.0.tar.gz"

In [2]:
!pip install spacy==2.0.12



In [3]:
!pip install {MODEL_URL}

Collecting https://github.com/huggingface/neuralcoref-models/releases/download/en_coref_md-3.0.0/en_coref_md-3.0.0.tar.gz
  Downloading https://github.com/huggingface/neuralcoref-models/releases/download/en_coref_md-3.0.0/en_coref_md-3.0.0.tar.gz (161.3 MB)
     |████████████████████████████████| 161.3 MB 56 kB/s              
[?25h  Preparing metadata (setup.py) ... [?25ldone


In [4]:
!python -m spacy download en_core_web_md

Collecting en_core_web_md==2.0.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-2.0.0/en_core_web_md-2.0.0.tar.gz (120.8 MB)
     |████████████████████████████████| 120.8 MB 2.8 MB/s            
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25h
[93m    Linking successful[0m
    /opt/conda/lib/python3.7/site-packages/en_core_web_md -->
    /opt/conda/lib/python3.7/site-packages/spacy/data/en_core_web_md

    You can now load the model via spacy.load('en_core_web_md')



In [5]:
import en_coref_md

nlp = en_coref_md.load()

## Simple test

In [32]:
test_sent = "Paul discussed the approach with Elizabeth. He thought she had great ideas. However, she disagreed with his overall plan and decided not to talk to him again. On the other hand, he proceeded to put together with the presentation."

In [33]:
doc = nlp(test_sent)

In [34]:
doc._.has_coref

True

In [35]:
len(doc._.coref_clusters)

2

In [36]:
doc._.coref_clusters
#[Paul: [Paul, He, his, him, he], Elizabeth: [Elizabeth, she, she]]

[Paul: [Paul, He, his, him, he], Elizabeth: [Elizabeth, she, she]]

In [11]:
doc._.coref_clusters[1].main
#Elizabeth

Elizabeth

In [12]:
doc._.coref_clusters[1].mentions
#[Elizabeth, she, she]

[Elizabeth, she, she]

In [13]:
resolved_text = doc._.coref_resolved
sentences = [sent.string.strip() for sent in nlp(resolved_text).sents]
sentences

['Paul discussed the approach with Elizabeth.',
 'Paul thought Elizabeth had great ideas.',
 'However, Elizabeth disagreed with Paul overall plan and decided not to talk to Paul again.',
 'On the other hand, Paul proceeded to put together with the presentation.']

['Paul discussed the approach with Elizabeth.',
 'Paul thought Elizabeth had great ideas.',
 'However, Elizabeth disagreed with Paul overall plan and decided not to talk to Paul again.',
 'On the other hand, Paul proceeded to put together with the presentation.']

In [14]:
output = [sent for sent in sentences if 'elizabeth' in 
          (' '.join([token.lemma_.lower() for token in nlp(sent)]))]

In [15]:
output

['Paul discussed the approach with Elizabeth.',
 'Paul thought Elizabeth had great ideas.',
 'However, Elizabeth disagreed with Paul overall plan and decided not to talk to Paul again.']

In [16]:
outstring = ''
for i in range(len(output)):
    outstring = outstring + output[i] + ' '
outstring

'Paul discussed the approach with Elizabeth. Paul thought Elizabeth had great ideas. However, Elizabeth disagreed with Paul overall plan and decided not to talk to Paul again. '

## Even larger test

In [17]:
test_sent = 'Being on your device and driving could be an overly dangerous choice in life. Many people around the world are injured by this situation every day. It could lead to accidents and altercations. In addition it would even cost you your licences. The most detrimental outcome is death. There are far more outcomes to operating a motor vehicle while being on a cell phone. Drivers should not be able to use cell phones in any capacity while operating a motor vehicle.'

In [18]:
doc = nlp(test_sent)

In [19]:
doc._.has_coref

True

In [20]:
len(doc._.coref_clusters)

1

In [21]:
doc._.coref_clusters
#[It: [It, it]]

[It: [It, it]]

In [22]:
resolved_text = doc._.coref_resolved
sentences = [sent.string.strip() for sent in nlp(resolved_text).sents]
sentences

['Being on your device and driving could be an overly dangerous choice in life.',
 'Many people around the world are injured by this situation every day.',
 'It could lead to accidents and altercations.',
 'In addition It would even cost you your licences.',
 'The most detrimental outcome is death.',
 'There are far more outcomes to operating a motor vehicle while being on a cell phone.',
 'Drivers should not be able to use cell phones in any capacity while operating a motor vehicle.']

## Talking about People

In [23]:
test_sent = 'Many people believe in UFOs. They often look for evidence in media.'
doc = nlp(test_sent)

In [24]:
doc._.has_coref

True

In [25]:
doc._.coref_clusters
#[Many people: [Many people, They]]

[Many people: [Many people, They]]

In [26]:
resolved_text = doc._.coref_resolved
sentences = [sent.string.strip() for sent in nlp(resolved_text).sents]
sentences

['Many people believe in UFOs.',
 'Many people often look for evidence in media.']

## Talking about 'You' or 'your'

In [27]:
test_sent = 'The amount of time you sleep at night determines the your energy levels will have the next day. studies have determined that a person who sleeps for 8 hours a day has a higher level of seratonin than those who do not.'
doc = nlp(test_sent)

In [28]:
doc._.has_coref
#False 

False

In [29]:
doc._.coref_clusters

## Talking about articles

In [30]:
test_sent = 'It is important to get adequate sleep every day. It is also important to  eat a balanced diet. Studies have determined that a person who sleeps for 8 hours a day has a higher level of seratonin than those who do not.'
doc = nlp(test_sent)

In [31]:
doc._.has_coref
#False 

False