<a href="https://colab.research.google.com/github/zphelps/beta-pearl-python/blob/main/coref.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# !pip install -U spacy-experimental https://github.com/explosion/spacy-experimental/releases/download/v0.6.1/en_coreference_web_trf-3.4.0a2-py3-none-any.whl

In [4]:
# !pip install flask



In [5]:
import spacy
from spacy.tokens import Doc
import pandas as pd
from flask import Flask

nlp = spacy.load("en_coreference_web_trf")

app = Flask(__name__)

@app.route('/api/resolve', methods=['POST'])
def resolve_references(doc: Doc) -> str:
    """Function for resolving references with the coref ouput
    doc (Doc): The Doc object processed by the coref pipeline
    RETURNS (str): The Doc string with resolved references
    """
    # token.idx : token.text
    token_mention_mapper = {}
    output_string = ""
    clusters = [
        val for key, val in doc.spans.items() if key.startswith("coref_cluster")
    ]

    # Iterate through every found cluster
    for cluster in clusters:
        first_mention = cluster[0]
        # Iterate through every other span in the cluster
        for mention_span in list(cluster)[1:]:
            # Set first_mention as value for the first token in mention_span in the token_mention_mapper
            token_mention_mapper[mention_span[0].idx] = first_mention.text + mention_span[0].whitespace_

            for token in mention_span[1:]:
                # Set empty string for all the other tokens in mention_span
                token_mention_mapper[token.idx] = ""

    # Iterate through every token in the Doc
    for token in doc:
        # Check if token exists in token_mention_mapper
        if token.idx in token_mention_mapper:
            output_string += token_mention_mapper[token.idx]
        # Else add original token text
        else:
            output_string += token.text + token.whitespace_

    return output_string

In [6]:
text = ["Noted! Would you like any assistance with preparations or arrangements for the Claypooles' visit? \n no. David is not very excited for them to be coming though. do you know which david I'm talking about?"]

df = pd.DataFrame(text, columns=['text'])

df['text-coref'] = [resolve_references(coref_doc) for coref_doc in nlp.pipe(df['text'])]

for txt in df['text-coref']:
    print(txt)

Noted! Would you like any assistance with preparations or arrangements for the Claypooles' visit? 
 no. David is not very excited for the Claypooles' to be coming though. do you know which david I'm talking about?
