In [None]:
pip install flask


In [None]:
pip install --upgrade pip setuptools


In [None]:
pip install scipy==1.7.3


In [None]:
pip install -r requirements.txt


In [None]:
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

In [None]:
pip setup.py install 


In [None]:
pip setup.py develop

In [None]:

import json
import urllib
import opennre

In [None]:
import spacy
from typing import List
from spacy.tokens import Doc, Span
from fastcoref import FCoref
#from fastcoref import LingMessCoref
# from flask import Flask, render_template, request, redirect, session, jsonify

nlp = spacy.load('en_core_web_sm')
model = FCoref()
#model = LingMessCoref(device='cuda:0')
# app = Flask(__name__)

def core_logic_part(document: Doc, coref: List[int], resolved: List[str], mention_span: Span):
    final_token = document[coref[1]]
    if final_token.tag_ in ["PRP$", "POS"]:
        resolved[coref[0]] = mention_span.text + "'s" + final_token.whitespace_
    else:
        resolved[coref[0]] = mention_span.text + final_token.whitespace_
    for i in range(coref[0] + 1, coref[1] + 1):
        resolved[i] = ""
    return resolved

def get_span_noun_indices(doc: Doc, cluster: List[List[int]]) -> List[int]:
    spans = [doc[span[0]:span[1]+1] for span in cluster]
    spans_pos = [[token.pos_ for token in span] for span in spans]
    span_noun_indices = [i for i, span_pos in enumerate(spans_pos)
        if any(pos in span_pos for pos in ['NOUN', 'PROPN'])]
    return span_noun_indices

def get_cluster_head(doc: Doc, cluster: List[List[int]], noun_indices: List[int]):
    head_idx = noun_indices[0]
    head_start, head_end = cluster[head_idx]
    head_span = doc[head_start:head_end+1]
    return head_span, [head_start, head_end]

def is_containing_other_spans(span: List[int], all_spans: List[List[int]]):
    return any([s[0] >= span[0] and s[1] <= span[1] and s != span for s in all_spans])

def improved_replace_corefs(document, clusters):
    resolved = list(tok.text_with_ws for tok in document)
    all_spans = [span for cluster in clusters for span in cluster]  # flattened list of all spans

    for cluster in clusters:
        noun_indices = get_span_noun_indices(document, cluster)

        if noun_indices:
            mention_span, mention = get_cluster_head(document, cluster, noun_indices)

            for coref in cluster:
                if coref != mention and not is_containing_other_spans(coref, all_spans):
                    core_logic_part(document, coref, resolved, mention_span)

    return "".join(resolved)

def get_fast_cluster_spans(doc, clusters):
    fast_clusters = []
    for cluster in clusters:
        new_group = []
        for tuple in cluster:
            print(type(tuple), tuple)
            (start, end) = tuple
            print("start, end", start, end)
            span = doc.char_span(start, end)
            print('span', span.start, span.end)
            new_group.append([span.start, span.end-1])
        fast_clusters.append(new_group)
    return fast_clusters

def get_fastcoref_clusters(doc, text):
    preds = model.predict(texts=[text])
    fast_clusters = preds[0].get_clusters(as_strings=False)
    fast_cluster_spans = get_fast_cluster_spans(doc, fast_clusters)
    return fast_cluster_spans

# @app.route('/coreference', methods=['POST'])
def coreference(text):
    #print(request.form["test"])
    # content = request.get_json()
    # print('content', content)
    # text = content["text"]
   
    doc = nlp(text)
    #clusters = get_allennlp_clusters(text)
    clusters = get_fastcoref_clusters(doc, text)
    coref_text = improved_replace_corefs(doc, clusters)
    print('coref_text', coref_text)
    return coref_text


In [None]:
import json
import urllib
from string import punctuation
import nltk

# Rest of your code...


ENTITY_TYPES = ["human", "person", "company", "enterprise", "business", "geographic region",
                "human settlement", "geographic entity", "territorial entity type", "organization"]

def wikifier(text, lang="en", threshold=0.8):
    """Function that fetches entity linking results from wikifier.com API"""
    # Prepare the URL.
    data = urllib.parse.urlencode([
        ("text", text), ("lang", lang),
        ("userKey", "tgbdmkpmkluegqfbawcwjywieevmza"),
        ("pageRankSqThreshold", "%g" %
         threshold), ("applyPageRankSqThreshold", "true"),
        ("nTopDfValuesToIgnore", "100"), ("nWordsToIgnoreFromList", "100"),
        ("wikiDataClasses", "true"), ("wikiDataClassIds", "false"),
        ("support", "true"), ("ranges", "false"), ("minLinkFrequency", "2"),
        ("includeCosines", "false"), ("maxMentionEntropy", "3")
    ])
    url = "http://www.wikifier.org/annotate-article"
    # Call the Wikifier and read the response.
    req = urllib.request.Request(url, data=data.encode("utf8"), method="POST")
    with urllib.request.urlopen(req, timeout=60) as f:
        response = f.read()
        response = json.loads(response.decode("utf8"))
    # Output the annotations.
    results = list()
    for annotation in response["annotations"]:
        # Filter out desired entity classes
        if ('wikiDataClasses' in annotation) and (any([el['enLabel'] in ENTITY_TYPES for el in annotation['wikiDataClasses']])):

            # Specify entity label
            if any([el['enLabel'] in ["human", "person"] for el in annotation['wikiDataClasses']]):
                label = 'Person'
            elif any([el['enLabel'] in ["company", "enterprise", "business", "organization"] for el in annotation['wikiDataClasses']]):
                label = 'Organization'
            elif any([el['enLabel'] in ["geographic region", "human settlement", "geographic entity", "territorial entity type"] for el in annotation['wikiDataClasses']]):
                label = 'Location'
            else:
                label = None

            results.append({'title': annotation['title'], 'wikiId': annotation['wikiDataItemId'], 'label': label,
                            'characters': [(el['chFrom'], el['chTo']) for el in annotation['support']]})
    return results

In [None]:

from flask import Flask, request


In [None]:
from opennre.pretrain import get_model
import itertools
import csv
import nltk
nltk.download('punkt')
relation_model = get_model('wiki80_cnn_softmax')
ENTITY_TYPES = ["human", "person", "company", "enterprise", "business", "geographic region",
                "human settlement", "geographic entity", "territorial entity type", "organization"]

def strip_punctuation(s):
    """Removes all punctuation from a string"""
    return ''.join(c for c in s if c not in punctuation)


def deduplicate_dict(d):
    return [dict(y) for y in set(tuple(x.items()) for x in d)]


def final(text):
   
        relation_threshold=0.9
        entities_threshold=0.8
        coref =  True
        if not text:
            return 'Missing text parameter'

        try:
            relation_threshold = float(relation_threshold)
            entities_threshold = float(entities_threshold)
        except ValueError:
            return 'Invalid value for relation or entity threshold parameter'

        if coref:
            text = coreference(text)

        print(text)

        relations_list = list()
        entities_list = list()

        for sentence in nltk.sent_tokenize(text):
            sentence = strip_punctuation(sentence)
            entities = wikifier(sentence, threshold=entities_threshold)
            entities_list.extend(
                [{'title': el['title'], 'wikiId': el['wikiId'], 'label': el['label']} for el in entities])
            # Iterate over every permutation pair of entities
            for permutation in itertools.permutations(entities, 2):
                for source in permutation[0]['characters']:
                    for target in permutation[1]['characters']:
                        # Relationship extraction with OpenNRE
                        data = relation_model.infer(
                            {'text': sentence, 'h': {'pos': [source[0], source[1] + 1]}, 't': {'pos': [target[0], target[1] + 1]}})
                        if data[1] > relation_threshold:
                            relations_list.append(
                                {'source': permutation[0]['title'], 'target': permutation[1]['title'], 'type': data[0]})
        result = {'entities': deduplicate_dict(entities_list), 'relations': deduplicate_dict(relations_list)}
        with open('entities.csv', 'w', newline='', encoding='utf-8') as entities_file:
            entities_writer = csv.DictWriter(entities_file, fieldnames=['title', 'wikiId', 'label'])
            entities_writer.writeheader()
            entities_writer.writerows(entities_list)

        # Write relations to CSV
        with open('relations.csv', 'w', newline='', encoding='utf-8') as relations_file:
            relations_writer = csv.DictWriter(relations_file, fieldnames=['source', 'target', 'type'])
            relations_writer.writeheader()
            relations_writer.writerows(relations_list)
        print("Entities:")
        for entity in entities_list:
            print(f"{entity['title']}: {entity['label']}")

        # Print relations for current sentence
        print("\nRelations:")
        for relation in relations_list:
            print(f"{relation['source']} -- {relation['type']} --> {relation['target']}")
    

text= "Akbar (Abu'l-Fath Jalal ud-b din Muhammad Akbar, 25 October 1542 â€“ 27 October 1605), also known as Akbar the Great was the 3rd Mughal Emperor.[1] He was born in Lahore (now Pakistan). He was the son of 2nd Mughal Emperor Humayun. Akbar became the de jure king in 1556 at the age of 13 when his father died. Akbar was too young to rule, so Bairam Khan was appointed as Akbar regent and chief army commander. Soon after coming to power Akbar defeated Himu, the general of the Afghan forces, in the Second Battle of Panipat. After a few years, he ended the regency of Bairam Khan and took charge of the kingdom. He initially offered friendship to the Rajputs. However, he had to fight against some Rajputs who opposed him. In 1576 he defeated Maharana Pratap of Mewar in the Battle of Haldighati. Akbar wars made the Mughal empire more than twice as big as it had been before, covering most of the Indian subcontinent except the south (excluding the Deccan Plateau)"
# ctext=coreference(text)
final(text)


    

: 