In [1]:
# Output directory
import spacy
from pathlib import Path
from spacy.training import Example
from spacy import displacy
from IPython.core.display import display, HTML
from IPython.display import clear_output
from nltk.sentiment import SentimentIntensityAnalyzer
from textblob import TextBlob
import requests

sia = SentimentIntensityAnalyzer()

In [2]:
output_dir=Path('/content/')

# Loading the model from the directory
print("Loading from", output_dir)
nlp = spacy.load(output_dir)
nlp.add_pipe('sentencizer')

Loading from \content


<spacy.pipeline.sentencizer.Sentencizer at 0x1eb5122c200>

In [3]:
import json

with open('data.json') as f:
    data = json.load(f)

In [None]:
print(data[0])

In [4]:
test_indexes = [150,406,513,101,535,700,814,809,395,31,196,484,390,676,465,642,285,60,77,299,685,14,267,145,666,55,868,344,816,474,872,770,876,432,790,622,308,181,337,318,877,538,279,630,97,304,836,706,626,692,8,144,483,580,270,542,230,363,266,301,215,306,520,391,34,609,668,764,27,30,582,732,760,384,271,878,372,283,293,202,317,806,526,758,742,824,527,672,568,586,695,874,236,720,787,382,590,475,204,506,253,456,331,379,716,358,258,252,428,638,5,717,457,103,434,247,808,342,826,272,142,403,796,686,519,776,113,727,249,40,338,214,158,175,567,563,839,447,718,141,667,511,62,79,491,351,840,263,345,530,356,546,477,505,85,604,75,239,772,316,364,829,1,413,873,689,828,794,710,813,140,319,362,367,264,320,466,222,200,162,587,632,460,726,416,54,45,172,255,193,471,409,39,614,533,18,557,156,231,641,50,843,748,640,122,576,167,784,627,859,10,105,68,240,310,698,261,412,602,545]

In [6]:
examples = []
i = 0
for text, annots in data:
    if i in test_indexes:
        examples.append(Example.from_dict(nlp.make_doc(text), annots))
    i += 1



In [None]:
results = nlp.evaluate(examples)
print(results)

In [None]:
z = 0
for d in data:
    if z in test_indexes:
        doc = nlp(d[0])
        print("#######################")
        print("Annotated:")
        print(d[1]['entities'])
        print("\n\n\n#######################")
        print("Found from model:")
        print(doc.ents)
        for entity in doc.ents:
            print("\n\n#######################")
            print('Entity extracted : ', entity.text)
            print("\n\n#######################")
            print('Sentence extracted from : ', entity.sent)
            print("\n\n#######################")
            print('SIA :')
            print(sia.polarity_scores(entity.sent.text))
            print("\n\n#######################")
            print('Textblob :')
            testimonial = TextBlob(entity.sent.text)
            print(testimonial.sentiment)
        html = displacy.render(doc, style="ent")
        input('Press ENTER to continue...')
        clear_output(wait=True)
    z += 1

#######################
Annotated:
[[1125, 1150, 'PATHWAY']]



#######################
Found from model:
(oxidative phosphorylation,)


#######################
Entity extracted :  oxidative phosphorylation


#######################
Sentence extracted from :  synergism analysis showed that celecoxib, dmc, and casiopeinaii-gly at sub-ic<sub>50</sub> doses increased the potency of cisplatin, paclitaxel, and doxorubicin to hinder hela cell proliferation through a significant abolishment of oxidative phosphorylation in bidimensional cultures, with no apparent effect on non-cancer cells (therapeutic index >3.6).


#######################
SIA :
{'neg': 0.048, 'neu': 0.868, 'pos': 0.085, 'compound': 0.1779}


#######################
Textblob :
Sentiment(polarity=0.175, subjectivity=0.6125)


In [5]:
import os
import django
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'gproject.settings')
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
from articles.models import Article, Pathway
from django.db.models import Exists

In [6]:
articles = Article.objects.filter(pathways__isnull=True)
[entry for entry in articles]

(293.253) SELECT "articles_article"."created_at", "articles_article"."updated_at", "articles_article"."is_active", "articles_article"."id", "articles_article"."abstract_text", "articles_article"."pub_date", "articles_article"."name", "articles_article"."doi" FROM "articles_article" LEFT OUTER JOIN "articles_article_pathways" ON ("articles_article"."id" = "articles_article_pathways"."article_id") WHERE "articles_article_pathways"."pathway_id" IS NULL ORDER BY "articles_article"."id" DESC; args=()


[<Article: Article object (464954)>,
 <Article: Article object (464953)>,
 <Article: Article object (464952)>,
 <Article: Article object (464951)>,
 <Article: Article object (464950)>,
 <Article: Article object (464949)>,
 <Article: Article object (464948)>,
 <Article: Article object (464947)>,
 <Article: Article object (464946)>,
 <Article: Article object (464945)>,
 <Article: Article object (464944)>,
 <Article: Article object (464943)>,
 <Article: Article object (464942)>,
 <Article: Article object (464941)>,
 <Article: Article object (464940)>,
 <Article: Article object (464939)>,
 <Article: Article object (464938)>,
 <Article: Article object (464937)>,
 <Article: Article object (464936)>,
 <Article: Article object (464935)>,
 <Article: Article object (464934)>,
 <Article: Article object (464933)>,
 <Article: Article object (464932)>,
 <Article: Article object (464931)>,
 <Article: Article object (464930)>,
 <Article: Article object (464929)>,
 <Article: Article object (464928)>,
 

In [5]:
print(len(articles))

447277


In [5]:
i = 0
z = 0
for ar in articles:
    abstractText = ar.abstract_text.lower()
    doc = nlp(abstractText)
    if len(doc.ents) > 0:
        print("#######################")
        print("Found from model:")
        print(doc.ents)
        for entity in doc.ents:
            print("\n\n#######################")
            print('Entity extracted : ', entity.text)
            print("\n\n#######################")
            print('Sentence extracted from : ', entity.sent)
            print("\n\n#######################")
            print('SIA :')
            print(sia.polarity_scores(entity.sent.text))
            print("\n\n#######################")
            print('Textblob :')
            testimonial = TextBlob(entity.sent.text)
            print(testimonial.sentiment)
        html = displacy.render(doc, style="ent")
        input('Press ENTER to continue...')
        clear_output(wait=True)
        z += 1
    i += 1

NameError: name 'articles' is not defined

In [8]:
articles_p = Article.objects.exclude(pathways=None)
[entry for entry in articles_p]

[<Article: Article object (120)>,
 <Article: Article object (661)>,
 <Article: Article object (934)>,
 <Article: Article object (766)>,
 <Article: Article object (861)>,
 <Article: Article object (866)>,
 <Article: Article object (2154)>,
 <Article: Article object (1541)>,
 <Article: Article object (1677)>,
 <Article: Article object (2025)>,
 <Article: Article object (2704)>,
 <Article: Article object (531)>,
 <Article: Article object (23606)>,
 <Article: Article object (3431)>,
 <Article: Article object (14184)>,
 <Article: Article object (14197)>,
 <Article: Article object (14211)>,
 <Article: Article object (14527)>,
 <Article: Article object (14553)>,
 <Article: Article object (15117)>,
 <Article: Article object (15931)>,
 <Article: Article object (16040)>,
 <Article: Article object (16072)>,
 <Article: Article object (16550)>,
 <Article: Article object (17025)>,
 <Article: Article object (17487)>,
 <Article: Article object (17545)>,
 <Article: Article object (17880)>,
 <Article: A

In [7]:
from textblob import TextBlob
import requests
import nltk
nltk.download('punkt')
import spacy
from spacy import displacy
nlp = spacy.load("en_ner_bc5cdr_md")
nlp_bio = spacy.load("en_ner_bionlp13cg_md")

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\omer.davarci\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [12]:
i = 0
z = 0
for ar in articles_p:
    abstractText = ar.abstract_text.lower()
    doc = nlp_bio(abstractText)
    for p in ar.pathways.all():
        print(p.name)
    if len(doc.ents) > 0:
        print("#######################")
        print("Found from model:")
        print(doc.ents)
        html = displacy.render(doc, style="ent")
        input('Press ENTER to continue...')
        clear_output(wait=True)
        z += 1
    i += 1

Pyruvate metabolism
#######################
Found from model:
(cancer, cancer, cancer, cancer, cell, metastatic tumors, tumors, cancer cells, breast cancer lung, cancer, cancer cells, cell, cancer, pyruvate dehydrogenase, pdh, pyruvate, lactate, pdha, pdh, mitochondrial matrix, pdha, ser295, ser314, ser295, ser314, pdha, lung, pdh, pdha ser314, pdha, pdhks, s293, pdh, s295, pyruvate, pdh)


KeyboardInterrupt: Interrupted by user