In [None]:
import flair.datasets
from pathlib import Path
from flair.data import Corpus
from flair.datasets import ColumnCorpus
from flair.data_fetcher import NLPTaskDataFetcher, NLPTask
from flair.embeddings import TokenEmbeddings, BertEmbeddings,  ELMoEmbeddings, FlairEmbeddings, WordEmbeddings, PooledFlairEmbeddings, StackedEmbeddings, CharacterEmbeddings, CharLMEmbeddings
from flair.models import LanguageModel
from gensim.models.keyedvectors import KeyedVectors
from typing import List
import os

In [None]:
# Disease
EBI_data_folder = '/nfs/gns/literature/Santosh_Tirunagari/NER_Datasets/EBI_standard-IOB'
# define columns
columns = {0: 'text', 1: 'ner'}

In [None]:
EBI = ColumnCorpus(EBI_data_folder, columns, 
                              train_file='train.csv',  test_file='test.csv', dev_file='dev.csv', in_memory=False)



In [None]:
tag_type = 'ner'

# 3. make the tag dictionary from the corpus
tag_dictionary = EBI.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary.idx2item)

In [None]:
embedding_types: List[TokenEmbeddings] = [
                   WordEmbeddings('glove'),
                   FlairEmbeddings('news-forward'),
                   FlairEmbeddings('news-backward'),
                   CharacterEmbeddings(), 
                   PooledFlairEmbeddings('pubmed-backward', pooling='max'),
                   PooledFlairEmbeddings('pubmed-forward', pooling='max'),
                   ]

In [None]:
embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

In [None]:
# 5. initialize sequence tagger
from flair.models import SequenceTagger

tagger: SequenceTagger = SequenceTagger(hidden_size=256,
                                        embeddings=embeddings,
                                        tag_dictionary=tag_dictionary,
                                        tag_type=tag_type,
                                        use_crf=True)

In [None]:
# 6. initialize trainer
from flair.trainers import ModelTrainer

trainer: ModelTrainer = ModelTrainer(tagger, EBI)

In [None]:
trainer.train('/nfs/gns/literature/Santosh_Tirunagari/GitHub/flair_models/ner/multi_bio_ner_model/EBI',
              learning_rate=0.1,
              mini_batch_size=32,
              max_epochs=150)

In [1]:
from flair.data import Sentence, Token
from flair.models import SequenceTagger

flair_model = flair_model = SequenceTagger.load('/nfs/gns/literature/Santosh_Tirunagari/GitHub/flair_models/ner/multi_bio_ner_model/EBI/best-model.pt')


2019-10-14 15:57:21,093 loading file /nfs/gns/literature/Santosh_Tirunagari/GitHub/flair_models/ner/multi_bio_ner_model/EBI/best-model.pt


In [8]:
from flask import Flask, jsonify, request

def term_highlighter(text: str = None, terms: list = None) -> str:
    if not text or not terms:
        raise ValueError('Either the supplied text or list of terms and scores is empty or of type None')

    used_term_strs = set()

    for term in terms:

        # because here  each term is something like 'ProjectSummary:AI' followed by a float score
        # (this is just the format that solr returns the 'interesting terms' list).
        term_str = term[3]

        if type(term_str) != str:
            continue

        # prevent double highlighting
        if term_str in used_term_strs:
            continue

        used_term_strs.add(term_str)
        new_term = set()
        for s in filter(lambda x: term_str in x, wordpunct_tokenize(text)):
            new_term.add(s)
        try:
            new_term_str = list(new_term)[0]
        except:
            new_term_str = term_str

        if term[2] == 'GP':
            text = text.replace(new_term_str, '<span class=\'GP\'>' + new_term_str + '</span>')
        elif term[2] == 'DS':
            text = text.replace(new_term_str, '<span class=\'DS\'>' + new_term_str + '</span>')
        elif term[2] == 'OG':
            text = text.replace(new_term_str, '<span class=\'OG\'>' + new_term_str + '</span>')

    return text


def pcse_ner_predictor(text_sentence):

    data_dict ={}

    if not text_sentence:
        return jsonify({
            'error': 'No parameters supplied',
            "status": 400,
            "service": 'pcse_ner_predictor'
        })

    sentence = Sentence(' '.join(wordpunct_tokenize(text_sentence)))
    # print(sentence)
    # print(text_sentence)
    flair_model.predict(sentence)

    try:
        data_dict['tagged'] = sentence.to_dict(tag_type='ner')

        text_input = data_dict['tagged']['text']

        terms_entities = []
        for each_entity in data_dict['tagged']['entities']:
            terms_entities.append(
                [each_entity['start_pos'], each_entity['end_pos'], each_entity['type'], each_entity['text']])

        data_dict['highlighted_text'] = term_highlighter(text_input,terms_entities)

        data_dict['status'] = 200
    except:
        data_dict['status'] = 400

    if data_dict['status'] != 200:
        data_dict['status'] = 400
        return data_dict
    else:
        return data_dict

In [9]:
from nltk.tokenize import wordpunct_tokenize

text_sentence = 'AS1411 Aptamer-functionalized liposome was successfully formulated and found to be nanosized. Flow cytometer and CLSM results demonstrated that Aptamer enhanced the targeting of carrier in the cancer cells via nucleolin-mediated transmembrane endocytosis pathway. The lipofectaminebased miR-29b showed a typical concentration-dependent cytotoxic effect in the cancer cells. LP-miR induced a significant reduction in the cell viability of A2780 cells compared to that of nontreated control, while LP-Mut (mutant loaded) did not have any effect on the cell viability indicating the importance of the specific gene sequencing. LP-miR induced a significant decrease in the green fluorescence which is indicative of the decrease in the cell viability. Simultaneously, higher PI positive cells were observed for LP-miR treated cancer cells in Live/Dead assay. Cells treated with LP-miR exhibited the brightest fluorescence indicating the presence of apoptotic cells. Significant increase in the Annexin-V+ cells and PI+ cells were observed for cell treated with LP-miR compared to that of non-treated control indicating the potential of miR-29b. This novel miR-29b-loaded Aptamer-directed liposome could potential serve as a new platform to improve the therapeutic outcome in ovarian cancers.'

print(pcse_ner_predictor(text_sentence))

    
    

{'tagged': {'text': 'AS1411 Aptamer - functionalized liposome was successfully formulated and found to be nanosized . Flow cytometer and CLSM results demonstrated that Aptamer enhanced the targeting of carrier in the cancer cells via nucleolin - mediated transmembrane endocytosis pathway . The lipofectaminebased miR - 29b showed a typical concentration - dependent cytotoxic effect in the cancer cells . LP - miR induced a significant reduction in the cell viability of A2780 cells compared to that of nontreated control , while LP - Mut ( mutant loaded ) did not have any effect on the cell viability indicating the importance of the specific gene sequencing . LP - miR induced a significant decrease in the green fluorescence which is indicative of the decrease in the cell viability . Simultaneously , higher PI positive cells were observed for LP - miR treated cancer cells in Live / Dead assay . Cells treated with LP - miR exhibited the brightest fluorescence indicating the presence of apopt