In [1]:
import sys
import random
import os
from pathlib import Path
import shutil
import json

import argparse
import tqdm
import spacy
from spacy.gold import minibatch
from spacy.language import Language
from spacy import util

In [2]:
from scispacy.data_util import read_full_med_mentions, read_ner_from_tsv
from scispacy.per_class_scorer import PerClassScorer
from scispacy.train_utils import evaluate_ner

In [None]:
def train_ner(output_dir: str,
              train_data_path: str,
              dev_data_path: str,
              test_data_path: str,
              run_test: bool = None,
              model: str = None,
              n_iter: int = 10,
              meta_overrides: str = None):

    util.fix_random_seed(util.env_opt("seed", 0))
    train_data = read_ner_from_tsv(train_data_path)
    dev_data = read_ner_from_tsv(dev_data_path)
    test_data = read_ner_from_tsv(test_data_path)
    os.makedirs(output_dir, exist_ok=True)
    if run_test:
        nlp = spacy.load(model)
        print("Loaded model '%s'" % model)
        evaluate_ner(nlp, dev_data, dump_path=os.path.join(output_dir, "dev_metrics.json"))
        evaluate_ner(nlp, test_data, dump_path=os.path.join(output_dir, "test_metrics.json"))
    else:
        train(model, train_data, dev_data, test_data, output_dir, n_iter, meta_overrides)

In [None]:
def train(model, train_data, dev_data, test_data, output_dir, n_iter, meta_overrides):
    """Load the model, set up the pipeline and train the entity recognizer."""
    
    if model is not None:
        nlp = spacy.load(model)  # load existing spaCy model
        print("Loaded model '%s'" % model)
    else:
        nlp = spacy.blank('en')  # create blank Language class
        print("Created blank 'en' model")

    if meta_overrides is not None:
        metadata = json.load(open(meta_overrides))
        nlp.meta.update(metadata)

    original_tokenizer = nlp.tokenizer

#     nlp.tokenizer = nlp_en.tokenizer

    # create the built-in pipeline components and add them to the pipeline
    # nlp.create_pipe works for built-ins that are registered with spaCy
    if 'ner' not in nlp.pipe_names and "parser" in nlp.pipe_names:
        ner = nlp.create_pipe('ner')
        nlp.add_pipe(ner, after="parser")
    elif 'ner' not in nlp.pipe_names and "tagger" in nlp.pipe_names:
        ner = nlp.create_pipe('ner')
        nlp.add_pipe(ner, after="tagger")
    elif 'ner' not in nlp.pipe_names:
        ner = nlp.create_pipe('ner')
        nlp.add_pipe(ner, last=True)
    # otherwise, get it so we can add labels
    else:
        ner = nlp.get_pipe('ner')

    # add labels
    for _, annotations in train_data:
        for ent in annotations.get('entities'):
            ner.add_label(ent[2])

    # get names of other pipes to disable them during training
    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']

    dropout_rates = util.decaying(util.env_opt('dropout_from', 0.2),
                                  util.env_opt('dropout_to', 0.2),
                                  util.env_opt('dropout_decay', 0.005))
    batch_sizes = util.compounding(util.env_opt('batch_from', 1),
                                   util.env_opt('batch_to', 32),
                                   util.env_opt('batch_compound', 1.001))

    optimizer = nlp.begin_training()
    best_epoch = 0
    best_f1 = 0
    for i in range(n_iter):
        print(str(i)+'--'+str(n_iter))
        random.shuffle(train_data)
        count = 0
        losses = {}
        total = len(train_data)

        with nlp.disable_pipes(*other_pipes):  # only train NER
            with tqdm.tqdm(total=total, leave=True) as pbar:
                for batch in minibatch(train_data, size=batch_sizes):
                    docs, golds = zip(*batch)
                    nlp.update(docs, golds, sgd=optimizer,
                               losses=losses, drop=next(dropout_rates))
                    pbar.update(len(batch))
                    if count % 100 == 0 and count > 0:
                        print('sum loss: %s' % losses['ner'])
                    count += 1

        # save model to output directory
        output_dir_path = Path(output_dir + "/" + str(i))
        if not output_dir_path.exists():
            output_dir_path.mkdir()

        with nlp.use_params(optimizer.averages):
            nlp.tokenizer = original_tokenizer
            nlp.to_disk(output_dir_path)
            print("Saved model to", output_dir_path)

        # test the saved model
        print("Loading from", output_dir_path)
        nlp2 = util.load_model_from_path(output_dir_path)
#         nlp2.tokenizer = nlp_en.tokenizer

        metrics = evaluate_ner(nlp2, dev_data)
        if metrics["f1-measure-overall"] > best_f1:
            best_f1 = metrics["f1-measure-overall"]
            best_epoch = i
    # save model to output directory
    best_model_path = Path(output_dir + "/" + "best")
    print(f"Best Epoch: {best_epoch} of {n_iter}")
    if os.path.exists(best_model_path):
        shutil.rmtree(best_model_path)
    shutil.copytree(os.path.join(output_dir, str(best_epoch)),
                    best_model_path)

    # test the saved model
    print("Loading from", best_model_path)
    nlp2 = util.load_model_from_path(best_model_path)
#     nlp2.tokenizer = nlp_en.tokenizer

    evaluate_ner(nlp2, dev_data, dump_path=os.path.join(output_dir, "dev_metrics.json"))
    evaluate_ner(nlp2, test_data, dump_path=os.path.join(output_dir, "test_metrics.json"))

In [None]:
##### Genes ####
model_output_dir = '/nfs/gns/literature/Santosh_Tirunagari/GitHub/spacy_models/public/public-BC2GM-en-pubmed-pmc-lg/'
train_data_path = '/nfs/gns/literature/machine-learning/Datasets/NER_Datasets/BC2GM-IOB/train.tsv'
dev_data_path = '/nfs/gns/literature/machine-learning/Datasets/NER_Datasets/BC2GM-IOB/devel.tsv'
test_data_path = '/nfs/gns/literature/machine-learning/Datasets/NER_Datasets/BC2GM-IOB/test.tsv'
run_test = False
model_path = '/nfs/gns/literature/Santosh_Tirunagari/pretrained_word_embeddings/models/pubmed-pmc/' # None #'en_core_sci_md'
iterations = 7
meta_overrides = '/nfs/gns/literature/Santosh_Tirunagari/GitHub/scispacy/data/EPMC_ner.json'

In [None]:
train_ner(model_output_dir,
              train_data_path,
              dev_data_path,
              test_data_path,
              run_test,
              model_path,
              iterations,
              meta_overrides)

In [None]:
##### Diseases ####
  
model_output_dir = '/nfs/gns/literature/Santosh_Tirunagari/GitHub/spacy_models/public/public-NCBI-en-pubmed-pmc-lg/'
train_data_path = '/nfs/gns/literature/machine-learning/Datasets/NER_Datasets/NCBI-disease-IOB/train.tsv'
dev_data_path = '/nfs/gns/literature/machine-learning/Datasets/NER_Datasets/NCBI-disease-IOB/devel.tsv'
test_data_path = '/nfs/gns/literature/machine-learning/Datasets/NER_Datasets/NCBI-disease-IOB/test.tsv'
run_test = False
model_path = '/nfs/gns/literature/Santosh_Tirunagari/pretrained_word_embeddings/models/pubmed-pmc/' # None #'en_core_sci_md'
iterations = 7
meta_overrides = '/nfs/gns/literature/Santosh_Tirunagari/GitHub/scispacy/data/EPMC_ner.json'


train_ner(model_output_dir,
              train_data_path,
              dev_data_path,
              test_data_path,
              run_test,
              model_path,
              iterations,
              meta_overrides)



In [None]:
##### Organisms ####

model_output_dir = '/nfs/gns/literature/Santosh_Tirunagari/GitHub/spacy_models/public/public-linnaeus-en-pubmed-pmc-lg/'
train_data_path = '/nfs/gns/literature/machine-learning/Datasets/NER_Datasets/linnaeus-IOB/train.tsv'
dev_data_path = '/nfs/gns/literature/machine-learning/Datasets/NER_Datasets/linnaeus-IOB/devel.tsv'
test_data_path = '/nfs/gns/literature/machine-learning/Datasets/NER_Datasets/linnaeus-IOB/test.tsv'
run_test = False
model_path = '/nfs/gns/literature/Santosh_Tirunagari/pretrained_word_embeddings/models/pubmed-pmc/' # None #'en_core_sci_md'
iterations = 7
meta_overrides = '/nfs/gns/literature/Santosh_Tirunagari/GitHub/scispacy/data/EPMC_ner.json'


train_ner(model_output_dir,
              train_data_path,
              dev_data_path,
              test_data_path,
              run_test,
              model_path,
              iterations,
              meta_overrides)


In [3]:
from nltk.tokenize import WordPunctTokenizer, wordpunct_tokenize


def convert2IOB(text_data, ner_tags):
    tokenizer = WordPunctTokenizer()

    tokens = []
    ners = []
    spans = []

    split_text = tokenizer.tokenize(text_data)
    span_text = list(tokenizer.span_tokenize(text_data))
    # for each word token append 'O'
    arr = ['O'] * len(split_text)

    if ner_tags:
        try:
            ner_tags = literal_eval(ner_tags)
        except:
            pass

    elif isinstance(ner_tags, float) or ner_tags is None:
        return zip(split_text, arr)
    else:
        return zip(split_text, arr)

    for each_tag in ner_tags:
        span_list = (each_tag[0], each_tag[1])
        token_list = wordpunct_tokenize(each_tag[2])
        ner_list = wordpunct_tokenize(each_tag[3])

        if (len(token_list) > len(ner_list)):
            ner_list = len(token_list) * ner_list
        for i in range(0, len(ner_list)):
            # The logic here is look for the first B-tag and then append I-tag next
            if (i == 0):
                ner_list[i] = 'B-' + ner_list[i]
            else:
                ner_list[i] = 'I-' + ner_list[i]

        tokens.append(token_list)
        ners.append(ner_list)
        spans.append(span_list)

    split_token_span_list = list(zip(split_text, span_text))
    span_ner_list = list(zip(spans, ners))

    sub_spans = []  # get sub spans from the full spans of the ner

    for each_span_ner_list in span_ner_list:
        # in full range ner e.g., [144, 150, 'GM-CSF', 'GP']
        count = 0
        # count is to keep track of the B, I, sub tags in the ner list
        for each_token in split_token_span_list:
            sub_spans_ = find_sub_span(each_token[1], each_span_ner_list[0])
            if sub_spans_:
                sub_spans.append([sub_spans_, each_span_ner_list[1][count]])
                count = count + 1

    for i, each_span_token in enumerate(split_token_span_list):
        for each_ner_span in sub_spans:
            if each_span_token[1] == each_ner_span[0]:
                arr[i] = ''.join(each_ner_span[1])

    return zip(split_text, arr)


In [4]:
import pandas as pd
from tqdm import tqdm
from ast import literal_eval
import csv


def find_sub_span(sub_span_range, full_spans_range):
    # if a sub span is present in full span return it
    if sub_span_range[0] in range(full_spans_range[0], full_spans_range[1]):
        return sub_span_range
    
    



In [5]:
result_path = '/nfs/gns/literature/machine-learning/evaluation/300articles/ML-NER/en-pubmed-pmc-lg/public/'

from nltk.tokenize import wordpunct_tokenize, WordPunctTokenizer
from tqdm import tqdm
import csv
import pandas as pd

test_df = pd.read_csv('/nfs/gns/literature/machine-learning/evaluation/300articles/CSV formats/test.csv', sep='\t', names = ['pmc_id','sentence', 'gt'])
dev_df = pd.read_csv('/nfs/gns/literature/machine-learning/evaluation/300articles/CSV formats/dev.csv', sep='\t', names = ['pmc_id','sentence', 'gt'])
train_df = pd.read_csv('/nfs/gns/literature/machine-learning/evaluation/300articles/CSV formats/train.csv', sep='\t', names = ['pmc_id','sentence', 'gt'])

In [6]:
gene_path= '/nfs/gns/literature/Santosh_Tirunagari/GitHub/spacy_models/public/public-BC2GM-en-pubmed-pmc-lg/best'
disease_path = '/nfs/gns/literature/Santosh_Tirunagari/GitHub/spacy_models/public/public-NCBI-en-pubmed-pmc-lg/best'
organism_path = '/nfs/gns/literature/Santosh_Tirunagari/GitHub/spacy_models/public/public-linnaeus-en-pubmed-pmc-lg/best'

GP_model = util.load_model_from_path(gene_path)
print('GP model loaded')

DS_model = util.load_model_from_path(disease_path)
print('DS model loaded')

OG_model = util.load_model_from_path(organism_path)
print('OG model loaded')

GP model loaded
DS model loaded
OG model loaded


In [28]:
####### Organisms ###################

train_result_file_name = 'spaCy_public_train_OG_IOB.csv'
dev_result_file_name = 'spaCy_public_dev_OG_IOB.csv'
test_result_file_name = 'spaCy_public_test_OG_IOB.csv'

with open(result_path + train_result_file_name, 'w', newline='\n') as f1:
    ml_writer = csv.writer(f1, delimiter='\t', lineterminator='\n')

    for index_, each_annotation in tqdm(train_df.iterrows(), total=train_df.shape[0]):
        text = each_annotation['sentence']
        ml_ner = []
        sentence = OG_model(text)

        for ent in sentence.ents:
            ml_ner.append([ent.start_char, ent.end_char, ent.text, ent.label_])

        tagged_tokens = convert2IOB(text, ml_ner)
            
        for each_word in tagged_tokens:
            ml_writer.writerow(list(each_word))
        ml_writer.writerow('')
        
        
with open(result_path + dev_result_file_name, 'w', newline='\n') as f1:
    ml_writer = csv.writer(f1, delimiter='\t', lineterminator='\n')

    for index_, each_annotation in tqdm(dev_df.iterrows(), total=dev_df.shape[0]):
        text = each_annotation['sentence']
        ml_ner = []
        sentence = OG_model(text)
        
        for ent in sentence.ents:
            ml_ner.append([ent.start_char, ent.end_char, ent.text, ent.label_])

        tagged_tokens = convert2IOB(text, ml_ner)
            
        for each_word in tagged_tokens:
            ml_writer.writerow(list(each_word))
        ml_writer.writerow('')
        
        
with open(result_path + test_result_file_name, 'w', newline='\n') as f1:
    ml_writer = csv.writer(f1, delimiter='\t', lineterminator='\n')

    for index_, each_annotation in tqdm(test_df.iterrows(), total=test_df.shape[0]):
        text = each_annotation['sentence']
        ml_ner = []
        sentence = OG_model(text)
        
        for ent in sentence.ents:
            ml_ner.append([ent.start_char, ent.end_char, ent.text, ent.label_])

        tagged_tokens = convert2IOB(text, ml_ner)
            
        for each_word in tagged_tokens:
            ml_writer.writerow(list(each_word))
        ml_writer.writerow('')     



  0%|          | 0/80014 [00:00<?, ?it/s][A[A

  0%|          | 29/80014 [00:00<04:36, 289.51it/s][A[A

  0%|          | 59/80014 [00:00<04:34, 290.85it/s][A[A

  0%|          | 100/80014 [00:00<04:11, 317.49it/s][A[A

  0%|          | 141/80014 [00:00<03:55, 339.36it/s][A[A

  0%|          | 181/80014 [00:00<03:45, 354.01it/s][A[A

  0%|          | 213/80014 [00:00<03:55, 338.16it/s][A[A

  0%|          | 247/80014 [00:00<03:57, 335.36it/s][A[A

  0%|          | 279/80014 [00:00<04:18, 308.75it/s][A[A

  0%|          | 309/80014 [00:00<04:25, 300.09it/s][A[A

  0%|          | 339/80014 [00:01<04:34, 290.60it/s][A[A

  0%|          | 368/80014 [00:01<04:35, 289.10it/s][A[A

  0%|          | 398/80014 [00:01<04:34, 290.04it/s][A[A

  1%|          | 427/80014 [00:01<04:41, 282.83it/s][A[A

  1%|          | 456/80014 [00:01<04:41, 283.03it/s][A[A

  1%|          | 485/80014 [00:01<04:57, 267.15it/s][A[A

  1%|          | 512/80014 [00:01<04:56, 267.75it/s

  6%|▌         | 4574/80014 [00:14<03:51, 325.22it/s][A[A

  6%|▌         | 4608/80014 [00:14<04:05, 307.15it/s][A[A

  6%|▌         | 4640/80014 [00:14<04:13, 297.74it/s][A[A

  6%|▌         | 4671/80014 [00:14<04:16, 293.52it/s][A[A

  6%|▌         | 4701/80014 [00:14<04:20, 289.37it/s][A[A

  6%|▌         | 4731/80014 [00:14<04:22, 286.55it/s][A[A

  6%|▌         | 4760/80014 [00:14<04:23, 285.88it/s][A[A

  6%|▌         | 4791/80014 [00:15<04:18, 291.49it/s][A[A

  6%|▌         | 4831/80014 [00:15<03:57, 316.73it/s][A[A

  6%|▌         | 4872/80014 [00:15<03:41, 339.40it/s][A[A

  6%|▌         | 4913/80014 [00:15<03:31, 354.87it/s][A[A

  6%|▌         | 4950/80014 [00:15<03:48, 327.86it/s][A[A

  6%|▌         | 4984/80014 [00:15<04:00, 311.81it/s][A[A

  6%|▋         | 5017/80014 [00:15<04:08, 302.34it/s][A[A

  6%|▋         | 5048/80014 [00:15<04:24, 283.86it/s][A[A

  6%|▋         | 5078/80014 [00:15<04:20, 287.42it/s][A[A

  6%|▋         | 5108/80

 17%|█▋        | 13823/80014 [00:42<02:43, 404.72it/s][A[A

 17%|█▋        | 13864/80014 [00:42<02:43, 405.46it/s][A[A

 17%|█▋        | 13905/80014 [00:42<02:48, 393.02it/s][A[A

 17%|█▋        | 13946/80014 [00:42<02:46, 396.04it/s][A[A

 17%|█▋        | 13986/80014 [00:43<02:47, 394.78it/s][A[A

 18%|█▊        | 14026/80014 [00:43<02:48, 392.14it/s][A[A

 18%|█▊        | 14066/80014 [00:43<02:47, 393.19it/s][A[A

 18%|█▊        | 14108/80014 [00:43<02:45, 399.18it/s][A[A

 18%|█▊        | 14149/80014 [00:43<02:43, 401.97it/s][A[A

 18%|█▊        | 14190/80014 [00:43<02:43, 403.50it/s][A[A

 18%|█▊        | 14231/80014 [00:43<02:42, 404.50it/s][A[A

 18%|█▊        | 14272/80014 [00:43<02:42, 405.43it/s][A[A

 18%|█▊        | 14313/80014 [00:43<02:45, 396.82it/s][A[A

 18%|█▊        | 14353/80014 [00:44<03:07, 349.92it/s][A[A

 18%|█▊        | 14390/80014 [00:44<03:14, 336.89it/s][A[A

 18%|█▊        | 14425/80014 [00:44<03:31, 309.89it/s][A[A

 18%|█▊ 

 29%|██▊       | 22961/80014 [01:14<02:53, 329.61it/s][A[A

 29%|██▊       | 22995/80014 [01:15<03:04, 309.84it/s][A[A

 29%|██▉       | 23027/80014 [01:15<03:15, 291.98it/s][A[A

 29%|██▉       | 23057/80014 [01:15<03:17, 288.67it/s][A[A

 29%|██▉       | 23088/80014 [01:15<03:14, 293.33it/s][A[A

 29%|██▉       | 23121/80014 [01:15<03:09, 300.39it/s][A[A

 29%|██▉       | 23152/80014 [01:15<03:14, 292.22it/s][A[A

 29%|██▉       | 23182/80014 [01:15<03:17, 287.61it/s][A[A

 29%|██▉       | 23211/80014 [01:15<03:22, 279.85it/s][A[A

 29%|██▉       | 23250/80014 [01:15<03:06, 304.26it/s][A[A

 29%|██▉       | 23292/80014 [01:16<02:51, 331.52it/s][A[A

 29%|██▉       | 23334/80014 [01:16<02:40, 353.00it/s][A[A

 29%|██▉       | 23377/80014 [01:16<02:32, 372.31it/s][A[A

 29%|██▉       | 23418/80014 [01:16<02:28, 381.68it/s][A[A

 29%|██▉       | 23459/80014 [01:16<02:26, 387.35it/s][A[A

 29%|██▉       | 23500/80014 [01:16<02:24, 391.58it/s][A[A

 29%|██▉

 40%|████      | 32120/80014 [01:43<02:14, 355.20it/s][A[A

 40%|████      | 32163/80014 [01:43<02:08, 373.29it/s][A[A

 40%|████      | 32206/80014 [01:43<02:03, 387.17it/s][A[A

 40%|████      | 32249/80014 [01:43<02:00, 397.80it/s][A[A

 40%|████      | 32291/80014 [01:43<01:58, 402.84it/s][A[A

 40%|████      | 32332/80014 [01:44<02:00, 395.24it/s][A[A

 40%|████      | 32372/80014 [01:44<02:14, 353.35it/s][A[A

 41%|████      | 32409/80014 [01:44<02:24, 328.92it/s][A[A

 41%|████      | 32444/80014 [01:44<02:30, 315.22it/s][A[A

 41%|████      | 32477/80014 [01:44<02:38, 300.74it/s][A[A

 41%|████      | 32508/80014 [01:44<02:39, 297.98it/s][A[A

 41%|████      | 32540/80014 [01:44<02:37, 301.77it/s][A[A

 41%|████      | 32571/80014 [01:44<02:42, 292.08it/s][A[A

 41%|████      | 32601/80014 [01:44<02:42, 292.51it/s][A[A

 41%|████      | 32632/80014 [01:45<02:40, 294.78it/s][A[A

 41%|████      | 32662/80014 [01:45<02:40, 294.74it/s][A[A

 41%|███

 51%|█████▏    | 41036/80014 [02:11<02:08, 303.35it/s][A[A

 51%|█████▏    | 41067/80014 [02:12<02:11, 295.91it/s][A[A

 51%|█████▏    | 41100/80014 [02:12<02:08, 303.24it/s][A[A

 51%|█████▏    | 41133/80014 [02:12<02:06, 307.85it/s][A[A

 51%|█████▏    | 41164/80014 [02:12<02:06, 306.28it/s][A[A

 51%|█████▏    | 41195/80014 [02:12<02:06, 306.95it/s][A[A

 52%|█████▏    | 41226/80014 [02:12<02:07, 305.36it/s][A[A

 52%|█████▏    | 41257/80014 [02:12<02:15, 285.75it/s][A[A

 52%|█████▏    | 41286/80014 [02:12<02:19, 277.65it/s][A[A

 52%|█████▏    | 41315/80014 [02:12<02:20, 276.26it/s][A[A

 52%|█████▏    | 41343/80014 [02:13<02:21, 273.37it/s][A[A

 52%|█████▏    | 41372/80014 [02:13<02:18, 278.09it/s][A[A

 52%|█████▏    | 41414/80014 [02:13<02:04, 309.12it/s][A[A

 52%|█████▏    | 41447/80014 [02:13<02:05, 306.57it/s][A[A

 52%|█████▏    | 41489/80014 [02:13<01:55, 332.83it/s][A[A

 52%|█████▏    | 41526/80014 [02:13<01:53, 339.67it/s][A[A

 52%|███

 62%|██████▏   | 49537/80014 [02:40<01:45, 287.59it/s][A[A

 62%|██████▏   | 49566/80014 [02:40<01:47, 283.73it/s][A[A

 62%|██████▏   | 49596/80014 [02:40<01:46, 286.45it/s][A[A

 62%|██████▏   | 49625/80014 [02:40<01:46, 285.42it/s][A[A

 62%|██████▏   | 49666/80014 [02:40<01:37, 312.52it/s][A[A

 62%|██████▏   | 49708/80014 [02:40<01:29, 337.82it/s][A[A

 62%|██████▏   | 49749/80014 [02:41<01:25, 355.26it/s][A[A

 62%|██████▏   | 49787/80014 [02:41<01:23, 362.30it/s][A[A

 62%|██████▏   | 49828/80014 [02:41<01:20, 373.18it/s][A[A

 62%|██████▏   | 49870/80014 [02:41<01:18, 383.54it/s][A[A

 62%|██████▏   | 49911/80014 [02:41<01:17, 388.61it/s][A[A

 62%|██████▏   | 49951/80014 [02:41<01:17, 385.81it/s][A[A

 62%|██████▏   | 49992/80014 [02:41<01:16, 391.46it/s][A[A

 63%|██████▎   | 50033/80014 [02:41<01:15, 394.91it/s][A[A

 63%|██████▎   | 50073/80014 [02:41<01:18, 383.01it/s][A[A

 63%|██████▎   | 50114/80014 [02:41<01:16, 389.99it/s][A[A

 63%|███

 73%|███████▎  | 58126/80014 [03:08<01:14, 293.17it/s][A[A

 73%|███████▎  | 58156/80014 [03:08<01:17, 281.37it/s][A[A

 73%|███████▎  | 58185/80014 [03:09<01:19, 274.54it/s][A[A

 73%|███████▎  | 58221/80014 [03:09<01:13, 294.60it/s][A[A

 73%|███████▎  | 58262/80014 [03:09<01:07, 320.85it/s][A[A

 73%|███████▎  | 58304/80014 [03:09<01:03, 343.61it/s][A[A

 73%|███████▎  | 58345/80014 [03:09<01:00, 360.60it/s][A[A

 73%|███████▎  | 58387/80014 [03:09<00:57, 374.21it/s][A[A

 73%|███████▎  | 58428/80014 [03:09<00:56, 382.28it/s][A[A

 73%|███████▎  | 58469/80014 [03:09<00:55, 385.67it/s][A[A

 73%|███████▎  | 58509/80014 [03:09<00:56, 381.41it/s][A[A

 73%|███████▎  | 58548/80014 [03:10<01:02, 343.01it/s][A[A

 73%|███████▎  | 58584/80014 [03:10<01:10, 304.72it/s][A[A

 73%|███████▎  | 58616/80014 [03:10<01:10, 304.53it/s][A[A

 73%|███████▎  | 58648/80014 [03:10<01:11, 297.32it/s][A[A

 73%|███████▎  | 58679/80014 [03:10<01:12, 295.42it/s][A[A

 73%|███

 84%|████████▍ | 67021/80014 [03:37<00:44, 291.17it/s][A[A

 84%|████████▍ | 67051/80014 [03:37<00:46, 279.72it/s][A[A

 84%|████████▍ | 67081/80014 [03:37<00:45, 282.71it/s][A[A

 84%|████████▍ | 67110/80014 [03:37<00:45, 283.06it/s][A[A

 84%|████████▍ | 67139/80014 [03:37<00:45, 282.01it/s][A[A

 84%|████████▍ | 67171/80014 [03:38<00:44, 291.84it/s][A[A

 84%|████████▍ | 67209/80014 [03:38<00:41, 312.10it/s][A[A

 84%|████████▍ | 67248/80014 [03:38<00:38, 330.24it/s][A[A

 84%|████████▍ | 67288/80014 [03:38<00:36, 347.29it/s][A[A

 84%|████████▍ | 67328/80014 [03:38<00:35, 360.85it/s][A[A

 84%|████████▍ | 67365/80014 [03:38<00:37, 335.70it/s][A[A

 84%|████████▍ | 67405/80014 [03:38<00:35, 352.36it/s][A[A

 84%|████████▍ | 67446/80014 [03:38<00:34, 366.31it/s][A[A

 84%|████████▍ | 67484/80014 [03:38<00:34, 360.12it/s][A[A

 84%|████████▍ | 67521/80014 [03:38<00:35, 349.54it/s][A[A

 84%|████████▍ | 67557/80014 [03:39<00:36, 342.51it/s][A[A

 84%|███

 95%|█████████▌| 76062/80014 [04:06<00:09, 396.53it/s][A[A

 95%|█████████▌| 76103/80014 [04:06<00:10, 372.00it/s][A[A

 95%|█████████▌| 76144/80014 [04:06<00:10, 381.60it/s][A[A

 95%|█████████▌| 76184/80014 [04:06<00:09, 385.14it/s][A[A

 95%|█████████▌| 76226/80014 [04:06<00:09, 393.94it/s][A[A

 95%|█████████▌| 76266/80014 [04:06<00:09, 394.68it/s][A[A

 95%|█████████▌| 76306/80014 [04:06<00:09, 384.05it/s][A[A

 95%|█████████▌| 76345/80014 [04:06<00:10, 354.62it/s][A[A

 95%|█████████▌| 76382/80014 [04:07<00:10, 331.67it/s][A[A

 96%|█████████▌| 76416/80014 [04:07<00:11, 318.04it/s][A[A

 96%|█████████▌| 76449/80014 [04:07<00:11, 314.13it/s][A[A

 96%|█████████▌| 76481/80014 [04:07<00:11, 301.05it/s][A[A

 96%|█████████▌| 76512/80014 [04:07<00:11, 300.12it/s][A[A

 96%|█████████▌| 76543/80014 [04:07<00:12, 288.98it/s][A[A

 96%|█████████▌| 76581/80014 [04:07<00:11, 311.09it/s][A[A

 96%|█████████▌| 76613/80014 [04:07<00:11, 300.02it/s][A[A

 96%|███

 31%|███       | 4953/16108 [00:16<00:35, 312.15it/s][A[A

 31%|███       | 4985/16108 [00:16<00:36, 307.77it/s][A[A

 31%|███       | 5016/16108 [00:16<00:36, 305.55it/s][A[A

 31%|███▏      | 5047/16108 [00:16<00:36, 305.72it/s][A[A

 32%|███▏      | 5078/16108 [00:16<00:36, 303.37it/s][A[A

 32%|███▏      | 5109/16108 [00:16<00:36, 301.70it/s][A[A

 32%|███▏      | 5140/16108 [00:16<00:36, 297.77it/s][A[A

 32%|███▏      | 5170/16108 [00:16<00:38, 285.58it/s][A[A

 32%|███▏      | 5199/16108 [00:17<00:38, 284.20it/s][A[A

 32%|███▏      | 5229/16108 [00:17<00:37, 287.84it/s][A[A

 33%|███▎      | 5258/16108 [00:17<00:37, 288.14it/s][A[A

 33%|███▎      | 5287/16108 [00:17<00:38, 283.18it/s][A[A

 33%|███▎      | 5316/16108 [00:17<00:38, 283.52it/s][A[A

 33%|███▎      | 5345/16108 [00:17<00:38, 279.28it/s][A[A

 33%|███▎      | 5379/16108 [00:17<00:36, 293.31it/s][A[A

 34%|███▎      | 5420/16108 [00:17<00:33, 319.63it/s][A[A

 34%|███▍      | 5461/16

 86%|████████▌ | 13818/16108 [00:44<00:07, 303.21it/s][A[A

 86%|████████▌ | 13850/16108 [00:44<00:07, 306.31it/s][A[A

 86%|████████▌ | 13882/16108 [00:44<00:07, 308.65it/s][A[A

 86%|████████▋ | 13913/16108 [00:45<00:07, 292.11it/s][A[A

 87%|████████▋ | 13943/16108 [00:45<00:07, 289.04it/s][A[A

 87%|████████▋ | 13976/16108 [00:45<00:07, 297.81it/s][A[A

 87%|████████▋ | 14012/16108 [00:45<00:06, 313.68it/s][A[A

 87%|████████▋ | 14052/16108 [00:45<00:06, 333.96it/s][A[A

 87%|████████▋ | 14090/16108 [00:45<00:05, 345.61it/s][A[A

 88%|████████▊ | 14128/16108 [00:45<00:05, 352.92it/s][A[A

 88%|████████▊ | 14164/16108 [00:45<00:05, 331.95it/s][A[A

 88%|████████▊ | 14198/16108 [00:45<00:06, 311.90it/s][A[A

 88%|████████▊ | 14230/16108 [00:46<00:06, 302.02it/s][A[A

 89%|████████▊ | 14261/16108 [00:46<00:06, 304.01it/s][A[A

 89%|████████▊ | 14292/16108 [00:46<00:05, 305.42it/s][A[A

 89%|████████▉ | 14323/16108 [00:46<00:06, 285.89it/s][A[A

 89%|███

 35%|███▍      | 6226/17957 [00:20<00:40, 289.71it/s][A[A

 35%|███▍      | 6256/17957 [00:20<00:40, 288.69it/s][A[A

 35%|███▌      | 6285/17957 [00:20<00:40, 286.68it/s][A[A

 35%|███▌      | 6314/17957 [00:20<00:41, 279.80it/s][A[A

 35%|███▌      | 6343/17957 [00:21<00:42, 275.51it/s][A[A

 35%|███▌      | 6371/17957 [00:21<00:42, 272.05it/s][A[A

 36%|███▌      | 6402/17957 [00:21<00:41, 281.46it/s][A[A

 36%|███▌      | 6431/17957 [00:21<00:40, 282.03it/s][A[A

 36%|███▌      | 6461/17957 [00:21<00:40, 286.46it/s][A[A

 36%|███▌      | 6491/17957 [00:21<00:39, 289.84it/s][A[A

 36%|███▋      | 6521/17957 [00:21<00:40, 279.30it/s][A[A

 36%|███▋      | 6550/17957 [00:21<00:41, 272.92it/s][A[A

 37%|███▋      | 6578/17957 [00:21<00:42, 268.08it/s][A[A

 37%|███▋      | 6605/17957 [00:22<00:43, 259.40it/s][A[A

 37%|███▋      | 6632/17957 [00:22<00:44, 254.36it/s][A[A

 37%|███▋      | 6659/17957 [00:22<00:43, 258.72it/s][A[A

 37%|███▋      | 6686/17

 84%|████████▍ | 15043/17957 [00:49<00:10, 265.21it/s][A[A

 84%|████████▍ | 15075/17957 [00:49<00:10, 278.35it/s][A[A

 84%|████████▍ | 15118/17957 [00:49<00:09, 310.21it/s][A[A

 84%|████████▍ | 15160/17957 [00:49<00:08, 335.23it/s][A[A

 85%|████████▍ | 15203/17957 [00:49<00:07, 357.09it/s][A[A

 85%|████████▍ | 15244/17957 [00:50<00:07, 370.74it/s][A[A

 85%|████████▌ | 15283/17957 [00:50<00:08, 328.98it/s][A[A

 85%|████████▌ | 15318/17957 [00:50<00:08, 306.96it/s][A[A

 85%|████████▌ | 15351/17957 [00:50<00:08, 295.77it/s][A[A

 86%|████████▌ | 15382/17957 [00:50<00:08, 290.84it/s][A[A

 86%|████████▌ | 15412/17957 [00:50<00:09, 280.69it/s][A[A

 86%|████████▌ | 15441/17957 [00:50<00:09, 274.75it/s][A[A

 86%|████████▌ | 15469/17957 [00:50<00:09, 272.23it/s][A[A

 86%|████████▋ | 15498/17957 [00:50<00:08, 275.79it/s][A[A

 86%|████████▋ | 15527/17957 [00:51<00:08, 276.19it/s][A[A

 87%|████████▋ | 15562/17957 [00:51<00:08, 293.54it/s][A[A

 87%|███

In [29]:
####### Disease ###################

train_result_file_name = 'spaCy_public_train_DS_IOB.csv'
dev_result_file_name = 'spaCy_public_dev_DS_IOB.csv'
test_result_file_name = 'spaCy_public_test_DS_IOB.csv'

with open(result_path + train_result_file_name, 'w', newline='\n') as f1:
    ml_writer = csv.writer(f1, delimiter='\t', lineterminator='\n')

    for index_, each_annotation in tqdm(train_df.iterrows(), total=train_df.shape[0]):
        text = each_annotation['sentence']
        ml_ner = []
        sentence = DS_model(text)
        
        for ent in sentence.ents:
            ml_ner.append([ent.start_char, ent.end_char, ent.text, ent.label_])

        tagged_tokens = convert2IOB(text, ml_ner)
            
        for each_word in tagged_tokens:
            ml_writer.writerow(list(each_word))
        ml_writer.writerow('')
        
        
with open(result_path + dev_result_file_name, 'w', newline='\n') as f1:
    ml_writer = csv.writer(f1, delimiter='\t', lineterminator='\n')

    for index_, each_annotation in tqdm(dev_df.iterrows(), total=dev_df.shape[0]):
        text = each_annotation['sentence']
        ml_ner = []
        sentence = DS_model(text)
        
        for ent in sentence.ents:
            ml_ner.append([ent.start_char, ent.end_char, ent.text, ent.label_])

        tagged_tokens = convert2IOB(text, ml_ner)
            
        for each_word in tagged_tokens:
            ml_writer.writerow(list(each_word))
        ml_writer.writerow('')
        
        
with open(result_path + test_result_file_name, 'w', newline='\n') as f1:
    ml_writer = csv.writer(f1, delimiter='\t', lineterminator='\n')

    for index_, each_annotation in tqdm(test_df.iterrows(), total=test_df.shape[0]):
        text = each_annotation['sentence']
        ml_ner = []
        sentence = DS_model(text)
        
        for ent in sentence.ents:
            ml_ner.append([ent.start_char, ent.end_char, ent.text, ent.label_])

        tagged_tokens = convert2IOB(text, ml_ner)
            
        for each_word in tagged_tokens:
            ml_writer.writerow(list(each_word))
        ml_writer.writerow('')        



  0%|          | 0/80014 [00:00<?, ?it/s][A[A

  0%|          | 27/80014 [00:00<05:06, 261.31it/s][A[A

  0%|          | 53/80014 [00:00<05:09, 258.66it/s][A[A

  0%|          | 90/80014 [00:00<04:41, 283.67it/s][A[A

  0%|          | 129/80014 [00:00<04:18, 308.60it/s][A[A

  0%|          | 169/80014 [00:00<04:02, 329.45it/s][A[A

  0%|          | 200/80014 [00:00<04:07, 322.25it/s][A[A

  0%|          | 231/80014 [00:00<04:16, 311.42it/s][A[A

  0%|          | 261/80014 [00:00<04:29, 295.58it/s][A[A

  0%|          | 290/80014 [00:00<04:36, 287.92it/s][A[A

  0%|          | 319/80014 [00:01<04:40, 284.29it/s][A[A

  0%|          | 348/80014 [00:01<04:43, 280.98it/s][A[A

  0%|          | 376/80014 [00:01<04:49, 275.48it/s][A[A

  1%|          | 404/80014 [00:01<04:52, 271.79it/s][A[A

  1%|          | 432/80014 [00:01<04:57, 267.67it/s][A[A

  1%|          | 459/80014 [00:01<05:11, 255.26it/s][A[A

  1%|          | 485/80014 [00:01<05:15, 252.02it/s]

 11%|█         | 8608/80014 [00:28<04:19, 275.48it/s][A[A

 11%|█         | 8640/80014 [00:28<04:10, 285.47it/s][A[A

 11%|█         | 8670/80014 [00:28<04:08, 287.38it/s][A[A

 11%|█         | 8699/80014 [00:29<04:29, 264.42it/s][A[A

 11%|█         | 8726/80014 [00:29<04:30, 263.58it/s][A[A

 11%|█         | 8755/80014 [00:29<04:23, 270.07it/s][A[A

 11%|█         | 8786/80014 [00:29<04:15, 278.92it/s][A[A

 11%|█         | 8815/80014 [00:29<04:14, 279.69it/s][A[A

 11%|█         | 8844/80014 [00:29<04:13, 281.17it/s][A[A

 11%|█         | 8873/80014 [00:29<04:24, 269.06it/s][A[A

 11%|█         | 8901/80014 [00:29<04:22, 270.66it/s][A[A

 11%|█         | 8930/80014 [00:29<04:18, 275.23it/s][A[A

 11%|█         | 8958/80014 [00:29<04:21, 272.03it/s][A[A

 11%|█         | 8987/80014 [00:30<04:17, 275.41it/s][A[A

 11%|█▏        | 9016/80014 [00:30<04:16, 277.09it/s][A[A

 11%|█▏        | 9048/80014 [00:30<04:08, 286.07it/s][A[A

 11%|█▏        | 9077/80

 22%|██▏       | 17843/80014 [00:57<02:53, 358.95it/s][A[A

 22%|██▏       | 17886/80014 [00:57<02:45, 375.74it/s][A[A

 22%|██▏       | 17925/80014 [00:57<02:51, 362.39it/s][A[A

 22%|██▏       | 17965/80014 [00:57<02:46, 372.63it/s][A[A

 23%|██▎       | 18006/80014 [00:57<02:42, 381.86it/s][A[A

 23%|██▎       | 18045/80014 [00:57<02:50, 364.33it/s][A[A

 23%|██▎       | 18082/80014 [00:57<02:56, 351.56it/s][A[A

 23%|██▎       | 18118/80014 [00:57<03:04, 335.22it/s][A[A

 23%|██▎       | 18152/80014 [00:57<03:09, 326.33it/s][A[A

 23%|██▎       | 18185/80014 [00:58<03:10, 325.39it/s][A[A

 23%|██▎       | 18218/80014 [00:58<03:15, 316.65it/s][A[A

 23%|██▎       | 18250/80014 [00:58<03:20, 308.47it/s][A[A

 23%|██▎       | 18282/80014 [00:58<03:27, 297.61it/s][A[A

 23%|██▎       | 18312/80014 [00:58<03:30, 292.44it/s][A[A

 23%|██▎       | 18342/80014 [00:58<03:30, 292.97it/s][A[A

 23%|██▎       | 18372/80014 [00:58<03:40, 279.30it/s][A[A

 23%|██▎

 34%|███▎      | 26974/80014 [01:25<02:16, 389.66it/s][A[A

 34%|███▍      | 27014/80014 [01:25<02:25, 364.79it/s][A[A

 34%|███▍      | 27052/80014 [01:25<02:34, 342.13it/s][A[A

 34%|███▍      | 27087/80014 [01:26<02:48, 313.52it/s][A[A

 34%|███▍      | 27120/80014 [01:26<02:57, 297.21it/s][A[A

 34%|███▍      | 27155/80014 [01:26<02:50, 309.85it/s][A[A

 34%|███▍      | 27192/80014 [01:26<02:43, 323.40it/s][A[A

 34%|███▍      | 27225/80014 [01:26<02:48, 313.84it/s][A[A

 34%|███▍      | 27257/80014 [01:26<02:48, 313.42it/s][A[A

 34%|███▍      | 27289/80014 [01:26<02:51, 307.84it/s][A[A

 34%|███▍      | 27321/80014 [01:26<02:54, 302.54it/s][A[A

 34%|███▍      | 27352/80014 [01:26<02:56, 298.89it/s][A[A

 34%|███▍      | 27383/80014 [01:27<02:56, 297.67it/s][A[A

 34%|███▍      | 27414/80014 [01:27<02:55, 299.51it/s][A[A

 34%|███▍      | 27447/80014 [01:27<02:51, 306.38it/s][A[A

 34%|███▍      | 27478/80014 [01:27<02:51, 305.83it/s][A[A

 34%|███

 45%|████▌     | 36155/80014 [01:54<03:01, 241.99it/s][A[A

 45%|████▌     | 36183/80014 [01:54<02:54, 251.84it/s][A[A

 45%|████▌     | 36216/80014 [01:54<02:41, 270.76it/s][A[A

 45%|████▌     | 36246/80014 [01:54<02:37, 277.57it/s][A[A

 45%|████▌     | 36275/80014 [01:54<02:36, 280.13it/s][A[A

 45%|████▌     | 36308/80014 [01:54<02:29, 291.50it/s][A[A

 45%|████▌     | 36348/80014 [01:55<02:18, 315.71it/s][A[A

 45%|████▌     | 36389/80014 [01:55<02:08, 338.57it/s][A[A

 46%|████▌     | 36424/80014 [01:55<02:14, 325.04it/s][A[A

 46%|████▌     | 36458/80014 [01:55<02:18, 314.70it/s][A[A

 46%|████▌     | 36500/80014 [01:55<02:08, 339.06it/s][A[A

 46%|████▌     | 36542/80014 [01:55<02:01, 358.05it/s][A[A

 46%|████▌     | 36584/80014 [01:55<01:56, 373.38it/s][A[A

 46%|████▌     | 36623/80014 [01:55<01:57, 368.72it/s][A[A

 46%|████▌     | 36661/80014 [01:55<02:04, 348.47it/s][A[A

 46%|████▌     | 36697/80014 [01:56<02:12, 327.03it/s][A[A

 46%|███

 56%|█████▌    | 44684/80014 [02:22<01:29, 396.76it/s][A[A

 56%|█████▌    | 44725/80014 [02:22<01:32, 381.71it/s][A[A

 56%|█████▌    | 44764/80014 [02:22<01:41, 347.16it/s][A[A

 56%|█████▌    | 44800/80014 [02:23<01:47, 328.00it/s][A[A

 56%|█████▌    | 44834/80014 [02:23<01:50, 319.00it/s][A[A

 56%|█████▌    | 44867/80014 [02:23<01:56, 302.58it/s][A[A

 56%|█████▌    | 44898/80014 [02:23<01:58, 296.66it/s][A[A

 56%|█████▌    | 44929/80014 [02:23<01:58, 295.88it/s][A[A

 56%|█████▌    | 44959/80014 [02:23<02:04, 282.25it/s][A[A

 56%|█████▌    | 44988/80014 [02:23<02:04, 281.23it/s][A[A

 56%|█████▋    | 45019/80014 [02:23<02:01, 287.82it/s][A[A

 56%|█████▋    | 45053/80014 [02:23<01:56, 300.28it/s][A[A

 56%|█████▋    | 45095/80014 [02:24<01:46, 328.30it/s][A[A

 56%|█████▋    | 45137/80014 [02:24<01:40, 348.41it/s][A[A

 56%|█████▋    | 45174/80014 [02:24<01:38, 353.43it/s][A[A

 57%|█████▋    | 45217/80014 [02:24<01:33, 372.34it/s][A[A

 57%|███

 67%|██████▋   | 53519/80014 [02:51<01:29, 296.50it/s][A[A

 67%|██████▋   | 53550/80014 [02:51<01:28, 299.75it/s][A[A

 67%|██████▋   | 53581/80014 [02:51<01:28, 300.11it/s][A[A

 67%|██████▋   | 53612/80014 [02:51<01:29, 295.87it/s][A[A

 67%|██████▋   | 53642/80014 [02:51<01:29, 294.78it/s][A[A

 67%|██████▋   | 53672/80014 [02:51<01:30, 290.31it/s][A[A

 67%|██████▋   | 53702/80014 [02:51<01:30, 290.75it/s][A[A

 67%|██████▋   | 53732/80014 [02:51<01:30, 291.22it/s][A[A

 67%|██████▋   | 53763/80014 [02:52<01:29, 294.00it/s][A[A

 67%|██████▋   | 53793/80014 [02:52<01:29, 293.69it/s][A[A

 67%|██████▋   | 53823/80014 [02:52<01:32, 284.63it/s][A[A

 67%|██████▋   | 53853/80014 [02:52<01:31, 287.05it/s][A[A

 67%|██████▋   | 53882/80014 [02:52<01:31, 285.83it/s][A[A

 67%|██████▋   | 53912/80014 [02:52<01:30, 289.61it/s][A[A

 67%|██████▋   | 53944/80014 [02:52<01:27, 296.33it/s][A[A

 67%|██████▋   | 53974/80014 [02:52<01:29, 292.38it/s][A[A

 67%|███

 78%|███████▊  | 62047/80014 [03:19<01:03, 285.05it/s][A[A

 78%|███████▊  | 62078/80014 [03:19<01:01, 289.64it/s][A[A

 78%|███████▊  | 62108/80014 [03:19<01:03, 283.16it/s][A[A

 78%|███████▊  | 62137/80014 [03:19<01:03, 280.88it/s][A[A

 78%|███████▊  | 62166/80014 [03:19<01:03, 280.85it/s][A[A

 78%|███████▊  | 62195/80014 [03:20<01:04, 274.32it/s][A[A

 78%|███████▊  | 62223/80014 [03:20<01:04, 275.90it/s][A[A

 78%|███████▊  | 62251/80014 [03:20<01:04, 274.29it/s][A[A

 78%|███████▊  | 62279/80014 [03:20<01:06, 268.65it/s][A[A

 78%|███████▊  | 62307/80014 [03:20<01:05, 270.20it/s][A[A

 78%|███████▊  | 62335/80014 [03:20<01:07, 262.91it/s][A[A

 78%|███████▊  | 62362/80014 [03:20<01:06, 263.61it/s][A[A

 78%|███████▊  | 62391/80014 [03:20<01:05, 270.18it/s][A[A

 78%|███████▊  | 62419/80014 [03:20<01:04, 272.51it/s][A[A

 78%|███████▊  | 62460/80014 [03:21<00:58, 302.50it/s][A[A

 78%|███████▊  | 62501/80014 [03:21<00:53, 326.71it/s][A[A

 78%|███

 89%|████████▉ | 71135/80014 [03:48<00:27, 319.97it/s][A[A

 89%|████████▉ | 71171/80014 [03:48<00:26, 328.02it/s][A[A

 89%|████████▉ | 71212/80014 [03:48<00:25, 348.02it/s][A[A

 89%|████████▉ | 71248/80014 [03:48<00:25, 343.98it/s][A[A

 89%|████████▉ | 71283/80014 [03:48<00:25, 343.77it/s][A[A

 89%|████████▉ | 71325/80014 [03:48<00:23, 362.98it/s][A[A

 89%|████████▉ | 71365/80014 [03:48<00:23, 373.28it/s][A[A

 89%|████████▉ | 71407/80014 [03:48<00:22, 383.80it/s][A[A

 89%|████████▉ | 71448/80014 [03:49<00:21, 390.14it/s][A[A

 89%|████████▉ | 71489/80014 [03:49<00:21, 393.72it/s][A[A

 89%|████████▉ | 71529/80014 [03:49<00:21, 388.00it/s][A[A

 89%|████████▉ | 71568/80014 [03:49<00:23, 366.36it/s][A[A

 89%|████████▉ | 71605/80014 [03:49<00:23, 354.30it/s][A[A

 90%|████████▉ | 71641/80014 [03:49<00:25, 325.08it/s][A[A

 90%|████████▉ | 71675/80014 [03:49<00:26, 314.11it/s][A[A

 90%|████████▉ | 71707/80014 [03:49<00:27, 306.64it/s][A[A

 90%|███

  0%|          | 31/16108 [00:00<00:52, 308.27it/s][A[A

  0%|          | 60/16108 [00:00<00:53, 300.66it/s][A[A

  1%|          | 91/16108 [00:00<00:53, 301.41it/s][A[A

  1%|          | 123/16108 [00:00<00:52, 304.45it/s][A[A

  1%|          | 153/16108 [00:00<00:53, 300.97it/s][A[A

  1%|          | 182/16108 [00:00<00:53, 296.73it/s][A[A

  1%|▏         | 216/16108 [00:00<00:51, 306.83it/s][A[A

  2%|▏         | 249/16108 [00:00<00:50, 313.22it/s][A[A

  2%|▏         | 279/16108 [00:00<00:52, 302.66it/s][A[A

  2%|▏         | 310/16108 [00:01<00:51, 304.79it/s][A[A

  2%|▏         | 340/16108 [00:01<00:52, 302.46it/s][A[A

  2%|▏         | 370/16108 [00:01<00:52, 300.22it/s][A[A

  2%|▏         | 402/16108 [00:01<00:51, 305.72it/s][A[A

  3%|▎         | 438/16108 [00:01<00:49, 319.46it/s][A[A

  3%|▎         | 476/16108 [00:01<00:46, 333.42it/s][A[A

  3%|▎         | 517/16108 [00:01<00:44, 349.91it/s][A[A

  3%|▎         | 553/16108 [00:01<00:45, 34

 54%|█████▍    | 8683/16108 [00:28<00:26, 281.12it/s][A[A

 54%|█████▍    | 8713/16108 [00:29<00:25, 284.78it/s][A[A

 54%|█████▍    | 8744/16108 [00:29<00:25, 289.19it/s][A[A

 54%|█████▍    | 8773/16108 [00:29<00:25, 289.28it/s][A[A

 55%|█████▍    | 8813/16108 [00:29<00:23, 313.79it/s][A[A

 55%|█████▍    | 8850/16108 [00:29<00:22, 327.22it/s][A[A

 55%|█████▌    | 8884/16108 [00:29<00:22, 314.70it/s][A[A

 55%|█████▌    | 8916/16108 [00:29<00:23, 306.45it/s][A[A

 56%|█████▌    | 8948/16108 [00:29<00:24, 297.33it/s][A[A

 56%|█████▌    | 8979/16108 [00:29<00:24, 288.13it/s][A[A

 56%|█████▌    | 9009/16108 [00:30<00:24, 287.97it/s][A[A

 56%|█████▌    | 9039/16108 [00:30<00:25, 279.67it/s][A[A

 56%|█████▋    | 9080/16108 [00:30<00:22, 308.53it/s][A[A

 57%|█████▋    | 9121/16108 [00:30<00:21, 332.57it/s][A[A

 57%|█████▋    | 9161/16108 [00:30<00:19, 349.24it/s][A[A

 57%|█████▋    | 9202/16108 [00:30<00:18, 364.83it/s][A[A

 57%|█████▋    | 9241/16

  7%|▋         | 1291/17957 [00:04<00:59, 281.91it/s][A[A

  7%|▋         | 1320/17957 [00:04<00:59, 278.27it/s][A[A

  8%|▊         | 1351/17957 [00:04<00:58, 285.97it/s][A[A

  8%|▊         | 1384/17957 [00:04<00:55, 297.81it/s][A[A

  8%|▊         | 1414/17957 [00:04<00:56, 295.08it/s][A[A

  8%|▊         | 1444/17957 [00:04<01:02, 264.05it/s][A[A

  8%|▊         | 1473/17957 [00:05<01:00, 270.60it/s][A[A

  8%|▊         | 1501/17957 [00:05<01:01, 268.52it/s][A[A

  9%|▊         | 1529/17957 [00:05<01:01, 265.92it/s][A[A

  9%|▊         | 1559/17957 [00:05<00:59, 274.81it/s][A[A

  9%|▉         | 1589/17957 [00:05<00:58, 277.52it/s][A[A

  9%|▉         | 1617/17957 [00:05<01:02, 262.70it/s][A[A

  9%|▉         | 1645/17957 [00:05<01:01, 267.13it/s][A[A

  9%|▉         | 1673/17957 [00:05<01:00, 268.83it/s][A[A

  9%|▉         | 1702/17957 [00:05<00:59, 274.24it/s][A[A

 10%|▉         | 1739/17957 [00:06<00:54, 295.99it/s][A[A

 10%|▉         | 1779/17

 57%|█████▋    | 10317/17957 [00:33<00:19, 398.55it/s][A[A

 58%|█████▊    | 10358/17957 [00:33<00:19, 396.79it/s][A[A

 58%|█████▊    | 10398/17957 [00:33<00:20, 367.46it/s][A[A

 58%|█████▊    | 10436/17957 [00:33<00:22, 331.49it/s][A[A

 58%|█████▊    | 10471/17957 [00:33<00:26, 285.37it/s][A[A

 58%|█████▊    | 10502/17957 [00:33<00:27, 272.51it/s][A[A

 59%|█████▊    | 10531/17957 [00:33<00:27, 272.67it/s][A[A

 59%|█████▉    | 10563/17957 [00:34<00:26, 283.84it/s][A[A

 59%|█████▉    | 10593/17957 [00:34<00:26, 281.53it/s][A[A

 59%|█████▉    | 10622/17957 [00:34<00:26, 276.39it/s][A[A

 59%|█████▉    | 10652/17957 [00:34<00:25, 283.01it/s][A[A

 59%|█████▉    | 10681/17957 [00:34<00:26, 279.29it/s][A[A

 60%|█████▉    | 10710/17957 [00:34<00:25, 281.44it/s][A[A

 60%|█████▉    | 10739/17957 [00:34<00:25, 281.39it/s][A[A

 60%|█████▉    | 10771/17957 [00:34<00:24, 290.51it/s][A[A

 60%|██████    | 10801/17957 [00:34<00:24, 289.29it/s][A[A

 60%|███

In [30]:
####### Genes ###################

train_result_file_name = 'spaCy_public_train_GP_IOB.csv'
dev_result_file_name = 'spaCy_public_dev_GP_IOB.csv'
test_result_file_name = 'spaCy_public_test_GP_IOB.csv'

with open(result_path + train_result_file_name, 'w', newline='\n') as f1:
    ml_writer = csv.writer(f1, delimiter='\t', lineterminator='\n')

    for index_, each_annotation in tqdm(train_df.iterrows(), total=train_df.shape[0]):
        text = each_annotation['sentence']
        ml_ner = []
        sentence = GP_model(text)
        
        for ent in sentence.ents:
            ml_ner.append([ent.start_char, ent.end_char, ent.text, ent.label_])

        tagged_tokens = convert2IOB(text, ml_ner)
            
        for each_word in tagged_tokens:
            ml_writer.writerow(list(each_word))
        ml_writer.writerow('')   

        
        
with open(result_path + dev_result_file_name, 'w', newline='\n') as f1:
    ml_writer = csv.writer(f1, delimiter='\t', lineterminator='\n')

    for index_, each_annotation in tqdm(dev_df.iterrows(), total=dev_df.shape[0]):
        text = each_annotation['sentence']
        ml_ner = []
        sentence = GP_model(text)
        
        for ent in sentence.ents:
            ml_ner.append([ent.start_char, ent.end_char, ent.text, ent.label_])

        tagged_tokens = convert2IOB(text, ml_ner)
            
        for each_word in tagged_tokens:
            ml_writer.writerow(list(each_word))
        ml_writer.writerow('')
        
        
with open(result_path + test_result_file_name, 'w', newline='\n') as f1:
    ml_writer = csv.writer(f1, delimiter='\t', lineterminator='\n')

    for index_, each_annotation in tqdm(test_df.iterrows(), total=test_df.shape[0]):
        text = each_annotation['sentence']
        ml_ner = []
        sentence = GP_model(text)
        
        for ent in sentence.ents:
            ml_ner.append([ent.start_char, ent.end_char, ent.text, ent.label_])

        tagged_tokens = convert2IOB(text, ml_ner)
            
        for each_word in tagged_tokens:
            ml_writer.writerow(list(each_word))
        ml_writer.writerow('')



  0%|          | 0/80014 [00:00<?, ?it/s][A[A

  0%|          | 27/80014 [00:00<05:03, 263.78it/s][A[A

  0%|          | 54/80014 [00:00<05:04, 262.92it/s][A[A

  0%|          | 92/80014 [00:00<04:36, 288.79it/s][A[A

  0%|          | 131/80014 [00:00<04:16, 311.47it/s][A[A

  0%|          | 170/80014 [00:00<04:01, 329.97it/s][A[A

  0%|          | 201/80014 [00:00<04:06, 323.21it/s][A[A

  0%|          | 231/80014 [00:00<04:12, 315.49it/s][A[A

  0%|          | 261/80014 [00:00<04:28, 297.01it/s][A[A

  0%|          | 290/80014 [00:00<04:35, 289.71it/s][A[A

  0%|          | 319/80014 [00:01<04:38, 285.93it/s][A[A

  0%|          | 348/80014 [00:01<04:41, 283.07it/s][A[A

  0%|          | 377/80014 [00:01<04:45, 279.22it/s][A[A

  1%|          | 405/80014 [00:01<04:49, 274.56it/s][A[A

  1%|          | 433/80014 [00:01<04:54, 270.11it/s][A[A

  1%|          | 460/80014 [00:01<05:21, 247.65it/s][A[A

  1%|          | 486/80014 [00:01<05:23, 245.64it/s]

 11%|█         | 8704/80014 [00:28<04:19, 274.57it/s][A[A

 11%|█         | 8732/80014 [00:28<04:19, 274.60it/s][A[A

 11%|█         | 8762/80014 [00:28<04:15, 278.93it/s][A[A

 11%|█         | 8792/80014 [00:29<04:11, 282.79it/s][A[A

 11%|█         | 8821/80014 [00:29<04:13, 280.99it/s][A[A

 11%|█         | 8850/80014 [00:29<04:14, 279.59it/s][A[A

 11%|█         | 8879/80014 [00:29<04:13, 280.87it/s][A[A

 11%|█         | 8908/80014 [00:29<04:12, 281.15it/s][A[A

 11%|█         | 8937/80014 [00:29<04:16, 277.39it/s][A[A

 11%|█         | 8965/80014 [00:29<04:23, 269.40it/s][A[A

 11%|█         | 8995/80014 [00:29<04:16, 276.74it/s][A[A

 11%|█▏        | 9023/80014 [00:29<04:16, 276.61it/s][A[A

 11%|█▏        | 9053/80014 [00:29<04:11, 282.65it/s][A[A

 11%|█▏        | 9082/80014 [00:30<04:09, 284.68it/s][A[A

 11%|█▏        | 9111/80014 [00:30<04:16, 275.96it/s][A[A

 11%|█▏        | 9141/80014 [00:30<04:11, 281.75it/s][A[A

 11%|█▏        | 9171/80

 22%|██▏       | 17972/80014 [00:57<02:45, 375.00it/s][A[A

 23%|██▎       | 18012/80014 [00:57<02:43, 378.51it/s][A[A

 23%|██▎       | 18051/80014 [00:57<02:50, 363.80it/s][A[A

 23%|██▎       | 18091/80014 [00:57<02:46, 371.63it/s][A[A

 23%|██▎       | 18129/80014 [00:57<02:56, 349.98it/s][A[A

 23%|██▎       | 18165/80014 [00:57<03:02, 338.98it/s][A[A

 23%|██▎       | 18200/80014 [00:57<03:08, 328.60it/s][A[A

 23%|██▎       | 18234/80014 [00:57<03:29, 295.07it/s][A[A

 23%|██▎       | 18265/80014 [00:58<03:31, 292.21it/s][A[A

 23%|██▎       | 18295/80014 [00:58<03:37, 284.13it/s][A[A

 23%|██▎       | 18324/80014 [00:58<03:38, 281.77it/s][A[A

 23%|██▎       | 18355/80014 [00:58<03:34, 287.17it/s][A[A

 23%|██▎       | 18386/80014 [00:58<03:31, 291.82it/s][A[A

 23%|██▎       | 18418/80014 [00:58<03:27, 297.55it/s][A[A

 23%|██▎       | 18460/80014 [00:58<03:09, 325.11it/s][A[A

 23%|██▎       | 18502/80014 [00:58<02:56, 348.18it/s][A[A

 23%|██▎

 34%|███▍      | 27049/80014 [01:25<02:38, 334.33it/s][A[A

 34%|███▍      | 27084/80014 [01:25<02:55, 302.42it/s][A[A

 34%|███▍      | 27116/80014 [01:26<03:05, 284.67it/s][A[A

 34%|███▍      | 27150/80014 [01:26<02:57, 297.46it/s][A[A

 34%|███▍      | 27187/80014 [01:26<02:47, 315.79it/s][A[A

 34%|███▍      | 27220/80014 [01:26<02:54, 301.91it/s][A[A

 34%|███▍      | 27251/80014 [01:26<02:54, 303.11it/s][A[A

 34%|███▍      | 27282/80014 [01:26<02:56, 299.17it/s][A[A

 34%|███▍      | 27313/80014 [01:26<02:57, 296.59it/s][A[A

 34%|███▍      | 27343/80014 [01:26<02:58, 294.47it/s][A[A

 34%|███▍      | 27373/80014 [01:26<03:00, 291.09it/s][A[A

 34%|███▍      | 27403/80014 [01:26<03:00, 291.88it/s][A[A

 34%|███▍      | 27435/80014 [01:27<02:55, 298.93it/s][A[A

 34%|███▍      | 27466/80014 [01:27<02:55, 298.86it/s][A[A

 34%|███▍      | 27498/80014 [01:27<02:52, 303.64it/s][A[A

 34%|███▍      | 27531/80014 [01:27<02:49, 309.02it/s][A[A

 34%|███

 45%|████▌     | 36102/80014 [01:54<03:42, 197.48it/s][A[A

 45%|████▌     | 36128/80014 [01:54<03:26, 212.74it/s][A[A

 45%|████▌     | 36156/80014 [01:54<03:12, 227.45it/s][A[A

 45%|████▌     | 36184/80014 [01:54<03:02, 240.48it/s][A[A

 45%|████▌     | 36217/80014 [01:54<02:47, 261.00it/s][A[A

 45%|████▌     | 36247/80014 [01:55<02:42, 270.16it/s][A[A

 45%|████▌     | 36276/80014 [01:55<02:39, 274.29it/s][A[A

 45%|████▌     | 36310/80014 [01:55<02:30, 290.72it/s][A[A

 45%|████▌     | 36352/80014 [01:55<02:16, 319.97it/s][A[A

 45%|████▌     | 36395/80014 [01:55<02:06, 344.96it/s][A[A

 46%|████▌     | 36432/80014 [01:55<02:15, 322.57it/s][A[A

 46%|████▌     | 36466/80014 [01:55<02:15, 320.34it/s][A[A

 46%|████▌     | 36507/80014 [01:55<02:07, 342.17it/s][A[A

 46%|████▌     | 36549/80014 [01:55<02:00, 359.96it/s][A[A

 46%|████▌     | 36591/80014 [01:56<01:56, 374.12it/s][A[A

 46%|████▌     | 36630/80014 [01:56<02:00, 358.59it/s][A[A

 46%|███

 56%|█████▌    | 44462/80014 [02:22<01:55, 307.04it/s][A[A

 56%|█████▌    | 44504/80014 [02:23<01:46, 333.25it/s][A[A

 56%|█████▌    | 44546/80014 [02:23<01:40, 354.49it/s][A[A

 56%|█████▌    | 44588/80014 [02:23<01:35, 370.78it/s][A[A

 56%|█████▌    | 44630/80014 [02:23<01:32, 383.07it/s][A[A

 56%|█████▌    | 44672/80014 [02:23<01:30, 392.32it/s][A[A

 56%|█████▌    | 44713/80014 [02:23<01:29, 394.35it/s][A[A

 56%|█████▌    | 44753/80014 [02:23<01:40, 350.57it/s][A[A

 56%|█████▌    | 44790/80014 [02:23<01:50, 320.12it/s][A[A

 56%|█████▌    | 44824/80014 [02:23<01:50, 318.97it/s][A[A

 56%|█████▌    | 44857/80014 [02:24<01:56, 301.56it/s][A[A

 56%|█████▌    | 44889/80014 [02:24<01:59, 293.33it/s][A[A

 56%|█████▌    | 44919/80014 [02:24<01:59, 294.66it/s][A[A

 56%|█████▌    | 44949/80014 [02:24<02:04, 281.29it/s][A[A

 56%|█████▌    | 44978/80014 [02:24<02:09, 270.11it/s][A[A

 56%|█████▌    | 45007/80014 [02:24<02:07, 274.97it/s][A[A

 56%|███

 67%|██████▋   | 53258/80014 [02:51<01:30, 296.24it/s][A[A

 67%|██████▋   | 53298/80014 [02:51<01:23, 320.42it/s][A[A

 67%|██████▋   | 53338/80014 [02:51<01:18, 339.62it/s][A[A

 67%|██████▋   | 53377/80014 [02:51<01:15, 351.20it/s][A[A

 67%|██████▋   | 53413/80014 [02:51<01:23, 319.20it/s][A[A

 67%|██████▋   | 53446/80014 [02:52<01:26, 306.75it/s][A[A

 67%|██████▋   | 53478/80014 [02:52<01:29, 297.45it/s][A[A

 67%|██████▋   | 53509/80014 [02:52<01:29, 296.59it/s][A[A

 67%|██████▋   | 53540/80014 [02:52<01:30, 293.68it/s][A[A

 67%|██████▋   | 53573/80014 [02:52<01:27, 300.55it/s][A[A

 67%|██████▋   | 53604/80014 [02:52<01:30, 290.51it/s][A[A

 67%|██████▋   | 53634/80014 [02:52<01:31, 288.64it/s][A[A

 67%|██████▋   | 53664/80014 [02:52<01:32, 284.57it/s][A[A

 67%|██████▋   | 53693/80014 [02:52<01:32, 283.26it/s][A[A

 67%|██████▋   | 53722/80014 [02:53<01:33, 281.33it/s][A[A

 67%|██████▋   | 53751/80014 [02:53<01:33, 281.45it/s][A[A

 67%|███

 77%|███████▋  | 61816/80014 [03:20<01:04, 282.91it/s][A[A

 77%|███████▋  | 61845/80014 [03:20<01:06, 271.98it/s][A[A

 77%|███████▋  | 61873/80014 [03:20<01:10, 258.57it/s][A[A

 77%|███████▋  | 61900/80014 [03:20<01:11, 252.82it/s][A[A

 77%|███████▋  | 61926/80014 [03:20<01:11, 252.72it/s][A[A

 77%|███████▋  | 61958/80014 [03:20<01:07, 266.93it/s][A[A

 77%|███████▋  | 61988/80014 [03:20<01:05, 275.26it/s][A[A

 78%|███████▊  | 62018/80014 [03:20<01:04, 281.13it/s][A[A

 78%|███████▊  | 62047/80014 [03:20<01:03, 282.35it/s][A[A

 78%|███████▊  | 62077/80014 [03:21<01:02, 286.95it/s][A[A

 78%|███████▊  | 62106/80014 [03:21<01:03, 281.14it/s][A[A

 78%|███████▊  | 62135/80014 [03:21<01:03, 281.73it/s][A[A

 78%|███████▊  | 62164/80014 [03:21<01:03, 280.14it/s][A[A

 78%|███████▊  | 62193/80014 [03:21<01:05, 272.41it/s][A[A

 78%|███████▊  | 62221/80014 [03:21<01:05, 271.87it/s][A[A

 78%|███████▊  | 62249/80014 [03:21<01:05, 270.12it/s][A[A

 78%|███

 89%|████████▊ | 70842/80014 [03:48<00:30, 305.19it/s][A[A

 89%|████████▊ | 70874/80014 [03:49<00:31, 287.80it/s][A[A

 89%|████████▊ | 70904/80014 [03:49<00:33, 274.43it/s][A[A

 89%|████████▊ | 70934/80014 [03:49<00:32, 281.53it/s][A[A

 89%|████████▊ | 70963/80014 [03:49<00:31, 283.04it/s][A[A

 89%|████████▊ | 70992/80014 [03:49<00:31, 282.10it/s][A[A

 89%|████████▉ | 71021/80014 [03:49<00:31, 283.76it/s][A[A

 89%|████████▉ | 71053/80014 [03:49<00:30, 291.94it/s][A[A

 89%|████████▉ | 71090/80014 [03:49<00:28, 308.63it/s][A[A

 89%|████████▉ | 71122/80014 [03:49<00:28, 308.84it/s][A[A

 89%|████████▉ | 71162/80014 [03:49<00:26, 330.18it/s][A[A

 89%|████████▉ | 71199/80014 [03:50<00:26, 338.99it/s][A[A

 89%|████████▉ | 71240/80014 [03:50<00:24, 356.07it/s][A[A

 89%|████████▉ | 71277/80014 [03:50<00:24, 352.48it/s][A[A

 89%|████████▉ | 71318/80014 [03:50<00:23, 366.37it/s][A[A

 89%|████████▉ | 71357/80014 [03:50<00:23, 371.10it/s][A[A

 89%|███

100%|█████████▉| 79622/80014 [04:17<00:01, 351.89it/s][A[A

100%|█████████▉| 79658/80014 [04:17<00:01, 328.42it/s][A[A

100%|█████████▉| 79692/80014 [04:17<00:00, 322.02it/s][A[A

100%|█████████▉| 79725/80014 [04:17<00:00, 319.08it/s][A[A

100%|█████████▉| 79758/80014 [04:17<00:00, 299.89it/s][A[A

100%|█████████▉| 79789/80014 [04:18<00:00, 292.64it/s][A[A

100%|█████████▉| 79819/80014 [04:18<00:00, 285.85it/s][A[A

100%|█████████▉| 79848/80014 [04:18<00:00, 278.34it/s][A[A

100%|█████████▉| 79877/80014 [04:18<00:00, 279.48it/s][A[A

100%|█████████▉| 79906/80014 [04:18<00:00, 280.64it/s][A[A

100%|█████████▉| 79937/80014 [04:18<00:00, 287.19it/s][A[A

100%|█████████▉| 79966/80014 [04:18<00:00, 273.82it/s][A[A

100%|██████████| 80014/80014 [04:18<00:00, 309.12it/s][A[A


  0%|          | 0/16108 [00:00<?, ?it/s][A[A

  0%|          | 32/16108 [00:00<00:51, 310.07it/s][A[A

  0%|          | 61/16108 [00:00<00:53, 302.56it/s][A[A

  1%|          | 93/16108 

 50%|█████     | 8130/16108 [00:27<00:28, 278.49it/s][A[A

 51%|█████     | 8159/16108 [00:27<00:30, 259.84it/s][A[A

 51%|█████     | 8188/16108 [00:27<00:29, 265.95it/s][A[A

 51%|█████     | 8217/16108 [00:27<00:29, 270.90it/s][A[A

 51%|█████     | 8246/16108 [00:28<00:28, 276.24it/s][A[A

 51%|█████▏    | 8276/16108 [00:28<00:27, 282.24it/s][A[A

 52%|█████▏    | 8306/16108 [00:28<00:27, 286.82it/s][A[A

 52%|█████▏    | 8336/16108 [00:28<00:27, 287.34it/s][A[A

 52%|█████▏    | 8365/16108 [00:28<00:26, 287.43it/s][A[A

 52%|█████▏    | 8396/16108 [00:28<00:26, 293.76it/s][A[A

 52%|█████▏    | 8429/16108 [00:28<00:25, 302.52it/s][A[A

 53%|█████▎    | 8460/16108 [00:28<00:26, 291.41it/s][A[A

 53%|█████▎    | 8490/16108 [00:28<00:26, 286.38it/s][A[A

 53%|█████▎    | 8519/16108 [00:29<00:26, 284.65it/s][A[A

 53%|█████▎    | 8548/16108 [00:29<00:26, 285.31it/s][A[A

 53%|█████▎    | 8579/16108 [00:29<00:25, 291.83it/s][A[A

 53%|█████▎    | 8609/16

  4%|▍         | 700/17957 [00:02<00:52, 330.72it/s][A[A

  4%|▍         | 734/17957 [00:02<00:53, 321.10it/s][A[A

  4%|▍         | 767/17957 [00:02<00:54, 313.69it/s][A[A

  4%|▍         | 799/17957 [00:02<00:56, 302.17it/s][A[A

  5%|▍         | 830/17957 [00:02<00:56, 304.17it/s][A[A

  5%|▍         | 862/17957 [00:02<00:55, 306.37it/s][A[A

  5%|▍         | 893/17957 [00:03<00:56, 302.69it/s][A[A

  5%|▌         | 926/17957 [00:03<00:55, 307.95it/s][A[A

  5%|▌         | 957/17957 [00:03<00:57, 296.70it/s][A[A

  5%|▌         | 987/17957 [00:03<00:58, 290.70it/s][A[A

  6%|▌         | 1019/17957 [00:03<00:57, 294.91it/s][A[A

  6%|▌         | 1049/17957 [00:03<00:57, 292.67it/s][A[A

  6%|▌         | 1079/17957 [00:03<00:59, 285.66it/s][A[A

  6%|▌         | 1108/17957 [00:03<01:00, 280.61it/s][A[A

  6%|▋         | 1137/17957 [00:03<00:59, 281.61it/s][A[A

  6%|▋         | 1167/17957 [00:03<00:58, 285.91it/s][A[A

  7%|▋         | 1196/17957 [00:04

 53%|█████▎    | 9509/17957 [00:31<00:29, 289.07it/s][A[A

 53%|█████▎    | 9551/17957 [00:31<00:26, 317.64it/s][A[A

 53%|█████▎    | 9592/17957 [00:31<00:24, 340.48it/s][A[A

 54%|█████▎    | 9632/17957 [00:31<00:24, 340.47it/s][A[A

 54%|█████▍    | 9674/17957 [00:31<00:23, 359.78it/s][A[A

 54%|█████▍    | 9716/17957 [00:31<00:21, 375.00it/s][A[A

 54%|█████▍    | 9758/17957 [00:31<00:21, 385.99it/s][A[A

 55%|█████▍    | 9799/17957 [00:32<00:20, 392.80it/s][A[A

 55%|█████▍    | 9839/17957 [00:32<00:22, 363.81it/s][A[A

 55%|█████▌    | 9877/17957 [00:32<00:23, 342.18it/s][A[A

 55%|█████▌    | 9913/17957 [00:32<00:25, 310.61it/s][A[A

 55%|█████▌    | 9946/17957 [00:32<00:26, 302.02it/s][A[A

 56%|█████▌    | 9978/17957 [00:32<00:27, 293.22it/s][A[A

 56%|█████▌    | 10009/17957 [00:32<00:26, 296.93it/s][A[A

 56%|█████▌    | 10051/17957 [00:32<00:24, 325.47it/s][A[A

 56%|█████▌    | 10093/17957 [00:33<00:22, 346.98it/s][A[A

 56%|█████▋    | 1013