In [1]:
# Here I am using gensim for learning the word embeddings from the EUADR corpus
# This is just a rough start to learn 
import gensim
import logging
import os
import pandas as pd

In [2]:
# start the log
logging.basicConfig(
    format='%(asctime)s : %(levelname)s : %(message)s',
    level=logging.INFO)

In [3]:
# get the current working directory
data_path = os.path.abspath(os.path.join(os.path.dirname( '__file__' ), '..', 'Datasets'))+'/'

# Although the dataset says csv, it is tab delimited. In addition to this, they have severe codels problems. 
# So best to parse throught codes first. 
# UnicodeDecodeError: 'utf-8' codec can't decode byte 0xfc in position 2: invalid start byte

#open for reading with "universal" type set

import codecs

doc_d_t = codecs.open(data_path+'EUADR_Corpus_IBIgroup/'+'EUADR_drug_target'+'.csv','rU','UTF-8') 
EUADR_drug_target = pd.read_csv(doc_d_t, sep='\t', na_filter = False)
EUADR_drug_target['CLASS'] = 'drug_gene'

doc_t_d = codecs.open(data_path+'EUADR_Corpus_IBIgroup/'+'EUADR_target_disease'+'.csv','rU','UTF-8',errors='ignore') 
EUADR_target_disease = pd.read_csv(doc_t_d, sep='\t', na_filter = False)
EUADR_target_disease['CLASS'] = 'gene_disease'
       
doc_d_d = codecs.open(data_path+'EUADR_Corpus_IBIgroup/'+'EUADR_drug_disease'+'.csv','rU','UTF-8')                       
EUADR_drug_disease = pd.read_csv(doc_d_d, sep='\t', na_filter = False)
EUADR_drug_disease['CLASS'] = 'drug_disease'

logging.info("Done reading data files")

2019-07-04 16:53:29,542 : INFO : Done reading data files


In [4]:
dataset =  EUADR_drug_target.append(EUADR_target_disease).append(EUADR_drug_disease)


In [5]:
dataset.head(10)

Unnamed: 0,ASSOCIATION_TYPE,PMID,NUM_SENTENCE,ENTITY1_TEXT,ENTITY1_INI,ENTITY1_END,ENTITY1_TYPE,ENTITY2_TEXT,ENTITY2_INI,ENTITY2_END,ENTITY2_TYPE,SENTENCE,CLASS
0,PA,17938326,0,ABCG2,71,76,Genes & Molecular Sequences,doxorubicin,43,54,Chemicals & Drugs,Gefitinib inhibition of drug resistance to dox...,drug_gene
1,PA,17938326,5,EGFR,16,20,Genes & Molecular Sequences,gefitinib,31,40,Chemicals & Drugs,Inactivation of EGFR kinase by gefitinib was a...,drug_gene
2,PA,17938326,0,ABCG2,71,76,Genes & Molecular Sequences,Gefitinib,0,9,Chemicals & Drugs,Gefitinib inhibition of drug resistance to dox...,drug_gene
3,PA,17938326,2,ABCG2 drug transporter,161,183,Genes & Molecular Sequences,doxorubicin,140,151,Chemicals & Drugs,Extrusion assays using flow cytometry analysis...,drug_gene
4,PA,17938326,12,EGFR,20,24,Genes & Molecular Sequences,gefitinib,35,44,Chemicals & Drugs,Inactivation of the EGFR kinase by gefitinib p...,drug_gene
5,PA,17938326,8,ABCG2 drug transporter,80,102,Genes & Molecular Sequences,doxorubicin,173,184,Chemicals & Drugs,Inhibition of EGFR kinase activity by gefitini...,drug_gene
6,PA,17938326,6,ABCG2,128,133,Genes & Molecular Sequences,doxorubicin,172,183,Chemicals & Drugs,A terminal deoxynucleotidyl transferase-mediat...,drug_gene
7,PA,17938326,9,ABCG2,126,131,Genes & Molecular Sequences,doxorubicin,93,104,Chemicals & Drugs,Both ARO and WRO demonstrated differential ABC...,drug_gene
8,PA,17938326,12,EGFR,20,24,Genes & Molecular Sequences,doxorubicin,173,184,Chemicals & Drugs,Inactivation of the EGFR kinase by gefitinib p...,drug_gene
9,PA,17938326,8,EGFR,14,18,Genes & Molecular Sequences,doxorubicin,173,184,Chemicals & Drugs,Inhibition of EGFR kinase activity by gefitini...,drug_gene


In [6]:
import nltk

from nltk.tokenize import word_tokenize

# use boolean flags to customize function
def tokenise_text(text):

    # Step 2: Tokenize
    output = word_tokenize(text)
    return(output)

In [7]:
dataset['SENTENCE_normalised'] = [tokenise_text(article) for article in dataset['SENTENCE']]

In [8]:
# convert text into doc2vec input format i.e. tuple containing list of tokens and index for each doc
from gensim.models.doc2vec import TaggedDocument

def doc_tagger(data, tokens_col_name, class_col_name):
    return(data.apply(lambda r: TaggedDocument(words=r[tokens_col_name], tags=[r[class_col_name]]), axis=1))


# Specify parameters for doc2vec: DBOW with word2vec training
from gensim.models.doc2vec import Doc2Vec

def train_doc2vec_model(corpus, vec_size, window, n_epochs, n_cpu):
    
    docs = corpus.values
    doc2vec_model = Doc2Vec(docs, vector_size=vec_size, window=window, epochs=n_epochs, dm=0, dbow_words=1, workers=n_cpu)
    
    return(doc2vec_model)

def learn_vectors(doc2vec_model, corpus):
    
    docs = corpus.values
    labels, feats = zip(*[(doc.tags[0], doc2vec_model.infer_vector(doc[0], steps=20)) for doc in docs])
    
    return(feats, labels)

In [9]:
train_tagged = doc_tagger(dataset, 'SENTENCE_normalised', 'CLASS')

In [10]:
import multiprocessing

doc2vec_model = train_doc2vec_model(train_tagged, vec_size=300, window=10, n_epochs=200, n_cpu=multiprocessing.cpu_count())

2019-07-04 16:53:52,631 : INFO : collecting all words and their counts
2019-07-04 16:53:52,632 : INFO : PROGRESS: at example #0, processed 0 words (0/s), 0 word types, 0 tags
2019-07-04 16:53:52,647 : INFO : collected 3311 word types and 3 unique tags from a corpus of 846 examples and 27842 words
2019-07-04 16:53:52,649 : INFO : Loading a fresh vocabulary
2019-07-04 16:53:52,655 : INFO : min_count=5 retains 1048 unique words (31% of original 3311, drops 2263)
2019-07-04 16:53:52,656 : INFO : min_count=5 leaves 23239 word corpus (83% of original 27842, drops 4603)
2019-07-04 16:53:52,661 : INFO : deleting the raw counts dictionary of 3311 items
2019-07-04 16:53:52,664 : INFO : sample=0.001 downsamples 45 most-common words
2019-07-04 16:53:52,665 : INFO : downsampling leaves estimated 15972 word corpus (68.7% of prior 23239)
2019-07-04 16:53:52,669 : INFO : estimated required memory for 1048 words and 300 dimensions: 3043400 bytes
2019-07-04 16:53:52,672 : INFO : resetting layer weights


2019-07-04 16:53:53,348 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-07-04 16:53:53,349 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-07-04 16:53:53,350 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-07-04 16:53:53,353 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-07-04 16:53:53,355 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-07-04 16:53:53,450 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-07-04 16:53:53,466 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-07-04 16:53:53,478 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-07-04 16:53:53,479 : INFO : EPOCH - 5 : training on 27842 raw words (16802 effective words) took 0.2s, 109232 effective words/s
2019-07-04 16:53:53,493 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-07-04 16:53:53,496 : INFO : worker thread 

2019-07-04 16:53:54,215 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-07-04 16:53:54,217 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-07-04 16:53:54,296 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-07-04 16:53:54,317 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-07-04 16:53:54,322 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-07-04 16:53:54,323 : INFO : EPOCH - 10 : training on 27842 raw words (16834 effective words) took 0.1s, 112468 effective words/s
2019-07-04 16:53:54,334 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-07-04 16:53:54,337 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-07-04 16:53:54,340 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-07-04 16:53:54,342 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-07-04 16:53:54,345 : INFO : worker thr

2019-07-04 16:53:55,080 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-07-04 16:53:55,085 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-07-04 16:53:55,086 : INFO : EPOCH - 15 : training on 27842 raw words (16849 effective words) took 0.2s, 109800 effective words/s
2019-07-04 16:53:55,092 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-07-04 16:53:55,096 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-07-04 16:53:55,099 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-07-04 16:53:55,102 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-07-04 16:53:55,107 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-07-04 16:53:55,108 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-07-04 16:53:55,112 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-07-04 16:53:55,116 : INFO : worker t

2019-07-04 16:53:55,852 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-07-04 16:53:55,855 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-07-04 16:53:55,857 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-07-04 16:53:55,860 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-07-04 16:53:55,863 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-07-04 16:53:55,869 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-07-04 16:53:55,871 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-07-04 16:53:55,873 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-07-04 16:53:55,876 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-07-04 16:53:55,879 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-07-04 16:53:55,880 : INFO : worker thread finished; awaiting finish of 5 more t

2019-07-04 16:53:56,637 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-07-04 16:53:56,640 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-07-04 16:53:56,642 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-07-04 16:53:56,643 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-07-04 16:53:56,646 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-07-04 16:53:56,649 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-07-04 16:53:56,650 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-07-04 16:53:56,653 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-07-04 16:53:56,656 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-07-04 16:53:56,658 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-07-04 16:53:56,740 : INFO : worker thread finished; awaiting finish of 2 more thre

2019-07-04 16:53:57,449 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-07-04 16:53:57,453 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-07-04 16:53:57,455 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-07-04 16:53:57,456 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-07-04 16:53:57,457 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-07-04 16:53:57,460 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-07-04 16:53:57,463 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-07-04 16:53:57,538 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-07-04 16:53:57,559 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-07-04 16:53:57,575 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-07-04 16:53:57,576 : INFO : EPOCH - 31 : training on 27842 raw words (16771 effective

2019-07-04 16:53:58,163 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-07-04 16:53:58,166 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-07-04 16:53:58,168 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-07-04 16:53:58,169 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-07-04 16:53:58,249 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-07-04 16:53:58,270 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-07-04 16:53:58,281 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-07-04 16:53:58,283 : INFO : EPOCH - 36 : training on 27842 raw words (16806 effective words) took 0.1s, 120051 effective words/s
2019-07-04 16:53:58,292 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-07-04 16:53:58,296 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-07-04 16:53:58,299 : INFO : worker threa

2019-07-04 16:53:58,921 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-07-04 16:53:59,009 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-07-04 16:53:59,019 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-07-04 16:53:59,028 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-07-04 16:53:59,029 : INFO : EPOCH - 41 : training on 27842 raw words (16828 effective words) took 0.1s, 120613 effective words/s
2019-07-04 16:53:59,041 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-07-04 16:53:59,045 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-07-04 16:53:59,047 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-07-04 16:53:59,048 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-07-04 16:53:59,051 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-07-04 16:53:59,055 : INFO : worker th

2019-07-04 16:53:59,771 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-07-04 16:53:59,772 : INFO : EPOCH - 46 : training on 27842 raw words (16827 effective words) took 0.1s, 122700 effective words/s
2019-07-04 16:53:59,781 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-07-04 16:53:59,783 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-07-04 16:53:59,786 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-07-04 16:53:59,788 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-07-04 16:53:59,789 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-07-04 16:53:59,794 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-07-04 16:53:59,797 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-07-04 16:53:59,799 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-07-04 16:53:59,800 : INFO : worker t

2019-07-04 16:54:00,505 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-07-04 16:54:00,509 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-07-04 16:54:00,512 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-07-04 16:54:00,514 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-07-04 16:54:00,517 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-07-04 16:54:00,520 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-07-04 16:54:00,522 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-07-04 16:54:00,523 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-07-04 16:54:00,525 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-07-04 16:54:00,526 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-07-04 16:54:00,532 : INFO : worker thread finished; awaiting finish of 5 more t

2019-07-04 16:54:01,236 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-07-04 16:54:01,238 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-07-04 16:54:01,240 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-07-04 16:54:01,241 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-07-04 16:54:01,243 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-07-04 16:54:01,246 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-07-04 16:54:01,247 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-07-04 16:54:01,249 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-07-04 16:54:01,251 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-07-04 16:54:01,252 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-07-04 16:54:01,323 : INFO : worker thread finished; awaiting finish of 2 more thre

2019-07-04 16:54:01,992 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-07-04 16:54:01,995 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-07-04 16:54:01,997 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-07-04 16:54:01,999 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-07-04 16:54:02,001 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-07-04 16:54:02,004 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-07-04 16:54:02,007 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-07-04 16:54:02,086 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-07-04 16:54:02,104 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-07-04 16:54:02,114 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-07-04 16:54:02,116 : INFO : EPOCH - 62 : training on 27842 raw words (16740 effective

2019-07-04 16:54:02,732 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-07-04 16:54:02,735 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-07-04 16:54:02,736 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-07-04 16:54:02,737 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-07-04 16:54:02,819 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-07-04 16:54:02,827 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-07-04 16:54:02,834 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-07-04 16:54:02,836 : INFO : EPOCH - 67 : training on 27842 raw words (16794 effective words) took 0.1s, 135562 effective words/s
2019-07-04 16:54:02,843 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-07-04 16:54:02,846 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-07-04 16:54:02,847 : INFO : worker threa

2019-07-04 16:54:03,438 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-07-04 16:54:03,516 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-07-04 16:54:03,532 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-07-04 16:54:03,540 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-07-04 16:54:03,541 : INFO : EPOCH - 72 : training on 27842 raw words (16757 effective words) took 0.1s, 127643 effective words/s
2019-07-04 16:54:03,555 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-07-04 16:54:03,559 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-07-04 16:54:03,568 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-07-04 16:54:03,572 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-07-04 16:54:03,576 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-07-04 16:54:03,579 : INFO : worker th

2019-07-04 16:54:04,289 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-07-04 16:54:04,291 : INFO : EPOCH - 77 : training on 27842 raw words (16775 effective words) took 0.1s, 115531 effective words/s
2019-07-04 16:54:04,301 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-07-04 16:54:04,302 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-07-04 16:54:04,304 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-07-04 16:54:04,307 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-07-04 16:54:04,309 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-07-04 16:54:04,310 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-07-04 16:54:04,312 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-07-04 16:54:04,315 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-07-04 16:54:04,317 : INFO : worker t

2019-07-04 16:54:05,052 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-07-04 16:54:05,057 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-07-04 16:54:05,061 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-07-04 16:54:05,065 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-07-04 16:54:05,068 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-07-04 16:54:05,070 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-07-04 16:54:05,072 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-07-04 16:54:05,075 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-07-04 16:54:05,077 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-07-04 16:54:05,081 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-07-04 16:54:05,085 : INFO : worker thread finished; awaiting finish of 5 more t

2019-07-04 16:54:05,802 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-07-04 16:54:05,805 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-07-04 16:54:05,807 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-07-04 16:54:05,808 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-07-04 16:54:05,809 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-07-04 16:54:05,812 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-07-04 16:54:05,815 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-07-04 16:54:05,817 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-07-04 16:54:05,819 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-07-04 16:54:05,821 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-07-04 16:54:05,897 : INFO : worker thread finished; awaiting finish of 2 more thre

2019-07-04 16:54:06,552 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-07-04 16:54:06,553 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-07-04 16:54:06,557 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-07-04 16:54:06,561 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-07-04 16:54:06,564 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-07-04 16:54:06,565 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-07-04 16:54:06,569 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-07-04 16:54:06,640 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-07-04 16:54:06,670 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-07-04 16:54:06,673 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-07-04 16:54:06,675 : INFO : EPOCH - 93 : training on 27842 raw words (16839 effective

2019-07-04 16:54:07,291 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-07-04 16:54:07,295 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-07-04 16:54:07,297 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-07-04 16:54:07,300 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-07-04 16:54:07,370 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-07-04 16:54:07,394 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-07-04 16:54:07,396 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-07-04 16:54:07,398 : INFO : EPOCH - 98 : training on 27842 raw words (16853 effective words) took 0.1s, 123078 effective words/s
2019-07-04 16:54:07,408 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-07-04 16:54:07,411 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-07-04 16:54:07,415 : INFO : worker threa

2019-07-04 16:54:08,020 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-07-04 16:54:08,095 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-07-04 16:54:08,117 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-07-04 16:54:08,119 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-07-04 16:54:08,120 : INFO : EPOCH - 103 : training on 27842 raw words (16803 effective words) took 0.1s, 126154 effective words/s
2019-07-04 16:54:08,129 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-07-04 16:54:08,133 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-07-04 16:54:08,136 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-07-04 16:54:08,139 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-07-04 16:54:08,141 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-07-04 16:54:08,142 : INFO : worker t

2019-07-04 16:54:08,858 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-07-04 16:54:08,859 : INFO : EPOCH - 108 : training on 27842 raw words (16797 effective words) took 0.1s, 118080 effective words/s
2019-07-04 16:54:08,869 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-07-04 16:54:08,873 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-07-04 16:54:08,875 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-07-04 16:54:08,877 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-07-04 16:54:08,879 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-07-04 16:54:08,881 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-07-04 16:54:08,883 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-07-04 16:54:08,884 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-07-04 16:54:08,887 : INFO : worker 

2019-07-04 16:54:09,567 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-07-04 16:54:09,571 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-07-04 16:54:09,575 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-07-04 16:54:09,576 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-07-04 16:54:09,579 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-07-04 16:54:09,581 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-07-04 16:54:09,582 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-07-04 16:54:09,584 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-07-04 16:54:09,586 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-07-04 16:54:09,589 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-07-04 16:54:09,591 : INFO : worker thread finished; awaiting finish of 5 more t

2019-07-04 16:54:10,301 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-07-04 16:54:10,304 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-07-04 16:54:10,307 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-07-04 16:54:10,309 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-07-04 16:54:10,312 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-07-04 16:54:10,313 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-07-04 16:54:10,315 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-07-04 16:54:10,318 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-07-04 16:54:10,320 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-07-04 16:54:10,322 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-07-04 16:54:10,395 : INFO : worker thread finished; awaiting finish of 2 more thre

2019-07-04 16:54:11,035 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-07-04 16:54:11,037 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-07-04 16:54:11,040 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-07-04 16:54:11,042 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-07-04 16:54:11,044 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-07-04 16:54:11,045 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-07-04 16:54:11,048 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-07-04 16:54:11,122 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-07-04 16:54:11,140 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-07-04 16:54:11,149 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-07-04 16:54:11,150 : INFO : EPOCH - 124 : training on 27842 raw words (16730 effectiv

2019-07-04 16:54:11,747 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-07-04 16:54:11,749 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-07-04 16:54:11,750 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-07-04 16:54:11,751 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-07-04 16:54:11,821 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-07-04 16:54:11,851 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-07-04 16:54:11,856 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-07-04 16:54:11,857 : INFO : EPOCH - 129 : training on 27842 raw words (16807 effective words) took 0.1s, 132316 effective words/s
2019-07-04 16:54:11,866 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-07-04 16:54:11,868 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-07-04 16:54:11,870 : INFO : worker thre

2019-07-04 16:54:12,467 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-07-04 16:54:12,543 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-07-04 16:54:12,571 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-07-04 16:54:12,574 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-07-04 16:54:12,576 : INFO : EPOCH - 134 : training on 27842 raw words (16812 effective words) took 0.1s, 119441 effective words/s
2019-07-04 16:54:12,586 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-07-04 16:54:12,588 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-07-04 16:54:12,592 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-07-04 16:54:12,593 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-07-04 16:54:12,597 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-07-04 16:54:12,599 : INFO : worker t

2019-07-04 16:54:13,297 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-07-04 16:54:13,298 : INFO : EPOCH - 139 : training on 27842 raw words (16801 effective words) took 0.1s, 125432 effective words/s
2019-07-04 16:54:13,306 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-07-04 16:54:13,308 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-07-04 16:54:13,309 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-07-04 16:54:13,314 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-07-04 16:54:13,316 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-07-04 16:54:13,319 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-07-04 16:54:13,321 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-07-04 16:54:13,323 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-07-04 16:54:13,325 : INFO : worker 

2019-07-04 16:54:14,054 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-07-04 16:54:14,057 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-07-04 16:54:14,059 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-07-04 16:54:14,060 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-07-04 16:54:14,063 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-07-04 16:54:14,064 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-07-04 16:54:14,067 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-07-04 16:54:14,069 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-07-04 16:54:14,073 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-07-04 16:54:14,074 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-07-04 16:54:14,077 : INFO : worker thread finished; awaiting finish of 5 more t

2019-07-04 16:54:14,788 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-07-04 16:54:14,790 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-07-04 16:54:14,792 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-07-04 16:54:14,794 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-07-04 16:54:14,796 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-07-04 16:54:14,799 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-07-04 16:54:14,801 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-07-04 16:54:14,802 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-07-04 16:54:14,803 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-07-04 16:54:14,806 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-07-04 16:54:14,879 : INFO : worker thread finished; awaiting finish of 2 more thre

2019-07-04 16:54:15,505 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-07-04 16:54:15,508 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-07-04 16:54:15,511 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-07-04 16:54:15,513 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-07-04 16:54:15,515 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-07-04 16:54:15,517 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-07-04 16:54:15,519 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-07-04 16:54:15,610 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-07-04 16:54:15,623 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-07-04 16:54:15,626 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-07-04 16:54:15,627 : INFO : EPOCH - 155 : training on 27842 raw words (16792 effectiv

2019-07-04 16:54:16,224 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-07-04 16:54:16,225 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-07-04 16:54:16,228 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-07-04 16:54:16,231 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-07-04 16:54:16,299 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-07-04 16:54:16,317 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-07-04 16:54:16,337 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-07-04 16:54:16,338 : INFO : EPOCH - 160 : training on 27842 raw words (16800 effective words) took 0.1s, 122297 effective words/s
2019-07-04 16:54:16,348 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-07-04 16:54:16,350 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-07-04 16:54:16,352 : INFO : worker thre

2019-07-04 16:54:16,951 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-07-04 16:54:17,026 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-07-04 16:54:17,043 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-07-04 16:54:17,048 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-07-04 16:54:17,049 : INFO : EPOCH - 165 : training on 27842 raw words (16743 effective words) took 0.1s, 129018 effective words/s
2019-07-04 16:54:17,060 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-07-04 16:54:17,064 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-07-04 16:54:17,065 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-07-04 16:54:17,068 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-07-04 16:54:17,071 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-07-04 16:54:17,073 : INFO : worker t

2019-07-04 16:54:17,735 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-07-04 16:54:17,736 : INFO : EPOCH - 170 : training on 27842 raw words (16786 effective words) took 0.1s, 129827 effective words/s
2019-07-04 16:54:17,749 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-07-04 16:54:17,751 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-07-04 16:54:17,753 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-07-04 16:54:17,755 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-07-04 16:54:17,759 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-07-04 16:54:17,762 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-07-04 16:54:17,765 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-07-04 16:54:17,768 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-07-04 16:54:17,768 : INFO : worker 

2019-07-04 16:54:18,513 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-07-04 16:54:18,516 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-07-04 16:54:18,516 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-07-04 16:54:18,518 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-07-04 16:54:18,520 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-07-04 16:54:18,522 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-07-04 16:54:18,524 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-07-04 16:54:18,524 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-07-04 16:54:18,527 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-07-04 16:54:18,530 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-07-04 16:54:18,533 : INFO : worker thread finished; awaiting finish of 5 more t

2019-07-04 16:54:19,304 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-07-04 16:54:19,307 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-07-04 16:54:19,309 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-07-04 16:54:19,313 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-07-04 16:54:19,316 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-07-04 16:54:19,318 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-07-04 16:54:19,322 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-07-04 16:54:19,327 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-07-04 16:54:19,329 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-07-04 16:54:19,333 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-07-04 16:54:19,405 : INFO : worker thread finished; awaiting finish of 2 more thre

2019-07-04 16:54:20,083 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-07-04 16:54:20,085 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-07-04 16:54:20,088 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-07-04 16:54:20,092 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-07-04 16:54:20,096 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-07-04 16:54:20,101 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-07-04 16:54:20,103 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-07-04 16:54:20,177 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-07-04 16:54:20,211 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-07-04 16:54:20,223 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-07-04 16:54:20,224 : INFO : EPOCH - 186 : training on 27842 raw words (16789 effectiv

2019-07-04 16:54:20,904 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-07-04 16:54:20,907 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-07-04 16:54:20,909 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-07-04 16:54:20,913 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-07-04 16:54:20,999 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-07-04 16:54:21,012 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-07-04 16:54:21,020 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-07-04 16:54:21,023 : INFO : EPOCH - 191 : training on 27842 raw words (16838 effective words) took 0.2s, 112072 effective words/s
2019-07-04 16:54:21,036 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-07-04 16:54:21,040 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-07-04 16:54:21,043 : INFO : worker thre

2019-07-04 16:54:21,641 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-07-04 16:54:21,722 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-07-04 16:54:21,744 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-07-04 16:54:21,746 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-07-04 16:54:21,747 : INFO : EPOCH - 196 : training on 27842 raw words (16909 effective words) took 0.1s, 121717 effective words/s
2019-07-04 16:54:21,758 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-07-04 16:54:21,761 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-07-04 16:54:21,764 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-07-04 16:54:21,765 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-07-04 16:54:21,767 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-07-04 16:54:21,769 : INFO : worker t

In [11]:
doc2vec_model.wv.most_similar('MDR1', topn=10)

2019-07-04 16:54:27,327 : INFO : precomputing L2-norms of word weight vectors


[('ulcerative', 0.5465207099914551),
 ('observed', 0.45839160680770874),
 ('bowel', 0.44729456305503845),
 ('factor', 0.40653979778289795),
 ('found', 0.4034286141395569),
 ('Crohn', 0.4004455804824829),
 ('COL1A2', 0.388619601726532),
 ('genome-wide', 0.3844340145587921),
 ('inflammatory', 0.3740016222000122),
 ('GM-CSF', 0.3734869062900543)]

In [27]:
# from scipy import spatial

sentence = 'However, the majority of colon cancer cells have deregulation of the Wnt/beta-catenin pathway.'

inferred_vector=doc2vec_model.infer_vector(sentence.split())

# print (1-  spatial.distance.cosine(inferred_vector,doc2vec_model.docvecs['SA']))
sims = doc2vec_model.docvecs.most_similar([inferred_vector], topn=len(doc2vec_model.docvecs))
sims

[('drug_gene', 0.2862572968006134),
 ('drug_disease', 0.2578011751174927),
 ('gene_disease', 0.19048158824443817)]