In [1]:
import os
import json
import nltk 
import numpy as np
import string
import time

from IPython.display import clear_output
from nltk.corpus import stopwords 
# reason for using snowball: https://stackoverflow.com/questions/10554052/what-are-the-major-differences-and-benefits-of-porter-and-lancaster-stemming-alg
from nltk.stem.snowball import SnowballStemmer
from nltk.tokenize import word_tokenize
from tqdm import tqdm

nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /home/gustaw/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /home/gustaw/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [2]:
questions_data_path = '../../data/medqa/questions/US_qbank.jsonl'
dev_questions_data_path = '../../data/medqa/questions/dev.jsonl'
textbooks_data_dir = '../../data/medqa/textbooks/'

questions_dev_medqa_path = '../../data/medqa/questions/metamap_extracted_phrases/dev.jsonl'
questions_train_medqa_path ='../../data/medqa/questions/metamap_extracted_phrases/train.jsonl'
questions_test_medqa_path ='../../data/medqa/questions/metamap_extracted_phrases/train.jsonl'

In [24]:
# all_questions_data = []
# with open(questions_data_path, 'r') as file:
#     for line in file:
#         all_questions_data.append(json.loads(line))
        
# dev_questions_data = []
# with open(questions_dev_metqa_path, 'r') as file:
#     for line in file:
#         dev_questions_data.append(json.loads(line))

questions_metamap_data = []

with open(questions_dev_medqa_path, 'r') as file:
    for line in file:
        questions_metamap_data.append(json.loads(line))

with open(questions_train_medqa_path, 'r') as file:
    for line in file:
        questions_metamap_data.append(json.loads(line))
        
with open(questions_test_medqa_path, 'r') as file:
    for line in file:
        questions_metamap_data.append(json.loads(line))    

corpus = {}
for textbook_name in os.listdir(textbooks_data_dir):
    textbook_path = textbooks_data_dir + '/' + textbook_name
    with open(textbook_path, 'r') as textbook_file:
        textbook_content = textbook_file.read()
        corpus[textbook_name] = textbook_content

In [25]:
stop_words = stopwords.words('english')
snowball_stemmer = SnowballStemmer(language='english') 
# do not remove the '-' and '/'
custom_string_punctuation = string.punctuation.replace('-','').replace('/','').replace('.','')
punctuation = str.maketrans('', '', custom_string_punctuation)

In [5]:
def preprocess_content(content, remove_stopwords, stemming, remove_punctuation):
    if not remove_stopwords and not stemming and not remove_punctuation:
        return content.lower()
    if remove_punctuation:
        content = content.translate(punctuation).replace('“','').replace('’','')
    
    sentences = nltk.sent_tokenize(content.lower())
    cleaned_sentences = []
    
    for sentence in sentences:
        tokens = word_tokenize(sentence.lower())
        if remove_stopwords:
            tokens = [x for x in tokens if x not in stop_words]
        if stemming:
            tokens = [snowball_stemmer.stem(x) for x in tokens]
        cleaned_sentences.append(' '.join(tokens))
            
    return ' '.join(sentences)

def preprocess_corpus(corpus, remove_stopwords, stemming, remove_punctuation):
    for name, content in tqdm(corpus.items()):
        # TODO: removal of non-medical terms using MetaMap
        corpus[name] = preprocess_content(content, remove_stopwords, stemming, remove_punctuation)
        
        
def preprocess_questions(questions, remove_stopwords, stemming, remove_punctuation, metamap=False):    
    for question in tqdm(questions):
        question['question'] = preprocess_content(question['question'], remove_stopwords, stemming, remove_punctuation)
        for option, value in question['options'].items():
            question['options'][option] = preprocess_content(value, remove_stopwords, stemming, remove_punctuation)
        if metamap:
            question['answer'] = preprocess_content(question['answer'], remove_stopwords, stemming, remove_punctuation)
            for i, phrase in enumerate(question['metamap_phrases']):
                question['metamap_phrases'][i] = preprocess_content(phrase, remove_stopwords, stemming, remove_punctuation)

In [6]:
# def preprocess_content(content, remove_stopwords, stemming, remove_punctuation):
#     # lowercase and create tokens
#     tokens = word_tokenize(content.lower())
#     if remove_stopwords:
#         tokens = [x for x in tokens if x not in stop_words]
#     if stemming:
#         tokens = [snowball_stemmer.stem(x) for x in tokens]
#     result = ' '.join(tokens)
#     if remove_punctuation:
#         result = result.translate(punctuation).replace('“','').replace('’','')
#     return result

# def preprocess_corpus(corpus, remove_stopwords, stemming, remove_punctuation):
#     for name, content in tqdm(corpus.items()):
#         # TODO: removal of non-medical terms using MetaMap
#         corpus[name] = preprocess_content(content, remove_stopwords, stemming, remove_punctuation)
        
        
# def preprocess_questions(questions, remove_stopwords, stemming, remove_punctuation, metamap=False):    
#     for question in tqdm(questions):
#         question['question'] = preprocess_content(question['question'], remove_stopwords, stemming, remove_punctuation)
#         for option, value in question['options'].items():
#             question['options'][option] = preprocess_content(value, remove_stopwords, stemming, remove_punctuation)
#         if metamap:
#             question['answer'] = preprocess_content(question['answer'], remove_stopwords, stemming, remove_punctuation)
#             for i, phrase in enumerate(question['metamap_phrases']):
#                 question['metamap_phrases'][i] = preprocess_content(phrase, remove_stopwords, stemming, remove_punctuation)

In [7]:
def preprocess_data(remove_stopwords, stemming, remove_punctuation, metamap):
    preprocess_corpus(
        corpus=corpus,
        remove_stopwords=remove_stopwords,
        stemming=stemming,
        remove_punctuation=remove_punctuation
    )
    preprocess_questions(
        questions=questions_metamap_data,
        remove_stopwords=remove_stopwords,
        stemming=stemming,
        remove_punctuation=remove_punctuation,
        metamap=metamap
    )

In [26]:
preprocess_corpus(
        corpus=corpus,
        remove_stopwords=False,
        stemming=False,
        remove_punctuation=False
    )

100%|██████████| 18/18 [00:00<00:00, 32.77it/s]


In [8]:
preprocess_data(
    remove_stopwords=False, 
    stemming=False,
    remove_punctuation=False, 
    metamap=True
)

100%|██████████| 18/18 [03:49<00:00, 12.78s/it]
100%|██████████| 21628/21628 [02:17<00:00, 157.06it/s]


In [9]:
def create_corpus_chunks(chunk_length):
    corpus_chunks = []
    document_counter = 1
    for title, content in corpus.items():

        content_tokens = word_tokenize(content)

        counter = 0
        for i in range(0, len(content_tokens), chunk_length):
            chunk_name = title + str(counter)
            entry = {
                'name': chunk_name,
                'content': ' '.join(content_tokens[i:i+chunk_length])
            }
            corpus_chunks.append(entry)
            counter += 1

        clear_output(wait=True)
        print(f'Document {title} split in chunks ({document_counter}/{len(corpus.keys())})')
        document_counter += 1
    
    return corpus_chunks

In [18]:
chunk_length = 100
corpus_chunks = create_corpus_chunks(chunk_length)

Document Psichiatry_DSM-5.txt split in chunks (18/18)


In [10]:
def create_corpus_sentences():
    corpus_sentences = []
    for title, content in tqdm(corpus.items()):
        content_sentences = (nltk.sent_tokenize(content))
        sentence_counter = 0
        for sentence in content_sentences:
            corpus_sentences.append({
                'name': title + str(sentence_counter),
                'content': sentence
            })
    return corpus_sentences

In [27]:
corpus_sentences = create_corpus_sentences()

100%|██████████| 18/18 [00:21<00:00,  1.21s/it]


In [12]:
from datetime import datetime
from elasticsearch import Elasticsearch
es = Elasticsearch()

def upload_documents(documents, index_name):
    create_index_body = """{
        "settings": {
            "index": {
                "similarity": {
                    "default": {
                        "type": "BM25"
                    }
                }
            }
        }
    }"""

    es.indices.create(index=index_name, body=create_index_body)

    id_counter = 1
    for document in tqdm(documents):
        res = es.index(index=index_name, id=id_counter, body=document)
        id_counter += 1

In [29]:
from enum import Enum

class Indexes(Enum):
    Unprocessed_chunks_100 = "unprocessed-chunks-100",
    Unprocessed_sentences = "sentences-unprocessed-shards-1",
    Stemmed_sentences = "sentences-stemmed-shards-1"
    #stemming-punctuation

In [22]:
upload_documents(corpus_sentences, Indexes.Stemmed_sentences.value)

100%|██████████| 657129/657129 [59:29<00:00, 184.08it/s] 


In [30]:
upload_documents(corpus_sentences, Indexes.Unprocessed_sentences.value)

100%|██████████| 657141/657141 [56:00<00:00, 195.56it/s]  


In [30]:
def search_documents(query_input, n, index_name):
    res = es.search(
        index=index_name, 
        body={
            "query": {
                "match": {
                    "content": query_input
                }
            },
            "from": 0,
            "size": n
        }
    )
    
    number_of_hits = len(res['hits']['hits'])
    
    results = []
    for i in range(number_of_hits):
        score = res['hits']['hits'][i]['_score']
        paragraph = res['hits']['hits'][i]['_source']
        result = {
            "score": score,
            "evidence": paragraph
        }
        results.append(result)
        
    return results

In [31]:
def ir_es(questions, no_documents_to_retrieve, index_name, metamap=False, all_questions_bank=False):
    start_time = time.time()

    correct_answer = 0
    incorrect_answer = 0

    for question_data in tqdm(questions):
        question = question_data['question']
        # for all_questions, the answer is the letter
        
        if all_questions_bank:
            answer = question_data['options'][question_data['answer']]
        else:
            answer = question_data['answer']

        final_answer = None
        final_score = 0

        for option, option_answer in question_data['options'].items():
            if metamap:
                query = ' '.join(question_data['metamap_phrases']) + " " + option_answer
            else:
                query = question + " " + option_answer
            top_documents = search_documents(query, no_documents_to_retrieve, index_name)
            if top_documents != []:
                score = 0
                for doc in top_documents:
                    score += doc['score']

                if final_score < score:
                    final_answer = option_answer
                    final_score = score

        correct = False
        if final_answer == answer:
            correct_answer += 1
            correct = True
        else:
            incorrect_answer += 1


    print(f'Accuracy: {100 * correct_answer / (correct_answer + incorrect_answer)}%')
    print(f'\tCorrect answers: {correct_answer}')
    print(f'\tInorrect answers: {incorrect_answer}')

In [105]:
x  = {'question': 'a 27-year-old male presents to urgent care complaining of pain with urination  he reports that the pain started 3 days ago  he has never experienced these symptoms before  he denies gross hematuria or pelvic pain  he is sexually active with his girlfriend  and they consistently use condoms  when asked about recent travel  he admits to recently returning from a  boys  trip  in cancun where he had unprotected sex 1 night with a girl he met at a bar  the patient  s medical history includes type i diabetes that is controlled with an insulin pump  his mother has rheumatoid arthritis  the patient  s temperature is 99°f  372°c   blood pressure is 112/74 mmhg  and pulse is 81/min  on physical examination  there are no lesions of the penis or other body rashes  no costovertebral tenderness is appreciated  a urinalysis reveals no blood  glucose  ketones  or proteins but is positive for leukocyte esterase  a urine microscopic evaluation shows a moderate number of white blood cells but no casts or crystals  a urine culture is negative  which of the following is the most likely cause for the patient  s symptoms ',
  'answer': 'A',
  'options': {'A': 'chlamydia trachomatis',
   'B': 'herpes simplex virus',
   'C': 'mycobacterium tuberculosis',
   'D': 'systemic lupus erythematosus',
   'E': 'treponema pallidum'},
  'meta_info': 'step1'}

In [32]:
# ir_es([x], 5, 'unprocessed')
def run_ir_es(questions, used_index, num_of_documents, metamap=False):
    print(f'Used index: {used_index}\nNumber of retrieved documents: {num_of_documents}\nUsing metamap phrases: {metamap}')
    ir_es(questions, num_of_documents, used_index, metamap)

In [35]:
run_ir_es(questions=questions_metamap_data,
          used_index=Indexes.Stemmed_sentences.value,
          num_of_documents=10,
          metamap=True
         )

  0%|          | 0/21628 [00:00<?, ?it/s]

Used index: sentences-stemmed
Number of retrieved documents: 10
Using metamap phrases: True


100%|██████████| 21628/21628 [43:41<00:00,  8.25it/s] 

Accuracy: 23.686887368226373%
	Correct answers: 5123
	Inorrect answers: 16505





In [60]:
from nltk import ngrams, FreqDist

grams = ngrams('one two scy blue one two babaloo'.split(), 1)
freq = [{key:value for nltk.FreqDist(grams)]
freq

FreqDist({('one',): 2, ('two',): 2, ('scy',): 1, ('blue',): 1, ('babaloo',): 1})

In [66]:
x = freq.get(('lol',))

In [None]:
from nltk import ngrams, FreqDist

corpus_unigrams = ngrams(corpus_joined.split(), 1)
corpus_unigrams_freq = nltk.FreqDist(corpus_unigrams)
def calculate_score(bm25_score, query):
    query_unigrams = ngrams(query.split(), 1)
    
    for unigram in query_unigrams:
        
    
    query_unigrams_freq = FreqDist(query_unigrams)

In [31]:
dev_questions = []

with open(questions_dev_medqa_path, 'r') as file:
    for line in file:
        dev_questions.append(json.loads(line))

In [32]:
dev_questions[0]

{'question': 'A 21-year-old sexually active male complains of fever, pain during urination, and inflammation and pain in the right knee. A culture of the joint fluid shows a bacteria that does not ferment maltose and has no polysaccharide capsule. The physician orders antibiotic therapy for the patient. The mechanism of action of action of the medication given blocks cell wall synthesis, which of the following was given?',
 'answer': 'Ceftriaxone',
 'options': {'A': 'Chloramphenicol',
  'B': 'Gentamicin',
  'C': 'Ciprofloxacin',
  'D': 'Ceftriaxone',
  'E': 'Trimethoprim'},
 'meta_info': 'step1',
 'answer_idx': 'D',
 'metamap_phrases': ['21-year-old sexually active male',
  'fever',
  'pain',
  'urination',
  'inflammation',
  'pain in the right knee',
  'culture',
  'joint',
  'bacteria',
  'not ferment maltose',
  'polysaccharide capsule',
  'physician orders antibiotic therapy',
  'patient',
  'mechanism of action',
  'medication given blocks cell wall synthesis',
  'following',
  '

In [38]:
import copy

x = copy.deepcopy(dev_questions)

In [39]:
x[1]['question'] = 'lol'

In [41]:
dev_questions[0]['question']

'lol'

In [46]:
snowball_stemmer = SnowballStemmer(language='english') 
stem_questions(x[:15], snowball_stemmer)

100%|██████████| 15/15 [00:00<00:00, 151.98it/s]


In [49]:
x[5]

{'question': 'a 41-year-old woman present to her primari care physician with complaint of fatigu and weak . she deni ani person histori of blood clot or bleed problem in her past , but she say that her mother has had to be treat for breast cancer recent and is start to wear her down . her past medic histori is signific for preeclampsia , hypertens , polycyst ovarian syndrom , and hypercholesterolemia . she current smoke 1 pack of cigarett per day , drink a glass of wine per day , and current deni ani illicit drug use . her vital sign includ : temperatur , 36.7°c ( 98.0°f ) ; blood pressur , 126/74 mm hg ; heart rate , 111/min ; and respiratori , rate 23/min . on physic examin , her puls are bound and irregular , complexion is pale , but breath sound remain clear . on examin , the physician find diffus skin pallor and order a complet blood count . her laboratori data demonstr a hematocrit of 27.1 % , mcv of 79 fl , and a reticulocyt count of 2.0 % . the patient is diagnos with anemia . 

In [114]:
from transformers import AutoTokenizer, AutoModelForQuestionAnswering

# tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
# model = AutoModelForQuestionAnswering.from_pretrained("bert-base-uncased")

tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
model = AutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")

In [115]:
import torch

text = r"""🤗 Transformers (formerly known as pytorch-transformers and pytorch-pretrained-bert) provides general-purpose
architectures (BERT, GPT-2, RoBERTa, XLM, DistilBert, XLNet…) for Natural Language Understanding (NLU) and Natural
Language Generation (NLG) with over 32+ pretrained models in 100+ languages and deep interoperability between
TensorFlow 2.0 and PyTorch."""

question = "How many pretrained models are available in 🤗 Transformers?"

inputs = tokenizer(question, text, add_special_tokens=True, return_tensors="pt")
input_ids = inputs["input_ids"].tolist()[0]

text_tokens = tokenizer.convert_ids_to_tokens(input_ids)
outputs = model(**inputs)
answer_start_scores = outputs.start_logits
answer_end_scores = outputs.end_logits

answer_start = torch.argmax(answer_start_scores)  # Get the most likely beginning of answer with the argmax of the score
answer_end = torch.argmax(answer_end_scores) + 1  # Get the most likely end of answer with the argmax of the score

answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))

print(f"Question: {question}")
print(f"Answer: {answer}")

# print(len(inputs['input_ids'][0]))
# print(len(inputs['token_type_ids'][0]))
# print(len(inputs['attention_mask'][0]))

Question: How many pretrained models are available in 🤗 Transformers?
Answer: over 32 +


## Experiment: using BERT reader with the question from medqa and documents from ir_es retriever as evidence

In [116]:
# corpus_joined = ' '.join(list(corpus.values()))
chlamydia_question = '''A 27-year-old male presents to urgent care complaining of pain with urination. He reports that the pain started 3 days ago. He has never experienced these symptoms before. He denies
gross hematuria or pelvic pain. He is sexually active with his girlfriend, and they consistently use condoms. When asked about recent travel, he admits to recently returning from a
boys’ trip” in Cancun where he had unprotected sex 1 night with a girl he met at a bar. The patients medical history includes type I diabetes that is controlled with an insulin pump.
His mother has rheumatoid arthritis. The patients temperature is 99 F (37.2 C), blood pressure is 112/74 mmHg, and pulse is 81/min. On physical examination, there are no lesions of
the penis or other body rashes. No costovertebral tenderness is appreciated. A urinalysis reveals no blood, glucose, ketones, or proteins but is positive for leukocyte esterase. A urine
microscopic evaluation shows a moderate number of white blood cells but no casts or crystals. A urine culture is negative. Which of the following is the most likely cause for the
patient’s symptoms?'''

another_question = '''a 4670-g  10-lb 5-oz  male newborn is delivered at term to a 26-year-old woman after prolonged labor  apgar scores are 9 and 9 at 1 and 5 minutes  examination in the delivery room shows swelling  tenderness  and crepitus over the left clavicle  there is decreased movement of the left upper extremity  movement of the hands and wrists are normal  a grasping reflex is normal in both hands  an asymmetric moro reflex is present  the remainder of the examination shows no abnormalities and an anteroposterior x-ray confirms the diagnosis  which of the following is the most appropriate next step in management'''
# x_text = "Last Monday Mark started working on my thesis using BERT. So fat he was stuck on it and could not progress"
# x_question =  'What did Mark start on Sunday?''
# retrieved_documents = search_documents(query_input=another_question, n=5, index_name='unprocessed')
retrieved_documents = search_documents(query_input=x['question'], n=10, index_name='stemming-punctuation')

In [117]:
evidence = ' '.join([x['evidence']['content'] for x in retrieved_documents[:2]])

In [70]:
evidence

'has gotten darker more brownish over the last 2 days last night his mother noticed that his eyes had a yellow tint jf says he feels as though he has no energy pertinent findings the physical examination was remarkable for jf s pale appearance mild scleral icterus jaundice mild splenomegaly and increased heart rate tachycardia jf s urine tested positive for hemoglobin hemoglobinuria a peripheral blood smear reveals a lower-than-normal number of red blood cells rbc with some containing precipitated hemoglobin heinz bodies see image at right and a higher-than-normal number of reticulocytes immature rbc results of the complete blood intern because he had a history of buttock pain and impotence on examination he had a reduced peripheral pulse on the left foot compared to the right on direct questioning the patient revealed that he experienced severe left-sided buttock pain after walking 100 yards after a short period of rest he could walk another 100 yards before the same symptoms recurred

In [72]:
inputs = tokenizer(x['question'], evidence, 
                   add_special_tokens=True, 
                   return_tensors="pt"
                  )

input_ids = inputs["input_ids"].tolist()[0]

# print(len(inputs['input_ids'][0]))
# print(len(inputs['token_type_ids'][0]))
# print(len(inputs['attention_mask'][0]))

text_tokens = tokenizer.convert_ids_to_tokens(input_ids)
outputs = model(**inputs)
answer_start_scores = outputs.start_logits
answer_end_scores = outputs.end_logits

answer_start = torch.argmax(answer_start_scores)  # Get the most likely beginning of answer with the argmax of the score
answer_end = torch.argmax(answer_end_scores) + 1  # Get the most likely end of answer with the argmax of the score

answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))

print(f"Question: {x['question']}")
print(f"\nAnswer: {answer}")

Question: a 27-year-old male presents to urgent care complaining of pain with urination  he reports that the pain started 3 days ago  he has never experienced these symptoms before  he denies gross hematuria or pelvic pain  he is sexually active with his girlfriend  and they consistently use condoms  when asked about recent travel  he admits to recently returning from a  boys  trip  in cancun where he had unprotected sex 1 night with a girl he met at a bar  the patient  s medical history includes type i diabetes that is controlled with an insulin pump  his mother has rheumatoid arthritis  the patient  s temperature is 99°f  372°c   blood pressure is 112/74 mmhg  and pulse is 81/min  on physical examination  there are no lesions of the penis or other body rashes  no costovertebral tenderness is appreciated  a urinalysis reveals no blood  glucose  ketones  or proteins but is positive for leukocyte esterase  a urine microscopic evaluation shows a moderate number of white blood cells but n

In [137]:
def ir_es_bert(questions, no_documents_to_retrieve, index_name, metamap=False):
    start_time = time.time()
    for i, question_data in enumerate(questions):
        question = question_data['question']
        if metamap:
            query = ' '.join(question_data['metamap_phrases'])
        else:
            query = question_data['question']
        answer = question_data['answer']
        
        # obtain the top-N ranked passages from the large-scale document collection C
        retrieved_documents = search_documents(
            query_input=query,
            n=no_documents_to_retrieve,
            index_name=index_name
        )
        
        # contatenating them into a long sequence c
        c = ""
        for document in retrieved_documents:
            c += document['evidence']['content']
        for option, option_answer in question_data['options'].items():
            # then for each question and option pair qa_i = q + a_i
            question_answer = question + " " + option_answer
            print(question_answer)
            # qa_i and c are then passed to the document reader for reasoning and decision making
            inputs = tokenizer(question_answer, c, 
                   add_special_tokens=True, 
                   return_tensors="pt"
                  )

            input_ids = inputs["input_ids"].tolist()[0]

            # print(len(inputs['input_ids'][0]))
            # print(len(inputs['token_type_ids'][0]))
            # print(len(inputs['attention_mask'][0]))

            text_tokens = tokenizer.convert_ids_to_tokens(input_ids)
            print(text_tokens)
            outputs = model(**inputs)
            answer_start_scores = outputs.start_logits
            answer_end_scores = outputs.end_logits

            answer_start = torch.argmax(answer_start_scores)  # Get the most likely beginning of answer with the argmax of the score
            answer_end = torch.argmax(answer_end_scores) + 1  # Get the most likely end of answer with the argmax of the score

            answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))

            print(f"Question: {question}")
            print(f"\nAnswer: {answer}")
            
        break

In [139]:
ir_es_bert(dev_questions_data, 2, 'stemming-punctuation', True)

a 21-year-old sexual activ male complain of fever pain dure urin and inflamm and pain in the right knee a cultur of the joint fluid show a bacteria that doe not ferment malto and has no polysaccharid capsul the physician order antibiot therapi for the patient the mechan of action of action of the medic given block cell wall synthesi which of the follow was given chloramphenicol
Question: a 21-year-old sexual activ male complain of fever pain dure urin and inflamm and pain in the right knee a cultur of the joint fluid show a bacteria that doe not ferment malto and has no polysaccharid capsul the physician order antibiot therapi for the patient the mechan of action of action of the medic given block cell wall synthesi which of the follow was given

Answer: [CLS] a 21 - year - old sexual activ male complain of fever pain dure urin and inflamm and pain in the right knee a cultur of the joint fluid show a bacteria that doe not ferment malto and has no polysaccharid capsul the physician orde

In [None]:
conda install -c huggingface transformers

In [None]:
python -c "from transformers import pipeline; print(pipeline('sentiment-analysis')('we love you'))"

# ElasticSearch usage

## Creating a document

In [2]:
from datetime import datetime
from elasticsearch import Elasticsearch
es = Elasticsearch()

doc = {
    'author': 'author_name',
    'text': 'Interensting content...',
    'title': 'Test Title',
    'timestamp': datetime.now(),
}
res = es.index(index="test-index", id=1, body=doc)
print(res['result'])

updated


## Getting a document

In [None]:
res = es.get(index="test-index", id=1)
print(res['_source'])

## Refreshing index

In [15]:
es.indices.refresh(index="unprocessed")

{'_shards': {'total': 2, 'successful': 1, 'failed': 0}}

## Searching for a document

In [None]:
res = es.search(index="test-index", body={"query": {"match_all": {}}})
print("Got %d Hits:" % res['hits']['total']['value'])
for hit in res['hits']['hits']:
    print("%(timestamp)s %(author)s: %(text)s" % hit["_source"])

## Deleting a document/index

In [19]:
from datetime import datetime
from elasticsearch import Elasticsearch
es = Elasticsearch()

# delete document
# es.delete(index="test-index", id=1)
# delete index
es.indices.delete(index="sentences-stemmed")

{'acknowledged': True}

In [25]:
# curl "localhost:9200/_cat/indices?v=true"
# curl -X GET "localhost:9200/_cat/health?v=true&pretty"
# curl -X GET "localhost:9200/sentences-stemmed/_settings"


{'acknowledged': True, 'shards_acknowledged': True, 'index': 'test'}