# load example data

In [58]:
import json
import openai
import random
import requests
from itertools import combinations

openai.api_key = "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"

# qa bot

In [76]:
def extract_terms(question, model="gpt-3.5-turbo"):
    messages = [
            {"role": "system", "content": "You are an AI expert in question answering, research, and information retrieval in the biomedical domain."},
            {"role": "user", "content": f"Extract the key biomedical entities from the question separated by '|': '{question}'.\
            Extract no more than three biomedical entities, according to their importance in the question. \
            Assume that phrases are not stemmed; therefore, generate useful variations. Return only the extracted entities that can \
            directly be used without any explanation text. Return only noun phrases. Focus on maintaining the returned entities' precision and relevance to the original question."}
        ]
    response_chat = openai.ChatCompletion.create(model=model, messages=messages, temperature=0, frequency_penalty=0.5, presence_penalty=0.1)
    return response_chat.choices[0].get('message').get('content')

def query_neo4j(terms, relationships=False):
    apis = 'https://glkb.dcmb.med.umich.edu/api'
    results = dict()

    params = [('query', t) for t in terms.split('|')]
    response = requests.get(f"{apis}/search/lexical_search", params=params)
    assert response.status_code == 200
    pmids = response.json()

    c = 0
    for pmid in pmids:
        response = requests.get(f"{apis}/nodes/article/{pmid}")
        if response.status_code == 200:
            res = ' '.join([str(s or '') for s in [response.json().get('title'), response.json().get('abstract')]]).replace('\n', ' ')
            results[f"http://www.ncbi.nlm.nih.gov/pubmed/{pmid}"] = res
            if c >= 4:
                break
            c += 1

    if relationships:
        for i, j in combinations(terms, 2):
            response = requests.get(f"{apis}/search/rel_text", params={'ent1':i, 'ent2':j, 'level':'abstract', 'semantic':'False'})
            if response.status_code == 200:
                for source, rel in response.json().items():
                    results[f'{source} database'] = ' '.join(rel)
                    
    return results
    
def create_article_dict(pmid, title, abstract):
    if isinstance(abstract, list):
        abstract = " ".join([str(a) for a in abstract])

    return {
        "id": "http://www.ncbi.nlm.nih.gov/pubmed/" + pmid,
        "title": title if title else '',
        "abstract": abstract if abstract else ''
    }

def get_answer(text):
    ans_dict = {
        'A': 'Yes',
        'B': 'No',
        'C': 'Maybe'
    }
    if ans_dict.get(text):
        return ans_dict.get(text)
    return 'dk'

def qa_with_context(question, context, options="""(A) yes\n(B) no\n(C) maybe""", model="gpt-3.5-turbo"):
    logit_bias = {32 : 33, 33 : 33, 34 : 33}
    preds = {}
    messages = [
            {"role": "system", "content": "You are an AI expert in question answering, research, and information retrieval in the biomedical domain."},
            {"role": "user", "content": f"""Answer the following multiple choice question based on the provided context. Answer the question directly without any explanation text.
            **Context:** {' '.join(context)} 
            **Question:** {question}\n{options}
            **Answer:** """}
        ]
    response = openai.ChatCompletion.create(model=model, messages=messages, temperature=0, logit_bias=logit_bias, max_tokens=1)
    answer = get_answer(response.choices[0].get('message').get('content'))
    return answer

def qa_bot(question, model="gpt-3.5-turbo"):
    terms = extract_terms(question, model=model)
    context = query_neo4j(terms)
    answer = qa_with_context(question=question, context=list(context.values()), model=model)
    return answer, context

def interpret_result(question, answer, context, model="gpt-3.5-turbo"):
    messages = [
                {"role": "system", "content": "You are an AI expert in question answering, research, and information retrieval in the biomedical domain."},
                {"role": "user", "content": f"The answer to the question '{question}' is '{answer}'. Interpret the answer to the question using \
                the following context in less than 100 words. The context is in the format of a dictionary, in which the keys are the sources, and the values are corresponding evidence. \
                Provide references to the specific pieces of evidence that support your interpretation in place. \
                Generate the explanation directly without repeating the question and the answer. Focus on maintaining the answers' precision and relevance to the original question. \
                Context: {context}"}
            ]
    response_chat = openai.ChatCompletion.create(model=model, messages=messages, temperature=0, frequency_penalty=0.5, presence_penalty=0.1, max_tokens=300)
    return response_chat.choices[0].get('message').get('content')

def qa_pipeline(question, model="gpt-3.5-turbo"):
    answer, context = qa_bot(question=question, model=model)
    explanation = interpret_result(question, answer, context, model=model)
    return f"{answer}. {explanation}"

In [78]:
question = "Does SOX2 regulate TP53?"
answer = qa_pipeline(question)
print(answer)

No. The answer to the question 'Does SOX2 regulate TP53?' is 'No'. This conclusion is supported by evidence from multiple sources. One study found that mutations in TP53 increase the risk of SOX2 copy number alterations, but there was no evidence of SOX2 regulating TP53 (source: http://www.ncbi.nlm.nih.gov/pubmed/26780934). Another study showed that loss of TP53 function promotes lineage plasticity and antiandrogen resistance in prostate cancer through increased expression of SOX2, but there was no mention of SOX2 regulating TP53 (source: http://www.ncbi.nlm.nih.gov/pubmed/28059768). Additionally, a study on astrocytomas found a positive correlation between ID4 and SOX2 expression levels, but did not mention any regulation between SOX2 and TP53 (source: http://www.ncbi.nlm.nih.gov/pubmed/23613880). Another study on anaplastic thyroid carcinoma identified potential functional partners of SOX2, including TP53, but did not provide evidence of direct regulation between the two (source: htt