In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [2]:
import sys
from pathlib import Path

# this notebook is in notebooks — define project root
project_root = Path().resolve().parent
sys.path.append(str(project_root))

In [3]:
import gradio as gr
import pandas as pd
from gdc_pipeline import execute_pipeline
from gdc_pipeline import setup_args
from gdc_pipeline import setup_models_and_data

In [4]:
# init a dummy list
sys.argv = ['prog', '--question', 'dummy']
args = setup_args()

In [5]:
args

Namespace(input_file=None, question='dummy')

In [6]:
# setup models and data
qag_requirements = setup_models_and_data()

loading HF token
getting gdc project information
loading gdc genes and mutations
loading llama-3B model


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

loading intent model


In [7]:
def wrapped_execute_pipeline(question: str):
    df = pd.DataFrame({'questions' : [question]})
    print(f'Question received: {question}')
    try:
        result = execute_pipeline(
            df,
            qag_requirements.gdc_genes_mutations,
            qag_requirements.model,
            qag_requirements.tok,
            qag_requirements.intent_model,
            qag_requirements.intent_tok,
            qag_requirements.project_mappings,
            output_file_prefix=None
        )
    except Exception as e:
        result = 'Unable to execute GDC API, can you please retry with a template question?'
    return result

In [8]:
question = 'What is the co-occurence frequency of somatic homozygous deletions in CDKN2A and CDKN2B in the mesothelioma project TCGA-MESO in the genomic data commons?'

In [9]:
def visible_component(input_text):
    return gr.update(value="WHATEVER")


# Create Gradio interface
with gr.Blocks(title="GDC QAG MCP server") as demo:
    gr.Markdown(
        """
        # GDC QAG Service
        """
    )

    with gr.Row():
        query_input = gr.Textbox(
            lines = 3,
            label="Search Query",
            placeholder='e.g. "What is the co-occurence frequency of somatic homozygous deletions in CDKN2A and CDKN2B in the mesothelioma project TCGA-MESO in the genomic data commons?"',
            info="Required: Enter your search query",
        )

    search_button = gr.Button("Search", variant="primary")

    output = gr.Textbox(
        label="Query Result",
        lines=10,
        max_lines=25,
        info="The Result of the Query will appear here",
    )

    search_button.click(
        fn=wrapped_execute_pipeline,
        inputs=[query_input],
        outputs=output,
    )

In [10]:
demo.launch(server_port=7862, show_api=True, mcp_server=True)

None
* Running on local URL:  http://127.0.0.1:7862
* To create a public link, set `share=True` in `launch()`.

🔨 MCP server (using SSE) running at: http://127.0.0.1:7862/gradio_api/mcp/sse




Question received: What is the co-occurence frequency of somatic homozygous deletions in CDKN2A and CDKN2B in the mesothelioma project TCGA-MESO in the genomic data commons?
starting pipeline
running test on input                                            questions
0  What is the co-occurence frequency of somatic ...


  0%|          | 0/1 [00:00<?, ?it/s]gpustat is not installed, run `pip install gpustat` to collect GPU stats.


StitchWidget(initial_height='auto', initial_width='100%', srcdoc='<!doctype html>\n<html lang="en">\n<head>\n …

query:
What is the co-occurence frequency of somatic homozygous deletions in CDKN2A and CDKN2B in the mesothelioma project TCGA-MESO in the genomic data commons?



  deserializers["tokenizer"] = lambda p: self.tokenizer.from_disk(  # type: ignore[union-attr]


gene entities ['CDKN2A', 'CDKN2B']
mutation entities []
cancer entities ['TCGA-MESO']
user intent:
freq_cnv_loss_or_gain



100%|██████████| 1/1 [00:17<00:00, 17.62s/it]


api_call_result ['joint frequency in TCGA-MESO is 44.83%']


100%|██████████| 1/1 [00:03<00:00,  3.96s/it]


postprocessing response


100%|██████████| 1/1 [00:00<00:00, 570.19it/s]


result                                                                    0
questions          What is the co-occurence frequency of somatic ...
intent                                         freq_cnv_loss_or_gain
llama_base_output                          The final answer is: 0.0%
helper_output                 joint frequency in TCGA-MESO is 44.83%
cancer_entities                                            TCGA-MESO
gene_entities                                       [CDKN2A, CDKN2B]
mutation_entities                                                 []
modified_prompt    What is the co-occurence frequency of somatic ...
ground_truth_stat                                              44.83
llama_base_stat                                                  0.0
delta_llama                                                   -44.83
final_response                           The final answer is: 44.83%
completed
execute_pipeline took 21.6316 seconds
