# BioMedLM Inference Test Notebook
This notebook uses the Stanford BioMedLM model to perform inference on a custom JSONL dataset.

In [1]:
# Install required libraries
!pip install -q transformers
!pip install -q accelerate

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m105.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m94.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m54.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m22.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
# Import libraries
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import json
from google.colab import files

In [3]:
# Load model and tokenizer with correct compatibility
model_name = "stanford-crfm/BioMedLM"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
model = AutoModelForCausalLM.from_pretrained(model_name)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
model.eval()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/267 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/602k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/276k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.23M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/876 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/10.7G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/10.7G [00:00<?, ?B/s]

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(28896, 2560)
    (wpe): Embedding(1024, 2560)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-31): 32 x GPT2Block(
        (ln_1): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=7680, nx=2560)
          (c_proj): Conv1D(nf=2560, nx=2560)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=10240, nx=2560)
          (c_proj): Conv1D(nf=2560, nx=10240)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=2560, out_features=28896, bias=False)
)

In [4]:
# Upload JSONL dataset
print("Please upload the task2data_simplified.jsonl file:")
uploaded = files.upload()

Please upload the task2data_simplified.jsonl file:


Saving task2data_simplified.jsonl to task2data_simplified.jsonl


In [5]:
# Read uploaded JSONL file
input_file = "task2data_simplified.jsonl"
data = []
with open(input_file, 'r', encoding='utf-8') as f:
    for line in f:
        data.append(json.loads(line))
print(f"Loaded {len(data)} samples")

Loaded 10 samples


In [6]:
# Define the prompt template
template = """Instruction: Based on the following clinical case presentation, provide ONLY the most likely diagnosis as a single short phrase.

Presentation: {presentation}

Diagnosis:"""

In [7]:
max_model_length = 1024
max_generation_tokens = 100
max_input_tokens = max_model_length - max_generation_tokens

In [8]:
# Run inference
for i, item in enumerate(data[:10]):
    casename = item.get("casename", "N/A")
    presentation = item.get("presentation", "N/A")
    clinical = item.get("clinical", "N/A")
    final = item.get("final", "N/A")

    short_presentation = presentation[:1500]
    prompt = template.format(presentation=short_presentation)

    inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False, truncation=True).to(device)

    # Truncate input token length to ensure generation space
    if inputs['input_ids'].shape[1] > max_input_tokens:
        inputs['input_ids'] = inputs['input_ids'][:, :max_input_tokens]
        inputs['attention_mask'] = inputs['attention_mask'][:, :max_input_tokens]

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_generation_tokens,
            do_sample=True,
            top_k=50,
            top_p=0.95,
            temperature=0.7,
            pad_token_id=tokenizer.eos_token_id
        )

    # Extract only generated part (remove prompt from output)
    output_ids = outputs[0]
    prompt_len = inputs['input_ids'].shape[1]
    generated_ids = output_ids[prompt_len:]
    response = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()

    # Print results
    print("========================================")
    print(f"Case {i+1}: {casename}")
    print("Clinical Diagnosis:", clinical)
    print("Final Diagnosis:", final)
    print("LLM Diagnosis:")
    print(response if response else "[Empty response]")

Case 1: Case 16-2020: A 47-Year-Old Woman with Recurrent Melanoma and Pulmonary Nodules
Clinical Diagnosis: Metastatic melanoma
Final Diagnosis: Pulmonary histoplasmosis
LLM Diagnosis:
-   Type of data: The data set supporting the findings and illustration of the cases were classified using CT-guided biopsy was collected from an institutional review board-approved retrospective study of patients who received neoadjuvant systemic therapy, including clinical and imaging features, including those from the literature,

-   Dataset 3. Systemic therapy-associated immune-mediated adverse events,

-   The data set has not been discussed in the abstract or other data sets are
Case 2: Case 27-2020: A 53-Year-Old Woman with Headache and Gait Imbalance
Clinical Diagnosis: N/A
Final Diagnosis: Human immunodeficiency virus type 2 infection and cerebral toxoplasmosis
LLM Diagnosis:
**Conclusion:** The patient is nowadays, MRI findings of cerebellar hemorrhage.[@b11-jcnsd-3-2010-057]

Case presentatio