In [1]:
import json 
import pandas as pd

# load payload
with open("../data/payload_mimic3.json", "r") as f:
    loaded_payload = json.load(f)

loaded_df = pd.DataFrame(loaded_payload)
all_subject_ids = loaded_df['subject_id'].values

In [46]:
def get_most_recent_admission(df: pd.DataFrame, subject_id: int) -> dict:
    subject_df = df[df['subject_id'] == subject_id]
    most_recent_admission = subject_df.sort_values('hadm_id', ascending=False).head(1)
    return most_recent_admission.iloc[0].to_dict()

def format_admission_info(admission: dict) -> str:
    all_prescriptions = []
    for prescription in admission['prescricoes']:
        presc_info = (
            f"  - Drug: {prescription['drug']}, "
            f"Type: {prescription['drug_type']}, "
            f"Dose: {prescription['dose_val']} {prescription['dose_unit']}, "
            f"Form: {prescription['form']}, "
            f"Route: {prescription['route']}, "
            f"Start: {prescription['starttime']}, "
            f"Stop: {prescription['stoptime']}"
            "--------------------------------"
        )
        all_prescriptions.append(presc_info)
        
    all_prescriptions_str = "\n".join(all_prescriptions[-10:])
    
    info = (
        f"Subject ID: {admission['subject_id']}\n"
        f"Admission ID: {admission['hadm_id']}\n"
        f"Admission Date: {admission['admittime']}\n"
        f"Discharge Date: {admission['dischtime']}\n"
        f"Insurance: {admission['insurance']}\n"
        f"Age: {admission['age_estimada']}\n"
        f"Gender: {admission['gender']}\n"
        f"Language: {admission['language']}\n"
        f"Marital Status: {admission['marital_status']}\n"
        f"Ethnicity: {admission['ethnicity']}\n"
        f"Creatinine: {admission['creatinina']} {admission['creatinina_uom']}\n"
        f"Hemoglobin: {admission['hemoglobina']} {admission['hemoglobina_uom']}\n"
        f"Potassium: {admission['potassio']} {admission['potassio_uom']}\n"
        f"Sodium: {admission['sodio']} {admission['sodio_uom']}\n"
        f"Notes (24h Summary): {admission['notas_24h_resumo']}\n"
        f"Prescriptions: {all_prescriptions_str}\n"
        f"----------------------------------------\n"
    )
    return info

def get_last_prescription(admission: dict) -> dict:
    prescriptions = admission['prescricoes']
    last = prescriptions[-1] if prescriptions else None
    return last

In [47]:
# admission = get_most_recent_admission(loaded_df, 41976)

In [48]:
def flatten_array(arr):
    return [item for sublist in arr for item in sublist]

all_prescricoes = flatten_array([x['prescricoes'] for x in loaded_payload])

In [49]:
all_prescricoes_df = pd.DataFrame(all_prescricoes)

In [50]:
all_drugs = all_prescricoes_df['drug'].unique().tolist()
all_drug_types = all_prescricoes_df['drug_type'].unique().tolist()
all_dose_values = all_prescricoes_df['dose_val'].unique().tolist()
all_dose_units = all_prescricoes_df['dose_unit'].unique().tolist()
all_routes = all_prescricoes_df['route'].unique().tolist()

In [51]:
len(all_drugs), len(all_drug_types), len(all_dose_values), len(all_dose_units), len(all_routes)

(589, 3, 220, 35, 37)

In [52]:
import random

N = 1000
p = 0.15

fake_admissions = []
for i in range(N):
    
    flag = True
    while flag:
        subject_id = random.choice(all_subject_ids)
        most_recent_admission = get_most_recent_admission(loaded_df, subject_id)
        if len(most_recent_admission['prescricoes']) > 0:
            flag = False

    current_prescription = most_recent_admission['prescricoes'][-1]
    most_recent_admission['prescricoes'] = most_recent_admission['prescricoes'][:-1]
    
    # Monte Carlo to deliberately drop poisoned current prescription
    z = random.random()
    poison_choice = None
    if z < p:
        poison_choice = random.choice(['drug', 'drug_type', 'dose_val', 'dose_unit', 'route'])
        if poison_choice == 'drug':
            current_prescription['drug'] = random.choice(all_drugs)
        elif poison_choice == 'drug_type':
            current_prescription['drug_type'] = random.choice(all_drug_types)
        elif poison_choice == 'dose_val':
            current_prescription['dose_val'] = random.choice(all_dose_values)
        elif poison_choice == 'dose_unit':
            current_prescription['dose_unit'] = random.choice(all_dose_units)
        elif poison_choice == 'route':
            current_prescription['route'] = random.choice(all_routes)
    
    most_recent_admission['subject_id'] = i
    
    fake_admissions.append({
        "old_subject_id": subject_id,
        "admission_str": format_admission_info(most_recent_admission),
        "current_prescription": current_prescription,
        "is_poisoned": z < p,
        "poison_choice": poison_choice
    })


In [53]:
# Salvar como JSON (fake_admissions Ã© uma lista, nÃ£o DataFrame)
import json

with open("../data/inputs_to_agent_fake_mimic3.json", "w") as f:
    json.dump(fake_admissions, f, indent=2, default=str)

print(f"âœ… Arquivo salvo com {len(fake_admissions)} registros!")
print(f"ðŸ“Š Registros envenenados: {sum(1 for x in fake_admissions if x['is_poisoned'])}")
print(f"ðŸ“ˆ Taxa de envenenamento: {sum(1 for x in fake_admissions if x['is_poisoned'])/len(fake_admissions)*100:.1f}%")

âœ… Arquivo salvo com 1000 registros!
ðŸ“Š Registros envenenados: 169
ðŸ“ˆ Taxa de envenenamento: 16.9%
