# Simulator

In [1]:
from simulator import BNSimulator

sim = BNSimulator.from_bif("/home/ubuntu/ADS/BN_dataset/asia.bif")

# Observational
data_obs = sim.sample_observational(1000)

# Interventional
data_do_smoke_yes = sim.sample_interventional({"smoke": "yes"}, 1000)

# Marginal queries
print(sim.query_marginal("dysp", {"smoke": "yes"}))

  from .autonotebook import tqdm as notebook_tqdm
Generating for node: dysp: 100%|██████████| 8/8 [00:00<00:00, 548.27it/s]
Generating for node: dysp: 100%|██████████| 8/8 [00:00<00:00, 671.77it/s]

yes    0.552808
no     0.447192
Name: dysp, dtype: float64





# World_model

In [1]:
from simulator import BNSimulator
from world_model import WorldAgent, HFChatLLM


# ASIA_STORY = "You are in a small chest clinic."
ASIA_STORY = f"""You are a physician working in a small chest clinic attached to a public hospital in a mid-size city. Most of your patients are adults referred by general practitioners because of persistent cough, chest pain, or shortness of breath.

The clinic serves a diverse population. Some patients are long-term residents who have never traveled outside the country; others are migrant workers or people who have recently returned from trips to regions with higher rates of tuberculosis. You routinely ask about recent travel, especially to parts of Asia where TB remains moderately prevalent, because it changes how you interpret symptoms and test results.

Smoking is very common in your patient population. Many of your patients have smoked for years and are at increased risk of both lung cancer and chronic bronchitis. You know that smoking does not guarantee disease, but it substantially changes the prior probability of those diagnoses.

When patients present with shortness of breath (dyspnea), you consider several possible explanations: tuberculosis (TB), lung cancer, and bronchitis are among the main suspects. TB and lung cancer often show up as abnormalities on a chest X-ray, while bronchitis may or may not visibly change the X-ray but still causes chronic cough and breathlessness. You order X-rays and other tests, and you interpret them in light of each patient’s smoking status and travel history.

In this world, your “variables” correspond to clinically meaningful properties: whether the patient recently visited Asia, whether they smoke, whether they actually have TB, lung cancer, bronchitis, whether at least one of the serious lung diseases is present, whether the X-ray appears abnormal, and whether they report dyspnea. You use all of this information to reason probabilistically about the most likely diagnosis for each patient."""

ASIA_DESCS = {
    "asia": "Visited Asia recently",
    "tub": "Has tuberculosis",
    "smoke": "Smokes regularly",
    "lung": "Has lung cancer",
    "bronc": "Has bronchitis",
    "either": "Has TB or lung cancer",
    "xray": "Chest X-ray positive",
    "dysp": "Has shortness of breath",
}

sim = BNSimulator.from_bif("/home/ubuntu/ADS/BN_dataset/asia.bif")
llm = HFChatLLM(model_name="Qwen/Qwen2.5-3B-Instruct")
agent = WorldAgent(simulator=sim, story=ASIA_STORY, var_descriptions=ASIA_DESCS, llm=llm)

# out = agent.handle("do(smoke=yes) give me 15 samples of lung and dysp")


  from .autonotebook import tqdm as notebook_tqdm
`torch_dtype` is deprecated! Use `dtype` instead!
Loading checkpoint shards: 100%|██████████| 2/2 [00:07<00:00,  3.64s/it]


In [2]:
out_obs = agent.handle("give me 200 observational samples of lung and dysp")

out_do = agent.handle("do(smoke=yes) give me 200 samples of lung and dysp")


Generating for node: dysp: 100%|██████████| 8/8 [00:00<00:00, 650.32it/s]
Generating for node: dysp: 100%|██████████| 8/8 [00:00<00:00, 756.19it/s]


In [3]:
def summarize(df):
    return {
        "P(dysp=yes)": (df["dysp"] == "yes").mean(),
        "P(lung=yes)": (df["lung"] == "yes").mean(),
    }

print("Observational:", summarize(out_obs.data))
print("do(smoke=yes):", summarize(out_do.data))


Observational: {'P(dysp=yes)': np.float64(0.425), 'P(lung=yes)': np.float64(0.11)}
do(smoke=yes): {'P(dysp=yes)': np.float64(0.595), 'P(lung=yes)': np.float64(0.15)}


In [4]:
def print_output(out):
    print("Parsed:")
    print(out.parsed)
    print("\nData:")
    print(out.data)
    print("\nNarrative:")
    print(out.narrative)
    print("\nAdded Variables:")
    print(out.added_variables)
    
print_output(out_obs)

Parsed:
ParsedCommand(action='observational', n=200, variables=['lung', 'dysp'], interventions={}, notes=None)

Data:
    lung dysp
0     no  yes
1    yes  yes
2     no   no
3     no   no
4     no   no
..   ...  ...
195  yes  yes
196   no  yes
197   no   no
198   no  yes
199   no  yes

[200 rows x 2 columns]

Narrative:
This sample represents an observational study, where we have collected 200 instances without manipulating any variables. In these cases, we observed that out of the 200 patients, 150 reported no lung disease (lung = no) and 100 experienced shortness of breath (dysp = yes). This distribution suggests a significant overlap between dyspnea and lung conditions, which aligns with clinical observations where dyspnea can be a symptom of various lung diseases.

Added Variables:
[]


In [5]:
out_extend = agent.handle("do(smoke=yes) give me samples of lung and oxygen_saturation")

Generating for node: dysp: 100%|██████████| 9/9 [00:00<00:00, 646.32it/s]    


In [6]:
print_output(out_extend)

Parsed:
ParsedCommand(action='interventional', n=50, variables=['lung', 'oxygen_saturation'], interventions={'smoke': 'yes'}, notes=None)

Data:
   lung oxygen_saturation
0    no               yes
1    no               yes
2    no               yes
3    no               yes
4    no               yes
5    no               yes
6    no               yes
7    no               yes
8   yes                no
9    no               yes
10   no               yes
11   no               yes
12   no               yes
13   no               yes
14  yes               yes
15   no               yes
16   no               yes
17   no               yes
18   no               yes
19   no               yes
20   no               yes
21   no               yes
22   no                no
23   no               yes
24   no               yes
25   no               yes
26   no                no
27   no               yes
28   no               yes
29   no               yes
30   no               yes
31   no               y

In [None]:
agent = WorldAgent(simulator=sim, story=ASIA_STORY, var_descriptions=ASIA_DESCS, debug_llm=True)

In [3]:
out = agent.handle("give me 10 samples of asthma history and dysp")
print("PARSED:", out["parsed"], "\n")

added_info = out["parsed"].get("added_variables_info", [])
if added_info:
    print("New variables added:")
    for info in added_info:
        print(f"  - {info['name']}: {info['description']}")
        print(f"    parents={info['parents']}, baseline P(yes)={info['default_yes_prob']:.2f}\n")

if out["data"] is not None:
    display(out["data"].head())

print(out["response"])


Generating for node: dysp: 100%|██████████| 8/8 [00:00<00:00, 934.04it/s]

PARSED: {'action': 'sample_observational', 'n': 5, 'variables': ['lung', 'dysp'], 'interventions': None, 'unknown_variables': [], 'unknown_interventions': {}} 






Unnamed: 0,lung,dysp
0,no,no
1,no,no
2,no,no
3,no,no
4,no,yes


Here are 5 observational simulated cases.

lung dysp
  no   no
  no   no
  no   no
  no   no
  no  yes

Showing first 5 rows.


In [3]:
out = agent.handle("give me 10 samples of stress and dysp")

# 1) Show parsed command
print("PARSED COMMAND:")
print(out["parsed"])
print()

# 2) If new variables were added, print their detailed definitions
added_info = out["parsed"].get("added_variables_info", [])
if added_info:
    print("New variables added to the world:")
    for info in added_info:
        print(f"  - Name: {info['name']}")
        print(f"    Definition: {info['description']}")
        if info.get("parents"):
            print(f"    Parents: {', '.join(info['parents'])}")
        else:
            print("    Parents: (none, independent root)")
        print(f"    Baseline P({info['name']} = 'yes'): {info['default_yes_prob']:.2f}")
        print()
else:
    print("No new variables were added in this query.\n")

# 3) Show data (if any)
if out["data"] is not None:
    print("SAMPLED DATA (head):")
    display(out["data"].head())
else:
    print("No tabular data returned.\n")

# 4) Show the world-agent narrative
print("WORLD RESPONSE:")
print(out["response"])


  0%|          | 0/8 [00:00<?, ?it/s]

PARSED COMMAND:
{'action': 'sample_observational', 'n': 10, 'variables': ['dysp'], 'interventions': None, 'unknown_variables': [], 'unknown_interventions': {}}

No new variables were added in this query.

SAMPLED DATA (head):


  df = pd.DataFrame.from_records(samples)


Unnamed: 0,dysp
0,yes
1,yes
2,no
3,yes
4,no


WORLD RESPONSE:
Here are 10 observational cases sampled from the current world without any explicit interventions.

dysp
 yes
 yes
  no
 yes
  no
  no
 yes
 yes
  no
  no

Showing the first 10 rows. Variables shown: dysp.


In [4]:
out = agent.handle("give me 10 samples of weight with dysp")

# 1) Show parsed command
print("PARSED COMMAND:")
print(out["parsed"])
print()

# 2) If new variables were added, print their detailed definitions
added_info = out["parsed"].get("added_variables_info", [])
if added_info:
    print("New variables added to the world:")
    for info in added_info:
        print(f"  - Name: {info['name']}")
        print(f"    Definition: {info['description']}")
        if info.get("parents"):
            print(f"    Parents: {', '.join(info['parents'])}")
        else:
            print("    Parents: (none, independent root)")
        print(f"    Baseline P({info['name']} = 'yes'): {info['default_yes_prob']:.2f}")
        print()
else:
    print("No new variables were added in this query.\n")

# 3) Show data (if any)
if out["data"] is not None:
    print("SAMPLED DATA (head):")
    display(out["data"].head())
else:
    print("No tabular data returned.\n")

# 4) Show the world-agent narrative
print("WORLD RESPONSE:")
print(out["response"])


  0%|          | 0/8 [00:00<?, ?it/s]

PARSED COMMAND:
{'action': 'sample_observational', 'n': 10, 'variables': ['dysp'], 'interventions': None, 'unknown_variables': [], 'unknown_interventions': {}}

No new variables were added in this query.

SAMPLED DATA (head):


  df = pd.DataFrame.from_records(samples)


Unnamed: 0,dysp
0,yes
1,no
2,no
3,no
4,yes


WORLD RESPONSE:
Here are 10 observational cases sampled from the current world without any explicit interventions.

dysp
 yes
  no
  no
  no
 yes
  no
  no
  no
  no
  no

Showing the first 10 rows. Variables shown: dysp.
