# Simulator

In [None]:
from simulator import BNSimulator

sim = BNSimulator.from_bif("/home/ubuntu/ai_data_scientist/BN_dataset/asia.bif")

# Observational
data_obs = sim.sample_observational(1000)

# Interventional
data_do_smoke_yes = sim.sample_interventional({"smoke": "yes"}, 1000)

# Marginal queries
print(sim.query_marginal("dysp", {"smoke": "yes"}))

  0%|          | 0/8 [00:00<?, ?it/s]

  df = pd.DataFrame.from_records(samples)


  0%|          | 0/8 [00:00<?, ?it/s]

yes    0.552808
no     0.447192
Name: dysp, dtype: float64


  df = pd.DataFrame.from_records(samples)


# World_model

In [None]:
from simulator import BNSimulator
from world_model import WorldAgent

# ASIA_STORY = "You are in a small chest clinic."
ASIA_STORY = f"""You are a physician working in a small chest clinic attached to a public hospital in a mid-size city. Most of your patients are adults referred by general practitioners because of persistent cough, chest pain, or shortness of breath.

The clinic serves a diverse population. Some patients are long-term residents who have never traveled outside the country; others are migrant workers or people who have recently returned from trips to regions with higher rates of tuberculosis. You routinely ask about recent travel, especially to parts of Asia where TB remains moderately prevalent, because it changes how you interpret symptoms and test results.

Smoking is very common in your patient population. Many of your patients have smoked for years and are at increased risk of both lung cancer and chronic bronchitis. You know that smoking does not guarantee disease, but it substantially changes the prior probability of those diagnoses.

When patients present with shortness of breath (dyspnea), you consider several possible explanations: tuberculosis (TB), lung cancer, and bronchitis are among the main suspects. TB and lung cancer often show up as abnormalities on a chest X-ray, while bronchitis may or may not visibly change the X-ray but still causes chronic cough and breathlessness. You order X-rays and other tests, and you interpret them in light of each patient’s smoking status and travel history.

In this world, your “variables” correspond to clinically meaningful properties: whether the patient recently visited Asia, whether they smoke, whether they actually have TB, lung cancer, bronchitis, whether at least one of the serious lung diseases is present, whether the X-ray appears abnormal, and whether they report dyspnea. You use all of this information to reason probabilistically about the most likely diagnosis for each patient."""

ASIA_DESCS = {
    "asia": "Visited Asia recently",
    "tub": "Has tuberculosis",
    "smoke": "Smokes regularly",
    "lung": "Has lung cancer",
    "bronc": "Has bronchitis",
    "either": "Has TB or lung cancer",
    "xray": "Chest X-ray positive",
    "dysp": "Has shortness of breath",
}

sim = BNSimulator.from_bif("/home/ubuntu/ai_data_scientist/BN_dataset/asia.bif")
agent = WorldAgent(simulator=sim, story=ASIA_STORY, var_descriptions=ASIA_DESCS)

out = agent.handle("do(smoke=yes) give me 15 samples of lung and dysp")
print(out["parsed"])
display(out["data"].head())
print(out["response"])


[WorldAgent] Loading LLM: Qwen/Qwen2.5-1.5B-Instruct on cuda


The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  0%|          | 0/8 [00:00<?, ?it/s]

{'asia': {'description': 'Visited Asia recently', 'states': ['yes', 'no']}, 'tub': {'description': 'Has tuberculosis', 'states': ['yes', 'no']}, 'smoke': {'description': 'Smokes regularly', 'states': ['yes', 'no']}, 'lung': {'description': 'Has lung cancer', 'states': ['yes', 'no']}, 'bronc': {'description': 'Has bronchitis', 'states': ['yes', 'no']}, 'either': {'description': 'Has TB or lung cancer', 'states': ['yes', 'no']}, 'xray': {'description': 'Chest X-ray positive', 'states': ['yes', 'no']}, 'dysp': {'description': 'Has shortness of breath', 'states': ['yes', 'no']}, 'action': 'sample_observational', 'n': 10, 'variables': ['lung', 'dysp'], 'interventions': None, 'unknown_variables': [], 'unknown_interventions': {}}


  df = pd.DataFrame.from_records(samples)


Unnamed: 0,lung,dysp
0,no,yes
1,no,no
2,no,no
3,no,yes
4,no,no


Here are 10 observational cases sampled from the current world without any explicit interventions.

lung dysp
  no  yes
  no   no
  no   no
  no  yes
  no   no
  no   no
  no   no
  no   no
  no  yes
  no  yes

Showing the first 10 rows. Variables shown: lung, dysp.


In [2]:
out = agent.handle("give me 10 samples of asthma history and dysp")

print("PARSED:", out["parsed"], "\n")

added_info = out["parsed"].get("added_variables_info", [])
if added_info:
    print("New variables added:")
    for info in added_info:
        print(f"  - {info['name']}: {info['description']} (parents={info['parents']}, "
              f"baseline P(yes)={info['default_yes_prob']:.2f})")
        print()

if out["data"] is not None:
    display(out["data"].head())

print(out["response"])


PARSED: {'asia': {'description': 'Visited Asia recently', 'states': ['yes', 'no']}, 'tub': {'description': 'Has tuberculosis', 'states': ['yes', 'no']}, 'smoke': {'description': 'Smokes regularly', 'states': ['yes', 'no']}, 'lung': {'description': 'Has lung cancer', 'states': ['yes', 'no']}, 'bronc': {'description': 'Has bronchitis', 'states': ['yes', 'no']}, 'either': {'description': 'Has TB or lung cancer', 'states': ['yes', 'no']}, 'xray': {'description': 'Chest X-ray positive', 'states': ['yes', 'no']}, 'dysp': {'description': 'Has shortness of breath', 'states': ['yes', 'no']}, 'action': 'sample_observational', 'n': 10, 'variables': ['dysp'], 'interventions': None, 'unknown_variables': ['asthma history'], 'unknown_interventions': {}} 

In this world, I don't yet know how to interpret some of the variables you mentioned: variables=['asthma history'], intervention targets=[]. I couldn't safely extend the world to include them.


In [3]:
out = agent.handle("give me 10 samples of stress and dysp")

# 1) Show parsed command
print("PARSED COMMAND:")
print(out["parsed"])
print()

# 2) If new variables were added, print their detailed definitions
added_info = out["parsed"].get("added_variables_info", [])
if added_info:
    print("New variables added to the world:")
    for info in added_info:
        print(f"  - Name: {info['name']}")
        print(f"    Definition: {info['description']}")
        if info.get("parents"):
            print(f"    Parents: {', '.join(info['parents'])}")
        else:
            print("    Parents: (none, independent root)")
        print(f"    Baseline P({info['name']} = 'yes'): {info['default_yes_prob']:.2f}")
        print()
else:
    print("No new variables were added in this query.\n")

# 3) Show data (if any)
if out["data"] is not None:
    print("SAMPLED DATA (head):")
    display(out["data"].head())
else:
    print("No tabular data returned.\n")

# 4) Show the world-agent narrative
print("WORLD RESPONSE:")
print(out["response"])


  0%|          | 0/8 [00:00<?, ?it/s]

PARSED COMMAND:
{'action': 'sample_observational', 'n': 10, 'variables': ['dysp'], 'interventions': None, 'unknown_variables': [], 'unknown_interventions': {}}

No new variables were added in this query.

SAMPLED DATA (head):


  df = pd.DataFrame.from_records(samples)


Unnamed: 0,dysp
0,yes
1,yes
2,no
3,yes
4,no


WORLD RESPONSE:
Here are 10 observational cases sampled from the current world without any explicit interventions.

dysp
 yes
 yes
  no
 yes
  no
  no
 yes
 yes
  no
  no

Showing the first 10 rows. Variables shown: dysp.


In [4]:
out = agent.handle("give me 10 samples of weight with dysp")

# 1) Show parsed command
print("PARSED COMMAND:")
print(out["parsed"])
print()

# 2) If new variables were added, print their detailed definitions
added_info = out["parsed"].get("added_variables_info", [])
if added_info:
    print("New variables added to the world:")
    for info in added_info:
        print(f"  - Name: {info['name']}")
        print(f"    Definition: {info['description']}")
        if info.get("parents"):
            print(f"    Parents: {', '.join(info['parents'])}")
        else:
            print("    Parents: (none, independent root)")
        print(f"    Baseline P({info['name']} = 'yes'): {info['default_yes_prob']:.2f}")
        print()
else:
    print("No new variables were added in this query.\n")

# 3) Show data (if any)
if out["data"] is not None:
    print("SAMPLED DATA (head):")
    display(out["data"].head())
else:
    print("No tabular data returned.\n")

# 4) Show the world-agent narrative
print("WORLD RESPONSE:")
print(out["response"])


  0%|          | 0/8 [00:00<?, ?it/s]

PARSED COMMAND:
{'action': 'sample_observational', 'n': 10, 'variables': ['dysp'], 'interventions': None, 'unknown_variables': [], 'unknown_interventions': {}}

No new variables were added in this query.

SAMPLED DATA (head):


  df = pd.DataFrame.from_records(samples)


Unnamed: 0,dysp
0,yes
1,no
2,no
3,no
4,yes


WORLD RESPONSE:
Here are 10 observational cases sampled from the current world without any explicit interventions.

dysp
 yes
  no
  no
  no
 yes
  no
  no
  no
  no
  no

Showing the first 10 rows. Variables shown: dysp.
