In [7]:
import pandas as pd
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.models import BayesianNetwork
from pgmpy.inference import VariableElimination

# Load the dataset
data = pd.read_csv("heartdisease-IR.csv")
heart_disease = pd.DataFrame(data)
print("Dataset loaded successfully.\n")

# Display sample data and attribute types
print("Sample instances from the dataset:")
print(heart_disease.head(), "\n")
print("Attributes and their datatypes:")
print(heart_disease.dtypes, "\n")

# Define the structure of the Bayesian Network
model = BayesianNetwork([
    ('age', 'heartdisease'),
    ('Gender', 'heartdisease'),
    ('Family', 'heartdisease'),
    ('diet', 'heartdisease'),
    ('Lifestyle', 'heartdisease'),
    ('cholestrol', 'heartdisease'),
])

# Learning the CPD using Maximum Likelihood Estimator
print("Learning CPD using Maximum Likelihood Estimator...")
model.fit(heart_disease, estimator=MaximumLikelihoodEstimator)

# Performing inference
HeartDisease_infer = VariableElimination(model)

# Query 1: Probability of Heart Disease given age = 45
print("\n1. Probability of Heart Disease given evidence age=45:")
q1 = HeartDisease_infer.query(variables=['heartdisease'], evidence={'age': 3})
print(q1)

# Query 2: Probability of Heart Disease given cholestrol = 2
print("\n2. Probability of Heart Disease given evidence cholestrol=2:")
q2 = HeartDisease_infer.query(variables=['heartdisease'], evidence={'cholestrol': 2})
print(q2)

Dataset loaded successfully.

Sample instances from the dataset:
   age  Gender  Family  diet  Lifestyle  cholestrol  heartdisease
0    0       0       1     1          3           0             1
1    0       1       1     1          3           0             1
2    1       0       0     0          2           1             1
3    4       0       1     1          3           2             0
4    3       1       1     0          0           2             0 

Attributes and their datatypes:
age             int64
Gender          int64
Family          int64
diet            int64
Lifestyle       int64
cholestrol      int64
heartdisease    int64
dtype: object 

Learning CPD using Maximum Likelihood Estimator...

1. Probability of Heart Disease given evidence age=45:
+-----------------+---------------------+
| heartdisease    |   phi(heartdisease) |
| heartdisease(0) |              0.5235 |
+-----------------+---------------------+
| heartdisease(1) |              0.4765 |
+-----------------