In [18]:
#!pip install pgmpy

In [19]:
import pandas as pd

# Define column names as per the dataset
names = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 
         'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target']

# Load the dataset
heartDisease = pd.read_csv('heart.csv', names=names)

# Replace missing values marked as '?'
heartDisease = heartDisease.replace('?', np.nan)

# Display unique values for 'age' and 'sex'
print(heartDisease['age'].unique())
print(heartDisease['sex'].unique())

['age' '52' '53' '70' '61' '62' '58' '55' '46' '54' '71' '43' '34' '51'
 '50' '60' '67' '45' '63' '42' '44' '56' '57' '59' '64' '65' '41' '66'
 '38' '49' '48' '29' '37' '47' '68' '76' '40' '39' '77' '69' '35' '74']
['sex' '1' '0']


In [20]:
import numpy as np
import pandas as pd
from pgmpy.inference import VariableElimination
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator

# Define column names as per the dataset
names = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 
         'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target']

# Load the dataset, skipping the header row provided by the 'names' parameter
heartDisease = pd.read_csv('heart.csv', names=names, skiprows=1)

# Replace missing values marked as '?'
heartDisease = heartDisease.replace('?', np.nan)

# Convert relevant columns to numeric types
heartDisease['age'] = pd.to_numeric(heartDisease['age'])
heartDisease['sex'] = pd.to_numeric(heartDisease['sex'])

# Define the Bayesian Network structure
model = BayesianNetwork([
    ('age', 'trestbps'),
    ('age', 'fbs'),
    ('sex', 'trestbps'),
    ('exang', 'trestbps'),
    ('trestbps', 'target'),
    ('fbs', 'target'),
    ('target', 'restecg'),
    ('target', 'thalach'),
    ('target', 'chol')
])

# Train the model using Maximum Likelihood Estimator
model.fit(heartDisease, estimator=MaximumLikelihoodEstimator)

# Perform inference
HeartDisease_infer = VariableElimination(model)

# Query the model: Predict heart disease for a 37-year-old male
q = HeartDisease_infer.query(variables=['target'], evidence={'age': 37, 'sex': 1})

# Print the results
print("Probability Distribution for Heart Disease:")
print(q)
print("Values (Probabilities):")
print(q.values)  # Probabilities of each state of 'target'


Probability Distribution for Heart Disease:
+-----------+---------------+
| target    |   phi(target) |
| target(0) |        0.3700 |
+-----------+---------------+
| target(1) |        0.6300 |
+-----------+---------------+
Values (Probabilities):
[0.37001404 0.62998596]


In [21]:
import numpy as np
import pandas as pd
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination

In [22]:
# Define column names as per the dataset
names = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 
         'exang', 'oldpeak', 'slope', 'ca', 'thal', 'heartdisease']

# Load the dataset, skipping the header row provided by the 'names' parameter
heartDisease = pd.read_csv('heart.csv', names=names, skiprows=1)
heartDisease.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,heartdisease
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


In [23]:
# Replace missing values marked as '?'
heartDisease = heartDisease.replace('?', np.nan)

# Convert relevant columns to numeric types
heartDisease = heartDisease.apply(pd.to_numeric, errors='coerce')

# Define the Bayesian Network structure
model = BayesianNetwork([
    ('age', 'trestbps'),
    ('age', 'fbs'),
    ('sex', 'trestbps'),
    ('exang', 'trestbps'),
    ('trestbps', 'heartdisease'),
    ('fbs', 'heartdisease'),
    ('heartdisease', 'restecg'),
    ('heartdisease', 'thalach'),
    ('heartdisease', 'chol')
])

# Train the model using Maximum Likelihood Estimator
model.fit(heartDisease, estimator=MaximumLikelihoodEstimator)

In [24]:
# Perform inference
HeartDisease_infer = VariableElimination(model)

# Query the model: Predict heart disease for a 37-year-old male
q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'age': 37, 'sex': 0})

# Print the results
print("Probability Distribution for Heart Disease:")
print(q)
print("Values (Probabilities):")
print(q.values)  # Probabilities of each state of 'heartdisease'

Probability Distribution for Heart Disease:
+-----------------+---------------------+
| heartdisease    |   phi(heartdisease) |
| heartdisease(0) |              0.4535 |
+-----------------+---------------------+
| heartdisease(1) |              0.5465 |
+-----------------+---------------------+
Values (Probabilities):
[0.45346581 0.54653419]
