## 1. Importing Libraries and Loading Data

In [2]:
import pandas as pd
import plotly.express as px

df = pd.read_csv("../data/raw/heart.csv")
df.head()


Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


## 2. Checking Missing Values

In [3]:
df.isnull().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

## 3. Distribution of Age

In [4]:
fig = px.histogram(df, x="age", title="Age Distribution", nbins=20)
fig.show()


## 4. Gender Count

In [6]:
gender_counts = df["sex"].value_counts().reset_index()
gender_counts.columns = ["Sex", "Count"]

fig = px.bar(gender_counts, x="Sex", y="Count", title="Gender Distribution")
fig.show()

## 5. Chest pain-type distribution

In [9]:
cp_counts = df["cp"].value_counts().reset_index()
cp_counts.columns = ["Chest Pain Type", "Count"]

fig = px.bar(cp_counts, 
             x="Chest Pain Type", 
             y="Count", 
             title="Chest Pain Type Distribution", 
             text_auto=True)

fig.show()

## 6. Correlation Heatmap

In [16]:
import plotly.express as px

corr_matrix = df.corr()

fig = px.imshow(corr_matrix, 
                text_auto=True, 
                color_continuous_scale='Viridis', 
                title="Correlation Matrix")

fig.update_layout(
    height=800,
    width=1000,
)

fig.show()

## 7. Relationship Between Age and Maximum Heart Rate

In [13]:
fig = px.scatter(df, x="age", y="thalach", color="target", title="Age vs. Maximum Heart Rate")
fig.show()

## 8. Distribution of Target Variable

In [14]:
fig = px.pie(df, names="target", title="Target Distribution")
fig.show()