In [1]:
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
import numpy as np

In [2]:
df = pd.read_csv("cardio_train.csv", delimiter = ";")
df['age_in_years'] = round(df['age'] / 365.25)

BMI is calculated and categorized according to CDC (Center of Disease Control and Prevention).

In [4]:
df['BMI'] = round(df['weight']/((df['height']/100)**2), 1)

# Define conditions and corresponding categories
conditions = [
    (df['BMI'] <= 18.4),
    (df['BMI'] <= 24.9),
    (df['BMI'] <= 39.9),
    (df['BMI'] >= 40.0),
]

categories = [
    "Underweight",
    "Normal",
    "Overweight",
    "Obese"
    # Add more categories corresponding to your conditions
]

# Apply categorization using numpy.select
df['BMI'] = np.select(conditions, categories, default="Other")

american heart association

In [5]:
def interpret_blood_pressure(ap_hi, ap_low):
    if ap_hi < 120 and ap_hi >= 90 and ap_low < 80 and ap_low >= 60 :
        return 'Normal'
    elif ap_hi < 90 and ap_low <= 60:
        return 'Low Blood Pressure'
    elif ap_hi >= 120 and ap_hi <= 129 and ap_low < 80:
        return 'Elevated'
    elif ap_hi > 129 and ap_low >= 80:
        return 'High Blood Pressure'
    else:
        return 'Abnormal'

# Apply the function to create the blood_pressure column
df['blood_pressure'] = df.apply(lambda row: interpret_blood_pressure(row['ap_hi'], row['ap_lo']), axis=1)

In [3]:
# Define conditions and corresponding categories
conditions = [
    (df['cardio'] == 0),
    (df['cardio'] == 1)
]

categories = [
    "No Disease",
    "Have Disease",
]

# Apply categorization using numpy.select
df['cardio_cat'] = np.select(conditions, categories, default="Other")

In [6]:
df.head()

Unnamed: 0,id,age,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio,age_in_years,cardio_cat,BMI,blood_pressure
0,0,18393,2,168,62.0,110,80,1,1,0,0,1,0,50.0,No Disease,Normal,Abnormal
1,1,20228,1,156,85.0,140,90,3,1,0,0,1,1,55.0,Have Disease,Overweight,High Blood Pressure
2,2,18857,1,165,64.0,130,70,3,1,0,0,0,1,52.0,Have Disease,Normal,Abnormal
3,3,17623,2,169,82.0,150,100,1,1,0,0,1,1,48.0,Have Disease,Overweight,High Blood Pressure
4,4,17474,1,156,56.0,100,60,1,1,0,0,0,0,48.0,No Disease,Normal,Normal


In [None]:
pip install dash

Collecting dash
  Downloading dash-2.16.1-py3-none-any.whl (10.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m19.2 MB/s[0m eta [36m0:00:00[0m
Collecting dash-html-components==2.0.0 (from dash)
  Downloading dash_html_components-2.0.0-py3-none-any.whl (4.1 kB)
Collecting dash-core-components==2.0.0 (from dash)
  Downloading dash_core_components-2.0.0-py3-none-any.whl (3.8 kB)
Collecting dash-table==5.0.0 (from dash)
  Downloading dash_table-5.0.0-py3-none-any.whl (3.9 kB)
Collecting retrying (from dash)
  Downloading retrying-1.3.4-py3-none-any.whl (11 kB)
Installing collected packages: dash-table, dash-html-components, dash-core-components, retrying, dash
Successfully installed dash-2.16.1 dash-core-components-2.0.0 dash-html-components-2.0.0 dash-table-5.0.0 retrying-1.3.4


In [None]:
pip install altair_viewer

Collecting altair_viewer
  Downloading altair_viewer-0.4.0-py3-none-any.whl (844 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m844.5/844.5 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
Collecting altair-data-server>=0.4.0 (from altair_viewer)
  Downloading altair_data_server-0.4.1-py3-none-any.whl (12 kB)
Installing collected packages: altair-data-server, altair_viewer
Successfully installed altair-data-server-0.4.1 altair_viewer-0.4.0


In [7]:
# Group the DataFrame and count the occurrences
counts = df.groupby(['age_in_years', 'cardio']).size().unstack(fill_value=0).reset_index()

# Reshape the data for Plotly
counts_melted = pd.melt(counts, id_vars='age_in_years', value_vars=[0, 1], var_name='Cardio', value_name='Count')

# Replace cardio values with descriptive labels
counts_melted['Cardio'] = counts_melted['Cardio'].map({0: 'No Disease', 1: 'Have Disease'})

# Plot the bar chart using Plotly
fig_age = px.bar(counts_melted, x='age_in_years', y='Count', color='Cardio', barmode='group',
             color_discrete_map={'No Disease': 'black', 'Have Disease': 'skyblue'})

# Update layout
fig_age.update_layout(title='Age of Cardio Vascular Disease Patients',
                  xaxis_title='Age',
                  yaxis_title='Count of Patients',
                  xaxis=dict(range=[35, counts_melted['age_in_years'].max()]),
                  bargap=0.2, # Expand bar width
                  plot_bgcolor='white')

  sf: grouped.get_group(s if len(s) > 1 else s[0])


In [8]:
# Group the DataFrame and count the occurrences
gender_count = df.groupby(['gender', 'cardio']).size().unstack(fill_value=0).reset_index()

# Reshape the data for Plotly
counts_melted = pd.melt(gender_count, id_vars='gender', value_vars=[0, 1], var_name='Cardio', value_name='Count')

# Replace cardio values with descriptive labels
counts_melted['Cardio'] = counts_melted['Cardio'].map({0: 'No Disease', 1: 'Have Disease'})
counts_melted['gender'] = counts_melted['gender'].map({1: 'Women', 2: 'Men'})

# Plot the bar chart using Plotly
fig_gender = px.bar(counts_melted, x='gender', y='Count', color='Cardio', barmode='group',
             color_discrete_map={'No Disease': 'black', 'Have Disease': 'skyblue'})

# Update layout
fig_gender.update_layout(title='Gender of Cardio Vascular Disease Patients',
                  xaxis_title='Gender',
                  yaxis_title='Count of Patients',
                  bargap=0.2, # Expand bar width
                  plot_bgcolor='white')





In [9]:
# Group the DataFrame and count the occurrences
BMI_count = df.groupby(['BMI', 'cardio']).size().unstack(fill_value=0).reset_index()

# Reshape the data for Plotly
counts_melted = pd.melt(BMI_count, id_vars='BMI', value_vars=[0, 1], var_name='Cardio', value_name='Count')

# Replace cardio values with descriptive labels
counts_melted['Cardio'] = counts_melted['Cardio'].map({0: 'No Disease', 1: 'Have Disease'})

# Plot the bar chart using Plotly
fig_BMI = px.bar(counts_melted, x='BMI', y='Count', color='Cardio', barmode='group',
             color_discrete_map={'No Disease': 'black', 'Have Disease': 'skyblue'})

# Update layout
fig_BMI.update_layout(title='BMI of Cardio Vascular Disease Patients',
                  xaxis_title='BMI',
                  yaxis_title='Count of Patients',
                  bargap=0.2, # Expand bar width
                  plot_bgcolor='white',
                  xaxis=dict(categoryorder='array', categoryarray=['Underweight', 'Normal', 'Overweight', 'Obese']))





In [10]:
# Group the DataFrame and count the occurrences
gender_count = df.groupby(['active', 'cardio']).size().unstack(fill_value=0).reset_index()

# Reshape the data for Plotly
counts_melted = pd.melt(gender_count, id_vars='active', value_vars=[0, 1], var_name='Cardio', value_name='Count')

# Replace cardio values with descriptive labels
counts_melted['Cardio'] = counts_melted['Cardio'].map({0: 'No Disease', 1: 'Have Disease'})
counts_melted['active'] = counts_melted['active'].map({0: 'Not Active', 1: 'Active'})

# Plot the bar chart using Plotly
fig_active = px.bar(counts_melted, x='active', y='Count', color='Cardio', barmode='group',
             color_discrete_map={'No Disease': 'black', 'Have Disease': 'skyblue'})

# Update layout
fig_active.update_layout(title='Physical Activity of Cardio Vascular Disease Patients',
                  xaxis_title='active',
                  yaxis_title='Count of Patients',
                  bargap=0.2, # Expand bar width
                  plot_bgcolor='white')





In [11]:
# Save the plot as an HTML file
fig_active.write_html("active_cardio_plot.html")

In [12]:
# Create the scatter plot
bp = px.scatter(df, x='ap_hi', y='ap_lo', color='blood_pressure',
                 labels={'ap_hi': 'Systolic Blood Pressure (ap_hi)', 'ap_lo': 'Diastolic Blood Pressure (ap_lo)', 'cardio': 'Cardiovascular Disease'},
                 title='Blood Pressure')

# Set x-axis and y-axis range
bp.update_layout(xaxis=dict(range=[0, 300]), yaxis=dict(range=[0, 250]))

# Define custom color map and legend labels
color_map = {'No Disease': 'black', 'Have Disease': 'skyblue'}

# Create the scatter plot
bp_c = px.scatter(df, x='ap_hi', y='ap_lo', color='cardio_cat',
                  labels={'ap_hi': 'Systolic Blood Pressure (ap_hi)', 'ap_lo': 'Diastolic Blood Pressure (ap_lo)', 'cardio': 'Cardiovascular Disease'},
                  title='Blood Pressure of Cardiovascular Disease', color_discrete_map=color_map)

# Set x-axis and y-axis range
bp_c.update_layout(xaxis=dict(range=[0, 300]), yaxis=dict(range=[0, 250]), legend_title = 'Cardio')

# Show the plot
bp.show()
bp_c.show()







In [13]:
bp.write_html("bp.html")
bp_c.write_html("bp_c.html")

In [None]:
# Show the plot
fig_age.show()
fig_gender.show()
fig_BMI.show()
fig_active.show()