Data: https://www.kaggle.com/ronitf/heart-disease-uci
Tutorial: https://www.bilibili.com/video/BV1Wf4y1U7EL?p=6

In [1]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd

In [2]:
df = pd.read_csv("heart.csv")
df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,57,0,0,140,241,0,1,123,1,0.2,1,0,3,0
299,45,1,3,110,264,0,1,132,0,1.2,1,0,3,0
300,68,1,0,144,193,1,1,141,0,3.4,1,2,3,0
301,57,1,0,130,131,0,1,115,1,1.2,1,1,3,0


In [3]:
df.dtypes

age           int64
sex           int64
cp            int64
trestbps      int64
chol          int64
fbs           int64
restecg       int64
thalach       int64
exang         int64
oldpeak     float64
slope         int64
ca            int64
thal          int64
target        int64
dtype: object

In [4]:
# make column names more readable
df.columns=['Age','Gender','Chest Pain Type','Resting Blood Pressure','Cholesterol',
           'Fasting Blood Sugar','Rest Ecg','Max Heart Rate','Exercise Induced Angina',
           'St_depression','St_slope','# Major Vessels','Thalassemia','Target']

In [5]:
df.tail()

Unnamed: 0,Age,Gender,Chest Pain Type,Resting Blood Pressure,Cholesterol,Fasting Blood Sugar,Rest Ecg,Max Heart Rate,Exercise Induced Angina,St_depression,St_slope,# Major Vessels,Thalassemia,Target
298,57,0,0,140,241,0,1,123,1,0.2,1,0,3,0
299,45,1,3,110,264,0,1,132,0,1.2,1,0,3,0
300,68,1,0,144,193,1,1,141,0,3.4,1,2,3,0
301,57,1,0,130,131,0,1,115,1,1.2,1,1,3,0
302,57,0,1,130,236,0,0,174,0,0.0,1,1,2,0


In [6]:
# Change integer representation of categorial value into string
df['Gender'][df['Gender']==0]='Female'
df['Gender'][df['Gender']==1]='Male'

df['Chest Pain Type'][df['Chest Pain Type']==0] = 'typical angina'
df['Chest Pain Type'][df['Chest Pain Type']==1] = 'atypical angina'
df['Chest Pain Type'][df['Chest Pain Type']==2] = 'non-angina pain'
df['Chest Pain Type'][df['Chest Pain Type']==3] = 'asymptomatic'

df['Fasting Blood Sugar'][df['Fasting Blood Sugar']==0] = '<120mg/ml'
df['Fasting Blood Sugar'][df['Fasting Blood Sugar']==1] = '>120mg/ml'

df['Rest Ecg'][df['Rest Ecg']==0] = 'normal'
df['Rest Ecg'][df['Rest Ecg']==0] = 'ST-T wave abnormality'
df['Rest Ecg'][df['Rest Ecg']==0] = 'left ventricular hypertrophy'

df['Exercise Induced Angina'][df['Exercise Induced Angina']==0] = 'No'
df['Exercise Induced Angina'][df['Exercise Induced Angina']==1] = 'Yes'

df['St_slope'][df['St_slope']==0] = 'unsloping'
df['St_slope'][df['St_slope']==1] = 'flat'
df['St_slope'][df['St_slope']==2] = 'downsloping'

df['Thalassemia'][df['Thalassemia']==0] = 'unknwon'
df['Thalassemia'][df['Thalassemia']==1] = 'normal'
df['Thalassemia'][df['Thalassemia']==2] = 'fixed defect'
df['Thalassemia'][df['Thalassemia']==3] = 'reversable defect'

In [7]:
df.tail()

Unnamed: 0,Age,Gender,Chest Pain Type,Resting Blood Pressure,Cholesterol,Fasting Blood Sugar,Rest Ecg,Max Heart Rate,Exercise Induced Angina,St_depression,St_slope,# Major Vessels,Thalassemia,Target
298,57,Female,typical angina,140,241,<120mg/ml,1,123,Yes,0.2,flat,0,reversable defect,0
299,45,Male,asymptomatic,110,264,<120mg/ml,1,132,No,1.2,flat,0,reversable defect,0
300,68,Male,typical angina,144,193,>120mg/ml,1,141,No,3.4,flat,2,reversable defect,0
301,57,Male,typical angina,130,131,<120mg/ml,1,115,Yes,1.2,flat,1,reversable defect,0
302,57,Female,atypical angina,130,236,<120mg/ml,normal,174,No,0.0,flat,1,fixed defect,0


In [10]:
df.dtypes

Age                          int64
Gender                      object
Chest Pain Type             object
Resting Blood Pressure       int64
Cholesterol                  int64
Fasting Blood Sugar         object
Rest Ecg                    object
Max Heart Rate               int64
Exercise Induced Angina     object
St_depression              float64
St_slope                    object
# Major Vessels              int64
Thalassemia                 object
Target                       int64
dtype: object

###### One-hot Encoding

In [11]:
df = pd.get_dummies(df)
df.columns

Index(['Age', 'Resting Blood Pressure', 'Cholesterol', 'Max Heart Rate',
       'St_depression', '# Major Vessels', 'Target', 'Gender_Female',
       'Gender_Male', 'Chest Pain Type_asymptomatic',
       'Chest Pain Type_atypical angina', 'Chest Pain Type_non-angina pain',
       'Chest Pain Type_typical angina', 'Fasting Blood Sugar_<120mg/ml',
       'Fasting Blood Sugar_>120mg/ml', 'Rest Ecg_1', 'Rest Ecg_2',
       'Rest Ecg_normal', 'Exercise Induced Angina_No',
       'Exercise Induced Angina_Yes', 'St_slope_downsloping', 'St_slope_flat',
       'St_slope_unsloping', 'Thalassemia_fixed defect', 'Thalassemia_normal',
       'Thalassemia_reversable defect', 'Thalassemia_unknwon'],
      dtype='object')

In [12]:
df.tail()

Unnamed: 0,Age,Resting Blood Pressure,Cholesterol,Max Heart Rate,St_depression,# Major Vessels,Target,Gender_Female,Gender_Male,Chest Pain Type_asymptomatic,...,Rest Ecg_normal,Exercise Induced Angina_No,Exercise Induced Angina_Yes,St_slope_downsloping,St_slope_flat,St_slope_unsloping,Thalassemia_fixed defect,Thalassemia_normal,Thalassemia_reversable defect,Thalassemia_unknwon
298,57,140,241,123,0.2,0,0,1,0,0,...,0,0,1,0,1,0,0,0,1,0
299,45,110,264,132,1.2,0,0,0,1,1,...,0,1,0,0,1,0,0,0,1,0
300,68,144,193,141,3.4,2,0,0,1,0,...,0,1,0,0,1,0,0,0,1,0
301,57,130,131,115,1.2,1,0,0,1,0,...,0,0,1,0,1,0,0,0,1,0
302,57,130,236,174,0.0,1,0,1,0,0,...,1,1,0,0,1,0,1,0,0,0


In [13]:
df.iloc[0]

Age                                 63.0
Resting Blood Pressure             145.0
Cholesterol                        233.0
Max Heart Rate                     150.0
St_depression                        2.3
# Major Vessels                      0.0
Target                               1.0
Gender_Female                        0.0
Gender_Male                          1.0
Chest Pain Type_asymptomatic         1.0
Chest Pain Type_atypical angina      0.0
Chest Pain Type_non-angina pain      0.0
Chest Pain Type_typical angina       0.0
Fasting Blood Sugar_<120mg/ml        0.0
Fasting Blood Sugar_>120mg/ml        1.0
Rest Ecg_1                           0.0
Rest Ecg_2                           0.0
Rest Ecg_normal                      1.0
Exercise Induced Angina_No           1.0
Exercise Induced Angina_Yes          0.0
St_slope_downsloping                 0.0
St_slope_flat                        0.0
St_slope_unsloping                   1.0
Thalassemia_fixed defect             0.0
Thalassemia_norm

In [14]:
df.to_csv('ProcessedHeart.csv',index=False)