In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv("Heart.csv")

In [3]:
df.drop("Unnamed: 0", axis=1, inplace=True)

In [4]:
df.head(10)

Unnamed: 0,Age,Sex,ChestPain,RestBP,Chol,Fbs,RestECG,MaxHR,ExAng,Oldpeak,Slope,Ca,Thal,AHD
0,63,1,typical,145,233,1,2,150,0,2.3,3,0.0,fixed,No
1,67,1,asymptomatic,160,286,0,2,108,1,1.5,2,3.0,normal,Yes
2,67,1,asymptomatic,120,229,0,2,129,1,2.6,2,2.0,reversable,Yes
3,37,1,nonanginal,130,250,0,0,187,0,3.5,3,0.0,normal,No
4,41,0,nontypical,130,204,0,2,172,0,1.4,1,0.0,normal,No
5,56,1,nontypical,120,236,0,0,178,0,0.8,1,0.0,normal,No
6,62,0,asymptomatic,140,268,0,2,160,0,3.6,3,2.0,normal,Yes
7,57,0,asymptomatic,120,354,0,0,163,1,0.6,1,0.0,normal,No
8,63,1,asymptomatic,130,254,0,2,147,0,1.4,2,1.0,reversable,Yes
9,53,1,asymptomatic,140,203,1,2,155,1,3.1,3,0.0,reversable,Yes


## Shape of Data 

In [5]:
print(f"(Rows, Cols): {df.shape}")

(Rows, Cols): (303, 14)


## All Rows having NULL values

In [6]:
df[df.isna().any(axis=1)]

Unnamed: 0,Age,Sex,ChestPain,RestBP,Chol,Fbs,RestECG,MaxHR,ExAng,Oldpeak,Slope,Ca,Thal,AHD
87,53,0,nonanginal,128,216,0,2,115,0,0.0,1,0.0,,No
166,52,1,nonanginal,138,223,0,0,169,0,0.0,1,,normal,No
192,43,1,asymptomatic,132,247,1,2,143,1,0.1,2,,reversable,Yes
266,52,1,asymptomatic,128,204,1,0,156,1,1.0,2,0.0,,Yes
287,58,1,nontypical,125,220,0,0,144,0,0.4,2,,reversable,No
302,38,1,nonanginal,138,175,0,0,173,0,0.0,1,,normal,No


## Data type of all columns

In [7]:
df.dtypes

Age            int64
Sex            int64
ChestPain     object
RestBP         int64
Chol           int64
Fbs            int64
RestECG        int64
MaxHR          int64
ExAng          int64
Oldpeak      float64
Slope          int64
Ca           float64
Thal          object
AHD           object
dtype: object

## Mean age of patients

In [8]:
print(f"Mean Age: {round(df.mean().Age, 2)}")

Mean Age: 54.44


## Modified DataFrame

In [9]:
df1 = df[df.columns[0:5]]

In [10]:
df1

Unnamed: 0,Age,Sex,ChestPain,RestBP,Chol
0,63,1,typical,145,233
1,67,1,asymptomatic,160,286
2,67,1,asymptomatic,120,229
3,37,1,nonanginal,130,250
4,41,0,nontypical,130,204
...,...,...,...,...,...
298,45,1,typical,110,264
299,68,1,asymptomatic,144,193
300,57,1,asymptomatic,130,131
301,57,0,nontypical,130,236


In [11]:
df1.describe()

Unnamed: 0,Age,Sex,RestBP,Chol
count,303.0,303.0,303.0,303.0
mean,54.438944,0.679868,131.689769,246.693069
std,9.038662,0.467299,17.599748,51.776918
min,29.0,0.0,94.0,126.0
25%,48.0,0.0,120.0,211.0
50%,56.0,1.0,130.0,241.0
75%,61.0,1.0,140.0,275.0
max,77.0,1.0,200.0,564.0


## Divide dataset in training (75%) and testing (25%). 

In [12]:
x_train, x_test, y_train, y_test = train_test_split(df1['Age'], df1['RestBP'], test_size=0.25)

In [13]:
x_train

114    62
84     52
176    52
54     60
142    52
       ..
215    56
34     44
180    48
171    53
183    59
Name: Age, Length: 227, dtype: int64

In [14]:
x_test

235    54
40     65
194    68
83     68
63     54
       ..
297    57
173    62
284    61
243    61
285    58
Name: Age, Length: 76, dtype: int64