# **Adult Data**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from scipy import stats

In [None]:


df=pd.read_csv('adult.csv')

df.head(20)

Unnamed: 0,age,workclass,fnlwgt,education,educational-num,marital-status,occupation,relationship,race,gender,capital-gain,capital-loss,hours-per-week,native-country,income
0,25,Private,226802,11th,7,Never-married,Machine-op-inspct,Own-child,Black,Male,0,0,40,United-States,<=50K
1,38,Private,89814,HS-grad,9,Married-civ-spouse,Farming-fishing,Husband,White,Male,0,0,50,United-States,<=50K
2,28,Local-gov,336951,Assoc-acdm,12,Married-civ-spouse,Protective-serv,Husband,White,Male,0,0,40,United-States,>50K
3,44,Private,160323,Some-college,10,Married-civ-spouse,Machine-op-inspct,Husband,Black,Male,7688,0,40,United-States,>50K
4,18,?,103497,Some-college,10,Never-married,?,Own-child,White,Female,0,0,30,United-States,<=50K
5,34,Private,198693,10th,6,Never-married,Other-service,Not-in-family,White,Male,0,0,30,United-States,<=50K
6,29,?,227026,HS-grad,9,Never-married,?,Unmarried,Black,Male,0,0,40,United-States,<=50K
7,63,Self-emp-not-inc,104626,Prof-school,15,Married-civ-spouse,Prof-specialty,Husband,White,Male,3103,0,32,United-States,>50K
8,24,Private,369667,Some-college,10,Never-married,Other-service,Unmarried,White,Female,0,0,40,United-States,<=50K
9,55,Private,104996,7th-8th,4,Married-civ-spouse,Craft-repair,Husband,White,Male,0,0,10,United-States,<=50K


In [None]:
df.shape

(48842, 15)

In [None]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48842 entries, 0 to 48841
Data columns (total 15 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   age              48842 non-null  int64 
 1   workclass        48842 non-null  object
 2   fnlwgt           48842 non-null  int64 
 3   education        48842 non-null  object
 4   educational-num  48842 non-null  int64 
 5   marital-status   48842 non-null  object
 6   occupation       48842 non-null  object
 7   relationship     48842 non-null  object
 8   race             48842 non-null  object
 9   gender           48842 non-null  object
 10  capital-gain     48842 non-null  int64 
 11  capital-loss     48842 non-null  int64 
 12  hours-per-week   48842 non-null  int64 
 13  native-country   48842 non-null  object
 14  income           48842 non-null  object
dtypes: int64(6), object(9)
memory usage: 5.6+ MB
None


In [None]:
df.isnull().sum()

Unnamed: 0,0
age,0
workclass,0
fnlwgt,0
education,0
educational-num,0
marital-status,0
occupation,0
relationship,0
race,0
gender,0


In [None]:
import numpy as np
df.loc[4, "hours-per-week"]=np.nan
df.loc[9, "hours-per-week"]=np.nan
df.isnull().sum()

Unnamed: 0,0
age,0
workclass,0
fnlwgt,0
education,0
educational-num,0
marital-status,0
occupation,0
relationship,0
race,0
gender,0


In [None]:
#fill missing values
imputer2 = SimpleImputer(strategy="most_frequent")

df_copy=df

imputer2.fit(df_copy[["hours-per-week"]])

df_copy["hours-per-week"] = imputer2.transform(df[["hours-per-week"]])

print(df_copy["hours-per-week"].isnull().sum())

0


In [None]:
#Handling Categorical Attributes

ordinal_encoder = OrdinalEncoder(categories=[["<=50K", ">50K"]])
df_copy["Income_Encoded"] = ordinal_encoder.fit_transform(df_copy[["income"]])

onehot_encoder = OneHotEncoder()

encoded_data = onehot_encoder.fit_transform(df[["workclass"]])

encoded_array = encoded_data.toarray()

encoded_df = pd.DataFrame(encoded_array, columns=onehot_encoder.get_feature_names_out(["workclass"]))
df_encoded = pd.concat([df_copy, encoded_df], axis=1)

df_encoded.drop("income", axis=1, inplace=True)
df_encoded.drop("workclass", axis=1, inplace=True)

print(df_encoded. head())

   age  fnlwgt     education  educational-num      marital-status  \
0   25  226802          11th                7       Never-married   
1   38   89814       HS-grad                9  Married-civ-spouse   
2   28  336951    Assoc-acdm               12  Married-civ-spouse   
3   44  160323  Some-college               10  Married-civ-spouse   
4   18  103497  Some-college               10       Never-married   

          occupation relationship   race  gender  capital-gain  ...  \
0  Machine-op-inspct    Own-child  Black    Male             0  ...   
1    Farming-fishing      Husband  White    Male             0  ...   
2    Protective-serv      Husband  White    Male             0  ...   
3  Machine-op-inspct      Husband  Black    Male          7688  ...   
4                  ?    Own-child  White  Female             0  ...   

   Income_Encoded  workclass_? workclass_Federal-gov  workclass_Local-gov  \
0             0.0          0.0                   0.0                  0.0   
1   

In [None]:
#Data Transformation
# Min-Max Scaler/Normalization (range 0-1)

normalizer = MinMaxScaler()
df_encoded[['fnlwgt']] = normalizer.fit_transform(df_encoded[['fnlwgt']])
df_encoded.head()

Unnamed: 0,age,fnlwgt,education,educational-num,marital-status,occupation,relationship,race,gender,capital-gain,...,Income_Encoded,workclass_?,workclass_Federal-gov,workclass_Local-gov,workclass_Never-worked,workclass_Private,workclass_Self-emp-inc,workclass_Self-emp-not-inc,workclass_State-gov,workclass_Without-pay
0,25,0.145129,11th,7,Never-married,Machine-op-inspct,Own-child,Black,Male,0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,38,0.052451,HS-grad,9,Married-civ-spouse,Farming-fishing,Husband,White,Male,0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,28,0.219649,Assoc-acdm,12,Married-civ-spouse,Protective-serv,Husband,White,Male,0,...,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,44,0.100153,Some-college,10,Married-civ-spouse,Machine-op-inspct,Husband,Black,Male,7688,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,18,0.061708,Some-college,10,Never-married,?,Own-child,White,Female,0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# Standardization (mean=0, variance=1)
scaler = StandardScaler()
df_encoded[['educational-num']] = scaler.fit_transform(df_encoded[['educational-num']])
df_encoded.head()


Unnamed: 0,age,fnlwgt,education,educational-num,marital-status,occupation,relationship,race,gender,capital-gain,...,Income_Encoded,workclass_?,workclass_Federal-gov,workclass_Local-gov,workclass_Never-worked,workclass_Private,workclass_Self-emp-inc,workclass_Self-emp-not-inc,workclass_State-gov,workclass_Without-pay
0,25,0.145129,11th,-1.197259,Never-married,Machine-op-inspct,Own-child,Black,Male,0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,38,0.052451,HS-grad,-0.419335,Married-civ-spouse,Farming-fishing,Husband,White,Male,0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,28,0.219649,Assoc-acdm,0.74755,Married-civ-spouse,Protective-serv,Husband,White,Male,0,...,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,44,0.100153,Some-college,-0.030373,Married-civ-spouse,Machine-op-inspct,Husband,Black,Male,7688,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,18,0.061708,Some-college,-0.030373,Never-married,?,Own-child,White,Female,0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
#Removing Outliers
# Outlier Detection and Treatment using IQR

df_encoded_copy1=df_encoded
df_encoded_copy2=df_encoded
df_encoded_copy3=df_encoded

Q1 = df_encoded_copy1['fnlwgt'].quantile(0.25)
Q3 = df_encoded_copy1['fnlwgt'].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
df_encoded_copy1['fnlwgt'] = np.where(df_encoded_copy1['fnlwgt'] > upper_bound, upper_bound,
                        np.where(df_encoded_copy1['fnlwgt'] < lower_bound, lower_bound, df_encoded_copy1['fnlwgt']))

print(df_encoded_copy1.head())


   age    fnlwgt     education  educational-num      marital-status  \
0   25  0.145129          11th        -1.197259       Never-married   
1   38  0.052451       HS-grad        -0.419335  Married-civ-spouse   
2   28  0.219649    Assoc-acdm         0.747550  Married-civ-spouse   
3   44  0.100153  Some-college        -0.030373  Married-civ-spouse   
4   18  0.061708  Some-college        -0.030373       Never-married   

          occupation relationship   race  gender  capital-gain  ...  \
0  Machine-op-inspct    Own-child  Black    Male             0  ...   
1    Farming-fishing      Husband  White    Male             0  ...   
2    Protective-serv      Husband  White    Male             0  ...   
3  Machine-op-inspct      Husband  Black    Male          7688  ...   
4                  ?    Own-child  White  Female             0  ...   

   Income_Encoded  workclass_? workclass_Federal-gov  workclass_Local-gov  \
0             0.0          0.0                   0.0                 

In [None]:
#Removing Outliers
# Z-score method

df_encoded_copy2['fnlwgt_zscore'] = stats.zscore(df_encoded_copy2['fnlwgt'])
df_encoded_copy2['fnlwgt'] = np.where(df_encoded_copy2['fnlwgt_zscore'].abs() > 3, np.nan, df_encoded_copy2['fnlwgt'])
print(df_encoded_copy2.head())

   age    fnlwgt     education  educational-num      marital-status  \
0   25  0.145129          11th        -1.197259       Never-married   
1   38  0.052451       HS-grad        -0.419335  Married-civ-spouse   
2   28  0.219649    Assoc-acdm         0.747550  Married-civ-spouse   
3   44  0.100153  Some-college        -0.030373  Married-civ-spouse   
4   18  0.061708  Some-college        -0.030373       Never-married   

          occupation relationship   race  gender  capital-gain  ...  \
0  Machine-op-inspct    Own-child  Black    Male             0  ...   
1    Farming-fishing      Husband  White    Male             0  ...   
2    Protective-serv      Husband  White    Male             0  ...   
3  Machine-op-inspct      Husband  Black    Male          7688  ...   
4                  ?    Own-child  White  Female             0  ...   

   workclass_?  workclass_Federal-gov workclass_Local-gov  \
0          0.0                    0.0                 0.0   
1          0.0          

In [None]:
#Removing Outliers
# Median replacement for outliers

df_encoded_copy3['fnlwgt_zscore'] = stats.zscore(df_encoded_copy3['fnlwgt'])
median_fnlwgt = df_encoded_copy3['fnlwgt'].median()
df_encoded_copy3['fnlwgt'] = np.where(df_encoded_copy3['fnlwgt_zscore'].abs() > 3, median_fnlwgt, df_encoded_copy3['fnlwgt'])
print(df_encoded_copy3.head())

   age    fnlwgt     education  educational-num      marital-status  \
0   25  0.145129          11th        -1.197259       Never-married   
1   38  0.052451       HS-grad        -0.419335  Married-civ-spouse   
2   28  0.219649    Assoc-acdm         0.747550  Married-civ-spouse   
3   44  0.100153  Some-college        -0.030373  Married-civ-spouse   
4   18  0.061708  Some-college        -0.030373       Never-married   

          occupation relationship   race  gender  capital-gain  ...  \
0  Machine-op-inspct    Own-child  Black    Male             0  ...   
1    Farming-fishing      Husband  White    Male             0  ...   
2    Protective-serv      Husband  White    Male             0  ...   
3  Machine-op-inspct      Husband  Black    Male          7688  ...   
4                  ?    Own-child  White  Female             0  ...   

   workclass_?  workclass_Federal-gov workclass_Local-gov  \
0          0.0                    0.0                 0.0   
1          0.0          

# **Diabetes Data**

In [None]:
df2=pd.read_csv('Dataset_of_Diabetes.csv')

df2.head(20)

Unnamed: 0,ID,No_Pation,Gender,AGE,Urea,Cr,HbA1c,Chol,TG,HDL,LDL,VLDL,BMI,CLASS
0,502,17975,F,50,4.7,46,4.9,4.2,0.9,2.4,1.4,0.5,24.0,N
1,735,34221,M,26,4.5,62,4.9,3.7,1.4,1.1,2.1,0.6,23.0,N
2,420,47975,F,50,4.7,46,4.9,4.2,0.9,2.4,1.4,0.5,24.0,N
3,680,87656,F,50,4.7,46,4.9,4.2,0.9,2.4,1.4,0.5,24.0,N
4,504,34223,M,33,7.1,46,4.9,4.9,1.0,0.8,2.0,0.4,21.0,N
5,634,34224,F,45,2.3,24,4.0,2.9,1.0,1.0,1.5,0.4,21.0,N
6,721,34225,F,50,2.0,50,4.0,3.6,1.3,0.9,2.1,0.6,24.0,N
7,421,34227,M,48,4.7,47,4.0,2.9,0.8,0.9,1.6,0.4,24.0,N
8,670,34229,M,43,2.6,67,4.0,3.8,0.9,2.4,3.7,1.0,21.0,N
9,759,34230,F,32,3.6,28,4.0,3.8,2.0,2.4,3.8,1.0,24.0,N


In [None]:
print(df2.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 14 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   ID         1000 non-null   int64  
 1   No_Pation  1000 non-null   int64  
 2   Gender     1000 non-null   object 
 3   AGE        1000 non-null   int64  
 4   Urea       1000 non-null   float64
 5   Cr         1000 non-null   int64  
 6   HbA1c      1000 non-null   float64
 7   Chol       1000 non-null   float64
 8   TG         1000 non-null   float64
 9   HDL        1000 non-null   float64
 10  LDL        1000 non-null   float64
 11  VLDL       1000 non-null   float64
 12  BMI        1000 non-null   float64
 13  CLASS      1000 non-null   object 
dtypes: float64(8), int64(4), object(2)
memory usage: 109.5+ KB
None


In [None]:
df2.isnull().sum()

Unnamed: 0,0
ID,0
No_Pation,0
Gender,0
AGE,0
Urea,0
Cr,0
HbA1c,0
Chol,0
TG,0
HDL,0


In [None]:
import numpy as np
df2.loc[4, "AGE"]=np.nan
df2.loc[9, "AGE"]=np.nan
df2.isnull().sum()

Unnamed: 0,0
ID,0
No_Pation,0
Gender,0
AGE,2
Urea,0
Cr,0
HbA1c,0
Chol,0
TG,0
HDL,0


In [None]:
#fill null values
imputer2 = SimpleImputer(strategy="most_frequent")

df_copy2=df2

imputer2.fit(df_copy2[["AGE"]])

df_copy2["AGE"] = imputer2.transform(df2[["AGE"]])

print(df_copy2["AGE"].isnull().sum())

0


In [None]:
#Handling Categorical Attributes
df_copy2["Gender"] = df_copy2["Gender"].str.upper()

ordinal_encoder = OrdinalEncoder(categories=[["M", "F"]])

df_copy2["Gender_Encoded"] = ordinal_encoder.fit_transform(df_copy2[["Gender"]])

df2["CLASS"] = df2["CLASS"].str.strip()
onehot_encoder = OneHotEncoder()

encoded_data = onehot_encoder.fit_transform(df2[["CLASS"]])

encoded_array = encoded_data.toarray()

encoded_df2 = pd.DataFrame(encoded_array, columns=onehot_encoder.get_feature_names_out(["CLASS"]))
df_encoded2 = pd.concat([df_copy2, encoded_df2], axis=1)

df_encoded2.drop("Gender", axis=1, inplace=True)
df_encoded2.drop("CLASS", axis=1, inplace=True)

print(df_encoded2. head())

    ID  No_Pation   AGE  Urea  Cr  HbA1c  Chol   TG  HDL  LDL  VLDL   BMI  \
0  502      17975  50.0   4.7  46    4.9   4.2  0.9  2.4  1.4   0.5  24.0   
1  735      34221  26.0   4.5  62    4.9   3.7  1.4  1.1  2.1   0.6  23.0   
2  420      47975  50.0   4.7  46    4.9   4.2  0.9  2.4  1.4   0.5  24.0   
3  680      87656  50.0   4.7  46    4.9   4.2  0.9  2.4  1.4   0.5  24.0   
4  504      34223  55.0   7.1  46    4.9   4.9  1.0  0.8  2.0   0.4  21.0   

   Gender_Encoded  CLASS_N  CLASS_P  CLASS_Y  
0             1.0      1.0      0.0      0.0  
1             0.0      1.0      0.0      0.0  
2             1.0      1.0      0.0      0.0  
3             1.0      1.0      0.0      0.0  
4             0.0      1.0      0.0      0.0  


In [None]:
#Data Transformation
# Min-Max Scaler/Normalization (range 0-1)

normalizer = MinMaxScaler()
df_encoded2[['BMI']] = normalizer.fit_transform(df_encoded2[['BMI']])
df_encoded2.head()

Unnamed: 0,ID,No_Pation,AGE,Urea,Cr,HbA1c,Chol,TG,HDL,LDL,VLDL,BMI,Gender_Encoded,CLASS_N,CLASS_P,CLASS_Y
0,502,-0.074747,50.0,4.7,46,4.9,4.2,0.9,2.4,1.4,0.5,0.173913,1.0,1.0,0.0,0.0
1,735,-0.06994,26.0,4.5,62,4.9,3.7,1.4,1.1,2.1,0.6,0.13913,0.0,1.0,0.0,0.0
2,420,-0.065869,50.0,4.7,46,4.9,4.2,0.9,2.4,1.4,0.5,0.173913,1.0,1.0,0.0,0.0
3,680,-0.054126,50.0,4.7,46,4.9,4.2,0.9,2.4,1.4,0.5,0.173913,1.0,1.0,0.0,0.0
4,504,-0.069939,55.0,7.1,46,4.9,4.9,1.0,0.8,2.0,0.4,0.069565,0.0,1.0,0.0,0.0


In [None]:
# Standardization (mean=0, variance=1)

scaler = StandardScaler()
df_encoded2[['No_Pation']] = scaler.fit_transform(df_encoded2[['No_Pation']])
df_encoded2.head()

Unnamed: 0,ID,No_Pation,AGE,Urea,Cr,HbA1c,Chol,TG,HDL,LDL,VLDL,BMI,Gender_Encoded,CLASS_N,CLASS_P,CLASS_Y
0,502,-0.074747,50.0,4.7,46,4.9,4.2,0.9,2.4,1.4,0.5,24.0,1.0,1.0,0.0,0.0
1,735,-0.06994,26.0,4.5,62,4.9,3.7,1.4,1.1,2.1,0.6,23.0,0.0,1.0,0.0,0.0
2,420,-0.065869,50.0,4.7,46,4.9,4.2,0.9,2.4,1.4,0.5,24.0,1.0,1.0,0.0,0.0
3,680,-0.054126,50.0,4.7,46,4.9,4.2,0.9,2.4,1.4,0.5,24.0,1.0,1.0,0.0,0.0
4,504,-0.069939,55.0,7.1,46,4.9,4.9,1.0,0.8,2.0,0.4,21.0,0.0,1.0,0.0,0.0


In [None]:
#Removing Outliers
# Outlier Detection and Treatment using IQR

df_encoded_copy1=df_encoded2
df_encoded_copy2=df_encoded2
df_encoded_copy3=df_encoded2

Q1 = df_encoded_copy1['No_Pation'].quantile(0.25)
Q3 = df_encoded_copy1['No_Pation'].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
df_encoded_copy1['No_Pation'] = np.where(df_encoded_copy1['No_Pation'] > upper_bound, upper_bound,
                        np.where(df_encoded_copy1['No_Pation'] < lower_bound, lower_bound, df_encoded_copy1['No_Pation']))

print(df_encoded_copy1.head())


    ID  No_Pation   AGE  Urea  Cr  HbA1c  Chol   TG  HDL  LDL  VLDL       BMI  \
0  502  -0.074747  50.0   4.7  46    4.9   4.2  0.9  2.4  1.4   0.5  0.173913   
1  735  -0.069940  26.0   4.5  62    4.9   3.7  1.4  1.1  2.1   0.6  0.139130   
2  420  -0.065869  50.0   4.7  46    4.9   4.2  0.9  2.4  1.4   0.5  0.173913   
3  680  -0.057172  50.0   4.7  46    4.9   4.2  0.9  2.4  1.4   0.5  0.173913   
4  504  -0.069939  55.0   7.1  46    4.9   4.9  1.0  0.8  2.0   0.4  0.069565   

   Gender_Encoded  CLASS_N  CLASS_P  CLASS_Y  
0             1.0      1.0      0.0      0.0  
1             0.0      1.0      0.0      0.0  
2             1.0      1.0      0.0      0.0  
3             1.0      1.0      0.0      0.0  
4             0.0      1.0      0.0      0.0  


In [None]:
#Removing Outliers
# Z-score method

df_encoded_copy2['No_Pation'] = stats.zscore(df_encoded_copy2['No_Pation'])
df_encoded_copy2['No_Pation'] = np.where(df_encoded_copy2['No_Pation'].abs() > 3, np.nan, df_encoded_copy2['No_Pation'])  # Replace outliers with NaN
print(df_encoded_copy2.head())

    ID  No_Pation   AGE  Urea  Cr  HbA1c  Chol   TG  HDL  LDL  VLDL       BMI  \
0  502  -0.919118  50.0   4.7  46    4.9   4.2  0.9  2.4  1.4   0.5  0.173913   
1  735  -0.087690  26.0   4.5  62    4.9   3.7  1.4  1.1  2.1   0.6  0.139130   
2  420   0.616204  50.0   4.7  46    4.9   4.2  0.9  2.4  1.4   0.5  0.173913   
3  680   2.120307  50.0   4.7  46    4.9   4.2  0.9  2.4  1.4   0.5  0.173913   
4  504  -0.087588  55.0   7.1  46    4.9   4.9  1.0  0.8  2.0   0.4  0.069565   

   Gender_Encoded  CLASS_N  CLASS_P  CLASS_Y  
0             1.0      1.0      0.0      0.0  
1             0.0      1.0      0.0      0.0  
2             1.0      1.0      0.0      0.0  
3             1.0      1.0      0.0      0.0  
4             0.0      1.0      0.0      0.0  


In [None]:
#Removing Outliers
# Median replacement for outliers
df_encoded_copy3['No_Pation_zscore'] = stats.zscore(df_encoded_copy3['No_Pation'])
median_fnlwgt = df_encoded_copy3['No_Pation'].median()
df_encoded_copy3['No_Pation'] = np.where(df_encoded_copy3['No_Pation_zscore'].abs() > 3, median_fnlwgt, df_encoded_copy3['No_Pation'])
print(df_encoded_copy3.head())

    ID  No_Pation   AGE  Urea  Cr  HbA1c  Chol   TG  HDL  LDL  VLDL       BMI  \
0  502  -0.919118  50.0   4.7  46    4.9   4.2  0.9  2.4  1.4   0.5  0.173913   
1  735  -0.087690  26.0   4.5  62    4.9   3.7  1.4  1.1  2.1   0.6  0.139130   
2  420   0.616204  50.0   4.7  46    4.9   4.2  0.9  2.4  1.4   0.5  0.173913   
3  680   2.120307  50.0   4.7  46    4.9   4.2  0.9  2.4  1.4   0.5  0.173913   
4  504  -0.087588  55.0   7.1  46    4.9   4.9  1.0  0.8  2.0   0.4  0.069565   

   Gender_Encoded  CLASS_N  CLASS_P  CLASS_Y  No_Pation_zscore  
0             1.0      1.0      0.0      0.0         -0.919118  
1             0.0      1.0      0.0      0.0         -0.087690  
2             1.0      1.0      0.0      0.0          0.616204  
3             1.0      1.0      0.0      0.0          2.120307  
4             0.0      1.0      0.0      0.0         -0.087588  
