In [1]:
import pandas as pd
import numpy  as np
from sklearn.model_selection import train_test_split
from sklearn import metrics
import random
from sklearn.utils import resample
from sklearn.ensemble import RandomForestClassifier
from sklearn.utils.class_weight import compute_class_weight
import tensorflow as tf
from tensorflow.keras.models import Model,Sequential,load_model
from tensorflow.keras.layers import Input,Dense,Flatten,Embedding,Conv1D,MaxPool1D,concatenate,Dropout
from sklearn.preprocessing import LabelEncoder,MinMaxScaler
from sklearn.neighbors import LocalOutlierFactor
from sklearn.svm import SVC
from catboost import CatBoostClassifier
from imblearn.under_sampling import RandomUnderSampler 

2022-12-14 19:50:50.132474: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-12-14 19:50:50.132506: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2022-12-14 19:50:51.863537: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2022-12-14 19:50:51.863819: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory


In [2]:
n = 319795
skip = sorted(random.sample(range(1,319795),319795-n))
DF = pd.read_csv('../../../datasets/heart_2020_cleaned.csv',skiprows=skip).reset_index(drop=True).rename(
columns={'HeartDisease':'target'})

In [3]:
TARGET = 'target'
NumCols = ['BMI', 'PhysicalHealth', 'MentalHealth', 'SleepTime']
CatCols = DF.columns.difference([*NumCols,'target','Race'])
DF = DF[[TARGET,*NumCols,*CatCols]]

In [4]:
def label_encoder(DF,cols = []):
    DF = DF.copy()
    for col in cols:
        encoder = LabelEncoder()
        DF[col] = encoder.fit_transform(DF[col])
    return DF

# <font color='green'>Label Encoder </font>

In [5]:
DF = label_encoder(DF,[TARGET,*CatCols])[[TARGET,*NumCols,*CatCols]]

# <font color='green'>Scale Numerical Feature</font>

In [6]:
def feature_scaler(DF,Cols):
    DF = DF.copy()
    for col in Cols:
        ss = MinMaxScaler()
        DF[col] = ss.fit_transform(DF[[col]])
    return DF

In [7]:
DF = feature_scaler(DF,NumCols)

# <font color='green'>Train Test Split</font>

In [8]:
X_train, X_test, Y_train, Y_test = train_test_split(
    DF[DF.columns.difference([TARGET])],
    DF[TARGET],
    test_size=0.2, random_state=42
)
DF_train = pd.concat([X_train,Y_train],axis=1)

# <font color='green'>Drop Noise Data</font>

In [9]:
DF_train.shape

(255836, 17)

In [10]:
# OutlierDetector = LocalOutlierFactor(n_neighbors=int(DF_train.shape[0]/100))
# Y_predit = OutlierDetector.fit_predict(DF_train[DF_train.columns.difference([TARGET])])
# print('noise data : '+str(Y_predit[Y_predit==-1].shape[0] / DF_train.shape[0] * 100)+'%') 

In [11]:
# DF_train['is_noise'] = Y_predit
# DF_train = DF_train[DF_train['is_noise'] == 1]
# DF_train = DF_train[DF_train.columns.difference(['is_noise'])]

In [12]:
DF_train.shape

(255836, 17)

# <font color='green'>Handle Imbalanced Data</font>

In [13]:
smoteomek = RandomUnderSampler(random_state=3020)
X_train, Y_train = smoteomek.fit_resample(DF_train[DF_train.columns.difference([TARGET])], DF_train[TARGET] )

In [14]:
class_weight = dict(enumerate(
    compute_class_weight(
        class_weight='balanced',
        classes=np.unique(Y_train),
        y=Y_train
    )
))
class_weight

{0: 1.0, 1: 1.0}

# <font color='green'>Model Evaluation : (Random Forest)</font>

In [15]:
model = RandomForestClassifier(
    random_state=3020,
    max_features=17,
#     max_depth=10,
    class_weight=class_weight,
    verbose=True
)
model.fit(X_train, Y_train)

Y_predicted = model.predict(X_test)
print(metrics.classification_report(Y_test,Y_predicted))
print(metrics.confusion_matrix(Y_test,Y_predicted))

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:   12.9s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


              precision    recall  f1-score   support

           0       0.97      0.71      0.82     58367
           1       0.20      0.76      0.32      5592

    accuracy                           0.71     63959
   macro avg       0.58      0.73      0.57     63959
weighted avg       0.90      0.71      0.78     63959

[[41447 16920]
 [ 1343  4249]]


[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    1.4s finished


In [16]:
model = CatBoostClassifier(
#     iterations=200,
#     learning_rate=.01,
#     depth=16,
#     eval_metric='Accuracy',
    random_state=3020,
#     verbose=False
    class_weights=class_weight
)
model.fit(X_train, Y_train)

Y_predicted = model.predict(X_test)
print(metrics.classification_report(Y_test,Y_predicted))
print(metrics.confusion_matrix(Y_test,Y_predicted))

Learning rate set to 0.051622
0:	learn: 0.6744364	total: 70.8ms	remaining: 1m 10s
1:	learn: 0.6568843	total: 84.4ms	remaining: 42.1s
2:	learn: 0.6425082	total: 97.7ms	remaining: 32.5s
3:	learn: 0.6298706	total: 114ms	remaining: 28.3s
4:	learn: 0.6175121	total: 127ms	remaining: 25.3s
5:	learn: 0.6058621	total: 138ms	remaining: 22.8s
6:	learn: 0.5960257	total: 153ms	remaining: 21.8s
7:	learn: 0.5874155	total: 167ms	remaining: 20.7s
8:	learn: 0.5797113	total: 177ms	remaining: 19.5s
9:	learn: 0.5722098	total: 188ms	remaining: 18.6s
10:	learn: 0.5655423	total: 202ms	remaining: 18.2s
11:	learn: 0.5592682	total: 215ms	remaining: 17.7s
12:	learn: 0.5536223	total: 230ms	remaining: 17.4s
13:	learn: 0.5485079	total: 246ms	remaining: 17.3s
14:	learn: 0.5441146	total: 259ms	remaining: 17s
15:	learn: 0.5403179	total: 270ms	remaining: 16.6s
16:	learn: 0.5366575	total: 278ms	remaining: 16.1s
17:	learn: 0.5329053	total: 289ms	remaining: 15.8s
18:	learn: 0.5294683	total: 299ms	remaining: 15.4s
19:	learn

163:	learn: 0.4780961	total: 2.23s	remaining: 11.4s
164:	learn: 0.4780413	total: 2.25s	remaining: 11.4s
165:	learn: 0.4779959	total: 2.26s	remaining: 11.4s
166:	learn: 0.4779396	total: 2.28s	remaining: 11.4s
167:	learn: 0.4778556	total: 2.29s	remaining: 11.4s
168:	learn: 0.4778019	total: 2.3s	remaining: 11.3s
169:	learn: 0.4777266	total: 2.31s	remaining: 11.3s
170:	learn: 0.4776731	total: 2.32s	remaining: 11.3s
171:	learn: 0.4776017	total: 2.33s	remaining: 11.2s
172:	learn: 0.4775448	total: 2.35s	remaining: 11.2s
173:	learn: 0.4774503	total: 2.35s	remaining: 11.2s
174:	learn: 0.4773765	total: 2.37s	remaining: 11.2s
175:	learn: 0.4773139	total: 2.38s	remaining: 11.1s
176:	learn: 0.4772248	total: 2.39s	remaining: 11.1s
177:	learn: 0.4771626	total: 2.4s	remaining: 11.1s
178:	learn: 0.4770836	total: 2.41s	remaining: 11s
179:	learn: 0.4770043	total: 2.42s	remaining: 11s
180:	learn: 0.4769572	total: 2.43s	remaining: 11s
181:	learn: 0.4769001	total: 2.44s	remaining: 11s
182:	learn: 0.4768304	

332:	learn: 0.4671191	total: 4.55s	remaining: 9.11s
333:	learn: 0.4670523	total: 4.56s	remaining: 9.09s
334:	learn: 0.4669993	total: 4.58s	remaining: 9.08s
335:	learn: 0.4669480	total: 4.59s	remaining: 9.07s
336:	learn: 0.4668950	total: 4.6s	remaining: 9.06s
337:	learn: 0.4668358	total: 4.62s	remaining: 9.04s
338:	learn: 0.4667811	total: 4.63s	remaining: 9.03s
339:	learn: 0.4667441	total: 4.65s	remaining: 9.02s
340:	learn: 0.4666908	total: 4.66s	remaining: 9.01s
341:	learn: 0.4666432	total: 4.67s	remaining: 8.99s
342:	learn: 0.4665896	total: 4.69s	remaining: 8.98s
343:	learn: 0.4665238	total: 4.7s	remaining: 8.97s
344:	learn: 0.4664669	total: 4.72s	remaining: 8.96s
345:	learn: 0.4664061	total: 4.73s	remaining: 8.94s
346:	learn: 0.4663430	total: 4.75s	remaining: 8.94s
347:	learn: 0.4662724	total: 4.76s	remaining: 8.92s
348:	learn: 0.4662006	total: 4.78s	remaining: 8.91s
349:	learn: 0.4661201	total: 4.79s	remaining: 8.89s
350:	learn: 0.4660529	total: 4.81s	remaining: 8.89s
351:	learn: 0.

491:	learn: 0.4587772	total: 6.4s	remaining: 6.61s
492:	learn: 0.4587365	total: 6.41s	remaining: 6.59s
493:	learn: 0.4586669	total: 6.43s	remaining: 6.58s
494:	learn: 0.4586167	total: 6.44s	remaining: 6.57s
495:	learn: 0.4585923	total: 6.46s	remaining: 6.56s
496:	learn: 0.4585462	total: 6.47s	remaining: 6.54s
497:	learn: 0.4584659	total: 6.48s	remaining: 6.53s
498:	learn: 0.4584166	total: 6.49s	remaining: 6.51s
499:	learn: 0.4583639	total: 6.5s	remaining: 6.5s
500:	learn: 0.4583378	total: 6.51s	remaining: 6.48s
501:	learn: 0.4582936	total: 6.52s	remaining: 6.47s
502:	learn: 0.4582637	total: 6.53s	remaining: 6.45s
503:	learn: 0.4582187	total: 6.54s	remaining: 6.43s
504:	learn: 0.4581692	total: 6.55s	remaining: 6.42s
505:	learn: 0.4581227	total: 6.56s	remaining: 6.4s
506:	learn: 0.4580815	total: 6.57s	remaining: 6.39s
507:	learn: 0.4580260	total: 6.58s	remaining: 6.37s
508:	learn: 0.4579564	total: 6.59s	remaining: 6.36s
509:	learn: 0.4579053	total: 6.61s	remaining: 6.35s
510:	learn: 0.45

662:	learn: 0.4509633	total: 8.26s	remaining: 4.2s
663:	learn: 0.4509280	total: 8.27s	remaining: 4.18s
664:	learn: 0.4508698	total: 8.29s	remaining: 4.17s
665:	learn: 0.4508288	total: 8.3s	remaining: 4.16s
666:	learn: 0.4507923	total: 8.31s	remaining: 4.15s
667:	learn: 0.4507525	total: 8.32s	remaining: 4.13s
668:	learn: 0.4507131	total: 8.33s	remaining: 4.12s
669:	learn: 0.4506742	total: 8.34s	remaining: 4.11s
670:	learn: 0.4506209	total: 8.35s	remaining: 4.09s
671:	learn: 0.4505753	total: 8.36s	remaining: 4.08s
672:	learn: 0.4505333	total: 8.37s	remaining: 4.06s
673:	learn: 0.4504928	total: 8.38s	remaining: 4.05s
674:	learn: 0.4504370	total: 8.39s	remaining: 4.04s
675:	learn: 0.4503974	total: 8.4s	remaining: 4.02s
676:	learn: 0.4503627	total: 8.41s	remaining: 4.01s
677:	learn: 0.4503039	total: 8.42s	remaining: 4s
678:	learn: 0.4502509	total: 8.43s	remaining: 3.98s
679:	learn: 0.4501854	total: 8.44s	remaining: 3.97s
680:	learn: 0.4501587	total: 8.45s	remaining: 3.96s
681:	learn: 0.4501

834:	learn: 0.4435503	total: 10.1s	remaining: 2s
835:	learn: 0.4435172	total: 10.1s	remaining: 1.98s
836:	learn: 0.4434662	total: 10.1s	remaining: 1.97s
837:	learn: 0.4434246	total: 10.1s	remaining: 1.96s
838:	learn: 0.4433653	total: 10.2s	remaining: 1.95s
839:	learn: 0.4433429	total: 10.2s	remaining: 1.94s
840:	learn: 0.4433099	total: 10.2s	remaining: 1.92s
841:	learn: 0.4432603	total: 10.2s	remaining: 1.91s
842:	learn: 0.4432140	total: 10.2s	remaining: 1.9s
843:	learn: 0.4431709	total: 10.2s	remaining: 1.89s
844:	learn: 0.4431322	total: 10.2s	remaining: 1.87s
845:	learn: 0.4430988	total: 10.2s	remaining: 1.86s
846:	learn: 0.4430770	total: 10.2s	remaining: 1.85s
847:	learn: 0.4430269	total: 10.2s	remaining: 1.84s
848:	learn: 0.4429879	total: 10.3s	remaining: 1.82s
849:	learn: 0.4429421	total: 10.3s	remaining: 1.81s
850:	learn: 0.4429030	total: 10.3s	remaining: 1.8s
851:	learn: 0.4428674	total: 10.3s	remaining: 1.79s
852:	learn: 0.4428289	total: 10.3s	remaining: 1.77s
853:	learn: 0.442

              precision    recall  f1-score   support

           0       0.97      0.72      0.83     58367
           1       0.22      0.80      0.34      5592

    accuracy                           0.73     63959
   macro avg       0.60      0.76      0.59     63959
weighted avg       0.91      0.73      0.79     63959

[[42312 16055]
 [ 1132  4460]]


# <font color='green'>Model Evaluation : (CNN Deep learning)</font>

In [17]:
input1 = Input(shape=(X_train.shape[1],1))
flat1 = Flatten()(input1)

flatX = concatenate([flat1])
dense1 = Dense(50,activation='relu')(flatX)
output = Dense(2,activation='softmax')(dense1)

model = Model(inputs=[input1],outputs=output)
model.compile(
    optimizer=tf.keras.optimizers.Adam(0.01),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]
)

2022-12-14 19:51:23.118114: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-12-14 19:51:23.118144: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2022-12-14 19:51:23.118183: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: masoud-Aspire-V3-571G
2022-12-14 19:51:23.118192: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: masoud-Aspire-V3-571G
2022-12-14 19:51:23.118317: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: NOT_FOUND: was unable to find libcuda.so DSO loaded into this program
2022-12-14 19:51:23.118379: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is:

In [18]:
history = model.fit(
    [X_train],
    Y_train,
    epochs=40,
    batch_size=200,
    validation_data=([X_test],Y_test),
    class_weight=class_weight
)

Epoch 1/40


  output, from_logits = _get_logits(




  output, from_logits = _get_logits(


Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [20]:
Y_predicted = model.predict([X_test])
Y_predicted = [np.argmax(x) for x in Y_predicted]
# Y_predicted = [1 if x[1]>.4 else 0 for x in Y_predicted]

print(metrics.classification_report(Y_test,Y_predicted))
print(metrics.confusion_matrix(Y_test,Y_predicted))

              precision    recall  f1-score   support

           0       0.97      0.71      0.82     58367
           1       0.21      0.81      0.34      5592

    accuracy                           0.72     63959
   macro avg       0.59      0.76      0.58     63959
weighted avg       0.91      0.72      0.78     63959

[[41516 16851]
 [ 1067  4525]]
