## Klasifikacija zdravlja fetusa na temelju kardiotokografije

### Projekt u sklopu predmeta Strojno učenje

Učitavanje podataka:

In [1]:
import pandas as pd

In [2]:
#imena stupaca u CTG.xls koje ćemo koristiti
column_names = ['LB', 'AC', 'FM', 'UC', 'DL', 'DS', 'DP', 'ASTV', 'MSTV', 'ALTV', 'MLTV', 'Width', 'Min', 'Max',
                   'Nmax', 'Nzeros', 'Mode', 'Mean', 'Median', 'Variance', 'Tendency', 'NSP']

#stupci koji sadrže int vrijednosti, samo za ljepši ispis
int_columns = ['LB' ,'ASTV' ,'ALTV' ,'Width' ,'Min' ,'Max' ,'Nmax' ,'Nzeros' ,'Mode' ,'Mean' ,'Median' ,'Variance',
               'Tendency', 'NSP']

In [3]:
data = pd.read_excel('CTG.xls', sheet_name = 'Data', skiprows = 1, 
                     usecols = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 45]
                     , names = column_names)

#izbacujemo sve retke u kojima je barem jedna varijabla nepoznata (NaN)
data = data.dropna()

for col in int_columns:
    data[col] = (data[col]).astype(int)

In [4]:
data.head(15)

Unnamed: 0,LB,AC,FM,UC,DL,DS,DP,ASTV,MSTV,ALTV,...,Min,Max,Nmax,Nzeros,Mode,Mean,Median,Variance,Tendency,NSP
0,120,0.0,0.0,0.0,0.0,0.0,0.0,73,0.5,43,...,62,126,2,0,120,137,121,73,1,2
1,132,0.00638,0.0,0.00638,0.00319,0.0,0.0,17,2.1,0,...,68,198,6,1,141,136,140,12,0,1
2,133,0.003322,0.0,0.008306,0.003322,0.0,0.0,16,2.1,0,...,68,198,5,1,141,135,138,13,0,1
3,134,0.002561,0.0,0.007682,0.002561,0.0,0.0,16,2.4,0,...,53,170,11,0,137,134,137,13,1,1
4,132,0.006515,0.0,0.008143,0.0,0.0,0.0,16,2.4,0,...,53,170,9,0,137,136,138,11,1,1
5,134,0.001049,0.0,0.010493,0.009444,0.0,0.002099,26,5.9,0,...,50,200,5,3,76,107,107,170,0,3
6,134,0.001403,0.0,0.012623,0.008415,0.0,0.002805,29,6.3,0,...,50,200,6,3,71,107,106,215,0,3
7,122,0.0,0.0,0.0,0.0,0.0,0.0,83,0.5,6,...,62,130,0,0,122,122,123,3,1,3
8,122,0.0,0.0,0.001517,0.0,0.0,0.0,84,0.5,5,...,62,130,0,0,122,122,123,3,1,3
9,122,0.0,0.0,0.002967,0.0,0.0,0.0,86,0.3,6,...,62,130,1,0,122,122,123,1,1,3


In [5]:
X = data.iloc[:,:-1].values # podaci bez NSP dijagnoze
y = data.iloc[:,-1].values # NSP dijagnoza

Standarizacija podataka:

In [6]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit_transform( X )

array([[-1.35222005, -0.82140279, -0.20304786, ..., -1.18164215,
         1.87056871,  1.11298001],
       [-0.1325256 ,  0.8318261 , -0.20304786, ...,  0.13203796,
        -0.23499819, -0.52452553],
       [-0.03088439,  0.0395395 , -0.20304786, ..., -0.00624416,
        -0.2004807 , -0.52452553],
       ...,
       [ 0.68060404, -0.56734041, -0.20304786, ...,  0.96173066,
        -0.51113811,  1.11298001],
       [ 0.68060404, -0.64547365, -0.20304786, ...,  0.8925896 ,
        -0.51113811,  1.11298001],
       [ 0.88388645, -0.40275396, -0.16842402, ...,  0.47774325,
        -0.61469058, -0.52452553]])

Testiranje nekoliko modela na nepromijenjenim podacima (prije oversamplinga/undesamplinga):

XGBoost s 5-fold stratified cross-validation(sačuvan omjer klasa u svim podskupovima podataka):

In [8]:
import xgboost as xgb
from sklearn.model_selection import StratifiedKFold # koristit ćemo 5-fold stratified cross-validation
from sklearn.model_selection import cross_val_score

xgb = xgb.XGBClassifier()
skfold = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 7) 
# I suggest you select a random state value at random and use it for all your experiments. 
xgb_results = cross_val_score(xgb, X, y, cv = skfold)
# The fitting will be done inside the cross_val_score function, you don't need to worry about this beforehand.
# But if you want to use the model afterwards, you will need to fit() on the whole data again.
print("Točnost XGBoost modela: %.2f%% (standardna devijacija %.2f%%)" % (xgb_results.mean()*100, xgb_results.std()*100))

Točnost XGBoost modela: 94.59% (standardna devijacija 0.65%)


Prosjek recall-a za patološku klasu (ako se koristi stratifikacija, prosjek recall-a je OK):

In [10]:
import numpy as np
from sklearn.metrics import recall_score
xgb_recall = 0
for train_index, test_index in skfold.split(X,y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    xgb.fit(X_train, y_train)
    xgb_recall = xgb_recall + (recall_score(y_test, xgb.predict(X_test), average=None))[2]
xgb_recall = ( xgb_recall / 5 ) * 100
print("Recall za patološku klasu s XGBoost: %.2f%% " % xgb_recall)

Recall za patološku klasu s XGBoost: 91.48% 


In [78]:
# konfuzijske matrice za svaki od 5 podskupova:
#from sklearn.metrics import confusion_matrix
#for train_index, test_index in skfold.split(X,y):
#    X_train, X_test = X[train_index], X[test_index]
#    y_train, y_test = y[train_index], y[test_index]
#    xgb.fit(X_train, y_train)
#    print(confusion_matrix(y_test, xgb.predict(X_test)))
#    print("Recall za patološku klasu %f" % (recall_score(y_test, xgb.predict(X_test), average=None))[2])

Dalje ćemo u svim modelima koristiti 5-fold stratified cross-validation.

SVC s 5-fold stratified cross-validation:

In [11]:
from sklearn.svm import SVC 
svc = SVC(kernel = 'linear', random_state = 7)
svc_results = cross_val_score(svc, X, y, cv = skfold)
print("Točnost SVC modela: %.2f%% (standardna devijacija %.2f%%)" % (svc_results.mean()*100, svc_results.std()*100))

Točnost SVC modela: 87.91% (standardna devijacija 0.65%)


Prosjek recall-a za patološku klasu:

In [12]:
svc_recall = 0
for train_index, test_index in skfold.split(X,y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    svc.fit(X_train, y_train)
    svc_recall = svc_recall + (recall_score(y_test, svc.predict(X_test), average=None))[2]
svc_recall = ( svc_recall / 5 ) * 100
print("Recall za patološku klasu s XGBoost: %.2f%% " % svc_recall)

Recall za patološku klasu s XGBoost: 74.95% 


Random forest:

In [14]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
            max_depth=None, max_features=21, max_leaf_nodes=None,
            min_impurity_decrease=1e-07, min_samples_leaf=3,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=10, n_jobs=10, oob_score=False, random_state=100,
            verbose=0, warm_start=False)
rf_results = cross_val_score(rf, X, y, cv = skfold)
print("Točnost Random Forest modela: %.2f%% (standardna devijacija %.2f%%)" %(rf_results.mean()*100, rf_results.std()*100))

Točnost Random Forest modela: 93.79% (standardna devijacija 0.65%)


Prosjek recall-a za patološku klasu:

In [15]:
rf_recall = 0
for train_index, test_index in skfold.split(X,y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    rf.fit(X_train, y_train)
    rf_recall = rf_recall + (recall_score(y_test, rf.predict(X_test), average=None))[2]
rf_recall = ( rf_recall / 5 ) * 100
print("Recall za patološku klasu s Random Forest: %.2f%% " % rf_recall)

Recall za patološku klasu s Random Forest: 89.17% 


In [17]:
#!pip install imblearn

Collecting imblearn
  Downloading https://files.pythonhosted.org/packages/81/a7/4179e6ebfd654bd0eac0b9c06125b8b4c96a9d0a8ff9e9507eb2a26d2d7e/imblearn-0.0-py2.py3-none-any.whl
Collecting imbalanced-learn (from imblearn)
  Using cached https://files.pythonhosted.org/packages/e5/4c/7557e1c2e791bd43878f8c82065bddc5798252084f26ef44527c02262af1/imbalanced_learn-0.4.3-py3-none-any.whl
Installing collected packages: imbalanced-learn, imblearn
Successfully installed imbalanced-learn-0.4.3 imblearn-0.0


In [60]:
#Mijenjamo podatke pomoću 3 vrste oversamplinga
from imblearn.over_sampling import SMOTE, BorderlineSMOTE, ADASYN

In [61]:
X_resampled, y_resampled = SMOTE().fit_resample(X, y)
X_resampled2, y_resampled2 = BorderlineSMOTE().fit_resample(X, y)
X_resampled3, y_resampled3 = ADASYN().fit_resample(X, y)

SVC s oversamplingom (3 vrste):

In [62]:
svc_resampled_results = cross_val_score(svc, X_resampled, y_resampled, cv = skfold)
print("Točnost SVC modela (SMOTE): %.2f%% (standardna devijacija %.2f%%)" % (svc_resampled_results.mean()*100, svc_resampled_results.std()*100))

Točnost SVC modela (SMOTE): 85.94% (standardna devijacija 0.66%)


In [63]:
svc_resampled_recall = 0
for train_index, test_index in skfold.split(X_resampled, y_resampled):
    X_train, X_test = X_resampled[train_index], X_resampled[test_index]
    y_train, y_test = y_resampled[train_index], y_resampled[test_index]
    svc.fit(X_train, y_train)
    svc_resampled_recall = svc_resampled_recall + (recall_score(y_test, svc.predict(X_test), average=None))[2]
svc_resampled_recall = ( svc_resampled_recall / 5 ) * 100
print("Recall za patološku klasu sa SVC (SMOTE): %.2f%% " % svc_resampled_recall)

Recall za patološku klasu sa SVC (SMOTE): 90.27% 


In [64]:
svc_resampled2_results = cross_val_score(svc, X_resampled2, y_resampled2, cv = skfold)
print("Točnost SVC modela (BorderlineSMOTE): %.2f%% (standardna devijacija %.2f%%)" % (svc_resampled2_results.mean()*100, svc_resampled2_results.std()*100))

Točnost SVC modela (BorderlineSMOTE): 84.91% (standardna devijacija 1.11%)


In [65]:
svc_resampled2_recall = 0
for train_index, test_index in skfold.split(X_resampled2, y_resampled2):
    X_train, X_test = X_resampled2[train_index], X_resampled2[test_index]
    y_train, y_test = y_resampled2[train_index], y_resampled2[test_index]
    svc.fit(X_train, y_train)
    svc_resampled2_recall = svc_resampled2_recall + (recall_score(y_test, svc.predict(X_test), average=None))[2]
svc_resampled2_recall = ( svc_resampled2_recall / 5 ) * 100
print("Recall za patološku klasu sa SVC (BorderlineSMOTE): %.2f%% " % svc_resampled2_recall)

Recall za patološku klasu sa SVC (BorderlineSMOTE): 93.11% 


In [66]:
svc_resampled3_results = cross_val_score(svc, X_resampled3, y_resampled3, cv = skfold)
print("Točnost SVC modela (ADASYN): %.2f%% (standardna devijacija %.2f%%)" % (svc_resampled3_results.mean()*100, svc_resampled3_results.std()*100))

Točnost SVC modela (ADASYN): 82.24% (standardna devijacija 0.79%)


In [67]:
svc_resampled3_recall = 0
for train_index, test_index in skfold.split(X_resampled3, y_resampled3):
    X_train, X_test = X_resampled3[train_index], X_resampled3[test_index]
    y_train, y_test = y_resampled3[train_index], y_resampled3[test_index]
    svc.fit(X_train, y_train)
    svc_resampled3_recall = svc_resampled3_recall + (recall_score(y_test, svc.predict(X_test), average=None))[2]
svc_resampled3_recall = ( svc_resampled3_recall / 5 ) * 100
print("Recall za patološku klasu sa SVC (ADASYN): %.2f%% " % svc_resampled3_recall)

Recall za patološku klasu sa SVC (ADASYN): 92.10% 


Random forest s oversamplingom:

In [47]:
rf_resampled_results = cross_val_score(rf, X_resampled, y_resampled, cv = skfold)
print("Točnost Random Forest modela (SMOTE): %.2f%% (standardna devijacija %.2f%%)" %(rf_resampled_results.mean()*100, rf_resampled_results.std()*100))

Točnost Random Forest modela (SMOTE): 96.90% (standardna devijacija 0.57%)


In [48]:
rf_resampled_recall = 0
for train_index, test_index in skfold.split(X_resampled, y_resampled):
    X_train, X_test = X_resampled[train_index], X_resampled[test_index]
    y_train, y_test = y_resampled[train_index], y_resampled[test_index]
    rf.fit(X_train, y_train)
    rf_resampled_recall = rf_resampled_recall + (recall_score(y_test, rf.predict(X_test), average=None))[2]
rf_resampled_recall = ( rf_resampled_recall / 5 ) * 100
print("Recall za patološku klasu s Random forest (SMOTE): %.2f%% " % rf_resampled_recall)

Recall za patološku klasu s Random forest (SMOTE): 98.73% 


In [49]:
rf_resampled2_results = cross_val_score(rf, X_resampled2, y_resampled2, cv = skfold)
print("Točnost Random Forest modela (BorderlineSMOTE) : %.2f%% (standardna devijacija %.2f%%)" %(rf_resampled2_results.mean()*100, rf_resampled2_results.std()*100))

Točnost Random Forest modela (BorderlineSMOTE) : 97.36% (standardna devijacija 0.42%)


In [50]:
rf_resampled2_recall = 0
for train_index, test_index in skfold.split(X_resampled2, y_resampled2):
    X_train, X_test = X_resampled2[train_index], X_resampled2[test_index]
    y_train, y_test = y_resampled2[train_index], y_resampled2[test_index]
    rf.fit(X_train, y_train)
    rf_resampled2_recall = rf_resampled2_recall + (recall_score(y_test, rf.predict(X_test), average=None))[2]
rf_resampled2_recall = ( rf_resampled2_recall / 5 ) * 100
print("Recall za patološku klasu s Random forest (BorderlineSMOTE): %.2f%% " % rf_resampled2_recall)

Recall za patološku klasu s Random forest (BorderlineSMOTE): 98.91% 


In [52]:
rf_resampled3_results = cross_val_score(rf, X_resampled3, y_resampled3, cv = skfold)
print("Točnost Random Forest modela (ADASYN) : %.2f%% (standardna devijacija %.2f%%)" %(rf_resampled3_results.mean()*100, rf_resampled3_results.std()*100))

Točnost Random Forest modela (ADASYN) : 97.12% (standardna devijacija 0.80%)


In [59]:
rf_resampled3_recall = 0
for train_index, test_index in skfold.split(X_resampled3, y_resampled3):
    X_train, X_test = X_resampled3[train_index], X_resampled3[test_index]
    y_train, y_test = y_resampled3[train_index], y_resampled3[test_index]
    rf.fit(X_train, y_train)
    rf_resampled3_recall = rf_resampled3_recall + (recall_score(y_test, rf.predict(X_test), average=None))[2]
rf_resampled3_recall = ( rf_resampled3_recall / 5 ) * 100
print("Recall za patološku klasu s Random forest (ADASYN): %.2f%% " % rf_resampled3_recall)

Recall za patološku klasu s Random forest (ADASYN): 98.85% 


XGBoost s oversamplingom:

In [53]:
xgb_resampled_results = cross_val_score(xgb, X_resampled, y_resampled, cv = skfold)
print("Točnost XGBoost modela (SMOTE): %.2f%% (standardna devijacija %.2f%%)" %(xgb_resampled_results.mean()*100, xgb_resampled_results.std()*100))

Točnost XGBoost modela (SMOTE): 96.35% (standardna devijacija 0.56%)


In [54]:
xgb_resampled_recall = 0
for train_index, test_index in skfold.split(X_resampled, y_resampled):
    X_train, X_test = X_resampled[train_index], X_resampled[test_index]
    y_train, y_test = y_resampled[train_index], y_resampled[test_index]
    xgb.fit(X_train, y_train)
    xgb_resampled_recall = xgb_resampled_recall + (recall_score(y_test, xgb.predict(X_test), average=None))[2]
xgb_resampled_recall = ( xgb_resampled_recall / 5 ) * 100
print("Recall za patološku klasu s XGBoost (SMOTE): %.2f%% " % xgb_resampled_recall)

Recall za patološku klasu s XGBoost (SMOTE): 98.67% 


In [55]:
xgb_resampled2_results = cross_val_score(xgb, X_resampled2, y_resampled2, cv = skfold)
print("Točnost XGBoost modela (BorderlineSMOTE): %.2f%% (standardna devijacija %.2f%%)" %(xgb_resampled2_results.mean()*100, xgb_resampled2_results.std()*100))

Točnost XGBoost modela (BorderlineSMOTE): 96.41% (standardna devijacija 0.55%)


In [56]:
xgb_resampled2_recall = 0
for train_index, test_index in skfold.split(X_resampled2, y_resampled2):
    X_train, X_test = X_resampled2[train_index], X_resampled2[test_index]
    y_train, y_test = y_resampled2[train_index], y_resampled2[test_index]
    xgb.fit(X_train, y_train)
    xgb_resampled2_recall = xgb_resampled2_recall + (recall_score(y_test, xgb.predict(X_test), average=None))[2]
xgb_resampled2_recall = ( xgb_resampled2_recall / 5 ) * 100
print("Recall za patološku klasu s XGBoost (BorderlineSMOTE): %.2f%% " % xgb_resampled2_recall)

Recall za patološku klasu s XGBoost (BorderlineSMOTE): 98.25% 


In [57]:
xgb_resampled3_results = cross_val_score(xgb, X_resampled3, y_resampled3, cv = skfold)
print("Točnost XGBoost modela (ADASYN): %.2f%% (standardna devijacija %.2f%%)" %(xgb_resampled3_results.mean()*100, xgb_resampled3_results.std()*100))

Točnost XGBoost modela (ADASYN): 95.79% (standardna devijacija 0.58%)


In [58]:
xgb_resampled3_recall = 0
for train_index, test_index in skfold.split(X_resampled3, y_resampled3):
    X_train, X_test = X_resampled3[train_index], X_resampled3[test_index]
    y_train, y_test = y_resampled3[train_index], y_resampled3[test_index]
    xgb.fit(X_train, y_train)
    xgb_resampled3_recall = xgb_resampled3_recall + (recall_score(y_test, xgb.predict(X_test), average=None))[2]
xgb_resampled3_recall = ( xgb_resampled3_recall / 5 ) * 100
print("Recall za patološku klasu s XGBoost (ADASYN): %.2f%% " % xgb_resampled3_recall)

Recall za patološku klasu s XGBoost (ADASYN): 98.12% 


Usporedba recalla i točnosti:

In [88]:
print("Točnost za SVC:")
print("SVC: %.2f%%, SVC-SMOTE: %.2f%%, razlika: %.2f%%" % (svc_results.mean()*100, svc_resampled_results.mean()*100, (svc_resampled_results.mean()*100 - svc_results.mean()*100)))
print("SVC: %.2f%%, SVC-BorderlineSMOTE: %.2f%%, razlika: %.2f%%" % (svc_results.mean()*100, svc_resampled2_results.mean()*100, (svc_resampled2_results.mean()*100 - svc_results.mean()*100)))
print("SVC: %.2f%%, SVC-ADASYN: %.2f%%, razlika: %.2f%%" % (svc_results.mean()*100, svc_resampled3_results.mean()*100, (svc_resampled3_results.mean()*100 - svc_results.mean()*100)))

Točnost za SVC:
SVC: 87.91%, SVC-SMOTE: 85.94%, razlika: -1.97%
SVC: 87.91%, SVC-BorderlineSMOTE: 84.91%, razlika: -3.00%
SVC: 87.91%, SVC-ADASYN: 82.24%, razlika: -5.67%


In [90]:
print("Recall za patološku klasu za SVC:")
print("SVC: %.2f%%, SVC-SMOTE: %.2f%%, razlika: %.2f%%" % (svc_recall, svc_resampled_recall, (svc_resampled_recall - svc_recall)))
print("SVC: %.2f%%, SVC-BorderlineSMOTE: %.2f%%, razlika: %.2f%%" % (svc_recall, svc_resampled2_recall, (svc_resampled2_recall - svc_recall)))
print("SVC: %.2f%%, SVC-ADASYN: %.2f%%, razlika: %.2f%%" % (svc_recall, svc_resampled3_recall, (svc_resampled3_recall - svc_recall)))

Recall za patološku klasu za SVC:
SVC: 74.95%, SVC-SMOTE: 90.27%, razlika: 15.32%
SVC: 74.95%, SVC-BorderlineSMOTE: 93.11%, razlika: 18.16%
SVC: 74.95%, SVC-ADASYN: 92.10%, razlika: 17.15%


In [92]:
print("Točnost za RF:")
print("RF: %.2f%%, RF-SMOTE: %.2f%%, razlika: %.2f%%" % (rf_results.mean()*100, rf_resampled_results.mean()*100, (rf_resampled_results.mean()*100 - rf_results.mean()*100)))
print("RF: %.2f%%, RF-BorderlineSMOTE: %.2f%%, razlika: %.2f%%" % (rf_results.mean()*100, rf_resampled2_results.mean()*100, (rf_resampled2_results.mean()*100 - rf_results.mean()*100)))
print("RF: %.2f%%, RF-ADASYN: %.2f%%, razlika: %.2f%%" % (rf_results.mean()*100, rf_resampled3_results.mean()*100, (rf_resampled3_results.mean()*100 - rf_results.mean()*100)))

Točnost za RF:
RF: 93.79%, RF-SMOTE: 96.90%, razlika: 3.11%
RF: 93.79%, RF-BorderlineSMOTE: 97.36%, razlika: 3.57%
RF: 93.79%, RF-ADASYN: 97.12%, razlika: 3.33%


In [93]:
print("Recall za patološku klasu za RF:")
print("RF: %.2f%%, RF-SMOTE: %.2f%%, razlika: %.2f%%" % (rf_recall, rf_resampled_recall, (rf_resampled_recall - rf_recall)))
print("RF: %.2f%%, RF-BorderlineSMOTE: %.2f%%, razlika: %.2f%%" % (rf_recall, rf_resampled2_recall, (rf_resampled2_recall - rf_recall)))
print("RF: %.2f%%, RF-ADASYN: %.2f%%, razlika: %.2f%%" % (rf_recall, rf_resampled3_recall, (rf_resampled3_recall - rf_recall)))

Recall za patološku klasu za RF:
RF: 89.17%, RF-SMOTE: 98.73%, razlika: 9.56%
RF: 89.17%, RF-BorderlineSMOTE: 98.91%, razlika: 9.74%
RF: 89.17%, RF-ADASYN: 98.85%, razlika: 9.67%


In [94]:
print("Točnost za XGB:")
print("XGB: %.2f%%, XGB-SMOTE: %.2f%%, razlika: %.2f%%" % (xgb_results.mean()*100, xgb_resampled_results.mean()*100, (xgb_resampled_results.mean()*100 - xgb_results.mean()*100)))
print("XGB: %.2f%%, XGB-BorderlineSMOTE: %.2f%%, razlika: %.2f%%" % (xgb_results.mean()*100, xgb_resampled2_results.mean()*100, (xgb_resampled2_results.mean()*100 - xgb_results.mean()*100)))
print("XGB: %.2f%%, XGB-ADASYN: %.2f%%, razlika: %.2f%%" % (xgb_results.mean()*100, xgb_resampled3_results.mean()*100, (xgb_resampled3_results.mean()*100 - xgb_results.mean()*100)))

Točnost za XGB:
XGB: 94.59%, XGB-SMOTE: 96.35%, razlika: 1.76%
XGB: 94.59%, XGB-BorderlineSMOTE: 96.41%, razlika: 1.82%
XGB: 94.59%, XGB-ADASYN: 95.79%, razlika: 1.20%


In [95]:
print("Recall za patološku klasu za XGB:")
print("XGB: %.2f%%, XGB-SMOTE: %.2f%%, razlika: %.2f%%" % (xgb_recall, xgb_resampled_recall, (xgb_resampled_recall - xgb_recall)))
print("XGB: %.2f%%, XGB-BorderlineSMOTE: %.2f%%, razlika: %.2f%%" % (xgb_recall, xgb_resampled2_recall, (xgb_resampled2_recall - xgb_recall)))
print("XGB: %.2f%%, XGB-ADASYN: %.2f%%, razlika: %.2f%%" % (xgb_recall, xgb_resampled3_recall, (xgb_resampled3_recall - xgb_recall)))

Recall za patološku klasu za XGB:
XGB: 91.48%, XGB-SMOTE: 98.67%, razlika: 7.19%
XGB: 91.48%, XGB-BorderlineSMOTE: 98.25%, razlika: 6.77%
XGB: 91.48%, XGB-ADASYN: 98.12%, razlika: 6.64%
