In [29]:
import pandas as pd
import numpy as np
from xgboost import XGBClassifier

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from sklearn.model_selection import GridSearchCV

In [19]:
data = pd.read_csv("heart.csv", index_col=0)

data["age"] = data.index
data.index = range(len(data))

X = data.drop("output", axis=1)
y = data["output"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

In [20]:
model = XGBClassifier(
    objective= 'binary:logistic',
    nthread=4,
    seed=42
)

In [21]:
parameters = {
    'max_depth': range (2, 10, 1),
    'n_estimators': range(60, 220, 40),
    'learning_rate': [0.1, 0.01, 0.05]
}

grid_search = GridSearchCV(
    estimator=model,
    param_grid=parameters,
    scoring='roc_auc',
    n_jobs=10,
    cv=10,
    verbose=True
)

grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_

In [31]:
prediction = best_model.predict(X_test)
prediction

array([0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0,
       1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1,
       1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1,
       1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1])

In [34]:
probs = best_model.predict_proba(X_test)[:, 1]
probs

array([0.10866175, 0.57038313, 0.02858287, 0.96986896, 0.46750018,
       0.9097626 , 0.17987405, 0.9436689 , 0.6458603 , 0.04746833,
       0.06273849, 0.06189865, 0.42763665, 0.8104258 , 0.03655528,
       0.98549694, 0.22280906, 0.49866745, 0.62333757, 0.04015618,
       0.98459715, 0.29236513, 0.94624865, 0.45595643, 0.07949298,
       0.07292709, 0.07747218, 0.18445781, 0.02740588, 0.85481465,
       0.9744819 , 0.9518455 , 0.610071  , 0.70655745, 0.5865048 ,
       0.33167297, 0.05090255, 0.50548506, 0.9479781 , 0.04241505,
       0.40234733, 0.76954114, 0.3789977 , 0.7550168 , 0.8156378 ,
       0.02262044, 0.9793506 , 0.09185307, 0.7409831 , 0.420089  ,
       0.64316064, 0.98549694, 0.7602962 , 0.15543514, 0.578574  ,
       0.04664843, 0.06406511, 0.5309286 , 0.98549694, 0.93505913,
       0.29097277, 0.5257172 , 0.81233644, 0.02654627, 0.9268592 ,
       0.80177045, 0.53007215, 0.65902066, 0.66630435, 0.21481383,
       0.8922502 , 0.4187577 , 0.91661215, 0.516734  , 0.53066

In [64]:
row = pd.DataFrame([[1, 0, 100, 200, 0, 1, 140, 1, 0.8, 1, 1, 2, 90]], columns=X.columns)

In [70]:
print("Вероятность инсульта у пациента: {0:.2f}%".format(best_model.predict_proba(row)[:, 1][0] * 100))

Вероятность инсульта у пациента: 15.74%
