# Бинарная классификация

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import numpy as np

# 1. Импортировать датасет, целевой признак бинарный

In [2]:
df = pd.read_csv('Heart_Disease_and_Hospitals.csv')

# 2. Очистить датасет

In [3]:
df = df.drop(['full_name', 'first_name', 'last_name', 'treatment_date'], axis=1)
df.dropna(inplace=True)

categorical_features = ['country', 'state', 'gender', 'hospital', 'treatment']
numeric_features = ['age', 'blood_pressure', 'cholesterol', 'bmi', 'glucose_level']

for col in numeric_features:
    Q1 = df[col].quantile(0.25)
    Q3 = df[col].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    df = df[(df[col] >= lower_bound) & (df[col] <= upper_bound)]

# 3. Выбрать целевой признак и сформировать выборку

In [4]:
X = df.drop('heart_disease', axis=1)
y = df['heart_disease']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])

# 4. Создание модели бинарной классификации

In [5]:
logreg_pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                                  ('classifier', LogisticRegression(random_state=42))])
logreg_pipeline.fit(X_train, y_train)
y_pred_logreg = logreg_pipeline.predict(X_test)

svm_pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                             ('classifier', SVC(random_state=42))])
svm_pipeline.fit(X_train, y_train)
y_pred_svm = svm_pipeline.predict(X_test)

cart_pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                              ('classifier', DecisionTreeClassifier(random_state=42))])
cart_pipeline.fit(X_train, y_train)
y_pred_cart = cart_pipeline.predict(X_test)

# 5. Оценка качества модели

In [6]:
print("--- Оценка качества моделей ---")

print("\n--- Логистическая регрессия ---")
print("Точность (Accuracy):", accuracy_score(y_test, y_pred_logreg))
print("Матрица ошибок:\n", confusion_matrix(y_test, y_pred_logreg))
print("Отчет о классификации:\n", classification_report(y_test, y_pred_logreg))

print("\n--- Метод опорных векторов (SVM) ---")
print("Точность (Accuracy):", accuracy_score(y_test, y_pred_svm))
print("Матрица ошибок:\n", confusion_matrix(y_test, y_pred_svm))
print("Отчет о классификации:\n", classification_report(y_test, y_pred_svm))

print("\n--- Дерево решений (CART) ---")
print("Точность (Accuracy):", accuracy_score(y_test, y_pred_cart))
print("Матрица ошибок:\n", confusion_matrix(y_test, y_pred_cart))
print("Отчет о классификации:\n", classification_report(y_test, y_pred_cart))

--- Оценка качества моделей ---

--- Логистическая регрессия ---
Точность (Accuracy): 0.7784583971413986
Матрица ошибок:
 [[805 209]
 [225 720]]
Отчет о классификации:
               precision    recall  f1-score   support

           0       0.78      0.79      0.79      1014
           1       0.78      0.76      0.77       945

    accuracy                           0.78      1959
   macro avg       0.78      0.78      0.78      1959
weighted avg       0.78      0.78      0.78      1959


--- Метод опорных векторов (SVM) ---
Точность (Accuracy): 0.9310872894333844
Матрица ошибок:
 [[937  77]
 [ 58 887]]
Отчет о классификации:
               precision    recall  f1-score   support

           0       0.94      0.92      0.93      1014
           1       0.92      0.94      0.93       945

    accuracy                           0.93      1959
   macro avg       0.93      0.93      0.93      1959
weighted avg       0.93      0.93      0.93      1959


--- Дерево решений (CART) ---
Точн