<a href="https://colab.research.google.com/github/ttk66/Chem_analyzis/blob/main/ic50_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report
)
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, HistGradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
import warnings

warnings.filterwarnings("ignore")

# Загрузка и подготовка данных
df = pd.read_csv("final_filtered_data.csv")
df.fillna(df.median(numeric_only=True), inplace=True)

# Бинарная цель: log_IC50 > медианы
target = (df["log_IC50"] > df["log_IC50"].median()).astype(int)

# Признаки
X = df.select_dtypes(include=[np.number]).drop(columns=["IC50", "CC50", "SI", "log_IC50", "log_CC50", "log_SI"])
y = target

# Масштабирование
X_scaled = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Модели и параметры
model_configs = {
    "Logistic Regression": (LogisticRegression(), {
        "C": [0.1, 1, 10]
    }),
    "Ridge Classifier": (RidgeClassifier(), {
        "alpha": [0.1, 1, 10]
    }),
    "Random Forest": (RandomForestClassifier(random_state=42), {
        "n_estimators": [100, 200],
        "max_depth": [None, 10, 20]
    }),
    "Gradient Boosting": (GradientBoostingClassifier(random_state=42), {
        "n_estimators": [100, 200],
        "learning_rate": [0.05, 0.1],
        "max_depth": [3, 5]
    }),
    "HistGradientBoosting": (HistGradientBoostingClassifier(random_state=42), {
        "learning_rate": [0.05, 0.1],
        "max_iter": [100, 200]
    }),
    "XGBoost": (XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42), {
        "n_estimators": [100, 200],
        "max_depth": [3, 6],
        "learning_rate": [0.05, 0.1]
    }),
    "SVC": (SVC(probability=True), {
        "C": [0.1, 1, 10],
        "kernel": ["rbf", "linear"]
    }),
    "KNN": (KNeighborsClassifier(), {
        "n_neighbors": [3, 5, 7]
    }),
}

# Обучение и оценка
for name, (model, params) in model_configs.items():
    if params:
        model = GridSearchCV(model, params, cv=3, scoring="accuracy", n_jobs=-1)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else y_pred

    print(f"\n==== Модель: {name} ====")
    if isinstance(model, GridSearchCV):
        print("Лучшие параметры:", model.best_params_)
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Precision:", precision_score(y_test, y_pred))
    print("Recall:", recall_score(y_test, y_pred))
    print("F1 Score:", f1_score(y_test, y_pred))
    print("ROC AUC:", roc_auc_score(y_test, y_proba))
    print(classification_report(y_test, y_pred, target_names=["Class 0", "Class 1"]))



==== Модель: Logistic Regression ====
Лучшие параметры: {'C': 0.1}
Accuracy: 0.7164179104477612
Precision: 0.7326732673267327
Recall: 0.7115384615384616
F1 Score: 0.7219512195121951
ROC AUC: 0.7669012688342585
              precision    recall  f1-score   support

     Class 0       0.70      0.72      0.71        97
     Class 1       0.73      0.71      0.72       104

    accuracy                           0.72       201
   macro avg       0.72      0.72      0.72       201
weighted avg       0.72      0.72      0.72       201


==== Модель: Ridge Classifier ====
Лучшие параметры: {'alpha': 1}
Accuracy: 0.6915422885572139
Precision: 0.7058823529411765
Recall: 0.6923076923076923
F1 Score: 0.6990291262135923
ROC AUC: 0.6915146708961142
              precision    recall  f1-score   support

     Class 0       0.68      0.69      0.68        97
     Class 1       0.71      0.69      0.70       104

    accuracy                           0.69       201
   macro avg       0.69      0.69 