<a href="https://colab.research.google.com/github/ttk66/Chem_analyzis/blob/main/ic50_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, HistGradientBoostingRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Загрузка данных
df = pd.read_csv("/content/final_filtered_data.csv")
X = df.drop(columns=["IC50", "CC50", "SI", "log_IC50", "log_CC50", "log_SI"])
y = df["log_IC50"]

# Масштабирование
X_scaled = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Модели и параметры
model_configs = {
    "Linear Regression": (LinearRegression(), {}),
    "Ridge": (Ridge(), {"alpha": [0.1, 1.0, 10.0]}),
    "Lasso": (Lasso(), {"alpha": [0.001, 0.01, 0.1, 1.0]}),
    "Random Forest": (RandomForestRegressor(random_state=42), {
        "n_estimators": [100, 200],
        "max_depth": [None, 10, 20]
    }),
    "Gradient Boosting": (GradientBoostingRegressor(random_state=42), {
        "n_estimators": [100, 200],
        "learning_rate": [0.05, 0.1],
        "max_depth": [3, 5]
    }),
    "HistGradientBoosting": (HistGradientBoostingRegressor(random_state=42), {
        "learning_rate": [0.05, 0.1],
        "max_iter": [100, 200]
    }),
    "XGBoost": (XGBRegressor(random_state=42, objective='reg:squarederror'), {
        "n_estimators": [100, 200],
        "max_depth": [3, 6],
        "learning_rate": [0.05, 0.1]
    }),
}
# Обучение и оценка
for name, (model, params) in model_configs.items():
    if params:
        model = GridSearchCV(model, params, cv=3, scoring="neg_root_mean_squared_error", n_jobs=-1)
    model.fit(X_train, y_train)
    y_pred_log = model.predict(X_test)

    print(f"\n{name}")
    if isinstance(model, GridSearchCV):
        print("Лучшие параметры:", model.best_params_)
    print("RMSE (лог-шкала):", np.sqrt(mean_squared_error(y_test, y_pred_log)))
    print("R² (лог-шкала):", r2_score(y_test, y_pred_log))


Linear Regression
RMSE (лог-шкала): 1.8085605424901117
R² (лог-шкала): 0.15023677304158567

Ridge
Лучшие параметры: {'alpha': 10.0}
RMSE (лог-шкала): 1.618915088475444
R² (лог-шкала): 0.3191052718100309

Lasso
Лучшие параметры: {'alpha': 0.1}
RMSE (лог-шкала): 1.685312243119321
R² (лог-шкала): 0.2621083740960283

Random Forest
Лучшие параметры: {'max_depth': 20, 'n_estimators': 200}
RMSE (лог-шкала): 1.453926380065631
R² (лог-шкала): 0.4508175337116427

Gradient Boosting
Лучшие параметры: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 200}
RMSE (лог-шкала): 1.4743499093162435
R² (лог-шкала): 0.43528026514111096

HistGradientBoosting
Лучшие параметры: {'learning_rate': 0.05, 'max_iter': 100}
RMSE (лог-шкала): 1.4837150442200688
R² (лог-шкала): 0.42808323081685107

XGBoost
Лучшие параметры: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 200}
RMSE (лог-шкала): 1.4694373561052305
R² (лог-шкала): 0.43903730255495454
