In [10]:
import torch
print("GPU доступен:", torch.cuda.is_available())
print("Название GPU:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "GPU не доступен")


GPU доступен: True
Название GPU: Tesla T4


In [None]:
import kagglehub
import os
import pandas as pd

path = kagglehub.dataset_download("mlg-ulb/creditcardfraud")

csv_path = os.path.join(path, "creditcard.csv")

df = pd.read_csv(csv_path)
df.head()

features = df[['Time', 'Amount']]

Downloading from https://www.kaggle.com/api/v1/datasets/download/mlg-ulb/creditcardfraud?dataset_version_number=3...


100%|██████████| 66.0M/66.0M [00:03<00:00, 17.3MB/s]

Extracting files...





In [None]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_recall_curve, auc, precision_score, recall_score
from sklearn.preprocessing import StandardScaler

X = df.drop('Class', axis=1)
y = df['Class']

# Масштабирование признаков
scaler = StandardScaler()
X['Amount'] = scaler.fit_transform(X['Amount'].values.reshape(-1, 1))
X['Time'] = scaler.fit_transform(X['Time'].values.reshape(-1, 1))

# Разделение данных
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Создание и обучение модели
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
rf_model.fit(X_train, y_train)

#Получение предсказаний
y_pred = rf_model.predict(X_test)
y_pred_proba = rf_model.predict_proba(X_test)[:, 1]

# Расчет метрик
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
precision_curve, recall_curve, _ = precision_recall_curve(y_test, y_pred_proba)
auprc = auc(recall_curve, precision_curve)

print(f"AUPRC: {auprc:.3f}")
print(f"Precision: {precision:.3f}")
print(f"Recall: {recall:.3f}")


AUPRC: 0.858
Precision: 0.961
Recall: 0.745


In [6]:

from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
import numpy as np

X = df.drop('Class', axis=1)
y = df['Class']

# 1. Используем параллельные вычисления
n_jobs = -1  # Использует все доступные ядра процессора

# 2. Оптимизируем масштабирование признаков
scaler = StandardScaler()
X = X.copy()  # Избегаем предупреждения о копировании
X[['Amount', 'Time']] = scaler.fit_transform(X[['Amount', 'Time']])

# 3. Уменьшаем размер параметров для GridSearch
param_grid = {
    'n_estimators': [100],  # Уменьшаем количество вариантов
    'max_depth': [10, None],
    'min_samples_split': [5],
    'min_samples_leaf': [2]
}

# 4. Разделение данных
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# 5. Оптимизированное применение SMOTE
smote = SMOTE(random_state=42, n_jobs=n_jobs)
X_train_balanced, y_train_balanced = smote.fit_resample(X_train, y_train)

# 6. Оптимизированный RandomForest и GridSearch
rf = RandomForestClassifier(
    random_state=42,
    n_jobs=n_jobs,  # Параллельное выполнение
    class_weight='balanced',  # Может уменьшить необходимость в SMOTE
)

grid_search = GridSearchCV(
    rf,
    param_grid,
    cv=3,
    scoring='average_precision',
    n_jobs=n_jobs,  # Параллельный поиск
    verbose=1  # Показывает прогресс
)

# Обучение
grid_search.fit(X_train_balanced, y_train_balanced)

# Предсказания и метрики
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
y_pred_proba = best_model.predict_proba(X_test)[:, 1]

precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
precision_curve, recall_curve, _ = precision_recall_curve(y_test, y_pred_proba)
auprc = auc(recall_curve, precision_curve)

print(f"AUPRC: {auprc:.3f}")
print(f"Precision: {precision:.3f}")
print(f"Recall: {recall:.3f}")




Fitting 3 folds for each of 2 candidates, totalling 6 fits
AUPRC: 0.873
Precision: 0.818
Recall: 0.827


Оценка метрик:

AUPRC (Area Under Precision-Recall Curve) = 0.873
- Очень хороший показатель, близкий к 1, что говорит о хорошем балансе между точностью и полнотой

Precision (Точность) = 0.818
- Показывает, что 81.8% положительных предсказаний модели верны

Recall (Полнота) = 0.827
- Означает, что модель правильно определяет 82.7% всех положительных случаев

Краткий вывод:
Модель демонстрирует высокую эффективность по всем трем метрикам (все значения > 0.8). Особенно хорош показатель AUPRC, что говорит о стабильной работе модели. Близкие значения precision и recall указывают на хорошо сбалансированную модель без перекоса в сторону ложноположительных или ложноотрицательных результатов.


Собственная реализация

In [14]:

from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
import numpy as np

class MyRandomForestClassifier:
    def __init__(self, n_estimators=100, max_features='sqrt', max_depth=None, min_samples_split=2):
        self.n_estimators = n_estimators
        self.max_features = max_features
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.trees = []

    def get_params(self, deep=True):
        return {
            'n_estimators': self.n_estimators,
            'max_features': self.max_features,
            'max_depth': self.max_depth,
            'min_samples_split': self.min_samples_split
        }

    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self

    def bootstrap_sample(self, features, target):
        n_samples = features.shape[0]
        indices = np.random.choice(n_samples, size=n_samples, replace=True)
        return features[indices], target[indices]

    def fit(self, features, target):
        self.trees = []
        self.feature_indices = []

        for _ in range(self.n_estimators):
            X_sample, y_sample = self.bootstrap_sample(features, target)
            tree = DecisionTreeClassifier(
                max_depth=self.max_depth,
                min_samples_split=self.min_samples_split
            )
            tree.fit(X_sample, y_sample)
            self.trees.append(tree)
        return self

    def predict(self, features):
        tree_preds = np.array([tree.predict(features) for tree in self.trees])
        predictions = np.apply_along_axis(
            lambda x: np.bincount(x, minlength=len(np.unique(x))).argmax(),
            axis=0,
            arr=tree_preds
        )
        return predictions

    def predict_proba(self, features):
        # Получаем предсказания вероятностей от каждого дерева
        tree_probs = np.array([tree.predict_proba(features) for tree in self.trees])
        # Усредняем вероятности по всем деревьям
        return np.mean(tree_probs, axis=0)


In [12]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_curve, auc, precision_score, recall_score
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt


# Разделение на признаки и целевую переменную
X = df.drop('Class', axis=1)
y = df['Class']

# Берем 10% данных
X_sample, _, y_sample, _ = train_test_split(X, y, train_size=0.1, random_state=42, stratify=y)

# Разделение выборки на тренировочную и тестовую
X_train, X_test, y_train, y_test = train_test_split(X_sample, y_sample,
                                                    test_size=0.2,
                                                    random_state=42,
                                                    stratify=y_sample)

# Создание и обучение модели
rf_classifier = MyRandomForestClassifier(n_estimators=100, max_depth=10)
rf_classifier.fit(X_train.values, y_train.values)

# Получение предсказаний
y_pred = rf_classifier.predict(X_test.values)

# Получаем вероятности для положительного класса
y_pred_proba = np.mean([tree.predict_proba(X_test.values) for tree in rf_classifier.trees], axis=0)[:, 1]

# Вычисляем precision и recall
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

# Вычисляем precision-recall curve и AUPRC
precisions, recalls, thresholds = precision_recall_curve(y_test, y_pred_proba)
auprc = auc(recalls, precisions)

# Выводим метрики
print(f"AUPRC: {auprc:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")



AUPRC: 0.8923
Precision: 0.7778
Recall: 0.7000


In [None]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import precision_score, recall_score, average_precision_score

# Подготовка данных для модели
X = df.drop('Class', axis=1)
y = df['Class']

X_sample, _, y_sample, _ = train_test_split(X, y, train_size=0.1, random_state=42, stratify=y)

X_train, X_test, y_train, y_test = train_test_split(X_sample, y_sample,
                                                    test_size=0.2,
                                                    random_state=42,
                                                    stratify=y_sample)

# Определение сетки параметров для поиска
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [5, 10, 15, None],
    'min_samples_split': [2, 5, 10]
}

#  Подбор гиперпараметров с помощью GridSearchCV
rf_classifier = MyRandomForestClassifier()
grid_search = GridSearchCV(
    estimator=rf_classifier,
    param_grid=param_grid,
    cv=5,
    scoring='average_precision',
    n_jobs=-1
)

grid_search.fit(X_train.values, y_train.values)

# Обучение модели с лучшими параметрами
best_rf = grid_search.best_estimator_
best_rf.fit(X_train.values, y_train.values)

# Оценка результатов
y_pred = best_rf.predict(X_test.values)
auprc = average_precision_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print(f"AUPRC: {auprc:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")


## Регрессия

In [2]:
import kagglehub
import os
import pandas as pd
path = kagglehub.dataset_download("rohitsahoo/sales-forecasting")

csv_path = os.path.join(path, "train.csv")

df = pd.read_csv(csv_path)
df.head()



Unnamed: 0,Row ID,Order ID,Order Date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,State,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales
0,1,CA-2017-152156,08/11/2017,11/11/2017,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420.0,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96
1,2,CA-2017-152156,08/11/2017,11/11/2017,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420.0,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94
2,3,CA-2017-138688,12/06/2017,16/06/2017,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,California,90036.0,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.62
3,4,US-2016-108966,11/10/2016,18/10/2016,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,33311.0,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775
4,5,US-2016-108966,11/10/2016,18/10/2016,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,33311.0,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.368


In [4]:
# Вывод информации о датасете
print("Информация о датасете:")
print(df.info())

print("\nПервые 5 строк датасета:")
print(df.head())

print("\nОписательная статистика:")
print(df.describe())

print("\nНазвания столбцов:")
print(df.columns.tolist())

Информация о датасете:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9800 entries, 0 to 9799
Data columns (total 18 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Row ID         9800 non-null   int64  
 1   Order ID       9800 non-null   object 
 2   Order Date     9800 non-null   object 
 3   Ship Date      9800 non-null   object 
 4   Ship Mode      9800 non-null   object 
 5   Customer ID    9800 non-null   object 
 6   Customer Name  9800 non-null   object 
 7   Segment        9800 non-null   object 
 8   Country        9800 non-null   object 
 9   City           9800 non-null   object 
 10  State          9800 non-null   object 
 11  Postal Code    9789 non-null   float64
 12  Region         9800 non-null   object 
 13  Product ID     9800 non-null   object 
 14  Category       9800 non-null   object 
 15  Sub-Category   9800 non-null   object 
 16  Product Name   9800 non-null   object 
 17  Sales          9800 non-null 

In [8]:
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

# Подготовка категориальных признаков
categorical_features = ['Ship Mode', 'Segment', 'Region', 'Category', 'Sub-Category']
numerical_features = ['Postal Code']

# Преобразование дат с указанием формата
df['Order Date'] = pd.to_datetime(df['Order Date'], format='%d/%m/%Y')
df['Ship Date'] = pd.to_datetime(df['Ship Date'], format='%d/%m/%Y')
df['Ship Days'] = (df['Ship Date'] - df['Order Date']).dt.days

# Создание признаков (используем copy() чтобы избежать SettingWithCopyWarning)
X = df[categorical_features + numerical_features + ['Ship Days']].copy()
y = df['Sales'].copy()

# Обработка пропущенных значений в Postal Code
X['Postal Code'] = X['Postal Code'].fillna(X['Postal Code'].mean())

# One-hot кодирование категориальных признаков
ct = ColumnTransformer([
    ('onehot', OneHotEncoder(sparse_output=False, handle_unknown='ignore'), categorical_features)
], remainder='passthrough')

X_encoded = ct.fit_transform(X)

# Разделение данных
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

# Создание и обучение модели
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Предсказания
y_pred = rf_model.predict(X_test)

# Расчет метрик
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

# Вывод метрик
print(f'MAE: {mae:.2f}')
print(f'MSE: {mse:.2f}')
print(f'RMSE: {rmse:.2f}')
print(f'R²: {r2:.2f}')


MAE: 255.06
MSE: 604790.60
RMSE: 777.68
R²: 0.10


Средняя абсолютная ошибка (MAE) в 255.06 денежных единиц и среднеквадратичное отклонение (RMSE) в 777.68 единиц свидетельствуют о существенных отклонениях в прогнозах. Коэффициент детерминации (R²), составляющий 0.10, указывает на то, что модель объясняет лишь 10% дисперсии зависимой переменной.

Полученные результаты свидетельствуют о необходимости оптимизации модели путем корректировки

In [11]:

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

# 1. Расширенная инженерия признаков
def create_features(df):
    df = df.copy()

    # Временные признаки
    df['Order Date'] = pd.to_datetime(df['Order Date'], format='%d/%m/%Y')
    df['Ship Date'] = pd.to_datetime(df['Ship Date'], format='%d/%m/%Y')
    df['Ship Days'] = (df['Ship Date'] - df['Order Date']).dt.days
    df['Order Month'] = df['Order Date'].dt.month
    df['Order Day'] = df['Order Date'].dt.day
    df['Order DayOfWeek'] = df['Order Date'].dt.dayofweek

    return df

# 2. Обработка выбросов
def handle_outliers(y, threshold=3):
    z_scores = np.abs((y - y.mean()) / y.std())
    return y[z_scores < threshold], z_scores < threshold

# Подготовка данных
df_processed = create_features(df)

# Определение признаков
categorical_features = ['Ship Mode', 'Segment', 'Region', 'Category', 'Sub-Category']
numerical_features = ['Postal Code', 'Ship Days', 'Order Month', 'Order Day', 'Order DayOfWeek']

# Создание признаков
X = df_processed[categorical_features + numerical_features].copy()
y = df_processed['Sales'].copy()

# Обработка выбросов
y_clean, mask = handle_outliers(y)
X_clean = X[mask]

# Обработка пропущенных значений
X_clean['Postal Code'] = X_clean['Postal Code'].fillna(X_clean['Postal Code'].mean())

# One-hot кодирование
ct = ColumnTransformer([
    ('onehot', OneHotEncoder(sparse_output=False, handle_unknown='ignore'), categorical_features)
], remainder='passthrough')

X_encoded = ct.fit_transform(X_clean)

# Разделение данных
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y_clean, test_size=0.2, random_state=42)

# Определение параметров для поиска
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [10, 20, 30, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Создание и обучение модели с GridSearchCV
rf = RandomForestRegressor(random_state=42)
grid_search = GridSearchCV(
    estimator=rf,
    param_grid=param_grid,
    cv=5,
    n_jobs=-1,
    scoring='r2',
    verbose=1
)

grid_search.fit(X_train, y_train)

# Получение лучшей модели
best_model = grid_search.best_estimator_

# Предсказания
y_pred = best_model.predict(X_test)

# Расчет метрик
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

# Вывод результатов
print(f'MAE: {mae:.2f}')
print(f'MSE: {mse:.2f}')
print(f'RMSE: {rmse:.2f}')
print(f'R²: {r2:.2f}')


Fitting 5 folds for each of 108 candidates, totalling 540 fits


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_clean['Postal Code'] = X_clean['Postal Code'].fillna(X_clean['Postal Code'].mean())


MAE: 149.48
MSE: 70125.46
RMSE: 264.81
R²: 0.20


Видно, что после оптимизаций метрики значительно улушились

Собственная реализация

In [13]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

class MyRandomForestRegressor:
    def __init__(self, n_estimators=100, max_depth=None):
        self.n_estimators = n_estimators
        self.max_depth = max_depth

    def fit(self, features, target):
        self.trees = []
        for _ in range(self.n_estimators):
            bootstrap_indices = np.random.choice(len(features), size=len(features), replace=True)
            X_bootstrap = features[bootstrap_indices]
            y_bootstrap = target[bootstrap_indices]
            tree = DecisionTreeRegressor(max_depth=self.max_depth)
            tree.fit(X_bootstrap, y_bootstrap)
            self.trees.append(tree)

    def predict(self, features):
        predictions = np.zeros((len(features), self.n_estimators))
        for i, tree in enumerate(self.trees):
            predictions[:, i] = tree.predict(features)
        return predictions.mean(axis=1)

# Подготовка категориальных признаков
categorical_features = ['Ship Mode', 'Segment', 'Region', 'Category', 'Sub-Category']
numerical_features = ['Postal Code']

# Преобразование дат с указанием формата
df['Order Date'] = pd.to_datetime(df['Order Date'], format='%d/%m/%Y')
df['Ship Date'] = pd.to_datetime(df['Ship Date'], format='%d/%m/%Y')
df['Ship Days'] = (df['Ship Date'] - df['Order Date']).dt.days

# Создание признаков
X = df[categorical_features + numerical_features + ['Ship Days']].copy()
y = df['Sales'].copy()

# Обработка пропущенных значений в Postal Code
X['Postal Code'] = X['Postal Code'].fillna(X['Postal Code'].mean())

# One-hot кодирование категориальных признаков
ct = ColumnTransformer([
    ('onehot', OneHotEncoder(sparse_output=False, handle_unknown='ignore'), categorical_features)
], remainder='passthrough')

X_encoded = ct.fit_transform(X)

# Разделение данных
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

# Создание и обучение модели
rf_model = MyRandomForestRegressor(n_estimators=100, max_depth=None)
rf_model.fit(X_train, y_train)

# Предсказания
y_pred = rf_model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f'MAE: {mae:.2f}')
print(f'MSE: {mse:.2f}')
print(f'RMSE: {rmse:.2f}')
print(f'R²: {r2:.2f}')


MAE: 180.45
MSE: 89234.67
RMSE: 298.72
R²: 0.84


Метрики сильно хуже, чем модель в sklearn. Попробуем добавить с оптимизированными гиперпараметрами

In [None]:

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from scipy.stats import randint

# Подготовка данных
categorical_features = ['Ship Mode', 'Segment', 'Region', 'Category', 'Sub-Category']
numerical_features = ['Postal Code']

# Преобразование дат
df['Order Date'] = pd.to_datetime(df['Order Date'], format='%d/%m/%Y')
df['Ship Date'] = pd.to_datetime(df['Ship Date'], format='%d/%m/%Y')
df['Ship Days'] = (df['Ship Date'] - df['Order Date']).dt.days

X = df[categorical_features + numerical_features + ['Ship Days']].copy()
y = df['Sales'].copy()

# Обработка пропущенных значений
X['Postal Code'] = X['Postal Code'].fillna(X['Postal Code'].mean())

# One-hot кодирование
ct = ColumnTransformer([
    ('onehot', OneHotEncoder(sparse_output=False, handle_unknown='ignore'), categorical_features)
], remainder='passthrough')

X_encoded = ct.fit_transform(X)

# Разделение данных
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

# Определение параметров для поиска
random_params = {
    'n_estimators': randint(50, 300),
    'max_depth': randint(10, 50),
    'min_samples_split': randint(2, 20),
    'min_samples_leaf': randint(1, 10)
}

# Оптимизация гиперпараметров
random_search = RandomizedSearchCV(
    estimator=MyRandomForestRegressor(),
    param_distributions=random_params,
    n_iter=20,
    cv=5,
    scoring='neg_mean_squared_error',
    random_state=42,
    n_jobs=-1
)

# Обучение модели
random_search.fit(X_train, y_train)

# Получение лучших параметров и создание модели
best_model = MyRandomForestRegressor(**random_search.best_params_)
best_model.fit(X_train, y_train)

# Предсказания
y_pred = best_model.predict(X_test)

# Расчет и вывод метрик
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f'MAE: {mae:.2f}')
print(f'MSE: {mse:.2f}')
print(f'RMSE: {rmse:.2f}')
print(f'R²: {r2:.2f}')
