In [None]:
from tpot import TPOTClassifier, TPOTRegressor
from sklearn.datasets import load_iris, load_boston
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, mean_squared_error

# Пример классификации
# ---------------------
# Загрузка данных
data = load_iris()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Настройка и запуск TPOT для классификации
tpot_clf = TPOTClassifier(
    generations=10,          # Количество поколений
    population_size=50,      # Размер популяции
    cv=5,                    # Кросс-валидация
    random_state=42,
    verbosity=2,
    n_jobs=-1,               # Использовать все ядра процессора
    config_dict='TPOT sparse',  # Можно выбрать заранее определенные конфигурации
    warm_start=True          # Продолжить обучение с предыдущего состояния
)

# Обучение
tpot_clf.fit(X_train, y_train)

# Оценка и предсказание
y_pred = tpot_clf.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")

# Экспорт Python кода для лучшего конвейера
tpot_clf.export('tpot_iris_pipeline.py')

# Пример регрессии
# ----------------
# Загрузка данных для регрессии
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()
X, y = housing.data, housing.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Настройка и запуск TPOT для регрессии
tpot_reg = TPOTRegressor(
    generations=10,
    population_size=50,
    cv=5,
    random_state=42,
    verbosity=2,
    n_jobs=-1,
    scoring='neg_mean_squared_error'  # Метрика для оптимизации
)

# Обучение
tpot_reg.fit(X_train, y_train)

# Оценка и предсказание
y_pred = tpot_reg.predict(X_test)
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred))}")

# Экспорт Python кода для лучшего конвейера
tpot_reg.export('tpot_housing_pipeline.py')

In [None]:
from tpot.config import classifier_config_dict

# Пользовательский словарь конфигурации
custom_config = {
    'sklearn.ensemble.RandomForestClassifier': {
        'n_estimators': [100, 200, 500],
        'max_features': ['auto', 'sqrt', 'log2'],
        'max_depth': [None, 5, 10, 15, 20],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    },
    'sklearn.ensemble.GradientBoostingClassifier': {
        'n_estimators': [100, 200, 500],
        'learning_rate': [0.01, 0.05, 0.1, 0.5],
        'max_depth': [3, 5, 10],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4],
        'subsample': [0.6, 0.8, 1.0]
    },
    'sklearn.preprocessing.StandardScaler': {},
    'sklearn.preprocessing.RobustScaler': {},
    'sklearn.decomposition.PCA': {
        'n_components': [0.85, 0.9, 0.95]
    }
}

tpot_clf = TPOTClassifier(
    generations=10,
    population_size=50,
    cv=5,
    random_state=42,
    config_dict=custom_config
)

In [None]:
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline as ImbPipeline

# Создаем предобработку с SMOTE DISBALANCE IN CLASSES
preprocessor = ImbPipeline([
    ('scaler', StandardScaler()),
    ('smote', SMOTE(random_state=42))
])

# Применяем предобработку
X_train_resampled, y_train_resampled = preprocessor.fit_resample(X_train, y_train)

# Затем используем TPOT
tpot_clf = TPOTClassifier(generations=10, population_size=50, cv=5, random_state=42)
tpot_clf.fit(X_train_resampled, y_train_resampled) 

In [None]:
import xgboost as xgb
from tpot.builtins import StackingEstimator
from sklearn.pipeline import Pipeline
from sklearn.ensemble import VotingClassifier

# Импортируем созданный TPOT конвейер
from tpot_iris_pipeline import exported_pipeline

# Создаем ансамбль с XGBoost и TPOT
xgb_model = xgb.XGBClassifier(n_estimators=100, learning_rate=0.1)
ensemble = VotingClassifier([
    ('tpot', exported_pipeline),
    ('xgb', xgb_model)
], voting='soft')

ensemble.fit(X_train, y_train)
y_pred = ensemble.predict(X_test)
print(f"Ensemble Accuracy: {accuracy_score(y_test, y_pred)}")