In [1]:
from google.colab import drive
drive.mount('/content/gdrive/')

Mounted at /content/gdrive/


In [2]:
!pip install -U lightautoml

Collecting lightautoml
  Downloading lightautoml-0.3.8.1-py3-none-any.whl.metadata (16 kB)
Collecting autowoe>=1.2 (from lightautoml)
  Downloading AutoWoE-1.3.2-py3-none-any.whl.metadata (2.8 kB)
Collecting catboost>=0.26.1 (from lightautoml)
  Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl.metadata (1.2 kB)
Collecting cmaes (from lightautoml)
  Downloading cmaes-0.11.1-py3-none-any.whl.metadata (18 kB)
Collecting joblib<1.3.0 (from lightautoml)
  Downloading joblib-1.2.0-py3-none-any.whl.metadata (5.3 kB)
Collecting json2html (from lightautoml)
  Downloading json2html-1.3.0.tar.gz (7.0 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting lightgbm<=3.2.1,>=2.3 (from lightautoml)
  Downloading lightgbm-3.2.1-py3-none-manylinux1_x86_64.whl.metadata (14 kB)
Collecting optuna (from lightautoml)
  Downloading optuna-4.1.0-py3-none-any.whl.metadata (16 kB)
Collecting pandas<2.0.0 (from lightautoml)
  Downloading pandas-1.5.3-cp310-cp310-manylinux_2_17_x86_64.

In [3]:
import numpy as np
import pandas as pd
import scipy.io as io
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, Ridge, SGDRegressor, SGDClassifier

In [4]:
dataX = pd.read_csv("/content/gdrive/MyDrive/Colab Notebooks/ML/data1.txt", header=None, usecols=[0], sep=',') #change the path
dataY = pd.read_csv("/content/gdrive/MyDrive/Colab Notebooks/ML/data1.txt", header=None, usecols=[1], sep=',') #change the path
data_X = np.asarray(dataX)
data_Y = np.asarray(dataY)
print(data_Y[:10])

[[17.592 ]
 [ 9.1302]
 [13.662 ]
 [11.854 ]
 [ 6.8233]
 [11.886 ]
 [ 4.3483]
 [12.    ]
 [ 6.5987]
 [ 3.8166]]


In [5]:
X_train, X_test, Y_train, Y_test = train_test_split(data_X, data_Y, test_size=0.3,random_state=0)
print(X_train[:5], "\n", Y_train[:5])

[[8.8254]
 [5.1077]
 [8.5172]
 [5.8707]
 [5.3077]] 
 [[5.1694]
 [2.0576]
 [4.2415]
 [7.2029]
 [1.8396]]


In [6]:
from sklearn import metrics

def calc_metrics(true, pred):
    stats={
   'RMSE': round(np.sqrt(metrics.mean_squared_error(true, pred)), 4),
    'MAE': round(metrics.mean_absolute_error(true, pred),4),
    'R2': round(metrics.r2_score(true, pred),4)
    }
    print(f"RMSE: {stats['RMSE']}\nMAE: {stats['MAE']}\nR2: {stats['R2']}")
    return stats

In [7]:
models_metrics = {}

In [8]:
model_lasso = Lasso()
model_lasso.fit(X_train, Y_train)

y_pred_lasso = model_lasso.predict(X_test)
models_metrics['Lasso'] = calc_metrics(Y_test, y_pred_lasso)

RMSE: 3.0182
MAE: 2.3411
R2: 0.592


In [9]:
model_linear = LinearRegression()
model_linear.fit(X_train, Y_train)

y_pred_linear = model_linear.predict(X_test)
models_metrics['LinearRegression'] = calc_metrics(Y_test, y_pred_linear)

RMSE: 3.0162
MAE: 2.3417
R2: 0.5925


In [10]:
model_ridge = Ridge()
model_ridge.fit(X_train, Y_train)

y_pred_ridge = model_ridge.predict(X_test)
models_metrics['Ridge'] = calc_metrics(Y_test, y_pred_ridge)

RMSE: 3.0162
MAE: 2.3416
R2: 0.5925


Метод наименьших квадратов (Одномерная регрессия) МНК W=〖(X^T∙X+α∙I)〗^(-1)∙X^T∙y, α=0


In [11]:
data = pd.read_csv("/content/gdrive/MyDrive/Colab Notebooks/ML/data1.txt", header=None, sep=',')
X = data.iloc[:, 0].values.reshape(-1, 1)
Y = data.iloc[:, 1].values

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)

X_b_train = np.c_[np.ones((X_train.shape[0], 1)), X_train]
X_b_test = np.c_[np.ones((X_test.shape[0], 1)), X_test]

theta_best = np.linalg.inv(X_b_train.T.dot(X_b_train)).dot(X_b_train.T).dot(Y_train)

#print(f"Intercept: {theta_best[0]}, Coef: {theta_best[1]}")


In [12]:
print(X_b_train[:5])

[[1.     8.8254]
 [1.     5.1077]
 [1.     8.5172]
 [1.     5.8707]
 [1.     5.3077]]


In [13]:
Y_pred = X_b_test.dot(theta_best)
#calc_metrics(Y_test, Y_pred)

models_metrics['MNK'] = calc_metrics(Y_test, Y_pred)

RMSE: 3.0162
MAE: 2.3417
R2: 0.5925


Метод наименьших квадратов (Полиномиальная регрессия)


In [14]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn import metrics

data = pd.read_csv("/content/gdrive/MyDrive/Colab Notebooks/ML/data1.txt", header=None)

def toDegree(X, degree): # возведение полнинома в степень
    X_new = np.ones((len(X), 1))  # Создаем первый столбец из единиц (для свободного члена)
    for d in range(1, degree + 1):
        X_new = np.hstack((X_new, X ** d))  # Добавляем столбцы с возведением в степень от 1 до degree
    return X_new

X_train_new = toDegree(X_train, 3)
X_test_new = toDegree(X_test, 3)

print(X_train_new[:5])

[[  1.           8.8254      77.88768516 687.38997661]
 [  1.           5.1077      26.08859929 133.25273859]
 [  1.           8.5172      72.54269584 617.86064901]
 [  1.           5.8707      34.46511849 202.33437112]
 [  1.           5.3077      28.17167929 149.52682217]]


In [15]:
#Метод МНК W=〖(X^T∙X+α∙I)〗^(-1)∙X^T∙y, α=0
X_train_T=X_train_new.transpose()
print(X_train_T.shape,X_train_new.shape)
teta = np.linalg.inv(X_train_T.dot(X_train_new)).dot(X_train_T).dot(Y_train)
print(teta)
y_predict_mnk_poly=X_test_new.dot(teta)
#print(y_predict_mnk)

models_metrics['MNK_poly'] = calc_metrics(Y_test, y_predict_mnk_poly)

(4, 67) (67, 4)
[-1.14339650e+00  6.77564920e-01  1.45971480e-02  3.33712762e-04]
RMSE: 3.2079
MAE: 2.4205
R2: 0.5391


In [16]:
df = pd.DataFrame({'X': X.reshape(-1), 'Y': Y.reshape(-1)})
df.head()

Unnamed: 0,X,Y
0,6.1101,17.592
1,5.5277,9.1302
2,8.5186,13.662
3,7.0032,11.854
4,5.8598,6.8233


In [17]:
from lightautoml.automl.presets.tabular_presets import TabularAutoML
from lightautoml.tasks import Task

N_THREADS = 4
N_FOLDS = 5
RANDOM_STATE = 42
TEST_SIZE = 0.3
TIMEOUT = 300
TARGET_NAME = 'Y'

In [18]:
task = Task('reg')

roles = {
    'target': TARGET_NAME,
}

train_data, test_data = train_test_split(
    df,
    test_size=TEST_SIZE,
    #stratify=df[TARGET_NAME],
    random_state=RANDOM_STATE
)

print(f'Data is splitted. Parts sizes: train_data = {train_data.shape}, test_data = {test_data.shape}')

train_data.head()

Data is splitted. Parts sizes: train_data = (67, 2), test_data = (30, 2)


Unnamed: 0,X,Y
95,13.394,9.0551
30,5.2524,-1.22
28,13.176,14.692
87,6.002,0.92695
5,8.3829,11.886


In [19]:
automl = TabularAutoML(
    task = task,
    timeout = TIMEOUT,
    cpu_limit = N_THREADS,
    reader_params = {'n_jobs': N_THREADS, 'cv': N_FOLDS, 'random_state': RANDOM_STATE},
)

In [20]:
%%time
out_of_fold_predictions = automl.fit_predict(train_data, roles = roles, verbose = 1)

[08:24:52] Stdout logging level is INFO.


INFO:lightautoml.automl.presets.base:Stdout logging level is INFO.


[08:24:52] Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer




[08:24:52] Task: reg



INFO:lightautoml.automl.presets.base:Task: reg



[08:24:52] Start automl preset with listed constraints:


INFO:lightautoml.automl.presets.base:Start automl preset with listed constraints:


[08:24:52] - time: 300.00 seconds


INFO:lightautoml.automl.presets.base:- time: 300.00 seconds


[08:24:52] - CPU: 4 cores


INFO:lightautoml.automl.presets.base:- CPU: 4 cores


[08:24:52] - memory: 16 GB



INFO:lightautoml.automl.presets.base:- memory: 16 GB



[08:24:52] [1mTrain data shape: (67, 2)[0m



INFO:lightautoml.reader.base:[1mTrain data shape: (67, 2)[0m

INFO3:lightautoml.reader.base:Feats was rejected during automatic roles guess: []


[08:24:52] Layer [1m1[0m train process start. Time left 299.84 secs


INFO:lightautoml.automl.base:Layer [1m1[0m train process start. Time left 299.84 secs


[08:24:52] Start fitting [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m ...


INFO:lightautoml.ml_algo.base:Start fitting [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m ...
DEBUG:lightautoml.ml_algo.base:Training params: {'tol': 1e-06, 'max_iter': 100, 'cs': [1e-05, 5e-05, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000, 100000], 'early_stopping': 2, 'categorical_idx': [], 'embed_sizes': (), 'data_size': 1}
INFO2:lightautoml.ml_algo.base:===== Start working with [1mfold 0[0m for [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m =====
INFO3:lightautoml.ml_algo.torch_based.linear_model:Linear model: C = 1e-05 score = -36.44446209252187
INFO3:lightautoml.ml_algo.torch_based.linear_model:Linear model: C = 5e-05 score = -36.05335319969867
INFO3:lightautoml.ml_algo.torch_based.linear_model:Linear model: C = 0.0001 score = -35.577308719903094
INFO3:lightautoml.ml_algo.torch_based.linear_model:Linear model: C = 0.0005 score = -32.22474545018119
INFO3:lightautoml.ml_algo.torch_based.linear_model:Linear model: C = 0.001 score = -28.9469312941

[08:24:55] Fitting [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m finished. score = [1m-8.334612205892775[0m


INFO:lightautoml.ml_algo.base:Fitting [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m finished. score = [1m-8.334612205892775[0m


[08:24:55] [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m fitting and predicting completed


INFO:lightautoml.ml_algo.base:[1mLvl_0_Pipe_0_Mod_0_LinearL2[0m fitting and predicting completed


[08:24:55] Time left 297.28 secs



INFO:lightautoml.automl.base:Time left 297.28 secs

INFO3:lightautoml.ml_algo.boost_lgbm:Training until validation scores don't improve for 200 rounds
DEBUG:lightautoml.ml_algo.boost_lgbm:[100]	valid's l2: 24.64
DEBUG:lightautoml.ml_algo.boost_lgbm:[200]	valid's l2: 20.7474
DEBUG:lightautoml.ml_algo.boost_lgbm:[300]	valid's l2: 19.9382
DEBUG:lightautoml.ml_algo.boost_lgbm:[400]	valid's l2: 19.7017
DEBUG:lightautoml.ml_algo.boost_lgbm:[500]	valid's l2: 19.7648
DEBUG:lightautoml.ml_algo.boost_lgbm:Early stopping, best iteration is:
[381]	valid's l2: 19.685


[08:24:55] [1mSelector_LightGBM[0m fitting and predicting completed


INFO:lightautoml.ml_algo.base:[1mSelector_LightGBM[0m fitting and predicting completed


[08:24:55] Start fitting [1mLvl_0_Pipe_1_Mod_0_LightGBM[0m ...


INFO:lightautoml.ml_algo.base:Start fitting [1mLvl_0_Pipe_1_Mod_0_LightGBM[0m ...
DEBUG:lightautoml.ml_algo.base:Training params: {'task': 'train', 'learning_rate': 0.01, 'num_leaves': 16, 'feature_fraction': 0.9, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'max_depth': -1, 'verbosity': -1, 'reg_alpha': 1, 'reg_lambda': 0.0, 'min_split_gain': 0.0, 'zero_as_missing': False, 'num_threads': 2, 'max_bin': 255, 'min_data_in_bin': 3, 'num_trees': 3000, 'early_stopping_rounds': 200, 'random_state': 42}
INFO2:lightautoml.ml_algo.base:===== Start working with [1mfold 0[0m for [1mLvl_0_Pipe_1_Mod_0_LightGBM[0m =====
INFO3:lightautoml.ml_algo.boost_lgbm:Training until validation scores don't improve for 200 rounds
DEBUG:lightautoml.ml_algo.boost_lgbm:[100]	valid's l2: 24.64
DEBUG:lightautoml.ml_algo.boost_lgbm:[200]	valid's l2: 20.7474
DEBUG:lightautoml.ml_algo.boost_lgbm:[300]	valid's l2: 19.9382
DEBUG:lightautoml.ml_algo.boost_lgbm:[400]	valid's l2: 19.7017
DEBUG:lightautoml.ml_algo.boost

[08:24:58] Fitting [1mLvl_0_Pipe_1_Mod_0_LightGBM[0m finished. score = [1m-20.149283752816427[0m


INFO:lightautoml.ml_algo.base:Fitting [1mLvl_0_Pipe_1_Mod_0_LightGBM[0m finished. score = [1m-20.149283752816427[0m


[08:24:58] [1mLvl_0_Pipe_1_Mod_0_LightGBM[0m fitting and predicting completed


INFO:lightautoml.ml_algo.base:[1mLvl_0_Pipe_1_Mod_0_LightGBM[0m fitting and predicting completed


[08:24:58] Start hyperparameters optimization for [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m ... Time budget is 35.57 secs


INFO:lightautoml.ml_algo.tuning.optuna:Start hyperparameters optimization for [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m ... Time budget is 35.57 secs
INFO:optuna.storages._in_memory:A new study created in memory with name: no-name-c6d34a0a-81cf-44eb-8131-07a7ec0b1208
INFO3:lightautoml.ml_algo.boost_lgbm:Training until validation scores don't improve for 200 rounds
DEBUG:lightautoml.ml_algo.boost_lgbm:[100]	valid's l2: 24.7405
DEBUG:lightautoml.ml_algo.boost_lgbm:[200]	valid's l2: 21.9329
DEBUG:lightautoml.ml_algo.boost_lgbm:[300]	valid's l2: 20.9544
DEBUG:lightautoml.ml_algo.boost_lgbm:[400]	valid's l2: 20.6924
DEBUG:lightautoml.ml_algo.boost_lgbm:[500]	valid's l2: 20.7855
DEBUG:lightautoml.ml_algo.boost_lgbm:[600]	valid's l2: 20.6584
DEBUG:lightautoml.ml_algo.boost_lgbm:[700]	valid's l2: 20.7357
DEBUG:lightautoml.ml_algo.boost_lgbm:Early stopping, best iteration is:
[579]	valid's l2: 20.5994
INFO:optuna.study.study:Trial 0 finished with value: -20.6207793438078 and parameters: {'featu

[08:25:34] Hyperparameters optimization for [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m completed


INFO:lightautoml.ml_algo.tuning.optuna:Hyperparameters optimization for [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m completed
INFO2:lightautoml.ml_algo.tuning.optuna:The set of hyperparameters [1m{'feature_fraction': 0.865455651151708, 'num_leaves': 116, 'bagging_fraction': 0.9037367221438722, 'min_sum_hessian_in_leaf': 0.024578739597451413, 'reg_alpha': 0.15203858593612088, 'reg_lambda': 1.036963650861359e-06}[0m
 achieve -19.6760 mse


[08:25:34] Start fitting [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m ...


INFO:lightautoml.ml_algo.base:Start fitting [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m ...
DEBUG:lightautoml.ml_algo.base:Training params: {'task': 'train', 'learning_rate': 0.05, 'num_leaves': 116, 'feature_fraction': 0.865455651151708, 'bagging_fraction': 0.9037367221438722, 'bagging_freq': 1, 'max_depth': -1, 'verbosity': -1, 'reg_alpha': 0.15203858593612088, 'reg_lambda': 1.036963650861359e-06, 'min_split_gain': 0.0, 'zero_as_missing': False, 'num_threads': 2, 'max_bin': 255, 'min_data_in_bin': 3, 'num_trees': 3000, 'early_stopping_rounds': 100, 'random_state': 42, 'min_sum_hessian_in_leaf': 0.024578739597451413}
INFO2:lightautoml.ml_algo.base:===== Start working with [1mfold 0[0m for [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m =====
INFO3:lightautoml.ml_algo.boost_lgbm:Training until validation scores don't improve for 100 rounds
DEBUG:lightautoml.ml_algo.boost_lgbm:[100]	valid's l2: 20.4175
DEBUG:lightautoml.ml_algo.boost_lgbm:[200]	valid's l2: 20.0515
DEBUG:lightautoml.ml_algo.bo

[08:25:35] Fitting [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m finished. score = [1m-20.283294245031588[0m


INFO:lightautoml.ml_algo.base:Fitting [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m finished. score = [1m-20.283294245031588[0m


[08:25:35] [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m fitting and predicting completed


INFO:lightautoml.ml_algo.base:[1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m fitting and predicting completed


[08:25:35] Start fitting [1mLvl_0_Pipe_1_Mod_2_CatBoost[0m ...


INFO:lightautoml.ml_algo.base:Start fitting [1mLvl_0_Pipe_1_Mod_2_CatBoost[0m ...
DEBUG:lightautoml.ml_algo.base:Training params: {'task_type': 'CPU', 'thread_count': 2, 'random_seed': 42, 'num_trees': 2000, 'learning_rate': 0.05, 'l2_leaf_reg': 0.01, 'bootstrap_type': 'Bernoulli', 'grow_policy': 'SymmetricTree', 'max_depth': 5, 'min_data_in_leaf': 1, 'one_hot_max_size': 10, 'fold_permutation_block': 1, 'boosting_type': 'Plain', 'boost_from_average': True, 'od_type': 'Iter', 'od_wait': 300, 'max_bin': 32, 'feature_border_type': 'GreedyLogSum', 'nan_mode': 'Min', 'verbose': 100, 'allow_writing_files': False}
INFO2:lightautoml.ml_algo.base:===== Start working with [1mfold 0[0m for [1mLvl_0_Pipe_1_Mod_2_CatBoost[0m =====
INFO3:lightautoml.ml_algo.boost_cb:0:	learn: 5.2747208	test: 5.8601133	best: 5.8601133 (0)	total: 48.3ms	remaining: 1m 36s
DEBUG:lightautoml.ml_algo.boost_cb:100:	learn: 2.1985471	test: 3.9434678	best: 3.6893830 (49)	total: 62.2ms	remaining: 1.17s
DEBUG:lightautoml.

[08:25:36] Fitting [1mLvl_0_Pipe_1_Mod_2_CatBoost[0m finished. score = [1m-11.035201398822274[0m


INFO:lightautoml.ml_algo.base:Fitting [1mLvl_0_Pipe_1_Mod_2_CatBoost[0m finished. score = [1m-11.035201398822274[0m


[08:25:36] [1mLvl_0_Pipe_1_Mod_2_CatBoost[0m fitting and predicting completed


INFO:lightautoml.ml_algo.base:[1mLvl_0_Pipe_1_Mod_2_CatBoost[0m fitting and predicting completed


[08:25:36] Start hyperparameters optimization for [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m ... Time budget is 190.70 secs


INFO:lightautoml.ml_algo.tuning.optuna:Start hyperparameters optimization for [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m ... Time budget is 190.70 secs
INFO:optuna.storages._in_memory:A new study created in memory with name: no-name-d4e2f7e5-f71a-48aa-b712-91b42c0f1632
INFO3:lightautoml.ml_algo.boost_cb:0:	learn: 5.3506953	test: 5.9322212	best: 5.9322212 (0)	total: 2.11ms	remaining: 4.21s
DEBUG:lightautoml.ml_algo.boost_cb:100:	learn: 2.6947538	test: 3.5745575	best: 3.5745575 (100)	total: 22ms	remaining: 414ms
DEBUG:lightautoml.ml_algo.boost_cb:200:	learn: 2.3349674	test: 3.7451184	best: 3.5477215 (128)	total: 40.3ms	remaining: 360ms
DEBUG:lightautoml.ml_algo.boost_cb:300:	learn: 2.0974448	test: 3.9897529	best: 3.5477215 (128)	total: 59.8ms	remaining: 337ms
DEBUG:lightautoml.ml_algo.boost_cb:400:	learn: 1.9983347	test: 4.0942116	best: 3.5477215 (128)	total: 74.3ms	remaining: 296ms
INFO3:lightautoml.ml_algo.boost_cb:Stopped by overfitting detector  (300 iterations wait)
INFO3:lightautoml

[08:25:52] Hyperparameters optimization for [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m completed


INFO:lightautoml.ml_algo.tuning.optuna:Hyperparameters optimization for [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m completed
INFO2:lightautoml.ml_algo.tuning.optuna:The set of hyperparameters [1m{'max_depth': 3, 'l2_leaf_reg': 1.0639538312722558, 'min_data_in_leaf': 14}[0m
 achieve -11.8472 mse


[08:25:52] Start fitting [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m ...


INFO:lightautoml.ml_algo.base:Start fitting [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m ...
DEBUG:lightautoml.ml_algo.base:Training params: {'task_type': 'CPU', 'thread_count': 2, 'random_seed': 42, 'num_trees': 3000, 'learning_rate': 0.03, 'l2_leaf_reg': 1.0639538312722558, 'bootstrap_type': 'Bernoulli', 'grow_policy': 'SymmetricTree', 'max_depth': 3, 'min_data_in_leaf': 14, 'one_hot_max_size': 10, 'fold_permutation_block': 1, 'boosting_type': 'Plain', 'boost_from_average': True, 'od_type': 'Iter', 'od_wait': 100, 'max_bin': 32, 'feature_border_type': 'GreedyLogSum', 'nan_mode': 'Min', 'verbose': 100, 'allow_writing_files': False}
INFO2:lightautoml.ml_algo.base:===== Start working with [1mfold 0[0m for [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m =====
INFO3:lightautoml.ml_algo.boost_cb:0:	learn: 5.3652716	test: 5.9512749	best: 5.9512749 (0)	total: 1.46ms	remaining: 4.39s
DEBUG:lightautoml.ml_algo.boost_cb:100:	learn: 2.8074271	test: 3.5608523	best: 3.5595629 (97)	total: 20.3ms	remaining

[08:25:52] Fitting [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m finished. score = [1m-10.414642052987574[0m


INFO:lightautoml.ml_algo.base:Fitting [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m finished. score = [1m-10.414642052987574[0m


[08:25:52] [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m fitting and predicting completed


INFO:lightautoml.ml_algo.base:[1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m fitting and predicting completed


[08:25:52] Time left 239.84 secs



INFO:lightautoml.automl.base:Time left 239.84 secs



[08:25:52] [1mLayer 1 training completed.[0m



INFO:lightautoml.automl.base:[1mLayer 1 training completed.[0m



[08:25:52] Blending: optimization starts with equal weights and score [1m-11.722005223548043[0m


INFO:lightautoml.automl.blend:Blending: optimization starts with equal weights and score [1m-11.722005223548043[0m


[08:25:52] Blending: iteration [1m0[0m: score = [1m-8.334612205892775[0m, weights = [1m[1. 0. 0. 0. 0.][0m


INFO:lightautoml.automl.blend:Blending: iteration [1m0[0m: score = [1m-8.334612205892775[0m, weights = [1m[1. 0. 0. 0. 0.][0m


[08:25:53] Blending: iteration [1m1[0m: score = [1m-8.334612205892775[0m, weights = [1m[1. 0. 0. 0. 0.][0m


INFO:lightautoml.automl.blend:Blending: iteration [1m1[0m: score = [1m-8.334612205892775[0m, weights = [1m[1. 0. 0. 0. 0.][0m


[08:25:53] Blending: no score update. Terminated



INFO:lightautoml.automl.blend:Blending: no score update. Terminated



[08:25:53] [1mAutoml preset training completed in 60.34 seconds[0m



INFO:lightautoml.automl.presets.base:[1mAutoml preset training completed in 60.34 seconds[0m



[08:25:53] Model description:
Final prediction for new objects (level 0) = 
	 1.00000 * (5 averaged models Lvl_0_Pipe_0_Mod_0_LinearL2) 



INFO:lightautoml.automl.presets.base:Model description:
Final prediction for new objects (level 0) = 
	 1.00000 * (5 averaged models Lvl_0_Pipe_0_Mod_0_LinearL2) 



CPU times: user 47.6 s, sys: 2.02 s, total: 49.6 s
Wall time: 1min


In [21]:
test_predictions = automl.predict(test_data)

In [24]:
models_metrics['LAMA'] = calc_metrics(test_data[TARGET_NAME].values, test_predictions.data[:, 0])

RMSE: 3.2859
MAE: 2.1056
R2: 0.6076


In [25]:
model_cls_compare = pd.DataFrame(models_metrics).T
model_cls_compare

Unnamed: 0,RMSE,MAE,R2
Lasso,3.0182,2.3411,0.592
LinearRegression,3.0162,2.3417,0.5925
Ridge,3.0162,2.3416,0.5925
MNK,3.0162,2.3417,0.5925
MNK_poly,3.2079,2.4205,0.5391
LAMA,3.2859,2.1056,0.6076
