In [1]:
import os
os.chdir("..")

In [11]:
import pandas as pd
import datetime
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_log_error
import argparse
import json
import numpy as np
import optuna
from utils import load_datasets, load_target, evaluate_score
from models import LightGBM, LinearRegressionWrapper, LassoWrapper, RidgeWrapper, ElasticNetWrapper, KernelRidgeWrapper, SVRWrapper



In [6]:

def train_and_predict_lightgbm(X_train_all, y_train_all, X_test, config):
    params = config['params']
    # 学習前にy_trainに、log(y+1)で変換
    y_train_all = np.log(y_train_all + 1)  # np.log1p() でもOK

    y_preds = []
    models = []
    kf = KFold(n_splits=5)
    for train_index, valid_index in kf.split(X_train_all):
        X_train, X_valid = (X_train_all.iloc[train_index, :], X_train_all.iloc[valid_index, :])
        y_train, y_valid = (y_train_all.iloc[train_index], y_train_all.iloc[valid_index])

        # lgbmの実行
        lgbm = LightGBM()
        y_pred, y_valid_pred, model = lgbm.train_and_predict(X_train, X_valid, y_train, y_valid, X_test, params)

        # 結果の保存
        y_preds.append(y_pred)
        models.append(model)

    # CVスコア
    scores = [
        m.best_score['valid_0'][config['loss']] for m in models
    ]
    score = sum(scores) / len(scores)
    return score

def train_and_predict_linear(X_train_all, y_train_all, X_test, config):

    params = config['params']
    
    # 学習前にy_trainに、log(y+1)で変換
    y_train_all = np.log(y_train_all + 1)  # np.log1p() でもOK

    y_preds = []
    scores = []  # CVスコア
    kf = KFold(n_splits=5)
    for train_index, valid_index in kf.split(X_train_all):
        X_train, X_valid = (X_train_all.iloc[train_index, :], X_train_all.iloc[valid_index, :])
        y_train, y_valid = (y_train_all.iloc[train_index], y_train_all.iloc[valid_index])

        if config['model'] == "LinearRegression":
            lr = LinearRegressionWrapper()
        elif config['model'] == "Lasso":
            lr = LassoWrapper()
        elif config['model'] == "Ridge":
            lr = RidgeWrapper()
        elif config['model'] == "ErasticNet":
            lr = ElasticNetWrapper()
        elif config['model'] == "KernelRidge":
            lr = KernelRidgeWrapper()
        elif config['model'] == "SVR":
            lr = SVRWrapper()

        y_pred, y_valid_pred, m = lr.train_and_predict(X_train, X_valid, y_train, y_valid, X_test, params)

        # 結果の保存
        y_preds.append(y_pred)

        # スコア
        rmse_valid = evaluate_score(y_valid, y_valid_pred, config['loss'])
        scores.append(rmse_valid)

    score = sum(scores) / len(scores)

    return score


# Lasso

In [44]:


# 1. Define an objective function to be maximized.
def objective(trial):
    config = {
            "model": "Ridge",
            "features": [
                "numerical_features",
                "object_features"
                ],
            "params": {
                "alpha" : trial.suggest_loguniform('alpha', 1e-15, 100)

            },
            "loss": "rmse",
            "target_name": "SalePrice",
            "ID_name": "Id"
    }
    feats = config['features']
    target_name = config['target_name']

    # 指定した特徴量からデータをロード
    X_train_all, X_test = load_datasets(feats)
    y_train_all = load_target(target_name)
    if config['model'] == 'LightGBM':
        return train_and_predict_lightgbm(X_train_all, y_train_all, X_test, config)
    elif config['model'] in ['LinearRegression', 'Lasso', 'Ridge', 'ErasticNet', 'KernelRidge']:
        return train_and_predict_linear(X_train_all, y_train_all, X_test, config)
        

In [42]:

# 3. Create a study object and optimize the objective function.
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

[32m[I 2020-11-21 08:28:42,430][0m A new study created in memory with name: no-name-a295d735-f9f7-49f7-b82e-9671bb1a53e5[0m
  positive)
  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:28:44,427][0m Trial 0 finished with value: 0.11431364183913868 and parameters: {'alpha': 1.0103501313444229e-10}. Best is trial 0 with value: 0.11431364183913868.[0m
  positive)
  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:28:46,270][0m Trial 1 finished with value: 0.11431472976834649 and parameters: {'alpha': 3.395782440540975e-10}. Best is trial 0 with value: 0.11431364183913868.[0m
  positive)
  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:28:47,978][0m Trial 2 finished with value: 0.11431342207403743 and parameters: {'alpha': 5.7319626947166656e-11}. Best is trial 2 with value: 0.11431342207403743.[0m
[32m[I 2020-11-21 08:28:48,593][0m Trial 3 finished with value: 0.3967191668696852 and parameters: {'alpha': 1.08927806359

  positive)
  positive)
[32m[I 2020-11-21 08:28:55,241][0m Trial 7 finished with value: 0.11431315918329055 and parameters: {'alpha': 5.009946686486284e-12}. Best is trial 4 with value: 0.10061349198263705.[0m
  positive)
  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:28:57,427][0m Trial 8 finished with value: 0.11426218488924353 and parameters: {'alpha': 1.6205922309720042e-07}. Best is trial 4 with value: 0.10061349198263705.[0m
[32m[I 2020-11-21 08:28:58,176][0m Trial 9 finished with value: 0.3472626001297029 and parameters: {'alpha': 0.2572851326815849}. Best is trial 4 with value: 0.10061349198263705.[0m
  positive)
  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:28:59,814][0m Trial 10 finished with value: 0.11431313403531178 and parameters: {'alpha': 5.1310584388935986e-15}. Best is trial 4 with value: 0.10061349198263705.[0m
  positive)
[32m[I 2020-11-21 08:29:00,865][0m Trial 11 finished with value: 0.10064118805770723 an

  positive)
[32m[I 2020-11-21 08:29:07,558][0m Trial 17 finished with value: 0.11254720312435854 and parameters: {'alpha': 7.947506071402209e-06}. Best is trial 4 with value: 0.10061349198263705.[0m
  positive)
  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:29:09,461][0m Trial 18 finished with value: 0.11431201365115484 and parameters: {'alpha': 7.965372663396136e-09}. Best is trial 4 with value: 0.10061349198263705.[0m
[32m[I 2020-11-21 08:29:10,139][0m Trial 19 finished with value: 0.1293432403785495 and parameters: {'alpha': 0.016050887880921808}. Best is trial 4 with value: 0.10061349198263705.[0m
  positive)
  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:29:11,690][0m Trial 20 finished with value: 0.1131826327481404 and parameters: {'alpha': 4.7685321036575875e-06}. Best is trial 4 with value: 0.10061349198263705.[0m
[32m[I 2020-11-21 08:29:12,705][0m Trial 21 finished with value: 0.10209931402203976 and parameters: {'alpha

[32m[I 2020-11-21 08:29:20,043][0m Trial 27 finished with value: 0.10252199876489532 and parameters: {'alpha': 0.000995306009463438}. Best is trial 4 with value: 0.10061349198263705.[0m
[32m[I 2020-11-21 08:29:20,680][0m Trial 28 finished with value: 0.30885958674621045 and parameters: {'alpha': 0.17376495523733165}. Best is trial 4 with value: 0.10061349198263705.[0m
  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:29:22,540][0m Trial 29 finished with value: 0.107106132358967 and parameters: {'alpha': 5.673456460535539e-05}. Best is trial 4 with value: 0.10061349198263705.[0m
  positive)
  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:29:24,718][0m Trial 30 finished with value: 0.11430798929893374 and parameters: {'alpha': 1.9217075650262568e-08}. Best is trial 4 with value: 0.10061349198263705.[0m
[32m[I 2020-11-21 08:29:25,837][0m Trial 31 finished with value: 0.10063028265355216 and parameters: {'alpha': 0.0005363074887711287}.

  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:29:34,536][0m Trial 38 finished with value: 0.11404035657070762 and parameters: {'alpha': 8.897474302260418e-07}. Best is trial 4 with value: 0.10061349198263705.[0m
[32m[I 2020-11-21 08:29:35,243][0m Trial 39 finished with value: 0.21513862932172073 and parameters: {'alpha': 0.09590147051962639}. Best is trial 4 with value: 0.10061349198263705.[0m
  positive)
  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:29:37,045][0m Trial 40 finished with value: 0.114277371563317 and parameters: {'alpha': 1.1584992969910107e-07}. Best is trial 4 with value: 0.10061349198263705.[0m
[32m[I 2020-11-21 08:29:37,988][0m Trial 41 finished with value: 0.10243808259333612 and parameters: {'alpha': 0.0009833460823740862}. Best is trial 4 with value: 0.10061349198263705.[0m
  positive)
[32m[I 2020-11-21 08:29:39,198][0m Trial 42 finished with value: 0.10095989594748203 and parameters: {'alpha': 0.0002833620092322921}

  positive)
  positive)
[32m[I 2020-11-21 08:29:47,387][0m Trial 49 finished with value: 0.1136703523459123 and parameters: {'alpha': 2.470608520066647e-06}. Best is trial 4 with value: 0.10061349198263705.[0m
  positive)
  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:29:48,963][0m Trial 50 finished with value: 0.11431454140482525 and parameters: {'alpha': 9.065002740389775e-10}. Best is trial 4 with value: 0.10061349198263705.[0m
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:29:50,339][0m Trial 51 finished with value: 0.10318302064532173 and parameters: {'alpha': 0.00014202558420212978}. Best is trial 4 with value: 0.10061349198263705.[0m
[32m[I 2020-11-21 08:29:51,128][0m Trial 52 finished with value: 0.10867241220074841 and parameters: {'alpha': 0.0020455499192649036}. Best is trial 4 with value: 0.10061349198263705.[0m
[32m[I 2020-11-21 08:29:51,753][0m Trial 53 finished with value: 0.13006407306412743 and parameters: {'alpha': 0.01674

  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:29:58,573][0m Trial 57 finished with value: 0.11431314058578039 and parameters: {'alpha': 1.3087851027378585e-12}. Best is trial 4 with value: 0.10061349198263705.[0m
  positive)
[32m[I 2020-11-21 08:29:59,877][0m Trial 58 finished with value: 0.10086537693903366 and parameters: {'alpha': 0.00029963910735065634}. Best is trial 4 with value: 0.10061349198263705.[0m
[32m[I 2020-11-21 08:30:00,627][0m Trial 59 finished with value: 0.12182472142218717 and parameters: {'alpha': 0.008280487561937824}. Best is trial 4 with value: 0.10061349198263705.[0m
  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:30:02,328][0m Trial 60 finished with value: 0.10650236817812916 and parameters: {'alpha': 6.53377369518323e-05}. Best is trial 4 with value: 0.10061349198263705.[0m
  positive)
[32m[I 2020-11-21 08:30:03,757][0m Trial 61 finished with value: 0.10107377707806584 and parameters: {'alpha': 0.00026738272800728

  positive)
  positive)
[32m[I 2020-11-21 08:30:12,088][0m Trial 68 finished with value: 0.11349937930207812 and parameters: {'alpha': 3.246350692826362e-06}. Best is trial 4 with value: 0.10061349198263705.[0m
[32m[I 2020-11-21 08:30:13,143][0m Trial 69 finished with value: 0.10061424593603978 and parameters: {'alpha': 0.00046350741565734924}. Best is trial 4 with value: 0.10061349198263705.[0m
[32m[I 2020-11-21 08:30:14,036][0m Trial 70 finished with value: 0.10833000429999291 and parameters: {'alpha': 0.0019630910508601873}. Best is trial 4 with value: 0.10061349198263705.[0m
[32m[I 2020-11-21 08:30:15,026][0m Trial 71 finished with value: 0.10063123412909132 and parameters: {'alpha': 0.0005375810910167195}. Best is trial 4 with value: 0.10061349198263705.[0m
[32m[I 2020-11-21 08:30:15,958][0m Trial 72 finished with value: 0.10080006026799096 and parameters: {'alpha': 0.0006506856521928056}. Best is trial 4 with value: 0.10061349198263705.[0m
[32m[I 2020-11-21 08:30:

  positive)
[32m[I 2020-11-21 08:30:28,891][0m Trial 83 finished with value: 0.11269575076653093 and parameters: {'alpha': 7.156580412004382e-06}. Best is trial 4 with value: 0.10061349198263705.[0m
[32m[I 2020-11-21 08:30:29,647][0m Trial 84 finished with value: 0.10968713438819633 and parameters: {'alpha': 0.002277491570008347}. Best is trial 4 with value: 0.10061349198263705.[0m
  positive)
  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:30:31,217][0m Trial 85 finished with value: 0.114200440329565 and parameters: {'alpha': 3.5718898153915515e-07}. Best is trial 4 with value: 0.10061349198263705.[0m
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:30:32,619][0m Trial 86 finished with value: 0.1036052428131257 and parameters: {'alpha': 0.00012902975990090754}. Best is trial 4 with value: 0.10061349198263705.[0m
[32m[I 2020-11-21 08:30:33,580][0m Trial 87 finished with value: 0.10063893807491295 and parameters: {'alpha': 0.0005479028086906424

  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:30:44,071][0m Trial 95 finished with value: 0.11386496749909683 and parameters: {'alpha': 1.5740899554757206e-06}. Best is trial 93 with value: 0.10061286670395464.[0m
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:30:45,726][0m Trial 96 finished with value: 0.10364061160405917 and parameters: {'alpha': 0.00012804192820727598}. Best is trial 93 with value: 0.10061286670395464.[0m
[32m[I 2020-11-21 08:30:46,462][0m Trial 97 finished with value: 0.1210786040468618 and parameters: {'alpha': 0.007432578904891715}. Best is trial 93 with value: 0.10061286670395464.[0m
[32m[I 2020-11-21 08:30:47,119][0m Trial 98 finished with value: 0.26039123964346594 and parameters: {'alpha': 0.13242859937378226}. Best is trial 93 with value: 0.10061286670395464.[0m
  positive)
[32m[I 2020-11-21 08:30:48,314][0m Trial 99 finished with value: 0.10061302753272879 and parameters: {'alpha': 0.00045321780030038093}. Best

In [43]:
study.best_trial

FrozenTrial(number=93, value=0.10061286670395464, datetime_start=datetime.datetime(2020, 11, 21, 8, 30, 40, 413833), datetime_complete=datetime.datetime(2020, 11, 21, 8, 30, 41, 572286), params={'alpha': 0.00044227928994987245}, distributions={'alpha': LogUniformDistribution(high=10, low=1e-15)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=93, state=TrialState.COMPLETE)

# Ridge

In [46]:

# 1. Define an objective function to be maximized.
def objective(trial):
    config = {
            "model": "Ridge",
            "features": [
                "numerical_features",
                "object_features"
                ],
            "params": {
                "alpha" : trial.suggest_loguniform('alpha', 1e-15, 100)

            },
            "loss": "rmse",
            "target_name": "SalePrice",
            "ID_name": "Id"
    }
    feats = config['features']
    target_name = config['target_name']

    # 指定した特徴量からデータをロード
    X_train_all, X_test = load_datasets(feats)
    y_train_all = load_target(target_name)
    if config['model'] == 'LightGBM':
        return train_and_predict_lightgbm(X_train_all, y_train_all, X_test, config)
    elif config['model'] in ['LinearRegression', 'Lasso', 'Ridge', 'ErasticNet', 'KernelRidge']:
        return train_and_predict_linear(X_train_all, y_train_all, X_test, config)

# 3. Create a study object and optimize the objective function.
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

In [48]:
study.best_trial

FrozenTrial(number=96, value=0.1033346156851342, datetime_start=datetime.datetime(2020, 11, 21, 8, 34, 33, 890435), datetime_complete=datetime.datetime(2020, 11, 21, 8, 34, 34, 534665), params={'alpha': 13.97332056206028}, distributions={'alpha': LogUniformDistribution(high=100, low=1e-15)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=96, state=TrialState.COMPLETE)

# Elastic Net

In [52]:

# 1. Define an objective function to be maximized.
def objective(trial):
    config = {
            "model": "ErasticNet",
            "features": [
                "numerical_features",
                "object_features"
                ],
            "params": {
                "alpha" : trial.suggest_loguniform('alpha', 1e-15, 100),
                "l1_ratio": trial.suggest_loguniform('l1_ratio', 1e-5, 10)

            },
            "loss": "rmse",
            "target_name": "SalePrice",
            "ID_name": "Id"
    }

    feats = config['features']
    target_name = config['target_name']
    # 指定した特徴量からデータをロード
    X_train_all, X_test = load_datasets(feats)
    y_train_all = load_target(target_name)
    if config['model'] == 'LightGBM':
        return train_and_predict_lightgbm(X_train_all, y_train_all, X_test, config)
    elif config['model'] in ['LinearRegression', 'Lasso', 'Ridge', 'ErasticNet', 'KernelRidge']:
        return train_and_predict_linear(X_train_all, y_train_all, X_test, config)

# 3. Create a study object and optimize the objective function.
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

[32m[I 2020-11-21 08:40:06,933][0m A new study created in memory with name: no-name-ccaa9f2c-84e2-45f1-82c3-6cd8ba85800f[0m
[32m[I 2020-11-21 08:40:07,705][0m Trial 0 finished with value: 0.2709490801822532 and parameters: {'alpha': 12.243571498341566, 'l1_ratio': 1.3028575501954168e-05}. Best is trial 0 with value: 0.2709490801822532.[0m
  positive)
  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:40:09,613][0m Trial 1 finished with value: 0.11431322658854157 and parameters: {'alpha': 1.5423398985368704e-09, 'l1_ratio': 0.00013929759884353268}. Best is trial 1 with value: 0.11431322658854157.[0m
  positive)
  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:40:12,026][0m Trial 2 finished with value: 0.11431319920978836 and parameters: {'alpha': 6.211674185606148e-10, 'l1_ratio': 0.009173405927473467}. Best is trial 2 with value: 0.11431319920978836.[0m
  positive)
  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:40

  positive)
[32m[I 2020-11-21 08:40:18,565][0m Trial 6 finished with value: 0.11431313445054248 and parameters: {'alpha': 2.655700578155715e-13, 'l1_ratio': 0.3224682005248994}. Best is trial 6 with value: 0.11431313445054248.[0m
  positive)
  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:40:20,647][0m Trial 7 finished with value: 0.1143157574201114 and parameters: {'alpha': 4.186666790032987e-08, 'l1_ratio': 0.03886245471308523}. Best is trial 6 with value: 0.11431313445054248.[0m
[32m[I 2020-11-21 08:40:22,064][0m Trial 8 finished with value: 0.10637028986167682 and parameters: {'alpha': 0.06828117942249066, 'l1_ratio': 4.662125221811583e-05}. Best is trial 8 with value: 0.10637028986167682.[0m
  positive)
  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:40:24,269][0m Trial 9 finished with value: 0.11245158417925785 and parameters: {'alpha': 6.0725841174124345e-05, 'l1_ratio': 0.031166755200855212}. Best is trial 8 with value: 0.106

  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:40:32,510][0m Trial 14 finished with value: 0.11418893122571669 and parameters: {'alpha': 3.7065268595574312e-06, 'l1_ratio': 2.7840689630309023e-05}. Best is trial 12 with value: 0.10337344419791691.[0m
[32m[I 2020-11-21 08:40:33,732][0m Trial 15 finished with value: 0.10381582885920995 and parameters: {'alpha': 0.026260045329826294, 'l1_ratio': 0.0009547186801016565}. Best is trial 12 with value: 0.10337344419791691.[0m
  positive)
  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:40:35,633][0m Trial 16 finished with value: 0.1143131340095963 and parameters: {'alpha': 1.0037830767239888e-15, 'l1_ratio': 0.001354681759637507}. Best is trial 12 with value: 0.10337344419791691.[0m
[32m[I 2020-11-21 08:40:36,491][0m Trial 17 finished with value: 0.10605435125222354 and parameters: {'alpha': 0.06012452026649422, 'l1_ratio': 0.0015379867253679839}. Best is trial 12 with value: 0.10337344419791691.[0m
[

  positive)
[32m[I 2020-11-21 08:40:45,432][0m Trial 23 finished with value: 0.11309853591006125 and parameters: {'alpha': 4.266061037183296e-05, 'l1_ratio': 0.0022779737114705613}. Best is trial 12 with value: 0.10337344419791691.[0m
[32m[I 2020-11-21 08:40:46,614][0m Trial 24 finished with value: 0.10333330942652734 and parameters: {'alpha': 0.0151340814909565, 'l1_ratio': 0.0005057299328132176}. Best is trial 24 with value: 0.10333330942652734.[0m
  positive)
  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:40:48,463][0m Trial 25 finished with value: 0.11337595392130295 and parameters: {'alpha': 3.077218843776124e-05, 'l1_ratio': 0.0003969234135613931}. Best is trial 24 with value: 0.10333330942652734.[0m
  positive)
  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:40:50,241][0m Trial 26 finished with value: 0.1143161127653108 and parameters: {'alpha': 5.198645060228494e-08, 'l1_ratio': 0.005508295586591925}. Best is trial 24 with v

[32m[I 2020-11-21 08:41:00,770][0m Trial 34 finished with value: 0.11997678038815718 and parameters: {'alpha': 0.21269472378625584, 'l1_ratio': 0.012763729916737037}. Best is trial 32 with value: 0.10323007513323941.[0m
[32m[I 2020-11-21 08:41:01,812][0m Trial 35 finished with value: 0.10722931834497292 and parameters: {'alpha': 0.01185271583005807, 'l1_ratio': 0.13042988241096024}. Best is trial 32 with value: 0.10323007513323941.[0m
  positive)
  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:41:04,132][0m Trial 36 finished with value: 0.11396290475877065 and parameters: {'alpha': 9.487631198984934e-06, 'l1_ratio': 0.0034744312346151975}. Best is trial 32 with value: 0.10323007513323941.[0m
  positive)
  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:41:06,155][0m Trial 37 finished with value: 0.10892158188757436 and parameters: {'alpha': 0.0004192213365540012, 'l1_ratio': 0.012656282380677165}. Best is trial 32 with value: 0.1032300

  positive)
  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:41:16,852][0m Trial 45 finished with value: 0.11007815012508744 and parameters: {'alpha': 0.00029530502319696913, 'l1_ratio': 0.002572030000924523}. Best is trial 32 with value: 0.10323007513323941.[0m
[32m[I 2020-11-21 08:41:18,337][0m Trial 46 finished with value: 0.10341950991510071 and parameters: {'alpha': 0.018246565204798705, 'l1_ratio': 0.0007406453102314456}. Best is trial 32 with value: 0.10323007513323941.[0m
[32m[I 2020-11-21 08:41:19,025][0m Trial 47 finished with value: 0.3149229597128955 and parameters: {'alpha': 24.03106988237258, 'l1_ratio': 2.1902508986558814e-05}. Best is trial 32 with value: 0.10323007513323941.[0m
[32m[I 2020-11-21 08:41:19,942][0m Trial 48 finished with value: 0.10653979477755918 and parameters: {'alpha': 0.07071566775682818, 'l1_ratio': 0.00021741333870147842}. Best is trial 32 with value: 0.10323007513323941.[0m
  positive)
  positive)
  positive)
  posi

  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:41:30,510][0m Trial 55 finished with value: 0.11388175744601021 and parameters: {'alpha': 1.2071286419534137e-05, 'l1_ratio': 0.00026952804133522506}. Best is trial 32 with value: 0.10323007513323941.[0m
[32m[I 2020-11-21 08:41:31,120][0m Trial 56 finished with value: 0.18284369708669765 and parameters: {'alpha': 2.3045226747842302, 'l1_ratio': 0.005073928747745702}. Best is trial 32 with value: 0.10323007513323941.[0m
[32m[I 2020-11-21 08:41:31,801][0m Trial 57 finished with value: 0.11540390230379752 and parameters: {'alpha': 0.22763553247225782, 'l1_ratio': 0.0018118349942627687}. Best is trial 32 with value: 0.10323007513323941.[0m
  positive)
  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:41:33,807][0m Trial 58 finished with value: 0.11431314603454645 and parameters: {'alpha': 8.01260661730284e-11, 'l1_ratio': 0.01824152553910137}. Best is trial 32 with value: 0.10323007513323941.

[32m[I 2020-11-21 08:41:41,712][0m Trial 63 finished with value: 0.10680514958302836 and parameters: {'alpha': 0.07544888617175997, 'l1_ratio': 9.043181787793187e-05}. Best is trial 32 with value: 0.10323007513323941.[0m
[32m[I 2020-11-21 08:41:43,249][0m Trial 64 finished with value: 0.1033813042730575 and parameters: {'alpha': 0.009229845028984232, 'l1_ratio': 0.00018223912132182658}. Best is trial 32 with value: 0.10323007513323941.[0m
  positive)
  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:41:45,545][0m Trial 65 finished with value: 0.10745784067525696 and parameters: {'alpha': 0.0008431979379609467, 'l1_ratio': 0.0036105915224228916}. Best is trial 32 with value: 0.10323007513323941.[0m
[32m[I 2020-11-21 08:41:46,280][0m Trial 66 finished with value: 0.11915635640082231 and parameters: {'alpha': 0.3540677354589443, 'l1_ratio': 3.969392852075224e-05}. Best is trial 32 with value: 0.10323007513323941.[0m
[32m[I 2020-11-21 08:41:47,672][0m Trial

  positive)
[32m[I 2020-11-21 08:41:58,989][0m Trial 75 finished with value: 0.10876044428137477 and parameters: {'alpha': 0.0005349474374457317, 'l1_ratio': 0.0005592615923698878}. Best is trial 32 with value: 0.10323007513323941.[0m
[32m[I 2020-11-21 08:42:00,845][0m Trial 76 finished with value: 0.10335510431351425 and parameters: {'alpha': 0.00827925480717639, 'l1_ratio': 0.001216972358756709}. Best is trial 32 with value: 0.10323007513323941.[0m
  positive)
  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:42:03,572][0m Trial 77 finished with value: 0.11283932978927473 and parameters: {'alpha': 5.64164660532435e-05, 'l1_ratio': 8.144373988659507e-05}. Best is trial 32 with value: 0.10323007513323941.[0m
[32m[I 2020-11-21 08:42:04,737][0m Trial 78 finished with value: 0.12864474641017126 and parameters: {'alpha': 0.6570216297016152, 'l1_ratio': 0.0002761941449011401}. Best is trial 32 with value: 0.10323007513323941.[0m
[32m[I 2020-11-21 08:42:05,640

[32m[I 2020-11-21 08:42:26,771][0m Trial 91 finished with value: 0.10328487128617495 and parameters: {'alpha': 0.015766200686644295, 'l1_ratio': 0.0015099949946666368}. Best is trial 32 with value: 0.10323007513323941.[0m
[32m[I 2020-11-21 08:42:28,280][0m Trial 92 finished with value: 0.10338588920252863 and parameters: {'alpha': 0.007565877042664609, 'l1_ratio': 0.0015333099619739717}. Best is trial 32 with value: 0.10323007513323941.[0m
  positive)
  positive)
  positive)
  positive)
  positive)
[32m[I 2020-11-21 08:42:30,058][0m Trial 93 finished with value: 0.10804371202033218 and parameters: {'alpha': 0.0007094218399199496, 'l1_ratio': 0.0007880964332990084}. Best is trial 32 with value: 0.10323007513323941.[0m
[32m[I 2020-11-21 08:42:30,804][0m Trial 94 finished with value: 0.11226795019694238 and parameters: {'alpha': 0.16208333583342815, 'l1_ratio': 0.0019996166922787643}. Best is trial 32 with value: 0.10323007513323941.[0m
  positive)
  positive)
  positive)
  po

In [53]:
study.best_trial

FrozenTrial(number=32, value=0.10323007513323941, datetime_start=datetime.datetime(2020, 11, 21, 8, 40, 56, 456995), datetime_complete=datetime.datetime(2020, 11, 21, 8, 40, 57, 973190), params={'alpha': 0.008134728198737267, 'l1_ratio': 0.0029556742579328124}, distributions={'alpha': LogUniformDistribution(high=100, low=1e-15), 'l1_ratio': LogUniformDistribution(high=10, low=1e-05)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=32, state=TrialState.COMPLETE)

# kernel ridge (poly)

In [55]:
# 1. Define an objective function to be maximized.
def objective(trial):
    config = {
            "model": "KernelRidge",
            "features": [
                "numerical_features",
                "object_features"
                ],
            "params": {
                "alpha" : trial.suggest_loguniform('alpha', 1e-5, 100),
                "kernel" : 'poly',
                "degree" : trial.suggest_int('degree', 2, 4),
                "coef0": trial.suggest_loguniform('coef0', 1e-4, 10)

            },
            "loss": "rmse",
            "target_name": "SalePrice",
            "ID_name": "Id"
    }

    feats = config['features']
    target_name = config['target_name']
    # 指定した特徴量からデータをロード
    X_train_all, X_test = load_datasets(feats)
    y_train_all = load_target(target_name)
    if config['model'] == 'LightGBM':
        return train_and_predict_lightgbm(X_train_all, y_train_all, X_test, config)
    elif config['model'] in ['LinearRegression', 'Lasso', 'Ridge', 'ErasticNet', 'KernelRidge']:
        return train_and_predict_linear(X_train_all, y_train_all, X_test, config)

# 3. Create a study object and optimize the objective function.
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

[32m[I 2020-11-21 08:45:28,552][0m A new study created in memory with name: no-name-69e9149c-a868-413f-b3ff-bca8c93ccdeb[0m
[32m[I 2020-11-21 08:45:30,195][0m Trial 0 finished with value: 5.729316343058227 and parameters: {'alpha': 0.11008125303523662, 'degree': 3, 'coef0': 0.01081862760809356}. Best is trial 0 with value: 5.729316343058227.[0m
[32m[I 2020-11-21 08:45:31,839][0m Trial 1 finished with value: 4.195135762079753 and parameters: {'alpha': 0.02014836055340176, 'degree': 3, 'coef0': 0.003330513756887406}. Best is trial 1 with value: 4.195135762079753.[0m
[32m[I 2020-11-21 08:45:33,423][0m Trial 2 finished with value: 3.9285933261176114 and parameters: {'alpha': 1.9274948404147034e-05, 'degree': 4, 'coef0': 0.0009621907228265197}. Best is trial 2 with value: 3.9285933261176114.[0m
[32m[I 2020-11-21 08:45:34,958][0m Trial 3 finished with value: 11.068964248650701 and parameters: {'alpha': 12.103920972563515, 'degree': 4, 'coef0': 0.0029421175556205168}. Best is tr

[32m[I 2020-11-21 08:46:21,713][0m Trial 36 finished with value: 0.10558295702622952 and parameters: {'alpha': 7.364466795177675, 'degree': 4, 'coef0': 4.62946092975351}. Best is trial 12 with value: 0.10320502707772308.[0m
[32m[I 2020-11-21 08:46:23,246][0m Trial 37 finished with value: 10.792690836131353 and parameters: {'alpha': 21.918347503382577, 'degree': 3, 'coef0': 0.011015497660465081}. Best is trial 12 with value: 0.10320502707772308.[0m
[32m[I 2020-11-21 08:46:24,792][0m Trial 38 finished with value: 6.607431453088107 and parameters: {'alpha': 0.0955326746656912, 'degree': 3, 'coef0': 0.00016313818978307953}. Best is trial 12 with value: 0.10320502707772308.[0m
[32m[I 2020-11-21 08:46:26,301][0m Trial 39 finished with value: 0.20710267660168008 and parameters: {'alpha': 0.010918852726148065, 'degree': 4, 'coef0': 0.25568938918111866}. Best is trial 12 with value: 0.10320502707772308.[0m
[32m[I 2020-11-21 08:46:27,370][0m Trial 40 finished with value: 0.10515519

[32m[I 2020-11-21 08:47:17,946][0m Trial 72 finished with value: 0.1060364111826095 and parameters: {'alpha': 7.731450812824452, 'degree': 3, 'coef0': 3.2192028857109602}. Best is trial 52 with value: 0.1031500943094874.[0m
[32m[I 2020-11-21 08:47:19,497][0m Trial 73 finished with value: 0.10366204285160689 and parameters: {'alpha': 3.741452365544266, 'degree': 3, 'coef0': 6.798190098897142}. Best is trial 52 with value: 0.1031500943094874.[0m
[32m[I 2020-11-21 08:47:21,048][0m Trial 74 finished with value: 0.1408681704149991 and parameters: {'alpha': 0.002662064893918104, 'degree': 3, 'coef0': 9.517227764255702}. Best is trial 52 with value: 0.1031500943094874.[0m
[32m[I 2020-11-21 08:47:22,546][0m Trial 75 finished with value: 0.104102797526392 and parameters: {'alpha': 10.427531419049075, 'degree': 3, 'coef0': 4.952266716364914}. Best is trial 52 with value: 0.1031500943094874.[0m
[32m[I 2020-11-21 08:47:24,190][0m Trial 76 finished with value: 0.12913189105475573 and 

In [56]:
study.best_trial

FrozenTrial(number=83, value=0.10305224643848125, datetime_start=datetime.datetime(2020, 11, 21, 8, 47, 34, 60205), datetime_complete=datetime.datetime(2020, 11, 21, 8, 47, 35, 713292), params={'alpha': 9.2304694336833, 'degree': 3, 'coef0': 7.405346770365897}, distributions={'alpha': LogUniformDistribution(high=100, low=1e-05), 'degree': IntUniformDistribution(high=4, low=2, step=1), 'coef0': LogUniformDistribution(high=10, low=0.0001)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=83, state=TrialState.COMPLETE)

# kernel ridge (rbf)
複雑過ぎてダメそう

In [8]:
# 1. Define an objective function to be maximized.
def objective(trial):
    config = {
            "model": "KernelRidge",
            "features": [
                "numerical_features",
                "object_features"
                ],
            "params": {
                "alpha" : trial.suggest_loguniform('alpha', 1e-5, 100),
                "kernel" : 'rbf',
                "degree": 2, # 意味無しだが、無いとエラーに
                "coef0": trial.suggest_loguniform('coef0', 1e-4, 10)

            },
            "loss": "rmse",
            "target_name": "SalePrice",
            "ID_name": "Id"
    }
    feats = config['features']
    target_name = config['target_name']

    # 指定した特徴量からデータをロード
    X_train_all, X_test = load_datasets(feats)
    y_train_all = load_target(target_name)
    if config['model'] == 'LightGBM':
        return train_and_predict_lightgbm(X_train_all, y_train_all, X_test, config)
    elif config['model'] in ['LinearRegression', 'Lasso', 'Ridge', 'ErasticNet', 'KernelRidge', 'SVR']:
        return train_and_predict_linear(X_train_all, y_train_all, X_test, config)

# 3. Create a study object and optimize the objective function.
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

[32m[I 2020-11-21 09:09:58,632][0m A new study created in memory with name: no-name-3b76c2ec-7278-4792-9fd2-a5976e75bbbd[0m
[32m[I 2020-11-21 09:10:00,387][0m Trial 0 finished with value: 2.059916792529886 and parameters: {'alpha': 58.418500493925556, 'coef0': 0.15599901052077841}. Best is trial 0 with value: 2.059916792529886.[0m
[32m[I 2020-11-21 09:10:01,813][0m Trial 1 finished with value: 0.27256758029040284 and parameters: {'alpha': 0.10020407497906486, 'coef0': 0.6810480667996159}. Best is trial 1 with value: 0.27256758029040284.[0m
[32m[I 2020-11-21 09:10:03,202][0m Trial 2 finished with value: 0.22246882757714684 and parameters: {'alpha': 0.0015292261203993618, 'coef0': 0.21998294974984267}. Best is trial 2 with value: 0.22246882757714684.[0m
[32m[I 2020-11-21 09:10:04,548][0m Trial 3 finished with value: 0.38837191535867305 and parameters: {'alpha': 0.6590750852952686, 'coef0': 0.6079627001973277}. Best is trial 2 with value: 0.22246882757714684.[0m
[32m[I 202

[32m[I 2020-11-21 09:10:49,718][0m Trial 37 finished with value: 0.2221726154407843 and parameters: {'alpha': 0.00039607344498090297, 'coef0': 0.26750632526579515}. Best is trial 31 with value: 0.22215306600109147.[0m
[32m[I 2020-11-21 09:10:51,124][0m Trial 38 finished with value: 0.2494472716977691 and parameters: {'alpha': 0.04425978960157087, 'coef0': 0.0048919108871860245}. Best is trial 31 with value: 0.22215306600109147.[0m
[32m[I 2020-11-21 09:10:52,423][0m Trial 39 finished with value: 0.22478827835548948 and parameters: {'alpha': 0.004812239305797234, 'coef0': 0.11798902333511407}. Best is trial 31 with value: 0.22215306600109147.[0m
[32m[I 2020-11-21 09:10:53,710][0m Trial 40 finished with value: 0.22241902583527606 and parameters: {'alpha': 0.0014328649838112077, 'coef0': 0.000353118069853339}. Best is trial 31 with value: 0.22215306600109147.[0m
[32m[I 2020-11-21 09:10:55,016][0m Trial 41 finished with value: 0.22215943763467577 and parameters: {'alpha': 0.00

[32m[I 2020-11-21 09:11:38,419][0m Trial 75 finished with value: 0.22217445314636203 and parameters: {'alpha': 0.00038888731237845484, 'coef0': 1.14927472390617}. Best is trial 31 with value: 0.22215306600109147.[0m
[32m[I 2020-11-21 09:11:39,625][0m Trial 76 finished with value: 0.22223810853282994 and parameters: {'alpha': 0.0010124927617846083, 'coef0': 0.18583812091262586}. Best is trial 31 with value: 0.22215306600109147.[0m
[32m[I 2020-11-21 09:11:40,844][0m Trial 77 finished with value: 0.2223821303868644 and parameters: {'alpha': 7.675526903667977e-05, 'coef0': 0.08948748607174693}. Best is trial 31 with value: 0.22215306600109147.[0m
[32m[I 2020-11-21 09:11:42,117][0m Trial 78 finished with value: 0.22346723524187845 and parameters: {'alpha': 0.0030635970187411765, 'coef0': 0.1197946114900947}. Best is trial 31 with value: 0.22215306600109147.[0m
[32m[I 2020-11-21 09:11:43,393][0m Trial 79 finished with value: 0.22256508713567164 and parameters: {'alpha': 0.00170

In [9]:
study.best_trial

FrozenTrial(number=80, value=0.22215243327751505, datetime_start=datetime.datetime(2020, 11, 21, 9, 11, 43, 394357), datetime_complete=datetime.datetime(2020, 11, 21, 9, 11, 44, 659192), params={'alpha': 0.0005741952109157125, 'coef0': 3.754170390708927}, distributions={'alpha': LogUniformDistribution(high=100, low=1e-05), 'coef0': LogUniformDistribution(high=10, low=0.0001)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=80, state=TrialState.COMPLETE)

# SVR

In [12]:
# 1. Define an objective function to be maximized.
def objective(trial):
    config = {
            "model": "SVR",
            "features": [
                "numerical_features",
                "object_features"
                ],
            "params": {
                "kernel":"poly",
                "degree": trial.suggest_int('degree', 2, 4),
                "coef0": trial.suggest_loguniform('coef0', 1e-4, 10),
                "C":trial.suggest_uniform('C', 0, 2.0),
                "epsilon": trial.suggest_loguniform('epsilon', 1e-5, 1)

            },
            "loss": "rmse",
            "target_name": "SalePrice",
            "ID_name": "Id"
    }
    feats = config['features']
    target_name = config['target_name']

    # 指定した特徴量からデータをロード
    X_train_all, X_test = load_datasets(feats)
    y_train_all = load_target(target_name)
    if config['model'] == 'LightGBM':
        return train_and_predict_lightgbm(X_train_all, y_train_all, X_test, config)
    elif config['model'] in ['LinearRegression', 'Lasso', 'Ridge', 'ErasticNet', 'KernelRidge', 'SVR']:
        return train_and_predict_linear(X_train_all, y_train_all, X_test, config)

# 3. Create a study object and optimize the objective function.
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

[32m[I 2020-11-21 09:15:24,119][0m A new study created in memory with name: no-name-5c2c9372-439f-4005-b432-fccd3c84d410[0m
[32m[I 2020-11-21 09:15:38,306][0m Trial 0 finished with value: 0.13097691912541237 and parameters: {'degree': 4, 'coef0': 4.691430744876913, 'C': 1.9539292586411596, 'epsilon': 0.0018125620092745707}. Best is trial 0 with value: 0.13097691912541237.[0m
[32m[I 2020-11-21 09:15:46,613][0m Trial 1 finished with value: 0.20300234617621263 and parameters: {'degree': 4, 'coef0': 0.0002159024580783942, 'C': 1.8326749076025461, 'epsilon': 0.0004463383997343004}. Best is trial 0 with value: 0.13097691912541237.[0m
[32m[I 2020-11-21 09:15:54,706][0m Trial 2 finished with value: 0.23949986784146943 and parameters: {'degree': 4, 'coef0': 0.000839539393376443, 'C': 0.6233285588669752, 'epsilon': 0.0021832854035424717}. Best is trial 0 with value: 0.13097691912541237.[0m
[32m[I 2020-11-21 09:16:03,026][0m Trial 3 finished with value: 0.15982672495251643 and param

[32m[I 2020-11-21 09:19:36,637][0m Trial 32 finished with value: 0.1072930290338681 and parameters: {'degree': 2, 'coef0': 2.249723477833821, 'C': 0.714351548769813, 'epsilon': 0.016107295657564694}. Best is trial 32 with value: 0.1072930290338681.[0m
[32m[I 2020-11-21 09:19:43,085][0m Trial 33 finished with value: 0.10722188499279761 and parameters: {'degree': 2, 'coef0': 5.311064426562288, 'C': 0.611016824981415, 'epsilon': 0.04377935010630321}. Best is trial 33 with value: 0.10722188499279761.[0m
[32m[I 2020-11-21 09:19:49,097][0m Trial 34 finished with value: 0.10605481665576762 and parameters: {'degree': 2, 'coef0': 2.67705431973581, 'C': 0.6339134888897136, 'epsilon': 0.04645210942679866}. Best is trial 34 with value: 0.10605481665576762.[0m
[32m[I 2020-11-21 09:19:49,930][0m Trial 35 finished with value: 0.154924222560829 and parameters: {'degree': 4, 'coef0': 4.904754491540857, 'C': 0.6206900521210983, 'epsilon': 0.3929678086550641}. Best is trial 34 with value: 0.10

[32m[I 2020-11-21 09:22:34,458][0m Trial 64 finished with value: 0.10351245551685986 and parameters: {'degree': 2, 'coef0': 0.9052221084294821, 'C': 0.25417778087639975, 'epsilon': 0.024719617688751812}. Best is trial 41 with value: 0.10257032668815132.[0m
[32m[I 2020-11-21 09:22:42,418][0m Trial 65 finished with value: 0.17181265845799032 and parameters: {'degree': 2, 'coef0': 1.13980457133137, 'C': 0.0036452975711258606, 'epsilon': 0.001847407180699512}. Best is trial 41 with value: 0.10257032668815132.[0m
[32m[I 2020-11-21 09:22:49,493][0m Trial 66 finished with value: 0.1047170302681486 and parameters: {'degree': 2, 'coef0': 2.716053257690923, 'C': 0.42570330549086427, 'epsilon': 0.018795798105165254}. Best is trial 41 with value: 0.10257032668815132.[0m
[32m[I 2020-11-21 09:22:57,387][0m Trial 67 finished with value: 0.14472038418991834 and parameters: {'degree': 2, 'coef0': 0.013641602234551674, 'C': 0.07775045331078159, 'epsilon': 0.005478577402383002}. Best is trial 

[32m[I 2020-11-21 09:26:23,833][0m Trial 96 finished with value: 0.10207493707111881 and parameters: {'degree': 2, 'coef0': 7.865345479478388, 'C': 0.09111385520064191, 'epsilon': 0.0026048466307931843}. Best is trial 96 with value: 0.10207493707111881.[0m
[32m[I 2020-11-21 09:26:33,009][0m Trial 97 finished with value: 0.10327487898175984 and parameters: {'degree': 2, 'coef0': 7.452118874609367, 'C': 0.16996223643524255, 'epsilon': 0.0025488741622280166}. Best is trial 96 with value: 0.10207493707111881.[0m
[32m[I 2020-11-21 09:26:41,194][0m Trial 98 finished with value: 0.1029619389249424 and parameters: {'degree': 2, 'coef0': 4.277585892183445, 'C': 0.07176719321092725, 'epsilon': 0.0007520142094004234}. Best is trial 96 with value: 0.10207493707111881.[0m
[32m[I 2020-11-21 09:26:51,377][0m Trial 99 finished with value: 0.1291048866963515 and parameters: {'degree': 2, 'coef0': 0.00010306560700002056, 'C': 0.16421030757523208, 'epsilon': 0.0027792546077877677}. Best is tri

In [13]:
study.best_trial

FrozenTrial(number=96, value=0.10207493707111881, datetime_start=datetime.datetime(2020, 11, 21, 9, 26, 15, 423265), datetime_complete=datetime.datetime(2020, 11, 21, 9, 26, 23, 833224), params={'degree': 2, 'coef0': 7.865345479478388, 'C': 0.09111385520064191, 'epsilon': 0.0026048466307931843}, distributions={'degree': IntUniformDistribution(high=4, low=2, step=1), 'coef0': LogUniformDistribution(high=10, low=0.0001), 'C': UniformDistribution(high=2.0, low=0), 'epsilon': LogUniformDistribution(high=1, low=1e-05)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=96, state=TrialState.COMPLETE)