In [None]:
import pandas as pd
import numpy as np
import datetime
import random
import os
import sys
import holidays


import sklearn
from sklearn.ensemble import VotingRegressor
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold


import xgboost
from xgboost import XGBRegressor
import catboost
from catboost import CatBoostRegressor

from optuna.trial import Trial
import optuna

In [None]:
print(f"파이썬 버전 : {sys.version}")
print(f"pandas 버전 : {pd.__version__}")
print(f"numpy 버전 : {np.__version__}")
print(f"sklearn 버전 : {sklearn.__version__}")
print(f"xgboost 버전 확인 : {xgboost.__version__}")
print(f"catboost 버전 : {catboost.__version__}")
print(f"optuna 버전 : {optuna.__version__}")

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(2024) # Seed 고정

In [None]:
train = pd.read_csv('/mnt/c/Users/wschu/OneDrive/Documents/data/jeju_specialty/open/train.csv')
test = pd.read_csv('/mnt/c/Users/wschu/OneDrive/Documents/data/jeju_specialty/open/test.csv')
international_data = pd.read_csv('/mnt/c/Users/wschu/OneDrive/Documents/data/jeju_specialty/open/international_trade.csv')

In [None]:
international_data

In [None]:
print(international_data['품목명'].value_counts())

In [None]:
print(international_data['품목명'].unique())

In [None]:
relevant_dict = {"TG": "감귤", "BC": "꽃양배추와 브로콜리(broccoli)", "RD": None, "CR": "당근", "CB": "양배추"} # RD: 무는 없음

In [None]:
international_data.columns

In [None]:
new_column_names = {"기간": "timestamp", "품목명": "item", "수출 중량": "export_weight", "수출 금액": "export_amount", "수입 중량": "import_weight", "수입 금액": "import_amount", "무역수지": "trade_profit"}
international_data.rename(columns=new_column_names, inplace=True)

In [None]:
international_data['timestamp'] = pd.to_datetime(international_data['timestamp'])

In [None]:
type(international_data.timestamp[0])

In [None]:
def change_type(type_):
    result = None
    for key, value in relevant_dict.items():
            if value == type_:
                result = key
                break
    return result

In [None]:
international_data['item'] = international_data.apply(lambda x: change_type(x['item']), axis=1)

In [None]:
# data leakage를 피하기 위해 international_data는 2023년 1월분까지만 사용
print(international_data.shape)
international_data = international_data.loc[international_data.timestamp < pd.to_datetime("2023-02-01"), ]
print(international_data.shape)

In [None]:
scaler = StandardScaler()
international_data.loc[:, ["export_weight", "export_amount", "import_weight", "import_amount", "trade_profit"]] = scaler.fit_transform(international_data.loc[:, ["export_weight", "export_amount", "import_weight", "import_amount", "trade_profit"]])

In [None]:
international_data

In [None]:
def pre_all(train, test):
    print(f"전처리 전 train 크기 : {train.shape}")
    print(f"전처리 전 test 크기 : {test.shape}")
    print("=================전처리 중=================")

    # 합쳐서 전처리하기
    train["timestamp"] = pd.to_datetime(train["timestamp"])
    test["timestamp"] = pd.to_datetime(test["timestamp"])
    df = pd.concat([train,test]).reset_index(drop = True)
    
    df.rename(columns={'supply(kg)':'supply', 'price(원/kg)':'price'},inplace=True)

    #년/월/일 추가
    df['year']=df['timestamp'].dt.year
    df['month']=df['timestamp'].dt.month
    df['day']=df['timestamp'].dt.day

    #요일 추가
    df['week_day']=df['timestamp'].dt.weekday

    # 년-월 변수 추가 : year-month의 형태, 개월단위 누적값
    le = LabelEncoder()
    df["year_month"] = df["timestamp"].map(lambda x :str(x.year) + "-"+str(x.month))

    # 라벨 인코딩
    df["year_month"] = le.fit_transform(df["year_month"])


    # 주차 변수 추가
    df["week"] = df["timestamp"].map(lambda x: datetime.datetime(x.year, x.month, x.day).isocalendar()[1])

    # 주차 누적값
    week_list=[]
    for i in range(len(df['year'])) :
        if df['year'][i] == 2019 :
            week_list.append(int(df['week'][i]))
        elif df['year'][i] == 2020 :
            week_list.append(int(df['week'][i])+52)
        elif df['year'][i] == 2021 :
            week_list.append(int(df['week'][i])+52+53)
        elif df['year'][i] == 2022 :
            week_list.append(int(df['week'][i])+52+53+53)
        elif df['year'][i] == 2023 :
            week_list.append(int(df['week'][i])+52+53+53+52)
    df['week_num']= week_list

    # datetime 패키지에서 19년 12월 마지막주가 첫째주로 들어가는거 발견하여 수정
    df.loc[df['timestamp']=='2019-12-30','week_num']=52
    df.loc[df['timestamp']=='2019-12-31','week_num']=52


    # 공휴일 변수 추가
    def make_holi(x):
        kr_holi = holidays.KR()

        if x in kr_holi:
            return 1
        else:
            return 0
        
    df["holiday"] = df["timestamp"].map(lambda x : make_holi(x))

    # 국제 무역 데이터 접합하기
    # subsidiary_data = []
    # for i in range(len(df)):
    #     row = df.iloc[i, :]
    #     item_ = row['item']
    #     row_date = row.timestamp
    #     target = relevant_dict[item_]
    #     if target == None:
    #         subsidiary_data.append([None] * 5)
    #         continue
    #     target_row = international_data.loc[(international_data.type == relevant_dict["TG"]) & (international_data.date_year_month == row_date), ["export_weight", "export_amount", "import_weight", "import_amount", "trade_profit"]]
    #     if target_row.empty:
    #         subsidiary_data.append([None] * 5)
    #         continue
    #     target_row_list = target_row.values.tolist()[0]
    #     subsidiary_data.append(target_row_list)

    # subsidiary_df = pd.DataFrame(subsidiary_data)

    # print(df.shape)
    # print(subsidiary_df.shape)

    # df = pd.concat([df, subsidiary_df], axis=1)
    # df = pd.merge(df, international_data, on=["timestamp", "item"], how="inner")
    df = pd.merge(df, international_data, on=["timestamp", "item"], how="left")

    # train, test 분리하기
    train = df[~df["price"].isnull()].sort_values("timestamp").reset_index(drop = True)
    test = df[df["price"].isnull()].sort_values("timestamp").reset_index(drop=True)


    print(f"전처리 후 train 크기 : {train.shape}")
    print(f"전처리 후 test 크기 : {test.shape}")

    return train, test

In [None]:
train_pre, test_pre = pre_all(train, test)

In [None]:
print(train_pre)
print(train_pre.columns)

In [None]:
print(test_pre)

# 0. TG 외 품목들
### 0-1. 전처리

In [None]:
## 전처리

# 극 이상치 제거
tg_idx = train_pre[(train_pre["item"]=="TG") & (train_pre["price"]>20000)].index
rd_idx = train_pre[(train_pre["item"]=="RD") & (train_pre["price"]>5000)].index
bc_idx = train_pre[(train_pre["item"]=="BC") & (train_pre["price"]>8000)].index
cb_idx = train_pre[(train_pre["item"]=="CB") & (train_pre["price"]>2300)].index

train_pre.loc[tg_idx,"price"] = train_pre[(train_pre["item"]=="TG") & (train_pre["price"]!=0)]["price"].mean()
train_pre.loc[rd_idx,"price"] = train_pre[(train_pre["item"]=="RD") & (train_pre["price"]!=0)]["price"].mean()
train_pre.loc[bc_idx,"price"] = train_pre[(train_pre["item"]=="BC") & (train_pre["price"]!=0)]["price"].mean()
train_pre.loc[cb_idx,"price"] = train_pre[(train_pre["item"]=="CB") & (train_pre["price"]!=0)]["price"].mean()


# 감귤이 아닌것
print(f"train의 컬럼 : {train_pre.columns}")
print(f"test의 컬럼 : {test_pre.columns}")

train_notg = train_pre[train_pre["item"] !="TG"]
test_notg = test_pre[test_pre["item"] != "TG"]


#인코딩
Xy = pd.get_dummies(train_notg.sort_values(by = ["timestamp"]).reset_index(drop=True).drop(columns = ["supply"]), columns = ["item","corporation","location"])
answer_notg = pd.get_dummies(test_notg.drop(columns = ["timestamp","supply","price"]), columns = [ "item","corporation","location"])
print(Xy.columns)

In [None]:
def objective_cat(trial: Trial, model, x, y, eval_metric):
    """
    Optuna의 하이퍼파라미터 튜닝을 위한 목적 함수
    
    Parameters:
    - trial: Optuna의 Trial 객체
    - model: 튜닝할 머신러닝 모델
    - x: 입력 데이터
    - y: 타겟 데이터
    - eval_metric: 모델을 평가하기 위한 메트릭
    """
    
    # 모델에 맞는 하이퍼파라미터 범위를 정의합니다.
    # cat = CatBoostRegressor(random_state = 2024, 
    #                             n_estimators = n_estimators, 
    #                             learning_rate = lrs, 
    #                             depth = max_depths, 
    #                             l2_leaf_reg = l2_leaf_reg,
    #                             metric_period = 1000)
    params = {
        'random_state': 2024,
        'thread_count': -1,
        # 'criterion': trial.suggest_categorical('criterion', ['squared_error', 'absolute_error']),
        'n_estimators': trial.suggest_int('n_estimators', 500, 1000, step=100),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.05, step=0.01),
        'depth': trial.suggest_int('depth', 5, 15, step=1),
        'l2_leaf_reg': trial.suggest_int('l2_leaf_reg', 1, 5, step=1),
        # 'max_features': trial.suggest_float('max_features', 0.6, 0.9, step=0.1),
    }

    # 데이터 프레임이나 시리즈 형태의 x와 y를 numpy 배열로 변환합니다.
    x_np = x.values if isinstance(x, pd.DataFrame) else x    
    y_np = y.values if isinstance(y, pd.Series) else y

    results = dict()
    
    # 5-Fold Cross Validation을 정의합니다.
    # fold = KFold(n_splits=5, shuffle=True, random_state=2024)
    
    # 각 Fold에 대해 모델을 학습하고 평가합니다.
    # for i, (train_idx, test_idx) in enumerate(fold.split(x, y)):
    years = [2019, 2020, 2021, 2022]
    for i, year in enumerate(years):
        filtered_df = x[(x['year'] == year) & (x['month'] == 3) & (x['day'].between(4, 31))]
        train_idx = x.index.difference(filtered_df.index)
        test_idx = filtered_df.index

        x_train, y_train = x_np[train_idx], y_np[train_idx]
        x_test, y_test = x_np[test_idx], y_np[test_idx]
        
        # 현재의 하이퍼파라미터 세트로 모델을 생성하고 학습합니다.
        fold_model = model(**params)  
        fold_model.fit(x_train, y_train)
        fold_pred = fold_model.predict(x_test)
        
        # 해당 Fold의 평가 결과를 저장합니다.
        fold_error = eval_metric(y_test, fold_pred)
        
        results[i] = {
            'model': fold_model, 
            'error': fold_error
        }
    
    # 모든 Fold의 평가 결과의 평균을 반환합니다.
    errors = [v['error'] for k, v in results.items()]
    return np.array(errors).mean()

In [None]:
# 최소화 방향으로 하이퍼 파라미터 학습을 위한 스터디 객체를 생성합니다.
study_cat_no_tg = optuna.create_study(direction='minimize', study_name='cat_regressor_no_tg')

# 하이퍼파라미터 튜닝을 시작합니다.
study_cat_no_tg.optimize(lambda trial: objective_cat(trial,
                                                   CatBoostRegressor,
                                                   Xy.drop(columns = ["timestamp", "ID", "price"]),
                                                   Xy["price"],
                                                   mean_squared_error),
                                                   n_trials=20,
                      )

In [None]:
def objective_xgb(trial: Trial, model, x, y, eval_metric):
    """
    Optuna의 하이퍼파라미터 튜닝을 위한 목적 함수
    
    Parameters:
    - trial: Optuna의 Trial 객체
    - model: 튜닝할 머신러닝 모델
    - x: 입력 데이터
    - y: 타겟 데이터
    - eval_metric: 모델을 평가하기 위한 메트릭
    """
    
    # 모델에 맞는 하이퍼파라미터 범위를 정의합니다.
    # xgb = XGBRegressor(n_estimators = 1000,
    #                    random_state = 2024,
    #                    learning_rate = 0.01,
    #                    max_depth = 10,
    #                    device='cuda')
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 500, 1000, step=100),
        # 'random_state': 2024,
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.05, step=0.01),
        'max_depth': trial.suggest_int('max_depth', 5, 15, step=1),
        # 'device': 'cuda',
    }

    # 데이터 프레임이나 시리즈 형태의 x와 y를 numpy 배열로 변환합니다.
    x_np = x.values if isinstance(x, pd.DataFrame) else x    
    y_np = y.values if isinstance(y, pd.Series) else y

    results = dict()
    
    years = [2019, 2020, 2021, 2022]
    for i, year in enumerate(years):
        filtered_df = x[(x['year'] == year) & (x['month'] == 3) & (x['day'].between(4, 31))]
        train_idx = x.index.difference(filtered_df.index)
        test_idx = filtered_df.index

        x_train, y_train = x_np[train_idx], y_np[train_idx]
        x_test, y_test = x_np[test_idx], y_np[test_idx]
        
        # 현재의 하이퍼파라미터 세트로 모델을 생성하고 학습합니다.
        fold_model = model(random_state=2024,
                           **params)  
        fold_model.fit(x_train, y_train)
        fold_pred = fold_model.predict(x_test)
        
        # 해당 Fold의 평가 결과를 저장합니다.
        fold_error = eval_metric(y_test, fold_pred)
        
        results[i] = {
            'model': fold_model, 
            'error': fold_error
        }
    
    # 모든 Fold의 평가 결과의 평균을 반환합니다.
    errors = [v['error'] for k, v in results.items()]
    return np.array(errors).mean()

In [None]:
# 최소화 방향으로 하이퍼 파라미터 학습을 위한 스터디 객체를 생성합니다.
study_xgb_no_tg = optuna.create_study(direction='minimize', study_name='xgb_regressor_no_tg')

# 하이퍼파라미터 튜닝을 시작합니다.
study_xgb_no_tg.optimize(lambda trial: objective_xgb(trial,
                                                   XGBRegressor,
                                                   Xy.drop(columns = ["timestamp", "ID","price"]),
                                                   Xy["price"],
                                                   mean_squared_error),
                                                   n_trials=80,
                      )

In [None]:
## 앙상블 모델 정의
cat = CatBoostRegressor(random_state = 2024,
                        metric_period = 1000,
                        **study_cat_no_tg.best_params)

xgb = XGBRegressor(random_state = 2024,
                   device='cuda',
                   **study_xgb_no_tg.best_params)

In [None]:
# voting
vote_model = VotingRegressor(
    estimators =[("cat",cat), ("xgb", xgb)]
)

vote_model.fit(Xy.drop(columns = ["timestamp", "ID", "price"]), Xy["price"])

pred = vote_model.predict(answer_notg.drop(columns = ["ID", "answer"]))
for idx in range(len(pred)):
    if pred[idx]<0:
        pred[idx]= 0
answer_notg["answer"] = pred

answer_notg[["ID","answer"]]

# 1. TG (1)
### 1-1. 전처리

In [151]:
train_pre, test_pre = pre_all(train, test)

# 공휴일이지만 안쉬는 날 제외하기
no_holi = list(train_pre[(train_pre["item"] =="TG") &(train_pre["holiday"]==1) & (train_pre["price"]!=0)].groupby("timestamp").count().reset_index()["timestamp"])
noholi_idx = train_pre[train_pre["timestamp"].isin(no_holi)]["holiday"].index
for idx in noholi_idx:
    train_pre.loc[idx, "holiday"] = 0

# train 및 test 시간 순서로 정렬하기
train_tg = train_pre[train_pre["item"] == "TG"].sort_values(by = ["timestamp"]).reset_index(drop= True)
test_tg = test_pre[test_pre["item"] == "TG"].sort_values(by = ["timestamp"]).reset_index(drop= True)

Xy = pd.get_dummies(train_tg, columns = [ "item","corporation","location"]).drop(columns = ["supply"])
answer_tg1 = pd.get_dummies(test_tg, columns = [ "item","corporation","location"]).drop(columns = ["timestamp","supply","price"])
print(f"train의 컬럼 : {Xy.columns}")
print(f"test의 컬럼 : {answer_tg1.columns}")
Xy["price"] = np.sqrt(Xy["price"])

전처리 전 train 크기 : (59397, 7)
전처리 전 test 크기 : (1092, 5)
전처리 후 train 크기 : (59397, 20)
전처리 후 test 크기 : (1092, 20)
train의 컬럼 : Index(['ID', 'timestamp', 'price', 'year', 'month', 'day', 'week_day',
       'year_month', 'week', 'week_num', 'holiday', 'export_weight',
       'export_amount', 'import_weight', 'import_amount', 'trade_profit',
       'item_TG', 'corporation_A', 'corporation_B', 'corporation_C',
       'corporation_D', 'corporation_E', 'location_J', 'location_S'],
      dtype='object')
test의 컬럼 : Index(['ID', 'year', 'month', 'day', 'week_day', 'year_month', 'week',
       'week_num', 'holiday', 'export_weight', 'export_amount',
       'import_weight', 'import_amount', 'trade_profit', 'item_TG',
       'corporation_A', 'corporation_B', 'corporation_C', 'corporation_D',
       'corporation_E', 'location_J', 'location_S'],
      dtype='object')


### 3-2. 모델링 & 훈련 예측

In [152]:
# 최소화 방향으로 하이퍼 파라미터 학습을 위한 스터디 객체를 생성합니다.
study_cat_tg = optuna.create_study(direction='minimize', study_name='cat_regressor_tg')

# 하이퍼파라미터 튜닝을 시작합니다.
study_cat_tg.optimize(lambda trial: objective_cat(trial,
                                                   CatBoostRegressor,
                                                   Xy.drop(columns = ["timestamp", "ID", "price"]),
                                                   Xy["price"],
                                                   mean_squared_error),
                                                   n_trials=20,
                      )

study_xgb_tg = optuna.create_study(direction='minimize', study_name='xgb_regressor_tg')

# 하이퍼파라미터 튜닝을 시작합니다.
study_xgb_tg.optimize(lambda trial: objective_xgb(trial,
                                                   XGBRegressor,
                                                   Xy.drop(columns = ["timestamp", "ID", "price"]),
                                                   Xy["price"],
                                                   mean_squared_error),
                                                   n_trials=20,
                      )

[I 2024-03-28 19:20:12,610] A new study created in memory with name: cat_regressor_tg


0:	learn: 30.8074600	total: 1.31ms	remaining: 786ms
1:	learn: 30.4481477	total: 8.07ms	remaining: 2.41s
2:	learn: 30.1009041	total: 14.2ms	remaining: 2.83s
3:	learn: 29.7506784	total: 20.4ms	remaining: 3.04s
4:	learn: 29.4295596	total: 23.5ms	remaining: 2.8s
5:	learn: 29.1273507	total: 24.9ms	remaining: 2.46s
6:	learn: 28.8082572	total: 31.1ms	remaining: 2.64s
7:	learn: 28.5055680	total: 37.8ms	remaining: 2.8s
8:	learn: 28.2176856	total: 41ms	remaining: 2.69s
9:	learn: 27.9421693	total: 42.4ms	remaining: 2.5s
10:	learn: 27.6678230	total: 48.6ms	remaining: 2.6s
11:	learn: 27.3925184	total: 51.3ms	remaining: 2.51s
12:	learn: 27.1348712	total: 56.7ms	remaining: 2.56s
13:	learn: 26.8791884	total: 58.8ms	remaining: 2.46s
14:	learn: 26.6121076	total: 63.9ms	remaining: 2.49s
15:	learn: 26.3680723	total: 69.1ms	remaining: 2.52s
16:	learn: 26.1176103	total: 74.7ms	remaining: 2.56s
17:	learn: 25.8957884	total: 80.3ms	remaining: 2.6s
18:	learn: 25.6642827	total: 85.7ms	remaining: 2.62s
19:	learn:

[I 2024-03-28 19:20:26,100] Trial 0 finished with value: 300.1993210979238 and parameters: {'n_estimators': 600, 'learning_rate': 0.02, 'depth': 11, 'l2_leaf_reg': 2}. Best is trial 0 with value: 300.1993210979238.


575:	learn: 14.9068847	total: 3.08s	remaining: 129ms
576:	learn: 14.9032593	total: 3.09s	remaining: 123ms
577:	learn: 14.8986890	total: 3.1s	remaining: 118ms
578:	learn: 14.8920629	total: 3.1s	remaining: 113ms
579:	learn: 14.8873667	total: 3.11s	remaining: 107ms
580:	learn: 14.8831368	total: 3.11s	remaining: 102ms
581:	learn: 14.8798403	total: 3.12s	remaining: 96.5ms
582:	learn: 14.8745827	total: 3.13s	remaining: 91.2ms
583:	learn: 14.8694196	total: 3.13s	remaining: 85.8ms
584:	learn: 14.8648665	total: 3.14s	remaining: 80.5ms
585:	learn: 14.8586965	total: 3.14s	remaining: 75.1ms
586:	learn: 14.8551743	total: 3.15s	remaining: 69.7ms
587:	learn: 14.8513703	total: 3.15s	remaining: 64.4ms
588:	learn: 14.8454145	total: 3.16s	remaining: 59ms
589:	learn: 14.8401819	total: 3.17s	remaining: 53.6ms
590:	learn: 14.8354886	total: 3.17s	remaining: 48.3ms
591:	learn: 14.8323112	total: 3.18s	remaining: 42.9ms
592:	learn: 14.8281104	total: 3.18s	remaining: 37.6ms
593:	learn: 14.8246142	total: 3.19s	re

[I 2024-03-28 19:21:04,289] Trial 1 finished with value: 309.61651239917126 and parameters: {'n_estimators': 500, 'learning_rate': 0.04, 'depth': 13, 'l2_leaf_reg': 5}. Best is trial 0 with value: 300.1993210979238.


0:	learn: 30.2870466	total: 1.65ms	remaining: 987ms
1:	learn: 29.4856183	total: 3.67ms	remaining: 1.09s
2:	learn: 28.7201461	total: 45.8ms	remaining: 9.12s
3:	learn: 27.9796703	total: 89.2ms	remaining: 13.3s
4:	learn: 27.3139674	total: 90.6ms	remaining: 10.8s
5:	learn: 26.6805597	total: 130ms	remaining: 12.9s
6:	learn: 26.0847217	total: 172ms	remaining: 14.6s
7:	learn: 25.5232550	total: 212ms	remaining: 15.7s
8:	learn: 24.9973059	total: 256ms	remaining: 16.8s
9:	learn: 24.4862047	total: 297ms	remaining: 17.5s
10:	learn: 24.0307976	total: 340ms	remaining: 18.2s
11:	learn: 23.6106436	total: 342ms	remaining: 16.8s
12:	learn: 23.2075485	total: 343ms	remaining: 15.5s
13:	learn: 22.8325144	total: 346ms	remaining: 14.5s
14:	learn: 22.4531654	total: 388ms	remaining: 15.1s
15:	learn: 22.1150358	total: 477ms	remaining: 17.4s
16:	learn: 21.7846339	total: 520ms	remaining: 17.8s
17:	learn: 21.5003731	total: 561ms	remaining: 18.1s
18:	learn: 21.2073050	total: 603ms	remaining: 18.4s
19:	learn: 20.946

[I 2024-03-28 19:22:35,430] Trial 2 finished with value: 321.29691572253654 and parameters: {'n_estimators': 600, 'learning_rate': 0.05, 'depth': 14, 'l2_leaf_reg': 4}. Best is trial 0 with value: 300.1993210979238.


0:	learn: 30.4460439	total: 1.19ms	remaining: 831ms
1:	learn: 29.7210802	total: 92.1ms	remaining: 32.1s
2:	learn: 29.0261058	total: 179ms	remaining: 41.7s
3:	learn: 28.4057398	total: 185ms	remaining: 32.1s
4:	learn: 27.7925068	total: 203ms	remaining: 28.1s
5:	learn: 27.2442790	total: 208ms	remaining: 24s
6:	learn: 26.7333932	total: 209ms	remaining: 20.7s
7:	learn: 26.2254693	total: 211ms	remaining: 18.3s
8:	learn: 25.7682551	total: 212ms	remaining: 16.3s
9:	learn: 25.2685851	total: 302ms	remaining: 20.9s
10:	learn: 24.7808700	total: 391ms	remaining: 24.5s
11:	learn: 24.3352165	total: 476ms	remaining: 27.3s
12:	learn: 23.9152720	total: 561ms	remaining: 29.6s
13:	learn: 23.5227968	total: 646ms	remaining: 31.6s
14:	learn: 23.1401539	total: 731ms	remaining: 33.4s
15:	learn: 22.8369185	total: 733ms	remaining: 31.3s
16:	learn: 22.4874185	total: 821ms	remaining: 33s
17:	learn: 22.1562474	total: 907ms	remaining: 34.4s
18:	learn: 21.9057187	total: 917ms	remaining: 32.9s
19:	learn: 21.6335849	to

[I 2024-03-28 19:26:18,947] Trial 3 finished with value: 332.72435471949353 and parameters: {'n_estimators': 700, 'learning_rate': 0.04, 'depth': 15, 'l2_leaf_reg': 1}. Best is trial 0 with value: 300.1993210979238.


0:	learn: 30.8046495	total: 1.53ms	remaining: 1.53s
1:	learn: 30.4381552	total: 7.04ms	remaining: 3.52s
2:	learn: 30.0834750	total: 12.4ms	remaining: 4.11s
3:	learn: 29.7233638	total: 18.3ms	remaining: 4.55s
4:	learn: 29.3990367	total: 21.3ms	remaining: 4.24s
5:	learn: 29.0957669	total: 22.6ms	remaining: 3.75s
6:	learn: 28.7683867	total: 28.4ms	remaining: 4.03s
7:	learn: 28.4610222	total: 34.2ms	remaining: 4.24s
8:	learn: 28.1717770	total: 37.4ms	remaining: 4.12s
9:	learn: 27.8944906	total: 39.3ms	remaining: 3.89s
10:	learn: 27.6091335	total: 46.4ms	remaining: 4.17s
11:	learn: 27.3287691	total: 50.4ms	remaining: 4.15s
12:	learn: 27.0682290	total: 57.5ms	remaining: 4.37s
13:	learn: 26.8038569	total: 64.4ms	remaining: 4.54s
14:	learn: 26.5317186	total: 70.8ms	remaining: 4.65s
15:	learn: 26.2995367	total: 72.4ms	remaining: 4.45s
16:	learn: 26.0390895	total: 79.1ms	remaining: 4.57s
17:	learn: 25.8160064	total: 84.7ms	remaining: 4.62s
18:	learn: 25.5946744	total: 90.1ms	remaining: 4.65s
19:

[I 2024-03-28 19:26:41,941] Trial 4 finished with value: 302.15664554395084 and parameters: {'n_estimators': 1000, 'learning_rate': 0.02, 'depth': 11, 'l2_leaf_reg': 1}. Best is trial 0 with value: 300.1993210979238.


992:	learn: 12.7024652	total: 5.4s	remaining: 38.1ms
993:	learn: 12.6982007	total: 5.4s	remaining: 32.6ms
994:	learn: 12.6920971	total: 5.41s	remaining: 27.2ms
995:	learn: 12.6910175	total: 5.42s	remaining: 21.7ms
996:	learn: 12.6887896	total: 5.42s	remaining: 16.3ms
997:	learn: 12.6849878	total: 5.42s	remaining: 10.9ms
998:	learn: 12.6808796	total: 5.43s	remaining: 5.44ms
999:	learn: 12.6779128	total: 5.44s	remaining: 0us
0:	learn: 30.9900972	total: 1.2ms	remaining: 957ms
1:	learn: 30.8219752	total: 88.3ms	remaining: 35.2s
2:	learn: 30.6586068	total: 173ms	remaining: 46.1s
3:	learn: 30.4926866	total: 174ms	remaining: 34.7s
4:	learn: 30.3249909	total: 177ms	remaining: 28.2s
5:	learn: 30.1626754	total: 179ms	remaining: 23.7s
6:	learn: 30.0027847	total: 183ms	remaining: 20.8s
7:	learn: 29.8464282	total: 192ms	remaining: 19s
8:	learn: 29.6887709	total: 264ms	remaining: 23.2s
9:	learn: 29.5360139	total: 265ms	remaining: 20.9s
10:	learn: 29.3927867	total: 281ms	remaining: 20.2s
11:	learn: 2

[I 2024-03-28 19:30:06,696] Trial 5 finished with value: 305.39829900842903 and parameters: {'n_estimators': 800, 'learning_rate': 0.01, 'depth': 15, 'l2_leaf_reg': 5}. Best is trial 0 with value: 300.1993210979238.


0:	learn: 30.6329745	total: 1.27ms	remaining: 1.02s
1:	learn: 30.1199330	total: 4.87ms	remaining: 1.94s
2:	learn: 29.6272117	total: 8.05ms	remaining: 2.14s
3:	learn: 29.1650536	total: 11.6ms	remaining: 2.31s
4:	learn: 28.7227991	total: 14.9ms	remaining: 2.36s
5:	learn: 28.3037763	total: 16.1ms	remaining: 2.14s
6:	learn: 27.8825130	total: 19.2ms	remaining: 2.17s
7:	learn: 27.4558852	total: 22.4ms	remaining: 2.21s
8:	learn: 27.0762289	total: 25.5ms	remaining: 2.24s
9:	learn: 26.7370885	total: 28.5ms	remaining: 2.25s
10:	learn: 26.3930200	total: 29.8ms	remaining: 2.13s
11:	learn: 26.0234106	total: 33ms	remaining: 2.17s
12:	learn: 25.6804474	total: 36.1ms	remaining: 2.18s
13:	learn: 25.3782040	total: 38.9ms	remaining: 2.18s
14:	learn: 25.0725095	total: 42.1ms	remaining: 2.2s
15:	learn: 24.7670868	total: 46ms	remaining: 2.25s
16:	learn: 24.4988079	total: 49.8ms	remaining: 2.29s
17:	learn: 24.2229693	total: 53.4ms	remaining: 2.32s
18:	learn: 23.9638206	total: 56.5ms	remaining: 2.32s
19:	lear

[I 2024-03-28 19:30:17,108] Trial 6 finished with value: 297.9502935507792 and parameters: {'n_estimators': 800, 'learning_rate': 0.03, 'depth': 10, 'l2_leaf_reg': 3}. Best is trial 6 with value: 297.9502935507792.


757:	learn: 13.7430043	total: 2.37s	remaining: 131ms
758:	learn: 13.7406998	total: 2.37s	remaining: 128ms
759:	learn: 13.7378753	total: 2.37s	remaining: 125ms
760:	learn: 13.7345523	total: 2.38s	remaining: 122ms
761:	learn: 13.7290640	total: 2.38s	remaining: 119ms
762:	learn: 13.7279521	total: 2.38s	remaining: 116ms
763:	learn: 13.7272315	total: 2.39s	remaining: 112ms
764:	learn: 13.7227033	total: 2.39s	remaining: 109ms
765:	learn: 13.7217369	total: 2.39s	remaining: 106ms
766:	learn: 13.7178070	total: 2.4s	remaining: 103ms
767:	learn: 13.7146091	total: 2.4s	remaining: 100ms
768:	learn: 13.7089166	total: 2.4s	remaining: 96.9ms
769:	learn: 13.7044860	total: 2.4s	remaining: 93.7ms
770:	learn: 13.7034050	total: 2.41s	remaining: 90.6ms
771:	learn: 13.7001768	total: 2.41s	remaining: 87.5ms
772:	learn: 13.6989298	total: 2.41s	remaining: 84.3ms
773:	learn: 13.6959666	total: 2.42s	remaining: 81.2ms
774:	learn: 13.6917512	total: 2.42s	remaining: 78.1ms
775:	learn: 13.6879953	total: 2.42s	remaini

[I 2024-03-28 19:31:32,951] Trial 7 finished with value: 315.77094087574574 and parameters: {'n_estimators': 1000, 'learning_rate': 0.02, 'depth': 13, 'l2_leaf_reg': 1}. Best is trial 6 with value: 297.9502935507792.


0:	learn: 30.9878047	total: 8.31ms	remaining: 5.81s
1:	learn: 30.8117384	total: 58.6ms	remaining: 20.4s
2:	learn: 30.6378330	total: 100ms	remaining: 23.3s
3:	learn: 30.4628517	total: 134ms	remaining: 23.3s
4:	learn: 30.2912456	total: 144ms	remaining: 20s
5:	learn: 30.1258745	total: 145ms	remaining: 16.7s
6:	learn: 29.9547074	total: 154ms	remaining: 15.2s
7:	learn: 29.7904579	total: 194ms	remaining: 16.8s
8:	learn: 29.6343414	total: 211ms	remaining: 16.2s
9:	learn: 29.4689377	total: 220ms	remaining: 15.2s
10:	learn: 29.3152787	total: 258ms	remaining: 16.1s
11:	learn: 29.1621550	total: 259ms	remaining: 14.8s
12:	learn: 29.0081228	total: 300ms	remaining: 15.9s
13:	learn: 28.8570899	total: 304ms	remaining: 14.9s
14:	learn: 28.7101506	total: 344ms	remaining: 15.7s
15:	learn: 28.5641857	total: 383ms	remaining: 16.4s
16:	learn: 28.4226859	total: 384ms	remaining: 15.4s
17:	learn: 28.2728826	total: 425ms	remaining: 16.1s
18:	learn: 28.1343266	total: 467ms	remaining: 16.7s
19:	learn: 27.9909781	

[I 2024-03-28 19:33:06,015] Trial 8 finished with value: 301.06615083047683 and parameters: {'n_estimators': 700, 'learning_rate': 0.01, 'depth': 14, 'l2_leaf_reg': 3}. Best is trial 6 with value: 297.9502935507792.


0:	learn: 30.2870466	total: 1.31ms	remaining: 1.18s
1:	learn: 29.4856183	total: 3.71ms	remaining: 1.67s
2:	learn: 28.7363322	total: 25.6ms	remaining: 7.66s
3:	learn: 28.0247589	total: 45.3ms	remaining: 10.2s
4:	learn: 27.3455273	total: 66.9ms	remaining: 12s
5:	learn: 26.6906062	total: 84.7ms	remaining: 12.6s
6:	learn: 26.1049710	total: 93.7ms	remaining: 12s
7:	learn: 25.5461228	total: 114ms	remaining: 12.8s
8:	learn: 25.0223703	total: 137ms	remaining: 13.6s
9:	learn: 24.5400093	total: 138ms	remaining: 12.3s
10:	learn: 24.0926642	total: 155ms	remaining: 12.5s
11:	learn: 23.6906140	total: 156ms	remaining: 11.6s
12:	learn: 23.2650062	total: 175ms	remaining: 12s
13:	learn: 22.8811798	total: 196ms	remaining: 12.4s
14:	learn: 22.5079606	total: 204ms	remaining: 12.1s
15:	learn: 22.1800898	total: 209ms	remaining: 11.6s
16:	learn: 21.8427363	total: 229ms	remaining: 11.9s
17:	learn: 21.5386472	total: 275ms	remaining: 13.5s
18:	learn: 21.2803183	total: 278ms	remaining: 12.9s
19:	learn: 21.0246946

[I 2024-03-28 19:34:14,910] Trial 9 finished with value: 323.54458789231836 and parameters: {'n_estimators': 900, 'learning_rate': 0.05, 'depth': 13, 'l2_leaf_reg': 4}. Best is trial 6 with value: 297.9502935507792.


0:	learn: 30.6329745	total: 2.07ms	remaining: 1.65s
1:	learn: 30.1203527	total: 3.34ms	remaining: 1.33s
2:	learn: 29.6299855	total: 4.36ms	remaining: 1.16s
3:	learn: 29.1587383	total: 5.73ms	remaining: 1.14s
4:	learn: 28.7141220	total: 6.75ms	remaining: 1.07s
5:	learn: 28.2889808	total: 7.68ms	remaining: 1.02s
6:	learn: 27.8803451	total: 8.63ms	remaining: 977ms
7:	learn: 27.4884243	total: 9.54ms	remaining: 944ms
8:	learn: 27.1160417	total: 10.4ms	remaining: 916ms
9:	learn: 26.7626203	total: 11.4ms	remaining: 898ms
10:	learn: 26.4239101	total: 12.2ms	remaining: 877ms
11:	learn: 26.1028039	total: 13ms	remaining: 856ms
12:	learn: 25.7881139	total: 13.9ms	remaining: 839ms
13:	learn: 25.4898263	total: 14.6ms	remaining: 822ms
14:	learn: 25.2228814	total: 15.4ms	remaining: 804ms
15:	learn: 24.9566763	total: 16.3ms	remaining: 799ms
16:	learn: 24.6833442	total: 17.3ms	remaining: 795ms
17:	learn: 24.4430509	total: 18ms	remaining: 782ms
18:	learn: 24.2207449	total: 18.9ms	remaining: 778ms
19:	lea

[I 2024-03-28 19:34:17,959] Trial 10 finished with value: 287.3358266924869 and parameters: {'n_estimators': 800, 'learning_rate': 0.03, 'depth': 6, 'l2_leaf_reg': 3}. Best is trial 10 with value: 287.3358266924869.


630:	learn: 16.3855298	total: 530ms	remaining: 142ms
631:	learn: 16.3841657	total: 531ms	remaining: 141ms
632:	learn: 16.3820392	total: 532ms	remaining: 140ms
633:	learn: 16.3760658	total: 532ms	remaining: 139ms
634:	learn: 16.3705814	total: 533ms	remaining: 139ms
635:	learn: 16.3681265	total: 534ms	remaining: 138ms
636:	learn: 16.3672681	total: 535ms	remaining: 137ms
637:	learn: 16.3642248	total: 536ms	remaining: 136ms
638:	learn: 16.3612133	total: 537ms	remaining: 135ms
639:	learn: 16.3591739	total: 538ms	remaining: 134ms
640:	learn: 16.3563208	total: 538ms	remaining: 134ms
641:	learn: 16.3537507	total: 539ms	remaining: 133ms
642:	learn: 16.3516973	total: 540ms	remaining: 132ms
643:	learn: 16.3495802	total: 541ms	remaining: 131ms
644:	learn: 16.3486020	total: 542ms	remaining: 130ms
645:	learn: 16.3476578	total: 543ms	remaining: 129ms
646:	learn: 16.3458697	total: 544ms	remaining: 129ms
647:	learn: 16.3426414	total: 545ms	remaining: 128ms
648:	learn: 16.3410678	total: 546ms	remaining:

[I 2024-03-28 19:34:20,544] Trial 11 finished with value: 291.9982308948853 and parameters: {'n_estimators': 800, 'learning_rate': 0.03, 'depth': 5, 'l2_leaf_reg': 3}. Best is trial 10 with value: 287.3358266924869.


652:	learn: 16.8632556	total: 456ms	remaining: 103ms
653:	learn: 16.8604274	total: 457ms	remaining: 102ms
654:	learn: 16.8594067	total: 457ms	remaining: 101ms
655:	learn: 16.8581101	total: 458ms	remaining: 101ms
656:	learn: 16.8563650	total: 459ms	remaining: 99.9ms
657:	learn: 16.8537235	total: 459ms	remaining: 99.2ms
658:	learn: 16.8504258	total: 460ms	remaining: 98.4ms
659:	learn: 16.8493974	total: 461ms	remaining: 97.7ms
660:	learn: 16.8482354	total: 461ms	remaining: 97ms
661:	learn: 16.8438324	total: 462ms	remaining: 96.4ms
662:	learn: 16.8397822	total: 463ms	remaining: 95.7ms
663:	learn: 16.8361319	total: 464ms	remaining: 95ms
664:	learn: 16.8348021	total: 464ms	remaining: 94.3ms
665:	learn: 16.8340484	total: 465ms	remaining: 93.6ms
666:	learn: 16.8332151	total: 466ms	remaining: 92.8ms
667:	learn: 16.8321039	total: 466ms	remaining: 92.1ms
668:	learn: 16.8299466	total: 467ms	remaining: 91.5ms
669:	learn: 16.8269927	total: 468ms	remaining: 90.8ms
670:	learn: 16.8258538	total: 469ms	

[I 2024-03-28 19:34:23,432] Trial 12 finished with value: 290.4951933256592 and parameters: {'n_estimators': 900, 'learning_rate': 0.03, 'depth': 5, 'l2_leaf_reg': 2}. Best is trial 10 with value: 287.3358266924869.


670:	learn: 16.7639029	total: 471ms	remaining: 161ms
671:	learn: 16.7620214	total: 472ms	remaining: 160ms
672:	learn: 16.7614887	total: 472ms	remaining: 159ms
673:	learn: 16.7590893	total: 473ms	remaining: 159ms
674:	learn: 16.7554375	total: 474ms	remaining: 158ms
675:	learn: 16.7540004	total: 475ms	remaining: 157ms
676:	learn: 16.7519465	total: 475ms	remaining: 157ms
677:	learn: 16.7488014	total: 476ms	remaining: 156ms
678:	learn: 16.7469727	total: 477ms	remaining: 155ms
679:	learn: 16.7458662	total: 477ms	remaining: 154ms
680:	learn: 16.7435253	total: 478ms	remaining: 154ms
681:	learn: 16.7412255	total: 479ms	remaining: 153ms
682:	learn: 16.7405247	total: 479ms	remaining: 152ms
683:	learn: 16.7364159	total: 480ms	remaining: 152ms
684:	learn: 16.7335603	total: 481ms	remaining: 151ms
685:	learn: 16.7297939	total: 481ms	remaining: 150ms
686:	learn: 16.7288945	total: 482ms	remaining: 149ms
687:	learn: 16.7278517	total: 483ms	remaining: 149ms
688:	learn: 16.7242888	total: 484ms	remaining:

[I 2024-03-28 19:34:26,364] Trial 13 finished with value: 287.46975539710155 and parameters: {'n_estimators': 900, 'learning_rate': 0.04, 'depth': 5, 'l2_leaf_reg': 2}. Best is trial 10 with value: 287.3358266924869.


832:	learn: 16.1143539	total: 598ms	remaining: 48.1ms
833:	learn: 16.1125728	total: 599ms	remaining: 47.4ms
834:	learn: 16.1106859	total: 600ms	remaining: 46.7ms
835:	learn: 16.1096229	total: 600ms	remaining: 46ms
836:	learn: 16.1083137	total: 601ms	remaining: 45.2ms
837:	learn: 16.1062517	total: 602ms	remaining: 44.5ms
838:	learn: 16.1055233	total: 603ms	remaining: 43.8ms
839:	learn: 16.1050519	total: 603ms	remaining: 43.1ms
840:	learn: 16.1046674	total: 604ms	remaining: 42.4ms
841:	learn: 16.1041343	total: 604ms	remaining: 41.6ms
842:	learn: 16.1033165	total: 605ms	remaining: 40.9ms
843:	learn: 16.1026190	total: 606ms	remaining: 40.2ms
844:	learn: 16.0997675	total: 606ms	remaining: 39.5ms
845:	learn: 16.0992697	total: 607ms	remaining: 38.8ms
846:	learn: 16.0975869	total: 608ms	remaining: 38ms
847:	learn: 16.0948640	total: 609ms	remaining: 37.3ms
848:	learn: 16.0933053	total: 609ms	remaining: 36.6ms
849:	learn: 16.0916592	total: 610ms	remaining: 35.9ms
850:	learn: 16.0909885	total: 61

[I 2024-03-28 19:34:30,351] Trial 14 finished with value: 291.97759121267507 and parameters: {'n_estimators': 900, 'learning_rate': 0.04, 'depth': 7, 'l2_leaf_reg': 2}. Best is trial 10 with value: 287.3358266924869.


889:	learn: 14.6964021	total: 887ms	remaining: 9.97ms
890:	learn: 14.6942435	total: 888ms	remaining: 8.97ms
891:	learn: 14.6919663	total: 889ms	remaining: 7.97ms
892:	learn: 14.6895709	total: 890ms	remaining: 6.98ms
893:	learn: 14.6874096	total: 891ms	remaining: 5.98ms
894:	learn: 14.6865764	total: 892ms	remaining: 4.98ms
895:	learn: 14.6823231	total: 893ms	remaining: 3.99ms
896:	learn: 14.6820286	total: 894ms	remaining: 2.99ms
897:	learn: 14.6792434	total: 895ms	remaining: 1.99ms
898:	learn: 14.6772652	total: 896ms	remaining: 996us
899:	learn: 14.6736014	total: 897ms	remaining: 0us
0:	learn: 30.4516243	total: 1.33ms	remaining: 1.19s
1:	learn: 29.7934160	total: 2.27ms	remaining: 1.02s
2:	learn: 29.1739047	total: 3.22ms	remaining: 963ms
3:	learn: 28.5835908	total: 4.1ms	remaining: 918ms
4:	learn: 28.0167132	total: 5.1ms	remaining: 912ms
5:	learn: 27.4756578	total: 6.1ms	remaining: 909ms
6:	learn: 26.9591733	total: 6.99ms	remaining: 892ms
7:	learn: 26.4918805	total: 7.83ms	remaining: 873

[I 2024-03-28 19:34:34,342] Trial 15 finished with value: 291.97759121267507 and parameters: {'n_estimators': 900, 'learning_rate': 0.04, 'depth': 7, 'l2_leaf_reg': 2}. Best is trial 10 with value: 287.3358266924869.


723:	learn: 15.0765647	total: 724ms	remaining: 176ms
724:	learn: 15.0697531	total: 725ms	remaining: 175ms
725:	learn: 15.0667619	total: 726ms	remaining: 174ms
726:	learn: 15.0647174	total: 727ms	remaining: 173ms
727:	learn: 15.0621853	total: 728ms	remaining: 172ms
728:	learn: 15.0592412	total: 729ms	remaining: 171ms
729:	learn: 15.0568238	total: 730ms	remaining: 170ms
730:	learn: 15.0563196	total: 730ms	remaining: 169ms
731:	learn: 15.0534906	total: 731ms	remaining: 168ms
732:	learn: 15.0507840	total: 732ms	remaining: 167ms
733:	learn: 15.0458775	total: 733ms	remaining: 166ms
734:	learn: 15.0423671	total: 735ms	remaining: 165ms
735:	learn: 15.0395122	total: 736ms	remaining: 164ms
736:	learn: 15.0383187	total: 737ms	remaining: 163ms
737:	learn: 15.0377364	total: 738ms	remaining: 162ms
738:	learn: 15.0341164	total: 739ms	remaining: 161ms
739:	learn: 15.0317742	total: 740ms	remaining: 160ms
740:	learn: 15.0289947	total: 741ms	remaining: 159ms
741:	learn: 15.0282705	total: 742ms	remaining:

[I 2024-03-28 19:34:38,691] Trial 16 finished with value: 291.25439168070056 and parameters: {'n_estimators': 1000, 'learning_rate': 0.05, 'depth': 7, 'l2_leaf_reg': 4}. Best is trial 10 with value: 287.3358266924869.


990:	learn: 14.3026864	total: 971ms	remaining: 8.81ms
991:	learn: 14.3013704	total: 972ms	remaining: 7.83ms
992:	learn: 14.3000033	total: 973ms	remaining: 6.86ms
993:	learn: 14.2988326	total: 974ms	remaining: 5.88ms
994:	learn: 14.2979337	total: 975ms	remaining: 4.9ms
995:	learn: 14.2962075	total: 976ms	remaining: 3.92ms
996:	learn: 14.2943461	total: 977ms	remaining: 2.94ms
997:	learn: 14.2938064	total: 978ms	remaining: 1.96ms
998:	learn: 14.2886007	total: 979ms	remaining: 979us
999:	learn: 14.2863997	total: 980ms	remaining: 0us
0:	learn: 30.4566984	total: 1.19ms	remaining: 831ms
1:	learn: 29.8021064	total: 3.06ms	remaining: 1.07s
2:	learn: 29.1725330	total: 5.05ms	remaining: 1.17s
3:	learn: 28.5644415	total: 6.92ms	remaining: 1.2s
4:	learn: 27.9750431	total: 8.7ms	remaining: 1.21s
5:	learn: 27.4476103	total: 10.5ms	remaining: 1.22s
6:	learn: 26.8984599	total: 12.4ms	remaining: 1.23s
7:	learn: 26.3885797	total: 14.4ms	remaining: 1.24s
8:	learn: 25.9204089	total: 16.2ms	remaining: 1.24s

[I 2024-03-28 19:34:44,348] Trial 17 finished with value: 289.6829739470321 and parameters: {'n_estimators': 700, 'learning_rate': 0.04, 'depth': 9, 'l2_leaf_reg': 3}. Best is trial 10 with value: 287.3358266924869.


619:	learn: 14.2068145	total: 1.19s	remaining: 153ms
620:	learn: 14.2019443	total: 1.19s	remaining: 151ms
621:	learn: 14.2006201	total: 1.19s	remaining: 149ms
622:	learn: 14.1962131	total: 1.19s	remaining: 147ms
623:	learn: 14.1918218	total: 1.2s	remaining: 146ms
624:	learn: 14.1881529	total: 1.2s	remaining: 144ms
625:	learn: 14.1843962	total: 1.2s	remaining: 142ms
626:	learn: 14.1824460	total: 1.2s	remaining: 140ms
627:	learn: 14.1787329	total: 1.2s	remaining: 138ms
628:	learn: 14.1775138	total: 1.21s	remaining: 136ms
629:	learn: 14.1734398	total: 1.21s	remaining: 134ms
630:	learn: 14.1695573	total: 1.21s	remaining: 132ms
631:	learn: 14.1622329	total: 1.21s	remaining: 130ms
632:	learn: 14.1617255	total: 1.21s	remaining: 128ms
633:	learn: 14.1612825	total: 1.22s	remaining: 127ms
634:	learn: 14.1575783	total: 1.22s	remaining: 125ms
635:	learn: 14.1516015	total: 1.22s	remaining: 123ms
636:	learn: 14.1494357	total: 1.22s	remaining: 121ms
637:	learn: 14.1425901	total: 1.22s	remaining: 119m

[I 2024-03-28 19:34:47,364] Trial 18 finished with value: 288.07922741820164 and parameters: {'n_estimators': 800, 'learning_rate': 0.03, 'depth': 6, 'l2_leaf_reg': 2}. Best is trial 10 with value: 287.3358266924869.


677:	learn: 16.2004227	total: 580ms	remaining: 104ms
678:	learn: 16.1977127	total: 581ms	remaining: 104ms
679:	learn: 16.1957070	total: 582ms	remaining: 103ms
680:	learn: 16.1918929	total: 583ms	remaining: 102ms
681:	learn: 16.1892499	total: 583ms	remaining: 101ms
682:	learn: 16.1860538	total: 584ms	remaining: 100ms
683:	learn: 16.1845005	total: 585ms	remaining: 99.2ms
684:	learn: 16.1828798	total: 586ms	remaining: 98.4ms
685:	learn: 16.1804901	total: 587ms	remaining: 97.5ms
686:	learn: 16.1783561	total: 588ms	remaining: 96.7ms
687:	learn: 16.1757494	total: 589ms	remaining: 95.8ms
688:	learn: 16.1730628	total: 589ms	remaining: 94.9ms
689:	learn: 16.1725392	total: 590ms	remaining: 94.1ms
690:	learn: 16.1715683	total: 591ms	remaining: 93.2ms
691:	learn: 16.1704916	total: 592ms	remaining: 92.4ms
692:	learn: 16.1671123	total: 593ms	remaining: 91.5ms
693:	learn: 16.1658146	total: 594ms	remaining: 90.7ms
694:	learn: 16.1636939	total: 595ms	remaining: 89.8ms
695:	learn: 16.1614787	total: 596m

[I 2024-03-28 19:34:52,307] Trial 19 finished with value: 291.8686818855866 and parameters: {'n_estimators': 900, 'learning_rate': 0.02, 'depth': 8, 'l2_leaf_reg': 4}. Best is trial 10 with value: 287.3358266924869.
[I 2024-03-28 19:34:52,307] A new study created in memory with name: xgb_regressor_tg


780:	learn: 15.8135572	total: 979ms	remaining: 149ms
781:	learn: 15.8132296	total: 980ms	remaining: 148ms
782:	learn: 15.8106200	total: 981ms	remaining: 147ms
783:	learn: 15.8076188	total: 983ms	remaining: 145ms
784:	learn: 15.8042223	total: 984ms	remaining: 144ms
785:	learn: 15.8017090	total: 985ms	remaining: 143ms
786:	learn: 15.7984518	total: 987ms	remaining: 142ms
787:	learn: 15.7981851	total: 988ms	remaining: 140ms
788:	learn: 15.7949675	total: 990ms	remaining: 139ms
789:	learn: 15.7909122	total: 991ms	remaining: 138ms
790:	learn: 15.7905930	total: 992ms	remaining: 137ms
791:	learn: 15.7873808	total: 994ms	remaining: 136ms
792:	learn: 15.7857246	total: 996ms	remaining: 134ms
793:	learn: 15.7840744	total: 997ms	remaining: 133ms
794:	learn: 15.7804018	total: 1000ms	remaining: 132ms
795:	learn: 15.7790163	total: 1s	remaining: 131ms
796:	learn: 15.7764923	total: 1s	remaining: 130ms
797:	learn: 15.7753993	total: 1s	remaining: 128ms
798:	learn: 15.7722889	total: 1.01s	remaining: 127ms
7

[I 2024-03-28 19:34:56,251] Trial 0 finished with value: 305.0655482229911 and parameters: {'n_estimators': 500, 'learning_rate': 0.02, 'max_depth': 8}. Best is trial 0 with value: 305.0655482229911.
[I 2024-03-28 19:35:01,073] Trial 1 finished with value: 290.60122203190883 and parameters: {'n_estimators': 900, 'learning_rate': 0.03, 'max_depth': 6}. Best is trial 1 with value: 290.60122203190883.
[I 2024-03-28 19:35:08,753] Trial 2 finished with value: 312.71145971533406 and parameters: {'n_estimators': 1000, 'learning_rate': 0.03, 'max_depth': 8}. Best is trial 1 with value: 290.60122203190883.
[I 2024-03-28 19:35:22,189] Trial 3 finished with value: 390.01038607193135 and parameters: {'n_estimators': 600, 'learning_rate': 0.02, 'max_depth': 14}. Best is trial 1 with value: 290.60122203190883.
[I 2024-03-28 19:35:45,583] Trial 4 finished with value: 404.38787060959515 and parameters: {'n_estimators': 1000, 'learning_rate': 0.05, 'max_depth': 15}. Best is trial 1 with value: 290.6012

In [156]:
print(study_cat_tg.best_params)
print(study_xgb_tg.best_params)

{'n_estimators': 800, 'learning_rate': 0.03, 'depth': 6, 'l2_leaf_reg': 3}
{'n_estimators': 900, 'learning_rate': 0.03, 'max_depth': 6}


In [157]:
cat = CatBoostRegressor(random_state = 2024,
                        metric_period = 1000,
                        **study_cat_tg.best_params)

xgb = XGBRegressor(random_state = 2024,
                   device='cuda',
                   **study_xgb_tg.best_params)

In [159]:
# voting
vote_model = VotingRegressor(
    estimators =[("cat",cat), ("xgb", xgb)]
)

vote_model.fit(Xy.drop(columns = ["timestamp", "ID","price"]), Xy["price"])

pred = vote_model.predict(answer_tg1.drop(columns = ["ID"]))
for idx in range(len(pred)):
    if pred[idx]<0:
        pred[idx]= 0
answer_tg1["answer"] = np.power(pred,2)

answer_tg1[["ID","answer"]]

0:	learn: 30.6073935	total: 1.3ms	remaining: 1.04s
799:	learn: 16.0157061	total: 726ms	remaining: 0us
Parameters: { "device" } are not used.



Unnamed: 0,ID,answer
0,TG_A_J_20230304,2769.258597
1,TG_E_S_20230304,3371.770351
2,TG_E_J_20230304,1247.973414
3,TG_D_S_20230304,3674.167752
4,TG_D_J_20230304,324.027678
...,...,...
275,TG_D_J_20230331,1142.198745
276,TG_D_S_20230331,5159.619945
277,TG_A_S_20230331,5839.199820
278,TG_E_S_20230331,4820.895274


# 4. TG (2)
- 일반화를 위한 추가 모델링
### 4-1. 전처리

In [160]:
train_tg2 = train_pre[train_pre["item"] =="TG"]
test_tg2 = test_pre[test_pre["item"] == "TG"]

Xy2 = pd.get_dummies(train_tg2.sort_values(by = ["timestamp", "corporation","location"]).reset_index(drop=True).drop(columns = ["item","supply"]), columns = [ "corporation","location"])
answer_tg2 = pd.get_dummies(test_tg2.drop(columns = ["timestamp","supply","price","item"]), columns = [ "corporation","location"])
print(Xy2.columns)

# 종속변수 루트값
Xy2["price"] = np.sqrt(Xy2["price"])

Index(['ID', 'timestamp', 'price', 'year', 'month', 'day', 'week_day',
       'year_month', 'week', 'week_num', 'holiday', 'export_weight',
       'export_amount', 'import_weight', 'import_amount', 'trade_profit',
       'corporation_A', 'corporation_B', 'corporation_C', 'corporation_D',
       'corporation_E', 'location_J', 'location_S'],
      dtype='object')


### 4-2. 모델링 & 훈련 예측

In [162]:
study_xgb_tg_2 = optuna.create_study(direction='minimize', study_name='xgb_regressor_tg_2')

# 하이퍼파라미터 튜닝을 시작합니다.
study_xgb_tg_2.optimize(lambda trial: objective_xgb(trial,
                                                   XGBRegressor,
                                                   Xy2.drop(columns = ["timestamp", "ID", "price"]),
                                                   Xy2["price"],
                                                   mean_squared_error),
                                                   n_trials=30,
                      )

[I 2024-03-28 19:40:24,885] A new study created in memory with name: xgb_regressor_tg_2
[I 2024-03-28 19:40:27,494] Trial 0 finished with value: 295.068835676664 and parameters: {'n_estimators': 500, 'learning_rate': 0.01, 'max_depth': 6}. Best is trial 0 with value: 295.068835676664.
[I 2024-03-28 19:40:31,057] Trial 1 finished with value: 307.914325277217 and parameters: {'n_estimators': 700, 'learning_rate': 0.05, 'max_depth': 6}. Best is trial 0 with value: 295.068835676664.
[I 2024-03-28 19:40:34,663] Trial 2 finished with value: 295.53720266238633 and parameters: {'n_estimators': 700, 'learning_rate': 0.04, 'max_depth': 6}. Best is trial 0 with value: 295.068835676664.
[I 2024-03-28 19:40:39,039] Trial 3 finished with value: 321.0932221916768 and parameters: {'n_estimators': 500, 'learning_rate': 0.03, 'max_depth': 9}. Best is trial 0 with value: 295.068835676664.
[I 2024-03-28 19:40:54,914] Trial 4 finished with value: 393.62063971768293 and parameters: {'n_estimators': 600, 'le

In [163]:
# 모델 정의 및 훈련 예측
cat = CatBoostRegressor(random_state = 2024,
                        metric_period = 1000,
                        l2_leaf_reg = 3,
                        **study_xgb_tg_2.best_params)
cat.fit(Xy2.drop(columns = ["timestamp", "ID", "price"]), Xy2["price"])

pred2 = cat.predict(answer_tg2.drop(columns = ["ID"]))
for idx in range(len(pred2)):
    if pred2[idx]<0:
        pred2[idx]= 0
answer_tg2["answer"] = np.power(pred2,2)

answer_tg2[["ID","answer"]]

0:	learn: 30.9627769	total: 1.03ms	remaining: 825ms
799:	learn: 17.5576027	total: 621ms	remaining: 0us


Unnamed: 0,ID,answer
0,TG_A_J_20230304,2649.827660
1,TG_E_S_20230304,3051.385401
3,TG_E_J_20230304,1308.403333
5,TG_D_S_20230304,3431.358627
8,TG_D_J_20230304,2124.296045
...,...,...
1074,TG_D_S_20230331,4703.574936
1077,TG_A_S_20230331,5096.694778
1079,TG_E_S_20230331,4156.976128
1087,TG_A_J_20230331,3819.401204


# 5. TG 앙상블

In [164]:
total1 = pd.concat([answer_tg1[["ID","answer"]],answer_notg[["ID","answer"]]])
total2 = pd.concat([answer_tg2[["ID","answer"]],answer_notg[["ID","answer"]]])

In [165]:
print(total1.shape)
print(total2.shape)

(1092, 2)
(1092, 2)


In [166]:
# TG 앙상블 (평균)

df = pd.merge(total1, total2, how = "inner", on="ID")

In [167]:
df

Unnamed: 0,ID,answer_x,answer_y
0,TG_A_J_20230304,2769.258597,2649.827660
1,TG_E_S_20230304,3371.770351,3051.385401
2,TG_E_J_20230304,1247.973414,1308.403333
3,TG_D_S_20230304,3674.167752,3431.358627
4,TG_D_J_20230304,324.027678,2124.296045
...,...,...,...
1087,RD_D_S_20230331,536.485698,536.485698
1088,CR_C_J_20230331,1912.146164,1912.146164
1089,RD_D_J_20230331,293.934552,293.934552
1090,CR_D_J_20230331,2015.732452,2015.732452


In [168]:

df["answer"] = (df["answer_x"]+df["answer_y"])/2
df["item"] = df["ID"].map(lambda x :x.split("_")[0])

In [169]:
# 후처리 
## 전체 min값 | 3월의 min값 확인
df.loc[(df['item']=='TG')&(df['answer']<400),'answer'] =0 # 551   #3월 675
df.loc[(df['item']=='CB')&(df['answer']<50),'answer'] =0 # 162  # 3월 200
df.loc[(df['item']=='RD')&(df['answer']<10),'answer'] =0 # 50     # 3월 124
df.loc[(df['item']=='CR')&(df['answer']<150),'answer'] =0 # 250   # 3월 450
df.loc[(df['item']=='BC')&(df['answer']<100),'answer'] =0 #205 3월 205.0

In [170]:
df = df.drop(columns = ["answer_x","answer_y", "item"])
df

Unnamed: 0,ID,answer
0,TG_A_J_20230304,2709.543128
1,TG_E_S_20230304,3211.577876
2,TG_E_J_20230304,1278.188374
3,TG_D_S_20230304,3552.763190
4,TG_D_J_20230304,1224.161862
...,...,...
1087,RD_D_S_20230331,536.485698
1088,CR_C_J_20230331,1912.146164
1089,RD_D_J_20230331,293.934552
1090,CR_D_J_20230331,2015.732452


In [171]:
# 제출용
df.to_csv('/mnt/c/Users/wschu/OneDrive/Documents/data/jeju_specialty/open/answer.csv',index=False)