In [1]:
import random
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
import lightgbm as lgb
from sktime.forecasting.model_selection import temporal_train_test_split

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(0) # Seed 고정

def weighted_mse(alpha = 1):
    def weighted_mse_fixed(label, pred):
        residual = (label - pred).astype("float")
        grad = np.where(residual>0, -2*alpha*residual, -2*residual)
        hess = np.where(residual>0, 2*alpha, 2.0)
        return grad, hess
    return weighted_mse_fixed

def mae(y, pred):
    return np.mean(abs(y-pred))
 #점수 측정을 위한 코드  
def SMAPE(y_true, y_pred):
    return 100/len(y_true) * np.sum(2 * np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)))

def validate(valid_x, valid_y, model):
    pred = model.predict(valid_x)
    smape_score, mae_score = SMAPE(valid_y, pred), mae(valid_y, pred)
    return smape_score, mae_score

In [2]:
smape_scores = []
mae_scores = []
for n_estimator in [40,42,45,47,50]:
    for i in range(1,101,1):
        train_df = pd.read_csv(f'./pretest/train_building{i}.csv')
        pretest_df = pd.read_csv(f'./pretest/pretest_building{i}.csv')
        x_train = train_df.drop(columns=['power_consumption'])
        y_train = train_df['power_consumption']
        y_valid = pretest_df['power_consumption']
        x_valid = pretest_df.drop(columns=['power_consumption'])
        
        #print(f"building_{i}")
        model_lgb1 = lgb.LGBMRegressor(objective='regression',
                                    n_estimators= n_estimator,
                                    verbose=-1)
        model_lgb1.fit(x_train, y_train)
        smape_score, mae_score = validate(x_valid, y_valid, model_lgb1)
        smape_scores.append(smape_score)
        mae_scores.append(mae_score)
    smape_mean = np.mean(smape_scores)
    mae_mean = np.mean(mae_scores)
        
    print(f"n_estimator: {n_estimator}")
    print(f'SMAPE: {smape_mean}\nMAE: {mae_mean}')

[WinError 2] 지정된 파일을 찾을 수 없습니다
  File "c:\ProgramData\anaconda3\envs\poum\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "c:\ProgramData\anaconda3\envs\poum\Lib\subprocess.py", line 548, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\ProgramData\anaconda3\envs\poum\Lib\subprocess.py", line 1026, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "c:\ProgramData\anaconda3\envs\poum\Lib\subprocess.py", line 1538, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


n_estimator: 40
SMAPE: 5.50302137165282
MAE: 108.02073797549805
n_estimator: 42
SMAPE: 5.499612304604798
MAE: 108.02991798643043
n_estimator: 45
SMAPE: 5.49971441094895
MAE: 108.07215360631554
n_estimator: 47
SMAPE: 5.503815162864925
MAE: 108.18706354083344
n_estimator: 50
SMAPE: 5.510124253435037
MAE: 108.33577517776507


n_estimators= 45 최적

In [5]:
preds = np.array([])

for i in tqdm(range(1,101,1)):
    train_df = pd.read_csv(f'./submit/train_building{i}.csv')
    test_df = pd.read_csv(f'./submit/test_building{i}.csv')
    pred_df = pd.DataFrame()   # 시드별 예측값을 담을 data frame
    
    t_x = train_df.drop(columns=['power_consumption'])
    t_y = train_df['power_consumption']
 
    #print(f"building_{i}")
    for seed in [0,1,2,3,4,5,6]:
        
        model = lgb.LGBMRegressor(objective='regression',
                                    n_estimators= 45,
                                    verbose=-1,seed = seed)
        model.fit(t_x, t_y)
        y_pred = model.predict(test_df)
        pred_df.loc[:,seed] = y_pred  
    pred = pred_df.mean(axis=1)
    preds = np.append(preds, pred)
    


100%|██████████| 100/100 [00:33<00:00,  2.99it/s]


In [6]:
########################################
csv_name = './submission/lightGBM_wsw_seed_ensemble.csv'
########################################

In [7]:
submission = pd.read_csv('./data/sample_submission.csv')
submission['answer'] = preds
submission.to_csv(f'{csv_name}', index = False)