# Environment
- CPU : AMD Ryzen 9 6900HX with Radeon Graphics

## Import

In [416]:
import random
import glob
import os
import re
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
from sklearn.preprocessing import LabelEncoder

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
import warnings

warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore", message="DataFrame is highly fragmented")

In [417]:
import sys
import tqdm as tq
import catboost as cat
import matplotlib
import seaborn as sns
import sklearn as skl
import pandas as pd
import numpy as np
print("-------------------------- Python & library version --------------------------")
print("Python version: {}".format(sys.version))
print("pandas version: {}".format(pd.__version__))
print("numpy version: {}".format(np.__version__))
print("matplotlib version: {}".format(matplotlib.__version__))
print("tqdm version: {}".format(tq.__version__))
print("catboost version: {}".format(cat.__version__))
print("seaborn version: {}".format(sns.__version__))
print("scikit-learn version: {}".format(skl.__version__))
print("------------------------------------------------------------------------------")

-------------------------- Python & library version --------------------------
Python version: 3.9.13 (main, Aug 25 2022, 23:51:50) [MSC v.1916 64 bit (AMD64)]
pandas version: 2.0.3
numpy version: 1.21.5
matplotlib version: 3.5.2
tqdm version: 4.64.1
catboost version: 1.1.1
seaborn version: 0.11.2
scikit-learn version: 1.0.2
------------------------------------------------------------------------------


In [418]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cpu')

In [419]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(42) # Seed 고정

## Hyperparameter Setting

### 데이터 불러오기

In [420]:
train_paths = glob.glob('./train/*.csv')
test_paths = pd.read_csv('./test.csv')['data_path'].values

### 데이터 전처리

In [421]:
datas = []

In [422]:
def make_train_data(train_paths):
    sequences = []
    sequence_labels = []
    for path in tqdm(train_paths):
        driver = str(path.split('/')[-1].split('.')[0].split('_')[1][0])
        data = pd.read_csv(path)
        data['driver'] = 0 if driver == 'A' else 1
        data['A'] = data['Signal A']  + data['Signal B']
        data['B'] = data['Signal A']  + data['Signal C']
        data['C'] = data['Signal B']  + data['Signal C']
        data['D'] = data['Sensor A']  + data['Sensor B']
        data['E'] = data['Sensor A']  + data['Sensor C']
        data['F'] = data['Sensor A']  + data['Sensor D']
        data['G'] = data['Sensor B']  + data['Sensor C']
        data['H'] = data['Sensor B']  + data['Sensor D']
        data['I'] = data['Sensor C']  + data['Sensor D']

        data['A_1'] =  np.abs(data['Signal A']  - data['Signal B'])
        data['B_1'] =  np.abs(data['Signal A']  - data['Signal C'])
        data['C_1'] =  np.abs(data['Signal B']  - data['Signal C'])
        data['D_1'] =  np.abs(data['Sensor A']  - data['Sensor B'])
        data['E_1'] =  np.abs(data['Sensor A']  - data['Sensor C'])
        data['F_1'] =  np.abs(data['Sensor A']  - data['Sensor D'])
        data['G_1'] =  np.abs(data['Sensor B']  - data['Sensor C'])
        data['H_1'] =  np.abs(data['Sensor B']  - data['Sensor D'])
        data['I_1'] =  np.abs(data['Sensor C']  - data['Sensor D'])
        
        data['diff_1'] = (data['Signal A'] - data['Signal A'].shift(1)).fillna(0)
        data['diff_2'] = (data['Signal B'] - data['Signal B'].shift(1)).fillna(0)
        data['diff_3'] = (data['Signal C'] - data['Signal C'].shift(1)).fillna(0)
        data['diff_4'] = (data['Sensor A'] - data['Sensor A'].shift(1)).fillna(0)
        data['diff_5'] = (data['Sensor B'] - data['Sensor B'].shift(1)).fillna(0)
        data['diff_6'] = (data['Sensor C'] - data['Sensor C'].shift(1)).fillna(0)
        data['diff_7'] = (data['Sensor D'] - data['Sensor D'].shift(1)).fillna(0)

        data['rolling_diff_1'] = data['diff_1'].rolling(5).sum().bfill()
        data['rolling_diff_2'] = data['diff_2'].rolling(5).sum().bfill()
        data['rolling_diff_3'] = data['diff_3'].rolling(5).sum().bfill()
        data['rolling_diff_4'] = data['diff_4'].rolling(5).sum().bfill()
        data['rolling_diff_5'] = data['diff_5'].rolling(5).sum().bfill()
        data['rolling_diff_6'] = data['diff_6'].rolling(5).sum().bfill()
        data['rolling_diff_7'] = data['diff_7'].rolling(5).sum().bfill()

        data['rolling_diff_1B'] = data['diff_1'].rolling(20).sum().bfill()
        data['rolling_diff_2B'] = data['diff_2'].rolling(20).sum().bfill()
        data['rolling_diff_3B'] = data['diff_3'].rolling(20).sum().bfill()
        data['rolling_diff_4B'] = data['diff_4'].rolling(20).sum().bfill()
        data['rolling_diff_5B'] = data['diff_5'].rolling(20).sum().bfill()
        data['rolling_diff_6B'] = data['diff_6'].rolling(20).sum().bfill()
        data['rolling_diff_7B'] = data['diff_7'].rolling(20).sum().bfill()
        
        data['rolling_diff_1C'] = data['diff_1'].rolling(50).sum().bfill()
        data['rolling_diff_2C'] = data['diff_2'].rolling(50).sum().bfill()
        data['rolling_diff_3C'] = data['diff_3'].rolling(50).sum().bfill()
        data['rolling_diff_4C'] = data['diff_4'].rolling(50).sum().bfill()
        data['rolling_diff_5C'] = data['diff_5'].rolling(50).sum().bfill()
        data['rolling_diff_6C'] = data['diff_6'].rolling(50).sum().bfill()
        data['rolling_diff_7C'] = data['diff_7'].rolling(50).sum().bfill()

        data['rolling_Signal A_5'] = data['Signal A'].rolling(5).mean().bfill()
        data['rolling_Signal A_3'] = data['Signal A'].rolling(3).mean().bfill()
        data['rolling_Signal A_7'] = data['Signal A'].rolling(7).mean().bfill()
        data['rolling_Signal A_10'] = data['Signal A'].rolling(10).mean().bfill()
        data['rolling_Signal A_20'] = data['Signal A'].rolling(20).mean().bfill()
        data['rolling_Signal A_50'] = data['Signal A'].rolling(50).mean().bfill()
        data['rolling_Signal A_100'] = data['Signal A'].rolling(100).mean().bfill()

        data['rolling_Signal B_5'] = data['Signal B'].rolling(5).mean().bfill()
        data['rolling_Signal B_3'] = data['Signal B'].rolling(3).mean().bfill()
        data['rolling_Signal B_7'] = data['Signal B'].rolling(7).mean().bfill()        
        data['rolling_Signal B_10'] = data['Signal B'].rolling(10).mean().bfill()
        data['rolling_Signal B_20'] = data['Signal B'].rolling(20).mean().bfill()
        data['rolling_Signal B_50'] = data['Signal B'].rolling(50).mean().bfill()
        data['rolling_Signal B_100'] = data['Signal B'].rolling(100).mean().bfill()

        data['rolling_Signal C_5'] = data['Signal C'].rolling(5).mean().bfill()
        data['rolling_Signal C_3'] = data['Signal C'].rolling(3).mean().bfill()
        data['rolling_Signal C_7'] = data['Signal C'].rolling(7).mean().bfill()        
        data['rolling_Signal C_10'] = data['Signal C'].rolling(10).mean().bfill()
        data['rolling_Signal C_20'] = data['Signal C'].rolling(20).mean().bfill()
        data['rolling_Signal C_50'] = data['Signal C'].rolling(50).mean().bfill()
        data['rolling_Signal C_100'] = data['Signal C'].rolling(100).mean().bfill()

        data['rolling_Sensor A_5'] = data['Sensor A'].rolling(5).mean().bfill()
        data['rolling_Sensor A_3'] = data['Sensor A'].rolling(3).mean().bfill()
        data['rolling_Sensor A_7'] = data['Sensor A'].rolling(7).mean().bfill()        
        data['rolling_Sensor A_10'] = data['Sensor A'].rolling(10).mean().bfill()
        data['rolling_Sensor A_20'] = data['Sensor A'].rolling(20).mean().bfill()
        data['rolling_Sensor A_50'] = data['Sensor A'].rolling(50).mean().bfill()
        data['rolling_Sensor A_100'] = data['Sensor A'].rolling(100).mean().bfill()

        data['rolling_Sensor B_5'] = data['Sensor B'].rolling(5).mean().bfill()
        data['rolling_Sensor B_3'] = data['Sensor B'].rolling(3).mean().bfill()
        data['rolling_Sensor B_7'] = data['Sensor B'].rolling(7).mean().bfill()        
        data['rolling_Sensor B_10'] = data['Sensor B'].rolling(10).mean().bfill()
        data['rolling_Sensor B_20'] = data['Sensor B'].rolling(20).mean().bfill()
        data['rolling_Sensor B_50'] = data['Sensor B'].rolling(50).mean().bfill()
        data['rolling_Sensor B_100'] = data['Sensor B'].rolling(100).mean().bfill()

        data['rolling_Sensor C_5'] = data['Sensor C'].rolling(5).mean().bfill()
        data['rolling_Sensor C_3'] = data['Sensor C'].rolling(3).mean().bfill()
        data['rolling_Sensor C_7'] = data['Sensor C'].rolling(7).mean().bfill()        
        data['rolling_Sensor C_10'] = data['Sensor C'].rolling(10).mean().bfill()
        data['rolling_Sensor C_20'] = data['Sensor C'].rolling(20).mean().bfill()
        data['rolling_Sensor C_50'] = data['Sensor C'].rolling(50).mean().bfill()
        data['rolling_Sensor C_100'] = data['Sensor C'].rolling(100).mean().bfill()

        data['rolling_Sensor D_5'] = data['Sensor D'].rolling(5).mean().bfill()
        data['rolling_Sensor D_3'] = data['Sensor D'].rolling(3).mean().bfill()
        data['rolling_Sensor D_7'] = data['Sensor D'].rolling(7).mean().bfill()        
        data['rolling_Sensor D_10'] = data['Sensor D'].rolling(10).mean().bfill()
        data['rolling_Sensor D_20'] = data['Sensor D'].rolling(20).mean().bfill()
        data['rolling_Sensor D_50'] = data['Sensor D'].rolling(50).mean().bfill()
        data['rolling_Sensor D_100'] = data['Sensor D'].rolling(100).mean().bfill()

        
        label = float(path.split('\\')[-1].split('.')[0].split('_')[0][:-2])
        
        #label = label / 902.  Label 정규화
        data['label'] = label        
        datas.append(data)
    return datas,data

In [423]:
train_window_data, train_labels = make_train_data(train_paths)

  0%|          | 0/16 [00:00<?, ?it/s]

In [424]:
tests = []
def make_predict_data(test_paths):
    '''
        본 함수는 Test Sample들이 대부분 500개의 Time Step으로 되어있다는 정보를 안다는 가정하에 구현되었습니다.
        추론 Window Size : 500에 최적화
    '''
    sequences = []
    for path in tqdm(test_paths):
        driver = str(path.split('/')[-1].split('.')[0].split('_')[1][0])
        data = pd.read_csv(path)
        data['driver'] = 0 if driver == 'A' else 1
        data['A'] = data['Signal A']  + data['Signal B']
        data['B'] = data['Signal A']  + data['Signal C']
        data['C'] = data['Signal B']  + data['Signal C']
        data['D'] = data['Sensor A']  + data['Sensor B']
        data['E'] = data['Sensor A']  + data['Sensor C']
        data['F'] = data['Sensor A']  + data['Sensor D']
        data['G'] = data['Sensor B']  + data['Sensor C']
        data['H'] = data['Sensor B']  + data['Sensor D']
        data['I'] = data['Sensor C']  + data['Sensor D']

        data['A_1'] =  np.abs(data['Signal A']  - data['Signal B'])
        data['B_1'] =  np.abs(data['Signal A']  - data['Signal C'])
        data['C_1'] =  np.abs(data['Signal B']  - data['Signal C'])
        data['D_1'] =  np.abs(data['Sensor A']  - data['Sensor B'])
        data['E_1'] =  np.abs(data['Sensor A']  - data['Sensor C'])
        data['F_1'] =  np.abs(data['Sensor A']  - data['Sensor D'])
        data['G_1'] =  np.abs(data['Sensor B']  - data['Sensor C'])
        data['H_1'] =  np.abs(data['Sensor B']  - data['Sensor D'])
        data['I_1'] =  np.abs(data['Sensor C']  - data['Sensor D'])
        
        data['diff_1'] = (data['Signal A'] - data['Signal A'].shift(1)).fillna(0)
        data['diff_2'] = (data['Signal B'] - data['Signal B'].shift(1)).fillna(0)
        data['diff_3'] = (data['Signal C'] - data['Signal C'].shift(1)).fillna(0)
        data['diff_4'] = (data['Sensor A'] - data['Sensor A'].shift(1)).fillna(0)
        data['diff_5'] = (data['Sensor B'] - data['Sensor B'].shift(1)).fillna(0)
        data['diff_6'] = (data['Sensor C'] - data['Sensor C'].shift(1)).fillna(0)
        data['diff_7'] = (data['Sensor D'] - data['Sensor D'].shift(1)).fillna(0)

        data['rolling_diff_1'] = data['diff_1'].rolling(5).sum().bfill()
        data['rolling_diff_2'] = data['diff_2'].rolling(5).sum().bfill()
        data['rolling_diff_3'] = data['diff_3'].rolling(5).sum().bfill()
        data['rolling_diff_4'] = data['diff_4'].rolling(5).sum().bfill()
        data['rolling_diff_5'] = data['diff_5'].rolling(5).sum().bfill()
        data['rolling_diff_6'] = data['diff_6'].rolling(5).sum().bfill()
        data['rolling_diff_7'] = data['diff_7'].rolling(5).sum().bfill()

        data['rolling_diff_1B'] = data['diff_1'].rolling(20).sum().bfill()
        data['rolling_diff_2B'] = data['diff_2'].rolling(20).sum().bfill()
        data['rolling_diff_3B'] = data['diff_3'].rolling(20).sum().bfill()
        data['rolling_diff_4B'] = data['diff_4'].rolling(20).sum().bfill()
        data['rolling_diff_5B'] = data['diff_5'].rolling(20).sum().bfill()
        data['rolling_diff_6B'] = data['diff_6'].rolling(20).sum().bfill()
        data['rolling_diff_7B'] = data['diff_7'].rolling(20).sum().bfill()
        
        data['rolling_diff_1C'] = data['diff_1'].rolling(50).sum().bfill()
        data['rolling_diff_2C'] = data['diff_2'].rolling(50).sum().bfill()
        data['rolling_diff_3C'] = data['diff_3'].rolling(50).sum().bfill()
        data['rolling_diff_4C'] = data['diff_4'].rolling(50).sum().bfill()
        data['rolling_diff_5C'] = data['diff_5'].rolling(50).sum().bfill()
        data['rolling_diff_6C'] = data['diff_6'].rolling(50).sum().bfill()
        data['rolling_diff_7C'] = data['diff_7'].rolling(50).sum().bfill()

        data['rolling_Signal A_5'] = data['Signal A'].rolling(5).mean().bfill()
        data['rolling_Signal A_3'] = data['Signal A'].rolling(3).mean().bfill()
        data['rolling_Signal A_7'] = data['Signal A'].rolling(7).mean().bfill()
        data['rolling_Signal A_10'] = data['Signal A'].rolling(10).mean().bfill()
        data['rolling_Signal A_20'] = data['Signal A'].rolling(20).mean().bfill()
        data['rolling_Signal A_50'] = data['Signal A'].rolling(50).mean().bfill()
        data['rolling_Signal A_100'] = data['Signal A'].rolling(100).mean().bfill()

        data['rolling_Signal B_5'] = data['Signal B'].rolling(5).mean().bfill()
        data['rolling_Signal B_3'] = data['Signal B'].rolling(3).mean().bfill()
        data['rolling_Signal B_7'] = data['Signal B'].rolling(7).mean().bfill()        
        data['rolling_Signal B_10'] = data['Signal B'].rolling(10).mean().bfill()
        data['rolling_Signal B_20'] = data['Signal B'].rolling(20).mean().bfill()
        data['rolling_Signal B_50'] = data['Signal B'].rolling(50).mean().bfill()
        data['rolling_Signal B_100'] = data['Signal B'].rolling(100).mean().bfill()

        data['rolling_Signal C_5'] = data['Signal C'].rolling(5).mean().bfill()
        data['rolling_Signal C_3'] = data['Signal C'].rolling(3).mean().bfill()
        data['rolling_Signal C_7'] = data['Signal C'].rolling(7).mean().bfill()        
        data['rolling_Signal C_10'] = data['Signal C'].rolling(10).mean().bfill()
        data['rolling_Signal C_20'] = data['Signal C'].rolling(20).mean().bfill()
        data['rolling_Signal C_50'] = data['Signal C'].rolling(50).mean().bfill()
        data['rolling_Signal C_100'] = data['Signal C'].rolling(100).mean().bfill()

        data['rolling_Sensor A_5'] = data['Sensor A'].rolling(5).mean().bfill()
        data['rolling_Sensor A_3'] = data['Sensor A'].rolling(3).mean().bfill()
        data['rolling_Sensor A_7'] = data['Sensor A'].rolling(7).mean().bfill()        
        data['rolling_Sensor A_10'] = data['Sensor A'].rolling(10).mean().bfill()
        data['rolling_Sensor A_20'] = data['Sensor A'].rolling(20).mean().bfill()
        data['rolling_Sensor A_50'] = data['Sensor A'].rolling(50).mean().bfill()
        data['rolling_Sensor A_100'] = data['Sensor A'].rolling(100).mean().bfill()

        data['rolling_Sensor B_5'] = data['Sensor B'].rolling(5).mean().bfill()
        data['rolling_Sensor B_3'] = data['Sensor B'].rolling(3).mean().bfill()
        data['rolling_Sensor B_7'] = data['Sensor B'].rolling(7).mean().bfill()        
        data['rolling_Sensor B_10'] = data['Sensor B'].rolling(10).mean().bfill()
        data['rolling_Sensor B_20'] = data['Sensor B'].rolling(20).mean().bfill()
        data['rolling_Sensor B_50'] = data['Sensor B'].rolling(50).mean().bfill()
        data['rolling_Sensor B_100'] = data['Sensor B'].rolling(100).mean().bfill()

        data['rolling_Sensor C_5'] = data['Sensor C'].rolling(5).mean().bfill()
        data['rolling_Sensor C_3'] = data['Sensor C'].rolling(3).mean().bfill()
        data['rolling_Sensor C_7'] = data['Sensor C'].rolling(7).mean().bfill()        
        data['rolling_Sensor C_10'] = data['Sensor C'].rolling(10).mean().bfill()
        data['rolling_Sensor C_20'] = data['Sensor C'].rolling(20).mean().bfill()
        data['rolling_Sensor C_50'] = data['Sensor C'].rolling(50).mean().bfill()
        data['rolling_Sensor C_100'] = data['Sensor C'].rolling(100).mean().bfill()

        data['rolling_Sensor D_5'] = data['Sensor D'].rolling(5).mean().bfill()
        data['rolling_Sensor D_3'] = data['Sensor D'].rolling(3).mean().bfill()
        data['rolling_Sensor D_7'] = data['Sensor D'].rolling(7).mean().bfill()        
        data['rolling_Sensor D_10'] = data['Sensor D'].rolling(10).mean().bfill()
        data['rolling_Sensor D_20'] = data['Sensor D'].rolling(20).mean().bfill()
        data['rolling_Sensor D_50'] = data['Sensor D'].rolling(50).mean().bfill()
        data['rolling_Sensor D_100'] = data['Sensor D'].rolling(100).mean().bfill()

        tests.append(data)
        
test_window_data = make_predict_data(test_paths)

  0%|          | 0/4048 [00:00<?, ?it/s]

In [425]:
df = pd.DataFrame()

In [426]:
for data in datas:
    df = pd.concat([df,data],axis=0)

In [427]:
df = df.rename(columns={'Time[s]': 'time'})
df = df.reset_index(drop=True)


## label 값이 900 보다 작은애들로만 학습

In [428]:
df = df[df['label']<900]

In [429]:
x_train = df.drop(columns=['label'])
y_train = df['label'].astype(int)

In [430]:
x_train

Unnamed: 0,time,Signal A,Signal B,Signal C,Sensor A,Sensor B,Sensor C,Sensor D,driver,A,...,rolling_Sensor C_20,rolling_Sensor C_50,rolling_Sensor C_100,rolling_Sensor D_5,rolling_Sensor D_3,rolling_Sensor D_7,rolling_Sensor D_10,rolling_Sensor D_20,rolling_Sensor D_50,rolling_Sensor D_100
0,0.00,316.071968,316.071968,316.071968,703.956361,414.964554,316.071968,524.105077,0,632.143936,...,316.071968,316.071968,316.071968,524.459802,524.170845,524.534246,524.660516,524.602350,524.624446,524.326355
1,0.01,316.071968,316.071968,316.071968,704.638445,415.191915,316.071968,524.105077,0,632.143936,...,316.071968,316.071968,316.071968,524.459802,524.170845,524.534246,524.660516,524.602350,524.624446,524.326355
2,0.02,316.071968,316.071968,316.071968,704.789776,415.711543,316.071968,524.302381,0,632.143936,...,316.071968,316.071968,316.071968,524.459802,524.170845,524.534246,524.660516,524.602350,524.624446,524.326355
3,0.03,316.071968,316.071968,316.071968,704.702741,415.616157,316.071968,524.772794,0,632.143936,...,316.071968,316.071968,316.071968,524.459802,524.393417,524.534246,524.660516,524.602350,524.624446,524.326355
4,0.04,316.071968,316.071968,316.071968,705.826273,414.964554,316.071968,525.013680,0,632.143936,...,316.071968,316.071968,316.071968,524.459802,524.696285,524.534246,524.660516,524.602350,524.624446,524.326355
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
168128,158.07,316.071968,316.071968,316.071968,713.691900,377.740575,316.071968,498.694601,1,632.143936,...,316.071968,316.071968,316.071968,498.650347,498.377077,498.546361,498.550472,498.623333,498.508059,498.392669
168129,158.08,316.071968,316.071968,316.071968,712.920778,377.355638,316.071968,498.111178,1,632.143936,...,316.071968,316.071968,316.071968,498.473056,498.102331,498.514543,498.536815,498.597701,498.510826,498.387537
168130,158.09,316.071968,316.071968,316.071968,712.317109,377.953263,316.071968,497.941999,1,632.143936,...,316.071968,316.071968,316.071968,498.236881,498.249260,498.472130,498.442077,498.544919,498.510209,498.378066
168131,158.10,316.071968,316.071968,316.071968,715.783478,377.606627,316.071968,496.209051,1,632.143936,...,316.071968,316.071968,316.071968,497.691608,497.420743,498.073761,498.208675,498.405490,498.474933,498.354729


In [431]:
iterations =50000
patience = 200
is_holdout = False

In [432]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import xgboost as xgb
from xgboost import XGBRegressor
import lightgbm as lgb
from tqdm import tqdm
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split  # 수정된 부분

# 경고 끄기
pd.set_option('mode.chained_assignment', None)

from catboost import CatBoostRegressor, Pool

model = CatBoostRegressor(iterations=iterations, learning_rate=0.1, depth=6, random_seed=42,
                         custom_metric=['MAE'],  
                         loss_function='MAE') 

X_train, X_valid, Y_train, Y_valid = train_test_split(x_train, y_train, test_size=0.1, random_state=42) 
train_pool = Pool(X_train, label=Y_train)
validation_pool = Pool(X_valid, label=Y_valid)

model.fit(train_pool, eval_set=validation_pool, early_stopping_rounds=patience, verbose=100)

0:	learn: 231.3296244	test: 231.0258107	best: 231.0258107 (0)	total: 72.6ms	remaining: 1h 29s
100:	learn: 119.2875933	test: 119.2865476	best: 119.2865476 (100)	total: 5.49s	remaining: 45m 14s
200:	learn: 95.9052085	test: 96.3089408	best: 96.3089408 (200)	total: 10.9s	remaining: 44m 57s
300:	learn: 89.9194952	test: 90.3716622	best: 90.3716622 (300)	total: 16s	remaining: 43m 55s
400:	learn: 84.6530836	test: 85.1179751	best: 85.1179751 (400)	total: 21s	remaining: 43m 14s
500:	learn: 82.1231261	test: 82.6929795	best: 82.6929795 (500)	total: 26s	remaining: 42m 53s
600:	learn: 79.6061194	test: 80.1452308	best: 80.1452308 (600)	total: 31.2s	remaining: 42m 41s
700:	learn: 75.3823847	test: 76.0006247	best: 76.0006247 (700)	total: 36.2s	remaining: 42m 25s
800:	learn: 70.6417981	test: 71.3547553	best: 71.3547553 (800)	total: 41.1s	remaining: 42m 6s
900:	learn: 67.0754734	test: 67.7700922	best: 67.7700922 (900)	total: 46.5s	remaining: 42m 16s
1000:	learn: 62.2916869	test: 63.0650964	best: 63.06509

8500:	learn: 10.7608077	test: 14.1286755	best: 14.1286755 (8500)	total: 7m 45s	remaining: 37m 50s
8600:	learn: 10.6875681	test: 14.0684721	best: 14.0684721 (8600)	total: 7m 50s	remaining: 37m 43s
8700:	learn: 10.6209757	test: 14.0193868	best: 14.0193868 (8700)	total: 7m 55s	remaining: 37m 38s
8800:	learn: 10.5524074	test: 13.9735041	best: 13.9735041 (8800)	total: 8m 1s	remaining: 37m 32s
8900:	learn: 10.4746141	test: 13.9093827	best: 13.9093827 (8900)	total: 8m 6s	remaining: 37m 27s
9000:	learn: 10.4095412	test: 13.8535699	best: 13.8535699 (9000)	total: 8m 11s	remaining: 37m 20s
9100:	learn: 10.3389320	test: 13.7950101	best: 13.7950101 (9100)	total: 8m 17s	remaining: 37m 15s
9200:	learn: 10.2763704	test: 13.7496812	best: 13.7496409 (9199)	total: 8m 22s	remaining: 37m 8s
9300:	learn: 10.2123539	test: 13.7010431	best: 13.7010431 (9300)	total: 8m 27s	remaining: 37m 1s
9400:	learn: 10.1555759	test: 13.6577917	best: 13.6577917 (9400)	total: 8m 33s	remaining: 36m 56s
9500:	learn: 10.0996393	

16800:	learn: 7.6876970	test: 11.9390073	best: 11.9390073 (16800)	total: 15m 10s	remaining: 29m 59s
16900:	learn: 7.6699652	test: 11.9272691	best: 11.9272473 (16899)	total: 15m 15s	remaining: 29m 53s
17000:	learn: 7.6482618	test: 11.9140052	best: 11.9140040 (16999)	total: 15m 21s	remaining: 29m 47s
17100:	learn: 7.6303159	test: 11.9037050	best: 11.9036251 (17097)	total: 15m 26s	remaining: 29m 41s
17200:	learn: 7.6134980	test: 11.8929059	best: 11.8929059 (17200)	total: 15m 31s	remaining: 29m 36s
17300:	learn: 7.5967500	test: 11.8815980	best: 11.8815365 (17298)	total: 15m 36s	remaining: 29m 30s
17400:	learn: 7.5788093	test: 11.8695770	best: 11.8695770 (17400)	total: 15m 41s	remaining: 29m 24s
17500:	learn: 7.5610031	test: 11.8554373	best: 11.8554373 (17500)	total: 15m 46s	remaining: 29m 18s
17600:	learn: 7.5418101	test: 11.8460378	best: 11.8460378 (17600)	total: 15m 52s	remaining: 29m 12s
17700:	learn: 7.5225780	test: 11.8307927	best: 11.8307927 (17700)	total: 15m 57s	remaining: 29m 7s
1

25100:	learn: 6.6018824	test: 11.2939890	best: 11.2939506 (25098)	total: 22m 19s	remaining: 22m 8s
25200:	learn: 6.5912129	test: 11.2870339	best: 11.2870339 (25200)	total: 22m 24s	remaining: 22m 2s
25300:	learn: 6.5833092	test: 11.2834589	best: 11.2834589 (25300)	total: 22m 29s	remaining: 21m 57s
25400:	learn: 6.5741128	test: 11.2792797	best: 11.2792797 (25400)	total: 22m 34s	remaining: 21m 51s
25500:	learn: 6.5664628	test: 11.2764004	best: 11.2762696 (25490)	total: 22m 39s	remaining: 21m 46s
25600:	learn: 6.5585420	test: 11.2731333	best: 11.2731310 (25599)	total: 22m 45s	remaining: 21m 41s
25700:	learn: 6.5469640	test: 11.2653401	best: 11.2653057 (25698)	total: 22m 50s	remaining: 21m 35s
25800:	learn: 6.5380923	test: 11.2587182	best: 11.2586600 (25794)	total: 22m 55s	remaining: 21m 30s
25900:	learn: 6.5296789	test: 11.2531663	best: 11.2531663 (25900)	total: 23m	remaining: 21m 24s
26000:	learn: 6.5211505	test: 11.2471664	best: 11.2471482 (25998)	total: 23m 6s	remaining: 21m 19s
26100:	

33400:	learn: 6.0190739	test: 10.9962582	best: 10.9962220 (33399)	total: 29m 21s	remaining: 14m 35s
33500:	learn: 6.0139631	test: 10.9941148	best: 10.9941148 (33500)	total: 29m 26s	remaining: 14m 29s
33600:	learn: 6.0079443	test: 10.9919458	best: 10.9919312 (33598)	total: 29m 31s	remaining: 14m 24s
33700:	learn: 6.0017437	test: 10.9889690	best: 10.9889690 (33700)	total: 29m 36s	remaining: 14m 19s
33800:	learn: 5.9972661	test: 10.9868848	best: 10.9868662 (33797)	total: 29m 41s	remaining: 14m 13s
33900:	learn: 5.9912523	test: 10.9816717	best: 10.9816717 (33900)	total: 29m 47s	remaining: 14m 8s
34000:	learn: 5.9865992	test: 10.9791090	best: 10.9790604 (33991)	total: 29m 52s	remaining: 14m 3s
34100:	learn: 5.9803957	test: 10.9740920	best: 10.9740630 (34097)	total: 29m 57s	remaining: 13m 57s
34200:	learn: 5.9758297	test: 10.9727515	best: 10.9727515 (34200)	total: 30m 2s	remaining: 13m 52s
34300:	learn: 5.9698774	test: 10.9693372	best: 10.9693043 (34294)	total: 30m 7s	remaining: 13m 47s
3440

41700:	learn: 5.6302607	test: 10.8042389	best: 10.8042389 (41700)	total: 36m 28s	remaining: 7m 15s
41800:	learn: 5.6261886	test: 10.8028491	best: 10.8028491 (41800)	total: 36m 33s	remaining: 7m 10s
41900:	learn: 5.6227275	test: 10.8016213	best: 10.8016213 (41900)	total: 36m 39s	remaining: 7m 5s
42000:	learn: 5.6191469	test: 10.8001593	best: 10.8001191 (41989)	total: 36m 44s	remaining: 6m 59s
42100:	learn: 5.6148824	test: 10.7975186	best: 10.7975186 (42100)	total: 36m 49s	remaining: 6m 54s
42200:	learn: 5.6115066	test: 10.7959244	best: 10.7959244 (42200)	total: 36m 54s	remaining: 6m 49s
42300:	learn: 5.6073061	test: 10.7934510	best: 10.7934510 (42300)	total: 36m 59s	remaining: 6m 43s
42400:	learn: 5.6044940	test: 10.7926017	best: 10.7926017 (42400)	total: 37m 4s	remaining: 6m 38s
42500:	learn: 5.6015585	test: 10.7906499	best: 10.7904278 (42484)	total: 37m 9s	remaining: 6m 33s
42600:	learn: 5.5974465	test: 10.7885953	best: 10.7885953 (42600)	total: 37m 14s	remaining: 6m 28s
42700:	learn:

<catboost.core.CatBoostRegressor at 0x17e0ec717c0>

In [433]:
preds = []
for test in tqdm(tests):
    test = test.rename(columns={'Time[s]': 'time'})

    pred = model.predict(test)
    preds.append(pred)

100%|██████████████████████████████████████████████████████████████████████████████| 4048/4048 [01:47<00:00, 37.58it/s]


# preds 평균을 내줄때, sort 하여 상위10%, 하위10% 값을 버림

In [434]:
value = []
for i in range(len(preds)):
    pred = preds[i]
    sorted_preds = sorted(pred)
    remove_count = len(pred) // 10
    filtered_values = sorted_preds[remove_count:-remove_count]
    result = np.mean(filtered_values)
    value.append(result)

In [435]:
final_value = value

In [436]:
# 결과 후처리
final_value = np.round(final_value, 0).astype(int)

In [437]:
final_value.shape

(4048,)

## Submission

In [438]:
submit = pd.read_csv('./sample_submission.csv')

In [439]:
submit['weight'] = final_value

## 후처리
#### train label 의 최대값인 902 보다 크면 902로 처리
#### 음수값이 나오면 절대값 처리

In [440]:
submit.loc[submit['weight'] > 902, 'weight'] = 902
submit.loc[submit['weight'] < 0, 'weight'] = np.abs(submit['weight'])


In [441]:
submit['weight'].describe()

count    4048.000000
mean      418.914032
std       154.685839
min        17.000000
25%       321.000000
50%       402.000000
75%       578.250000
max       779.000000
Name: weight, dtype: float64

In [442]:
submit.to_csv('./상준_CAT_25호.csv', index=False)