In [3]:
# 라이브러리 한 번에 설치하기
%pip install lightgbm catboost optuna xgboost

Collecting lightgbm
  Using cached lightgbm-4.6.0-py3-none-win_amd64.whl.metadata (17 kB)
Collecting catboost
  Using cached catboost-1.2.8-cp312-cp312-win_amd64.whl.metadata (1.5 kB)
Collecting graphviz (from catboost)
  Using cached graphviz-0.21-py3-none-any.whl.metadata (12 kB)
Using cached lightgbm-4.6.0-py3-none-win_amd64.whl (1.5 MB)
Using cached catboost-1.2.8-cp312-cp312-win_amd64.whl (102.4 MB)
Using cached graphviz-0.21-py3-none-any.whl (47 kB)
Installing collected packages: graphviz, lightgbm, catboost

   ---------------------------------------- 0/3 [graphviz]
   ---------------------------------------- 0/3 [graphviz]
   ---------------------------------------- 0/3 [graphviz]
   ---------------------------------------- 0/3 [graphviz]
   ---------------------------------------- 0/3 [graphviz]
   ------------- -------------------------- 1/3 [lightgbm]
   ------------- -------------------------- 1/3 [lightgbm]
   ------------- -------------------------- 1/3 [lightgbm]
   ----


[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


# Model 2. LightGBM + Optuna Tuning

LightGBM을 사용하여 모델을 학습하고 Optuna로 최적의 하이퍼파라미터를 찾습니다.

### 진행 순서
1. 데이터 로드 (train_enriched.csv, test_enriched.csv)
2. 데이터 인코딩
3. Optuna를 이용한 하이퍼파라미터 최적화
4. 최적의 파라미터로 최종 모델 학습
5. 결과 예측 및 제출 파일 생성 (submission_lgbm_optuna.csv)

In [4]:
import pandas as pd
import numpy as np
import os
import warnings
import lightgbm as lgb
import optuna
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

warnings.filterwarnings('ignore')

# 1. 데이터 로드
PREPROCESS_DIR = './data_preprocess'
SUBMISSION_DIR = './open_track1'

TRAIN_PATH = os.path.join(PREPROCESS_DIR, 'train_enriched.csv')
TEST_PATH = os.path.join(PREPROCESS_DIR, 'test_enriched.csv')
SUBMISSION_PATH = os.path.join(SUBMISSION_DIR, 'sample_submission.csv')

print("데이터 로딩 중...")
train_df = pd.read_csv(TRAIN_PATH)
test_df = pd.read_csv(TEST_PATH)

# 인코딩
cat_features = ['type_name', 'prev_type_name']
le = LabelEncoder()

for col in cat_features:
    all_values = pd.concat([train_df[col].astype(str), test_df[col].astype(str)]).unique()
    le.fit(all_values)
    train_df[col] = le.transform(train_df[col].astype(str))
    test_df[col] = le.transform(test_df[col].astype(str))

# 피처 선택
feature_cols = [
    'start_x', 'start_y', 'type_name', 'team_id', 'time_seconds',
    'prev_type_name', 'prev_end_x', 'prev_end_y',
    'dist_to_goal', 'angle_to_goal', 'dist_to_center'
]
target_cols = ['end_x', 'end_y']

X = train_df[feature_cols]
y = train_df[target_cols]
X_test = test_df[feature_cols]
test_ids = test_df['game_episode']

# 튜닝용 데이터 분리
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. Optuna 튜닝
def objective(trial):
    params = {
        'objective': 'regression',
        'metric': 'rmse',
        'verbosity': -1,
        'boosting_type': 'gbdt',
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'num_leaves': trial.suggest_int('num_leaves', 20, 150),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'min_child_samples': trial.suggest_int('min_child_samples', 10, 100),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'random_state': 42,
        'n_jobs': -1
    }
    
    # X축 모델
    model_x = lgb.LGBMRegressor(**params)
    model_x.fit(X_train, y_train['end_x'])
    pred_x = model_x.predict(X_val)
    
    # Y축 모델
    model_y = lgb.LGBMRegressor(**params)
    model_y.fit(X_train, y_train['end_y'])
    pred_y = model_y.predict(X_val)
    
    rmse_x = np.sqrt(mean_squared_error(y_val['end_x'], pred_x))
    rmse_y = np.sqrt(mean_squared_error(y_val['end_y'], pred_y))
    
    return (rmse_x + rmse_y) / 2

print("Optuna 튜닝 시작...")
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=20)

print(f"Best Trial: {study.best_trial.value:.4f}")
print("Best Params:", study.best_params)

# 3. 최종 학습 및 예측
print("최적 파라미터로 전체 데이터 재학습 중...")

best_params = study.best_params
best_params['random_state'] = 42
best_params['n_jobs'] = -1

final_model_x = lgb.LGBMRegressor(**best_params)
final_model_y = lgb.LGBMRegressor(**best_params)

final_model_x.fit(X, y['end_x'])
final_model_y.fit(X, y['end_y'])

pred_x = final_model_x.predict(X_test)
pred_y = final_model_y.predict(X_test)

# Clipping
pred_x = np.clip(pred_x, 0, 105)
pred_y = np.clip(pred_y, 0, 68)

# 4. 저장
submission_df = pd.DataFrame({
    'game_episode': test_ids,
    'end_x': pred_x,
    'end_y': pred_y
})

sample_submission = pd.read_csv(SUBMISSION_PATH)
final_submission = pd.merge(sample_submission[['game_episode']], submission_df, on='game_episode', how='left')
final_submission.fillna(50.0, inplace=True)

save_path = 'submission_lgbm_optuna.csv'
final_submission.to_csv(save_path, index=False)

print(f"LightGBM 튜닝 및 저장 완료: {save_path}")

Collecting lightgbm
  Downloading lightgbm-4.6.0-py3-none-win_amd64.whl.metadata (17 kB)
Collecting catboost
  Downloading catboost-1.2.8-cp312-cp312-win_amd64.whl.metadata (1.5 kB)
Collecting graphviz (from catboost)
  Downloading graphviz-0.21-py3-none-any.whl.metadata (12 kB)
Collecting plotly (from catboost)
  Downloading plotly-6.5.0-py3-none-any.whl.metadata (8.5 kB)
Downloading lightgbm-4.6.0-py3-none-win_amd64.whl (1.5 MB)
   ---------------------------------------- 0.0/1.5 MB ? eta -:--:--
   ---------------------------------------- 1.5/1.5 MB 12.8 MB/s  0:00:00
Downloading catboost-1.2.8-cp312-cp312-win_amd64.whl (102.4 MB)
   ---------------------------------------- 0.0/102.4 MB ? eta -:--:--
   - -------------------------------------- 2.9/102.4 MB 14.0 MB/s eta 0:00:08
   -- ------------------------------------- 6.0/102.4 MB 14.2 MB/s eta 0:00:07
   --- ------------------------------------ 9.2/102.4 MB 15.0 MB/s eta 0:00:07
   ----- ---------------------------------- 13.1/1

ERROR: Could not install packages due to an OSError: [WinError 5] 액세스가 거부되었습니다: 'C:\\Users\\jwk72\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\lightgbm\\bin\\lib_lightgbm.dll'
Consider using the `--user` option or check the permissions.


[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip
  from .autonotebook import tqdm as notebook_tqdm
[I 2025-12-02 14:14:52,658] A new study created in memory with name: no-name-0fbcafab-3078-497b-8b36-808c9e43174d


데이터 로딩 중...
Optuna 튜닝 시작...


[I 2025-12-02 14:14:55,443] Trial 0 finished with value: 14.490207321562623 and parameters: {'n_estimators': 409, 'learning_rate': 0.15847876170028088, 'num_leaves': 48, 'max_depth': 10, 'min_child_samples': 19, 'subsample': 0.621613268930054, 'colsample_bytree': 0.9673332208132163}. Best is trial 0 with value: 14.490207321562623.
[I 2025-12-02 14:14:55,933] Trial 1 finished with value: 13.824519637249868 and parameters: {'n_estimators': 110, 'learning_rate': 0.18145282261647253, 'num_leaves': 88, 'max_depth': 11, 'min_child_samples': 95, 'subsample': 0.636467653053686, 'colsample_bytree': 0.6113120769354742}. Best is trial 1 with value: 13.824519637249868.
[I 2025-12-02 14:14:57,973] Trial 2 finished with value: 14.064597972195088 and parameters: {'n_estimators': 722, 'learning_rate': 0.09122704834624966, 'num_leaves': 39, 'max_depth': 9, 'min_child_samples': 88, 'subsample': 0.6815321770288874, 'colsample_bytree': 0.788265921280003}. Best is trial 1 with value: 13.824519637249868.
[I

Best Trial: 13.5002
Best Params: {'n_estimators': 217, 'learning_rate': 0.033617048496180524, 'num_leaves': 20, 'max_depth': 6, 'min_child_samples': 56, 'subsample': 0.6656043120950793, 'colsample_bytree': 0.6897347228206409}
최적 파라미터로 전체 데이터 재학습 중...
LightGBM 튜닝 및 저장 완료: submission_lgbm_optuna.csv
