<a href="https://colab.research.google.com/github/zaku2590/signate/blob/main/%E3%82%B2%E3%83%BC%E3%83%A0%E3%81%AE%E5%8B%9D%E6%95%97%E4%BA%88%E6%B8%ACkford_lightgbm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import pandas as pd
import numpy as np
train = pd.read_csv("train.csv", index_col=0)
test = pd.read_csv("test.csv", index_col=0)
sample_submit = pd.read_csv("sample_submit.csv", index_col=0, header=None)

train["blueKills-blueDeaths"] = train["blueFirstBlood"] + train["blueKills"] - train["blueDeaths"]
test["blueKills-blueDeaths"] = test["blueFirstBlood"] + test["blueKills"] - test["blueDeaths"]

train["blueEliteMonsters+blueDragons"] = train["blueEliteMonsters"] + train["blueDragons"]
test["blueEliteMonsters+blueDragons"] = test["blueEliteMonsters"] + test["blueDragons"]

# lightGBMのModelを構築

import lightgbm as lgb
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold

# modelのパラメーター
params = {
    'task' : 'train',
    'boosting_type' : 'gbdt',
    'objective' : 'binary',
    'seed' : 71,
    'verbose' : 0,
    'metric' : 'binary-logloss'
}

# スコア、モデル保存用の配列
scores = []
models = []

# 訓練データをK-Foldにより4分割
kf = KFold(n_splits=4, shuffle=True, random_state=71)

# 目的変数（y）と説明変数（X）の分離
y_train = train["blueWins"]  # 青チームが勝った場合1、それ以外は0
X_train = train.drop(["blueWins"], axis=1)  # blueWins以外の特徴量


# 学習を実施
for tr_idx, va_idx in kf.split(X_train):
  # 学習データ、評価データに分割
  tr_x, va_x = X_train.iloc[tr_idx], X_train.iloc[va_idx]
  tr_y, va_y = y_train.iloc[tr_idx], y_train.iloc[va_idx]

  # lightGBMデータ構造に変換
  lgb_train = lgb.Dataset(tr_x, tr_y)
  lgb_eval = lgb.Dataset(va_x, va_y, reference=lgb_train)

  model_gbm = lgb.train(
      params,
      lgb_train,
      num_boost_round=500,
      valid_sets=lgb_eval
  )

  # スコアの確認
  pred_y = model_gbm.predict(va_x)
  pred_y_label = np.where(pred_y>0.5, 1, 0)
  score = accuracy_score(pred_y_label, va_y)

  # 結果を格納
  scores.append(score)
  models.append(model_gbm)

# 予測実行関数
def pred(models, test):
  # 予測結果サマリ
  pred_y_summary = []

  # model分ループ
  for i in range(len(models)):
    # 予測を実行
    pred_y = models[i].predict(test)
    # 結果を格納
    pred_y_summary.append(pred_y)

  # 各モデルの予測結果の平均値を作成
  pred_y_mean = np.mean(pred_y_summary, axis=0)
  return pred_y_mean

# 予測を実行（Mean）
pred_y = pred(models, test)
pred_y_label = np.where(pred_y>0.5, 1, 0)
# 結果の確認
sample_submit.iloc[:, 0] = pred_y_label
sample_submit.to_csv("submission.csv", index=True, header=None)