<a href="https://colab.research.google.com/github/zaku2590/signate/blob/main/%E3%82%B2%E3%83%BC%E3%83%A0%E3%81%AE%E5%8B%9D%E6%95%97%E4%BA%88%E6%B8%AC%E3%82%A2%E3%83%B3%E3%82%B5%E3%83%B3%E3%83%96%E3%83%AB.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm

# データの読み込み
train = pd.read_csv("train.csv", index_col=0)
test = pd.read_csv("test.csv", index_col=0)
sample_submit = pd.read_csv("sample_submit.csv", index_col=0, header=None)

# 特徴量の作成
train["blueKills-blueDeaths"] = train["blueFirstBlood"] + train["blueKills"] - train["blueDeaths"]
test["blueKills-blueDeaths"] = test["blueFirstBlood"] + test["blueKills"] - test["blueDeaths"]

train["blueEliteMonsters+blueDragons"] = train["blueEliteMonsters"] + train["blueDragons"]
test["blueEliteMonsters+blueDragons"] = test["blueEliteMonsters"] + test["blueDragons"]

train["group"] = train["blueKills"] * train["blueAssists"]
test["group"] = test["blueKills"] * test["blueAssists"]

train = train.drop(["blueAssists", "blueKills", "blueDeaths", "blueDragons", "blueFirstBlood"], axis=1)
test = test.drop(["blueAssists", "blueKills", "blueDeaths", "blueDragons", "blueFirstBlood"], axis=1)


# 目的変数（y）と説明変数（X）の分離
y_train = train["blueWins"]
X_train = train.drop(["blueWins"], axis=1)

# テストデータ
X_test = test

# クロスバリデーションで予測を実行する関数
def predict(X_train, y_train, X_test, mode):
    # 結果格納用の配列
    preds = []
    preds_test = []
    idxes = []

    # クロスバリデーションで予測を実行
    kf = KFold(n_splits=4, shuffle=True, random_state=71)
    for tr_idx, va_idx in kf.split(X_train):
        # 学習データ、評価データに分割
        tr_x, va_x = X_train.iloc[tr_idx], X_train.iloc[va_idx]
        tr_y, va_y = y_train.iloc[tr_idx], y_train.iloc[va_idx]

        # モデルを構築
        if mode == 'LightGBM':
            params = {
                'bagging_fraction': 0.4600347572555584,
                'bagging_freq': 5,
                'boosting_type': 'gbdt',
                'feature_fraction': 0.7,
                'feature_pre_filter': False,
                'lambda_l1': 0.004418000666138604,
                'lambda_l2': 8.039538280454251e-06,
                'min_child_samples': 20,
                'num_leaves': 4,
                'objective': 'binary',
                'seed': 71,
                'task': 'train',
                'verbose': 0
            }
            # 学習データと検証データを用意
            lgb_train = lgb.Dataset(tr_x, tr_y)
            lgb_eval = lgb.Dataset(va_x, va_y, reference=lgb_train)

            # LightGBMモデルを学習（early_stopping_roundsを使わない）
            model = lgb.train(
                params,
                lgb_train,
                valid_sets=[lgb_eval]  # 検証データを設定
            )

        elif mode == 'RandomForest':
            model = RandomForestClassifier(random_state=123, n_estimators=9, criterion='gini', max_depth=25)
            model = model.fit(tr_x, tr_y)

        elif mode == 'SVM':
            model = svm.LinearSVC()
            model = model.fit(tr_x, tr_y)

        # 予測値を算出
        pred = model.predict(va_x)
        preds.append(pred)
        pred_test = model.predict(X_test)
        preds_test.append(pred_test)
        idxes.append(va_idx)

    # バリデーションデータに対する予測値を連結、その後元の順序に直す
    idxes = np.concatenate(idxes)
    preds = np.concatenate(preds, axis=0)
    pred_train = preds[np.argsort(idxes)]

    # テストデータに対する平均値を取得
    preds_test = np.mean(preds_test, axis=0)

    return pred_train, preds_test

# モードを指定して予測を実行（LightGBMを使用）
pred_train, pred_test = predict(X_train, y_train, X_test, mode='LightGBM')

# 予測結果をラベル化
pred_y_label = np.where(pred_test > 0.5, 1, 0)

# 提出用ファイルに結果を格納
sample_submit.iloc[:, 0] = pred_y_label  # テストデータの予測結果をsample_submitに格納
sample_submit.to_csv("submission.csv", index=True, header=None)  # 提出用ファイル保存


In [11]:
train.head()

Unnamed: 0_level_0,blueEliteMonsters,blueTotalGold,blueTotalExperience,blueWins,blueKills-blueDeaths,blueEliteMonsters+blueDragons,group
gameId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0,14536,17256,0,-3,0,30
1,0,14536,17863,0,10,0,50
2,0,17409,17256,0,-7,0,6
3,0,19558,18201,0,-2,0,56
4,0,17409,17256,0,-5,0,16
