In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter('ignore', category=UserWarning)

from Processing.load_dataset import Load_dataset
from Processing.processing import Split, Valid, Submission
from Engineering.enginnering import engin
from Valid.validation import rmse

from Model.lightgbm import LightGBM
from Model.xgboost import XGBOOST
from Model.nn import NN
from sklearn.metrics import mean_squared_error

In [39]:
train, test = Load_dataset()

# 加工・処理
train_df = engin(train)
test_df = engin(test)

# 提出用
index = np.array(test["id"])
test_df = test_df.drop(columns=["id"])

# val, train, test
X_train, X_test, y_train, y_test, index = Split(train_df)
X_train, X_valid, y_train, y_valid = Valid(X_train, y_train)

## NN

In [None]:
# 学習
nn_model = NN(X_train, X_valid, y_train, y_valid, fig=1)

In [6]:
# 評価
test_predict = nn_model.predict(X_test)
rmse(test_predict, y_test)

RMSE:  22.203138629004858


In [45]:
# 予測
predict_nn = nn_model.predict(test_df)
# 提出
index = np.array(test["id"])
Submission(index, np.ravel(predict_nn), name='nn_2')

## light-gbm

In [None]:
# 学習
Light_GBM = LightGBM(X_train, X_valid, y_train, y_valid, fig=1)
importance = pd.DataFrame(sorted(Light_GBM.feature_importance(), reverse=True), index=X_test.columns, columns=['importance'])
plt.figure(figsize=(10,6))
plt.bar(X_test.columns[:10], np.ravel(importance.values)[:10])

In [None]:
# 評価
test_predict = Light_GBM.predict(X_test)
rmse(test_predict, y_test)

In [None]:
# 予測
predict_light_gbm = Light_GBM.predict(data=test_df)

# 提出
Submission(index, predict_light_gbm, name='4')

## xgboost

In [None]:
xgboost = XGBOOST(X_train, X_valid, y_train, y_valid, fig=1)

In [None]:
importance = list(xgboost.get_score(importance_type='weight').values())
col = list(xgboost.get_score(importance_type='weight').keys())
plt.figure(figsize=(10,6))
plt.bar(col, importance)

In [None]:
# 評価
import xgboost as xgb
dm_test = xgb.DMatrix(X_test)
test_predict = xgboost.predict(dm_test)
rmse(test_predict, y_test)

In [None]:
# 予測
test = xgb.DMatrix(test_df)
predict_xgboost = xgboost.predict(test, ntree_limit=xgboost.best_ntree_limit)

Submission(index, predict_xgboost, name='xgb_1')