<a href="https://colab.research.google.com/github/WooJin1993/lh_meal/blob/main/lh_modeling_wj.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!python --version

In [None]:
# Link colab with drive

from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
# Connect file directory

%cd "/content/gdrive/MyDrive/Dacon_data"

In [None]:
from keras.callbacks import EarlyStopping
from keras.layers import Activation, Dense, Dropout
from keras.models import Sequential
from keras.optimizers import Adam

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import warnings

warnings.filterwarnings('ignore')

In [None]:
# Load data

train = pd.read_csv('./data/train_wj.csv') # load train data
test = pd.read_csv('./data/test_wj.csv')   # load test data

In [None]:
train.drop(columns=["Lunch_score", "Dinner_score"], inplace=True)

In [None]:
train = pd.get_dummies(train, columns=["Dayofweek"], prefix="Dayofweek", drop_first=True)
test = pd.get_dummies(test, columns=["Dayofweek"], prefix="Dayofweek", drop_first=True)

In [None]:
# Split data into x and y

train_cols = train.columns.values
X_cols_rm = ["Date", "Breakfast", "Lunch", "Dinner", "N_lunch", "N_dinner", "Rate_lunch", "Rate_dinner"]
X_cols = train_cols[np.isin(train_cols, X_cols_rm, invert=True)]
y_cols = ["N_lunch", "N_dinner"]

X_train = train.loc[:, X_cols] # input of train data
y_train = train.loc[:, y_cols] # output of train data
X_test = test.loc[:, X_cols]   # input of test data

In [None]:
# Split train data into train data and valid data

train_sample, valid = train_test_split(train, test_size=0.2) # randomly sample 80% of train data

In [None]:
# Split data into x and y

X_train_sample = train_sample.loc[:, X_cols]  # input of train_sample data
y_train_sample = train_sample.loc[:, y_cols]  # output of train_sample data
X_valid = valid.loc[:, X_cols]                # input of valid data
y_valid = valid.loc[:, y_cols]                # output of valid data

In [None]:
X_train_sample = X_train_sample.astype(float)
y_train_sample = y_train_sample.astype(float)

In [None]:
# Define Keras model

model = Sequential()
model.add(Dense(units=128, activation='relu', input_dim=X_train.shape[1]))
model.add(Dense(units=256, activation='relu'))
model.add(Dense(units=128, activation='relu'))
# model.add(Dense(units=32, activation='relu'))
# model.add(Dense(units=16, activation='relu'))
model.add(Dense(units=len(y_cols), activation='linear'))

In [None]:
# Compile Keras model

lr = 0.001
opt = Adam(learning_rate=lr)
model.compile(loss='mae', 
              optimizer=opt, 
              metrics=['mae'])

In [None]:
# Fit Keras model

epochs = 3000
# batch = None
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=100)
valid_split = 0.2

model.fit(x                = X_train_sample,
          y                = y_train_sample, 
          epochs           = epochs, 
          # batch_size       = batch, 
          callbacks        = [es],
          validation_split = valid_split)

In [None]:
# Evaluate Keras model

model.evaluate(X_valid, y_valid)

In [None]:
X_valid.columns.values

In [None]:
# Predict Keras model

y_test = model.predict(X_test)

In [None]:
# Make submission file 

submission = pd.read_csv('./data/sample_submission.csv')
submission[["중식계", "석식계"]] = y_test
submission.to_csv('./data/submission/submission_test4.csv', index=False)