In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import font_manager, rc
import seaborn as sns
from sklearn.model_selection import train_test_split
from datetime import datetime as dt
import warnings
import matplotlib.font_manager as fm
%config InlineBackend.figure_format = 'retina'
%matplotlib inline

  import pandas.util.testing as tm


In [4]:
final_data = pd.read_csv('./gdrive/My Drive/빅콘 대상팀/data/model_data.csv')

In [5]:
gu = pd.read_excel('./gdrive/My Drive/빅콘 대상팀/data/지역데이터/구_동.xlsx')
gs = pd.read_csv('./gdrive/My Drive/빅콘 대상팀/data/all_amt.csv',parse_dates=['STD_YMD'])
#gs = gs.drop(['Unnamed: 0'],axis=1)
gs = pd.concat([gs.iloc[:,[0,1]],gs.filter(like='GS')],axis=1)
gs = pd.merge(gs,gu,on='HDONG_NM')
gs['CITY'] = gs['HDONG_GU'].apply(lambda x: x[0:2])
gs_seoul = gs.query('CITY == "서울" & STD_YMD > "2020"')

gs_eat = gs_seoul.iloc[:,[0,1,3]].sort_values(['HDONG_NM','STD_YMD'])
gs_snack = gs_seoul.iloc[:,[0,1,4]].sort_values(['HDONG_NM','STD_YMD'])
gs_drink = gs_seoul.iloc[:,[0,1,5]].sort_values(['HDONG_NM','STD_YMD'])

In [6]:
final_data4 = final_data.drop(['COVID_CNT','covid_p1','sc_m1','cj_m1','covid_p1','최저기온','최고기온','일강수량'],axis=1) #최종사용데이터

In [7]:
dong_list = list(final_data4.HDONG_NM.unique())
dong_list.remove('상계8동')

In [8]:
def build_data(data,dong,cat):

  X = data.query('HDONG_NM==@dong').reset_index(drop=True)
  
  if cat == "식사":
    eat = gs_eat.query('HDONG_NM==@dong').reset_index(drop=True)
    X['self_m7'] = eat['GS_식사'].shift(7)
    X['y'] = eat['GS_식사']
  elif cat == "간식":
    snack = gs_snack.query('HDONG_NM==@dong').reset_index(drop=True)
    X['self_m7'] = snack['GS_간식'].shift(7)
    X['y'] = snack['GS_간식']
  elif cat == "마실거리":
    drink = gs_drink.query('HDONG_NM==@dong').reset_index(drop=True)
    X['self_m7'] = drink['GS_마실거리'].shift(7)
    X['y'] = drink['GS_마실거리']
  
  X.index = X['STD_YMD']
  del X['STD_YMD'],X['HDONG_NM']

  return X

In [9]:
from sklearn.preprocessing import MinMaxScaler 

def minmax_scalar(X):
  idx = X.index
  col = X.columns

  scalar = MinMaxScaler()
  scaled_X = pd.DataFrame(scalar.fit_transform(X))
  scaled_X.index = idx
  scaled_X.columns = col

  return scaled_X

In [10]:
def split_xy(dataset, time_steps, y_column):

  x, y = list(), list()
  for i in range(len(dataset)):
    x_end_number = i + time_steps
    y_end_number = x_end_number + y_column

    if y_end_number > len(dataset):
      break
    tmp_x = np.array(dataset)[i:x_end_number, :]
    tmp_y = np.array(dataset)[x_end_number:y_end_number, -1]
    x.append(tmp_x)
    y.append(tmp_y)
  return np.array(x), np.array(y)

In [11]:
import numpy as np
from sklearn.metrics import mean_squared_log_error
from sklearn.metrics import mean_absolute_error

def RMSLE_fun(origin,pred):
  rmsle = np.sqrt(mean_squared_log_error(origin+1, pred+1))
  return rmsle

In [12]:
def train_test_split(n,X,y):
  if isinstance(X, pd.DataFrame):
    total = X.shape[0]
    X_train,X_test = X.iloc[:total-n, :],X.iloc[-n:, :]
    y_train,y_test = y[:total-n],y[-n:]
  else :
    total = X.shape[0]
    X_train,X_test = X[:total-n, :],X[-n:, :]
    y_train,y_test = y[:total-n],y[-n:]
  return X_train,X_test,y_train,y_test

In [13]:
import keras
from keras.models import Sequential, load_model
from keras.layers import Dense, SimpleRNN, LSTM, GRU
from keras.callbacks import EarlyStopping

In [14]:
def data_pipeline(data, dong, cat, time_steps, y_columns):
  data = build_data(data,dong,cat)
  
  #y = data['y']
  #del data['y']
  min = data['y'].min()
  max = data['y'].max()

  X = minmax_scalar(data)
  #Xy = pd.concat([X,y],axis=1)
  Xy = X.dropna()
  
  X,y = split_xy(Xy,time_steps,y_columns)

  X_train, y_train = X[:-7],y[:-7]
  X_test, y_test = X[-7:],y[-7:]

  X_test=X_test.reshape(-1,time_steps,X_train.shape[2])
  y_test=y_test.reshape(-1,y_columns)



  return X_train,y_train,X_test,y_test,min,max

In [16]:
MSE_eat = []
RMSE_eat = []
MAE_eat = []
RMSLE_eat = []

MSE_snack = []
RMSE_snack = []
MAE_snack = []
RMSLE_snack = []

MSE_drink = []
RMSE_drink = []
MAE_drink = []
RMSLE_drink = []

for dong in dong_list:
  for cat in ['식사', '간식', '마실거리']:

    model = load_model('./gdrive/My Drive/빅콘 대상팀/분석 code/GRU/models/{}{} GRU.hdf5'.format(dong, cat))

    X_train,y_train,X_test,y_test,min,max = data_pipeline(final_data4, dong, cat, 7, 1)

    y_pred = model.predict(X_test, batch_size=1)
    #y_pred = y_pred.reshape(-1, 1) *(max-min)+min
    #y_test = y_test *(max-min)+min

    mse = np.mean((y_test-y_pred)**2)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    rmsle = RMSLE_fun(np.array(y_test), np.array(y_pred))

    print('{0}, {1}, {2}, {3}, {4}, {5}'.format(dong, cat, mse, rmse, mae, rmsle))

    if cat == '식사':
      MSE_eat.append(mse)
      RMSE_eat.append(rmse)
      MAE_eat.append(mae)
      RMSLE_eat.append(rmsle)
      
    elif cat == '간식':
      MSE_snack.append(mse)
      RMSE_snack.append(rmse)
      MAE_snack.append(mae)
      RMSLE_snack.append(rmsle)
      
    else: 
      MSE_drink.append(mse)
      RMSE_drink.append(rmse)
      MAE_drink.append(mae)
      RMSLE_drink.append(rmsle)

공릉1동, 식사, 0.0058700416891564935, 0.07661619730289734, 0.06972650640824776, 0.02749350110443856
공릉1동, 간식, 0.0013249165009583306, 0.03639940248078711, 0.028139960288899868, 0.017094969638125165
공릉1동, 마실거리, 0.003323224371711403, 0.057647414267349435, 0.0501048652217707, 0.021245184820358098
공릉2동, 식사, 0.007875647841885783, 0.08874484684693407, 0.06743113582271652, 0.03132397683086139
공릉2동, 간식, 0.002011078743199863, 0.044845052605609274, 0.041926561766662034, 0.02081179842238626
공릉2동, 마실거리, 0.005197786325559614, 0.07209567480480097, 0.054515430757794255, 0.0262380029555401
광희동, 식사, 0.001257736565791812, 0.035464581849950126, 0.03473002868469884, 0.01528019834788693
광희동, 간식, 0.0028125979992173126, 0.05303393252642418, 0.046384116507296, 0.023185499470187006
광희동, 마실거리, 0.005567045858472131, 0.07461263873146513, 0.053583640774087615, 0.026070421241836717
다산동, 식사, 0.000940031450457096, 0.030659932329623562, 0.02528355376456518, 0.01281103876782307
다산동, 간식, 0.002418638850113373, 0.04917965890602

In [18]:
gru_result = pd.DataFrame({'동':dong_list,
              '식사_MSE':MSE_eat,
              '식사_RMSE':RMSE_eat,
              '식사_MAE':MAE_eat,
              '식사_RMSLE':RMSLE_eat,
              '간식_MSE':MSE_snack,
              '간식_RMSE':RMSE_snack,
              '간식_MAE':MAE_snack,
              '간식_RMSLE':RMSLE_snack,
              '마실거리_MSE':MSE_drink,
              '마실거리_RMSE':RMSE_drink,
              '마실거리_MAE':MAE_drink,
              '마실거리_RMSLE':RMSLE_drink})

In [19]:
gru_result.mean()

식사_MSE        0.012469
식사_RMSE       0.099107
식사_MAE        0.082586
식사_RMSLE      0.038565
간식_MSE        0.004348
간식_RMSE       0.055376
간식_MAE        0.045554
간식_RMSLE      0.024025
마실거리_MSE      0.008879
마실거리_RMSE     0.087182
마실거리_MAE      0.072807
마실거리_RMSLE    0.032501
dtype: float64