In [15]:
from prophet import Prophet
import pandas as pd
from sklearn.metrics import mean_absolute_error

df = pd.read_csv('./data/train.csv')

df['일시'] = pd.to_datetime(df['일시'])
df = df.set_index('일시')
df.index.freq = 'D'
df = df.reset_index()
df = df.rename(columns={'일시': 'ds', '평균기온': 'y'})


# df['최고기온'].fillna(method='bfill', inplace=True) 
# df['최저기온'].fillna(method='bfill', inplace=True)
# df['일교차'].fillna(method='bfill', inplace=True)
# df['강수량'].fillna(method='bfill', inplace=True)
# df['평균풍속'].fillna(method='bfill', inplace=True)
df = df.fillna(df.mean())

df['강수유무'] = (df['강수량'] > 0).astype(int)
df['일사일조비율'] = df['일사합'] / df['일조합']
df['일사일조합계'] = df['일사합'] + df['일조합']
df['일사일조차이'] = df['일사합'] - df['일조합']
#columns_to_keep = ['ds', '최고기온', '최저기온', '일사일조차이', 'y']
#df = df[columns_to_keep]

data = df.copy()
data['month'] = data['ds'].dt.month
vaild_data = data[data['ds'].dt.year.isin([2022])]
train_data = data[data['ds'].dt.year.isin(range(1960,2021))]

print(data.head())

print(train_data.head(), '\n')
print(vaild_data.head())



          ds  최고기온  최저기온   일교차       강수량  평균습도  평균풍속  일조합       일사합  \
0 1960-01-01   2.2  -5.2   7.4  9.593683  68.3   1.7  6.7  11.93317   
1 1960-01-02   1.2  -5.6   6.8  0.400000  87.7   1.3  0.0  11.93317   
2 1960-01-03   8.7  -2.1  10.8  0.000000  81.3   3.0  0.0  11.93317   
3 1960-01-04  10.8   1.2   9.6  0.000000  79.7   4.4  2.6  11.93317   
4 1960-01-05   1.3  -8.2   9.5  9.593683  44.0   5.1  8.2  11.93317   

         일조율    y  강수유무    일사일조비율    일사일조합계    일사일조차이  month  
0  48.653526 -1.6     1  1.781070  18.63317   5.23317      1  
1  48.653526 -1.9     1       inf  11.93317  11.93317      1  
2  48.653526  4.0     0       inf  11.93317  11.93317      1  
3  48.653526  7.5     0  4.589681  14.53317   9.33317      1  
4  48.653526 -4.6     1  1.455265  20.13317   3.73317      1  
          ds  최고기온  최저기온   일교차       강수량  평균습도  평균풍속  일조합       일사합  \
0 1960-01-01   2.2  -5.2   7.4  9.593683  68.3   1.7  6.7  11.93317   
1 1960-01-02   1.2  -5.6   6.8  0.400000  87.7   1.3 

  df = df.fillna(df.mean())


In [16]:
import itertools


param_grid = {
    'growth': ['linear'],
    'changepoint_prior_scale' : [0.01, 0.03, 0.8],
    'changepoint_range' : [0.8, 0.9, 0.95],
    'seasonality_prior_scale': [12.0, 15.0],
    'seasonality_mode': ['additive'],
    'yearly_seasonality' : [15]
    '
}
vaild_size = int(len(vaild_data))

param_grid = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]

best = 10000
mlist = []

def get_MAE(model) :
  future_data = model.make_future_dataframe(periods = vaild_size, freq = 'd')
  forecast_data = model.predict(future_data)
  mae = mean_absolute_error(vaild_data.y, forecast_data.yhat[-vaild_size:].values)
  return mae

def is_best(m) :
    global best
    if(best > m) :
        best = m
        return True
    else :
        return False


for params in param_grid :
  model = Prophet(**params).add_seasonality(name='season', period=90, fourier_order=6)
  model.fit(train_data)
  m = get_MAE(model)
  if(is_best(m)) :
      print('\nhyper parameter : ',params,'\nmae : ',m,'\n\n')


03:32:49 - cmdstanpy - INFO - Chain [1] start processing
03:32:54 - cmdstanpy - INFO - Chain [1] done processing



mae :  2.4563849690096546 




In [17]:
best_model = Prophet(growth = 'linear',
                changepoint_prior_scale = 0.8,
                seasonality_prior_scale = 12.0,
                yearly_seasonality = 15,
                seasonality_mode = 'additive').add_seasonality(name = 'season', period = 90, fourier_order=6)
best_model.fit(data)
submission_df = pd.read_csv('./data/sample_submission.csv')



future_data = best_model.make_future_dataframe(periods = 358, freq = 'd')
forecast_data = best_model.predict(future_data)
print(forecast_data[['ds','yhat']].tail(5))
submission_df['평균기온'] = forecast_data.yhat[-358:].values
submission_df
submission_df.to_csv("test22.csv", index=False)

03:33:02 - cmdstanpy - INFO - Chain [1] start processing
03:33:06 - cmdstanpy - INFO - Chain [1] done processing


              ds      yhat
23364 2023-12-20  0.565169
23365 2023-12-21  0.493002
23366 2023-12-22  0.397533
23367 2023-12-23  0.365919
23368 2023-12-24  0.203083
