In [5]:
from prophet import Prophet
import pandas as pd
from sklearn.metrics import mean_absolute_error

df = pd.read_csv('./data/train.csv')

df['일시'] = pd.to_datetime(df['일시'])
df = df.set_index('일시')
df.index.freq = 'D'
df = df.reset_index()
df = df.rename(columns={'일시': 'ds', '평균기온': 'y'})


# df['최고기온'].fillna(method='bfill', inplace=True) 
# df['최저기온'].fillna(method='bfill', inplace=True)
# df['일교차'].fillna(method='bfill', inplace=True)
df['강수량'].fillna(method='bfill', inplace=True)
# df['평균풍속'].fillna(method='bfill', inplace=True)
df = df.fillna(df.mean())

df['강수유무'] = (df['강수량'] > 0).astype(int)
df['일사일조비율'] = df['일사합'] / df['일조합']
df['일사일조합계'] = df['일사합'] + df['일조합']
df['일사일조차이'] = df['일사합'] - df['일조합']
#columns_to_keep = ['ds', '최고기온', '최저기온', '일사일조차이', 'y']
#df = df[columns_to_keep]

data = df.copy()
data['month'] = data['ds'].dt.month
vaild_data = data[data['ds'].dt.year.isin([2022])]
train_data = data[data['ds'].dt.year.isin(range(1960,2021))]

print(data.head())

print(train_data.head(), '\n')
print(vaild_data.head())



          ds  최고기온  최저기온   일교차  강수량  평균습도  평균풍속  일조합       일사합        일조율  \
0 1960-01-01   2.2  -5.2   7.4  0.4  68.3   1.7  6.7  11.93317  48.653526   
1 1960-01-02   1.2  -5.6   6.8  0.4  87.7   1.3  0.0  11.93317  48.653526   
2 1960-01-03   8.7  -2.1  10.8  0.0  81.3   3.0  0.0  11.93317  48.653526   
3 1960-01-04  10.8   1.2   9.6  0.0  79.7   4.4  2.6  11.93317  48.653526   
4 1960-01-05   1.3  -8.2   9.5  0.0  44.0   5.1  8.2  11.93317  48.653526   

     y  강수유무    일사일조비율    일사일조합계    일사일조차이  month  
0 -1.6     1  1.781070  18.63317   5.23317      1  
1 -1.9     1       inf  11.93317  11.93317      1  
2  4.0     0       inf  11.93317  11.93317      1  
3  7.5     0  4.589681  14.53317   9.33317      1  
4 -4.6     0  1.455265  20.13317   3.73317      1  
          ds  최고기온  최저기온   일교차  강수량  평균습도  평균풍속  일조합       일사합        일조율  \
0 1960-01-01   2.2  -5.2   7.4  0.4  68.3   1.7  6.7  11.93317  48.653526   
1 1960-01-02   1.2  -5.6   6.8  0.4  87.7   1.3  0.0  11.93317  48.6535

  df = df.fillna(df.mean())


In [6]:
import itertools


param_grid = {
    'growth': ['linear'],
    'changepoint_prior_scale' : [0.01, 0.03, 0.8],
    'changepoint_range' : [0.8, 0.9, 0.95],
    'seasonality_prior_scale': [12.0, 15.0],
    'seasonality_mode': ['additive'],
    'n_changepoints' : [25, 248, 744],
    'yearly_seasonality' : [15]
}
vaild_size = int(len(vaild_data))

param_grid = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]

best = 10000
mlist = []

def get_MAE(model) :
  future_data = model.make_future_dataframe(periods = vaild_size, freq = 'd')
  forecast_data = model.predict(future_data)
  mae = mean_absolute_error(vaild_data.y, forecast_data.yhat[-vaild_size:].values)
  return mae

def is_best(m) :
    global best
    if(best > m) :
        best = m
        return True
    else :
        return False


for params in param_grid :
  model = Prophet(**params).add_seasonality(name='season', period=90, fourier_order=6)
  model.fit(train_data)
  m = get_MAE(model)
  if(is_best(m)) :
      print('\nhyper parameter : ',params,'\nmae : ',m,'\n\n')


03:52:59 - cmdstanpy - INFO - Chain [1] start processing
03:53:00 - cmdstanpy - INFO - Chain [1] done processing



hyper parameter :  {'growth': 'linear', 'changepoint_prior_scale': 0.01, 'changepoint_range': 0.8, 'seasonality_prior_scale': 12.0, 'seasonality_mode': 'additive', 'n_changepoints': 25, 'yearly_seasonality': 15} 
mae :  2.481296748381046 




03:53:02 - cmdstanpy - INFO - Chain [1] start processing
03:53:11 - cmdstanpy - INFO - Chain [1] done processing
03:53:14 - cmdstanpy - INFO - Chain [1] start processing
03:53:49 - cmdstanpy - INFO - Chain [1] done processing
03:53:51 - cmdstanpy - INFO - Chain [1] start processing
03:53:53 - cmdstanpy - INFO - Chain [1] done processing



hyper parameter :  {'growth': 'linear', 'changepoint_prior_scale': 0.01, 'changepoint_range': 0.8, 'seasonality_prior_scale': 15.0, 'seasonality_mode': 'additive', 'n_changepoints': 25, 'yearly_seasonality': 15} 
mae :  2.4801413389779556 




03:53:55 - cmdstanpy - INFO - Chain [1] start processing
03:53:58 - cmdstanpy - INFO - Chain [1] done processing
03:54:00 - cmdstanpy - INFO - Chain [1] start processing
03:54:20 - cmdstanpy - INFO - Chain [1] done processing
03:54:22 - cmdstanpy - INFO - Chain [1] start processing
03:54:23 - cmdstanpy - INFO - Chain [1] done processing



hyper parameter :  {'growth': 'linear', 'changepoint_prior_scale': 0.01, 'changepoint_range': 0.9, 'seasonality_prior_scale': 12.0, 'seasonality_mode': 'additive', 'n_changepoints': 25, 'yearly_seasonality': 15} 
mae :  2.472130594381978 




03:54:26 - cmdstanpy - INFO - Chain [1] start processing
03:54:32 - cmdstanpy - INFO - Chain [1] done processing



hyper parameter :  {'growth': 'linear', 'changepoint_prior_scale': 0.01, 'changepoint_range': 0.9, 'seasonality_prior_scale': 12.0, 'seasonality_mode': 'additive', 'n_changepoints': 248, 'yearly_seasonality': 15} 
mae :  2.471044448488378 




03:54:34 - cmdstanpy - INFO - Chain [1] start processing
03:55:37 - cmdstanpy - INFO - Chain [1] done processing



hyper parameter :  {'growth': 'linear', 'changepoint_prior_scale': 0.01, 'changepoint_range': 0.9, 'seasonality_prior_scale': 12.0, 'seasonality_mode': 'additive', 'n_changepoints': 744, 'yearly_seasonality': 15} 
mae :  2.4705015699354025 




03:55:39 - cmdstanpy - INFO - Chain [1] start processing
03:55:40 - cmdstanpy - INFO - Chain [1] done processing
03:55:42 - cmdstanpy - INFO - Chain [1] start processing
03:55:49 - cmdstanpy - INFO - Chain [1] done processing
03:55:51 - cmdstanpy - INFO - Chain [1] start processing
03:56:45 - cmdstanpy - INFO - Chain [1] done processing
03:56:48 - cmdstanpy - INFO - Chain [1] start processing
03:56:49 - cmdstanpy - INFO - Chain [1] done processing
03:56:51 - cmdstanpy - INFO - Chain [1] start processing
03:56:56 - cmdstanpy - INFO - Chain [1] done processing
03:56:58 - cmdstanpy - INFO - Chain [1] start processing
03:57:36 - cmdstanpy - INFO - Chain [1] done processing
03:57:38 - cmdstanpy - INFO - Chain [1] start processing
03:57:39 - cmdstanpy - INFO - Chain [1] done processing
03:57:41 - cmdstanpy - INFO - Chain [1] start processing
03:57:49 - cmdstanpy - INFO - Chain [1] done processing
03:57:52 - cmdstanpy - INFO - Chain [1] start processing
03:58:35 - cmdstanpy - INFO - Chain [1]


hyper parameter :  {'growth': 'linear', 'changepoint_prior_scale': 0.01, 'changepoint_range': 0.95, 'seasonality_prior_scale': 15.0, 'seasonality_mode': 'additive', 'n_changepoints': 744, 'yearly_seasonality': 15} 
mae :  2.470424215276023 




03:58:37 - cmdstanpy - INFO - Chain [1] start processing
03:58:39 - cmdstanpy - INFO - Chain [1] done processing



hyper parameter :  {'growth': 'linear', 'changepoint_prior_scale': 0.03, 'changepoint_range': 0.8, 'seasonality_prior_scale': 12.0, 'seasonality_mode': 'additive', 'n_changepoints': 25, 'yearly_seasonality': 15} 
mae :  2.4669489677216645 




03:58:41 - cmdstanpy - INFO - Chain [1] start processing
03:58:46 - cmdstanpy - INFO - Chain [1] done processing
03:58:49 - cmdstanpy - INFO - Chain [1] start processing
03:59:21 - cmdstanpy - INFO - Chain [1] done processing



hyper parameter :  {'growth': 'linear', 'changepoint_prior_scale': 0.03, 'changepoint_range': 0.8, 'seasonality_prior_scale': 12.0, 'seasonality_mode': 'additive', 'n_changepoints': 744, 'yearly_seasonality': 15} 
mae :  2.466586701002489 




03:59:23 - cmdstanpy - INFO - Chain [1] start processing
03:59:25 - cmdstanpy - INFO - Chain [1] done processing



hyper parameter :  {'growth': 'linear', 'changepoint_prior_scale': 0.03, 'changepoint_range': 0.8, 'seasonality_prior_scale': 15.0, 'seasonality_mode': 'additive', 'n_changepoints': 25, 'yearly_seasonality': 15} 
mae :  2.465660920562894 




03:59:27 - cmdstanpy - INFO - Chain [1] start processing
03:59:36 - cmdstanpy - INFO - Chain [1] done processing
03:59:39 - cmdstanpy - INFO - Chain [1] start processing
04:00:17 - cmdstanpy - INFO - Chain [1] done processing
04:00:20 - cmdstanpy - INFO - Chain [1] start processing
04:00:21 - cmdstanpy - INFO - Chain [1] done processing



hyper parameter :  {'growth': 'linear', 'changepoint_prior_scale': 0.03, 'changepoint_range': 0.9, 'seasonality_prior_scale': 12.0, 'seasonality_mode': 'additive', 'n_changepoints': 25, 'yearly_seasonality': 15} 
mae :  2.458005948570432 




04:00:23 - cmdstanpy - INFO - Chain [1] start processing
04:00:33 - cmdstanpy - INFO - Chain [1] done processing



hyper parameter :  {'growth': 'linear', 'changepoint_prior_scale': 0.03, 'changepoint_range': 0.9, 'seasonality_prior_scale': 12.0, 'seasonality_mode': 'additive', 'n_changepoints': 248, 'yearly_seasonality': 15} 
mae :  2.4579163704773817 




04:00:35 - cmdstanpy - INFO - Chain [1] start processing
04:01:05 - cmdstanpy - INFO - Chain [1] done processing



hyper parameter :  {'growth': 'linear', 'changepoint_prior_scale': 0.03, 'changepoint_range': 0.9, 'seasonality_prior_scale': 12.0, 'seasonality_mode': 'additive', 'n_changepoints': 744, 'yearly_seasonality': 15} 
mae :  2.4579142143579844 




04:01:08 - cmdstanpy - INFO - Chain [1] start processing
04:01:09 - cmdstanpy - INFO - Chain [1] done processing
04:01:11 - cmdstanpy - INFO - Chain [1] start processing
04:01:17 - cmdstanpy - INFO - Chain [1] done processing
04:01:19 - cmdstanpy - INFO - Chain [1] start processing
04:01:48 - cmdstanpy - INFO - Chain [1] done processing
04:01:50 - cmdstanpy - INFO - Chain [1] start processing
04:01:51 - cmdstanpy - INFO - Chain [1] done processing
04:01:54 - cmdstanpy - INFO - Chain [1] start processing
04:02:00 - cmdstanpy - INFO - Chain [1] done processing
04:02:03 - cmdstanpy - INFO - Chain [1] start processing
04:02:24 - cmdstanpy - INFO - Chain [1] done processing
04:02:27 - cmdstanpy - INFO - Chain [1] start processing
04:02:28 - cmdstanpy - INFO - Chain [1] done processing
04:02:30 - cmdstanpy - INFO - Chain [1] start processing
04:02:37 - cmdstanpy - INFO - Chain [1] done processing
04:02:39 - cmdstanpy - INFO - Chain [1] start processing
04:03:20 - cmdstanpy - INFO - Chain [1]


hyper parameter :  {'growth': 'linear', 'changepoint_prior_scale': 0.03, 'changepoint_range': 0.95, 'seasonality_prior_scale': 15.0, 'seasonality_mode': 'additive', 'n_changepoints': 744, 'yearly_seasonality': 15} 
mae :  2.457909199400067 




04:03:22 - cmdstanpy - INFO - Chain [1] start processing
04:03:26 - cmdstanpy - INFO - Chain [1] done processing
04:03:29 - cmdstanpy - INFO - Chain [1] start processing
04:04:13 - cmdstanpy - INFO - Chain [1] done processing



hyper parameter :  {'growth': 'linear', 'changepoint_prior_scale': 0.8, 'changepoint_range': 0.8, 'seasonality_prior_scale': 12.0, 'seasonality_mode': 'additive', 'n_changepoints': 248, 'yearly_seasonality': 15} 
mae :  2.45761713272881 




04:04:15 - cmdstanpy - INFO - Chain [1] start processing
04:06:11 - cmdstanpy - INFO - Chain [1] done processing
04:06:13 - cmdstanpy - INFO - Chain [1] start processing
04:06:17 - cmdstanpy - INFO - Chain [1] done processing
04:06:19 - cmdstanpy - INFO - Chain [1] start processing
04:07:13 - cmdstanpy - INFO - Chain [1] done processing
04:07:15 - cmdstanpy - INFO - Chain [1] start processing
04:08:33 - cmdstanpy - INFO - Chain [1] done processing
04:08:35 - cmdstanpy - INFO - Chain [1] start processing
04:08:39 - cmdstanpy - INFO - Chain [1] done processing
04:08:41 - cmdstanpy - INFO - Chain [1] start processing
04:09:22 - cmdstanpy - INFO - Chain [1] done processing
04:09:24 - cmdstanpy - INFO - Chain [1] start processing
04:11:31 - cmdstanpy - INFO - Chain [1] done processing
04:11:33 - cmdstanpy - INFO - Chain [1] start processing
04:11:37 - cmdstanpy - INFO - Chain [1] done processing
04:11:40 - cmdstanpy - INFO - Chain [1] start processing
04:12:29 - cmdstanpy - INFO - Chain [1]

In [9]:
best_model = Prophet(growth = 'linear',
                changepoint_prior_scale = 0.8,
                changepoint_range = 0.8,
                seasonality_prior_scale = 12.0,
                yearly_seasonality = 15,
                n_changepoints = 744,
                seasonality_mode = 'additive').add_seasonality(name = 'season', period = 90, fourier_order=6)
best_model.fit(data)
submission_df = pd.read_csv('./data/sample_submission.csv')



future_data = best_model.make_future_dataframe(periods = 358, freq = 'd')
forecast_data = best_model.predict(future_data)
print(forecast_data[['ds','yhat']].tail(5))
submission_df['평균기온'] = forecast_data.yhat[-358:].values
submission_df
submission_df.to_csv("test22.csv", index=False)

04:40:09 - cmdstanpy - INFO - Chain [1] start processing
04:41:52 - cmdstanpy - INFO - Chain [1] done processing


              ds      yhat
23364 2023-12-20  0.565680
23365 2023-12-21  0.493538
23366 2023-12-22  0.398061
23367 2023-12-23  0.366494
23368 2023-12-24  0.203767
