In [None]:
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [None]:
# 1. 데이터 불러오기 및 전처리
data = pd.read_csv("final_data.csv", encoding = "cp949")
print(data.head())

data["datetime"] = pd.to_datetime(data["datetime"])
data["reg_date"] = pd.to_datetime(data["reg_date"])

data = data.sort_values(by="datetime")

In [None]:
data = data.drop(columns = ['rider_cnt', 'order_cnt','temp_c','rain_c', 'snow_c', 'q1', 'q3', 'IQR1.5', 'outlier','rider_cnt_w_1', 'rider_cnt_w_2', 
                            'rider_cnt_w_3', 'rider_cnt_w_4', 'order_cnt_w_1',
                            'order_cnt_w_2', 'order_cnt_w_3', 'order_cnt_w_4'])
print(data.head())

In [None]:
# category  - pick_rgn2_nm, hour_reg, day_of_reg, is_rain, month, week, is_holiday
for col in ['pick_rgn2_nm', 'hour_reg', 'day_of_reg', 'is_rain', 'month', 'week', 'is_holiday'] : 
    data[col] = data[col].astype('category')

print(data.dtypes)

In [None]:
# 4. 성능 비교 (MAE, RMSE)
# actual = data["rider_cnt_2"].tail(24*7)

# arima_predicted = arima_forecast.predicted_mean[-(24*7):]
# prophet_predicted = prophet_forecast["yhat"].tail(24*7)

# mae_arima = mean_absolute_error(actual, arima_predicted)
# mae_prophet = mean_absolute_error(actual, prophet_predicted)

# rmse_arima = np.sqrt(mean_squared_error(actual, arima_predicted))
# rmse_prophet = np.sqrt(mean_squared_error(actual, prophet_predicted))

# print(f"ARIMA - MAE: {mae_arima}  RMSE: {rmse_arima}")
# print(f"PROPHET - MAE: {mae_prophet} RMSE: {rmse_prophet}")

In [None]:
# 예측 결과를 저장할 빈 딕셔너리
arima_results = {}
prophet_results = {}

# 각 지역별로 ARIMA 및 Prophet 모델 훈련 및 예측 수행
locations = data["pick_rgn2_nm"].unique()
for location in locations:
    data_location = data[data["pick_rgn2_nm"] == location].copy()
    
    # train, test 데이터 분리
    train_data = data_location[data_location["datetime"] <= "2022-12-31"]
    test_data = data_location[data_location["datetime"] >= "2023-01-01"]
    
    # ARIMA 모델 훈련 및 예측
    ts_train_data = train_data.set_index("datetime")["rider_cnt_2"]
    arima_model_location = SARIMAX(ts_train_data, order=(1, 1, 1), seasonal_order=(1, 1, 1, 24)).fit()
    arima_forecast_location = arima_model_location.get_forecast(steps=test_data.shape[0], alpha=0.05)
    arima_results[location] = arima_forecast_location.summary_frame()["mean"].values
    
    # Prophet 모델 훈련 및 예측
    prophet_data_location = train_data[["datetime", "rider_cnt_2"]].copy()
    prophet_data_location.columns = ["ds", "y"]
    prophet_model_location = Prophet()
    prophet_model_location.fit(prophet_data_location)
    future_location = prophet_model_location.make_future_dataframe(periods=test_data.shape[0], freq="H")
    prophet_forecast_location = prophet_model_location.predict(future_location)
    prophet_results[location] = prophet_forecast_location.iloc[-test_data.shape[0]:]["yhat"].values

# 예측 결과와 actual 값을 포함한 데이터프레임 생성
result = pd.DataFrame()
for location in locations:
    test_data_location = data[data["pick_rgn2_nm"] == location].copy()
    test_data_location = test_data_location[test_data_location["datetime"] >= "2023-01-01"]
    test_data_location.reset_index(drop=True, inplace=True)
    result_location = pd.DataFrame({"datetime": test_data_location["datetime"],
                                     "actual": test_data_location["rider_cnt_2"],
                                     "arima_pred": arima_results[location],
                                     "prophet_pred": prophet_results[location]})
    result_location["location"] = location
    result = pd.concat([result, result_location], axis=0)


# 결과 저장
results = pd.concat([test_data[["datetime", "pick_rgn2_nm", "rider_cnt"]], arima_forecast_location.summary_frame()["mean"], prophet_forecast_location.iloc[-test_data.shape[0]:]["yhat"]], axis=1)

# csv 파일로 추출
results.to_csv("result_total.csv", index=False)

