In [6]:
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [24]:
# 1. 데이터 불러오기 및 전처리
data = pd.read_csv("final_data.csv", encoding = "cp949")
print(data.head())

data["datetime"] = pd.to_datetime(data["datetime"])
data["reg_date"] = pd.to_datetime(data["reg_date"])

data = data.sort_values(by="datetime")

              datetime pick_rgn2_nm  rider_cnt  order_cnt  hour_reg   
0  2022-01-01 09:00:00          강남구        128        222         9  \
1  2022-01-01 10:00:00          강남구        252        403        10   
2  2022-01-01 11:00:00          강남구        435        829        11   
3  2022-01-01 12:00:00          강남구        489        909        12   
4  2022-01-01 13:00:00          강남구        450        879        13   

     reg_date day_of_reg  temp_c  rain_c  snow_c  is_rain  month  week   
0  2022-01-01        토요일    -9.1     0.0     0.0        0      1     1  \
1  2022-01-01        토요일    -6.8     0.0     0.0        0      1     1   
2  2022-01-01        토요일    -4.3     0.0     0.0        0      1     1   
3  2022-01-01        토요일    -2.3     0.0     0.0        0      1     1   
4  2022-01-01        토요일    -0.1     0.0     0.0        0      1     1   

   is_holiday      q1      q3  IQR1.5  outlier  rider_cnt_2  
0           1  205.00  226.75  32.625        1        215.5  
1   

In [25]:
data = data.drop(columns = ['rider_cnt', 'rain_c', 'snow_c', 'q1', 'q3', 'IQR1.5', 'outlier'])
print(data.head())

                  datetime pick_rgn2_nm  order_cnt  hour_reg   reg_date   
0      2022-01-01 09:00:00          강남구        222         9 2022-01-01  \
11631  2022-01-01 09:00:00          강동구         71         9 2022-01-01   
127941 2022-01-01 09:00:00          동작구         38         9 2022-01-01   
162834 2022-01-01 09:00:00          서초구         65         9 2022-01-01   
58155  2022-01-01 09:00:00          광진구         86         9 2022-01-01   

       day_of_reg  temp_c  is_rain  month  week  is_holiday  rider_cnt_2  
0             토요일    -9.1        0      1     1           1        215.5  
11631         토요일    -9.1        0      1     1           1         45.0  
127941        토요일    -9.1        0      1     1           1         28.0  
162834        토요일    -9.1        0      1     1           1         82.0  
58155         토요일    -9.1        0      1     1           1         56.0  


In [27]:
# category  - pick_rgn2_nm, hour_reg, day_of_reg, is_rain, month, week, is_holiday

for col in ['pick_rgn2_nm', 'hour_reg', 'day_of_reg', 'is_rain', 'month', 'week', 'is_holiday'] : 
    data[col] = data[col].astype('category')

print(data.dtypes)

datetime        datetime64[ns]
pick_rgn2_nm          category
order_cnt                int64
hour_reg              category
reg_date        datetime64[ns]
day_of_reg            category
temp_c                 float64
is_rain               category
month                 category
week                  category
is_holiday            category
rider_cnt_2            float64
dtype: object


In [None]:
# 2. ARIMA 모델 훈련 및 예측
# ts_data = data["rider_cnt_2"]
# ts_data.index = pd.DatetimeIndex(data["datetime"])

# arima_model = SARIMAX(ts_data, order=(1, 1, 1), seasonal_order=(1, 1, 1, 24)).fit()
# arima_forecast = arima_model.get_forecast(steps=24*7)


# 2. ARIMA 모델 훈련 및 예측
ts_data = data["rider_cnt_2"]
ts_data.index = pd.DatetimeIndex(data["datetime"])

arima_model = SARIMAX(ts_data, order=(1, 1, 1), seasonal_order=(1, 1, 1, 24), exog = data["is_rain"]).fit()
arima_forecast = arima_model.get_forecast(steps=24*7, exog=data["is_rain"].tail(24*7))

In [None]:
# 3. PROPHET 모델 훈련 및 예측
# prophet_data = data[["datetime", "rider_cnt_2"]]
# prophet_data.columns = ["ds", "y"]

# prophet_model = Prophet()
# prophet_model.fit(prophet_data)

# future = prophet_model.make_future_dataframe(periods=24*7, freq="H")
# prophet_forecast = prophet_model.predict(future)

# 3. PROPHET 모델 훈련 및 예측
prophet_data = data[["datetime", "rider_cnt_2", "is_rain"]]
prophet_data.columns = ["ds", "y", "event"]

holidays = pd.DataFrame({
    "holiday": "event",
    "ds": prophet_data[prophet_data["event"] == 1]["ds"],
    "lower_window": 0,
    "upper_window": 1
})

prophet_model = Prophet(holidays=holidays)
prophet_model.fit(prophet_data.drop(columns=["event"]))

future = prophet_model.make_future_dataframe(periods=24*7, freq="H")
prophet_forecast = prophet_model.predict(future)

In [None]:
# 4. 성능 비교 (MAE, RMSE)
actual = data["rider_cnt_2"].tail(24*7)

arima_predicted = arima_forecast.predicted_mean[-(24*7):]
prophet_predicted = prophet_forecast["yhat"].tail(24*7)

mae_arima = mean_absolute_error(actual, arima_predicted)
mae_prophet = mean_absolute_error(actual, prophet_predicted)

rmse_arima = np.sqrt(mean_squared_error(actual, arima_predicted))
rmse_prophet = np.sqrt(mean_squared_error(actual, prophet_predicted))

print(f"ARIMA - MAE: {mae_arima}  RMSE: {rmse_arima}")
print(f"PROPHET - MAE: {mae_prophet} RMSE: {rmse_prophet}")

In [None]:
# 지역별로 데이터를 분리
locations = data["pick_rgn2_nm"].unique()

# 지역별 모델 및 예측 결과를 저장할 빈 딕셔너리
arima_results = {}
prophet_results = {}

# 각 지역별로 ARIMA 및 Prophet 모델 훈련 및 예측 수행
for location in locations:
    data_location = data[data["pick_rgn2_nm"] == location]
    
    # ARIMA 모델
    ts_data_location = data_location["rider_cnt_2"]
    ts_data_location.index = pd.DatetimeIndex(data_location["datetime"])
    arima_model_location = SARIMAX(ts_data_location, order=(1, 1, 1), seasonal_order=(1, 1, 1, 24)).fit()
    arima_forecast_location = arima_model_location.get_forecast(steps=24*7)
    arima_results[location] = arima_forecast_location.predicted_mean
    
    # Prophet 모델
    prophet_data_location = data_location[["datetime", "rider_cnt_2"]]
    prophet_data_location.columns = ["ds", "y"]
    prophet_model_location = Prophet()
    prophet_model_location.fit(prophet_data_location)
    future_location = prophet_model_location.make_future_dataframe(periods=24*7, freq="H")
    prophet_forecast_location = prophet_model_location.predict(future_location)
    prophet_results[location] = prophet_forecast_location["yhat"].tail(24*7)
