In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
    df = pd.read_csv('df_used_1.csv')
    df

In [None]:
df['Date'] = pd.to_datetime(df['Date'],format='%Y-%m')

In [None]:
df.info()

In [None]:
df_ix = df.copy()
df_ix = df_ix.set_index('Date')
df_ix.head()

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
from matplotlib import rcParams

rcParams['figure.figsize'] = (20,10)
decomposed = seasonal_decompose(df_ix['Penumpang Datang'])
decomposed.plot();

In [None]:
from statsmodels.tsa.stattools import adfuller

test_adf = adfuller(df['Penumpang Datang'])
output = pd.Series(
    test_adf[:4],
    index=['Test statistic', 'p-value', '# of lags', '# of observations']
)

for key, value in test_adf[4].items():
    output['Critical Value ({})'.format(key)] = value

output

In [None]:
alpha = 0.05
if test_adf[1] <= alpha:
    print('Reject null hypothesis; The series doesn\'t have a unit root')
else:
    print('Failed to reject null hypothesis; The series has a unit root')

In [None]:
from statsmodels.tsa.stattools import kpss 

test_kpps = kpss(df['Penumpang Datang'])
output = pd.Series(
    test_kpps[:3],
    index=['Test statistic', 'p-value', '# of lags']
)

for key, value in test_kpps[3].items():
    output['Critical Value ({})'.format(key)] = value
output

In [None]:
alpha = 0.05
if test_kpps[1] <= alpha:
    print('Reject null hypothesis; The series has a unit root')
else:
    print('Failed to reject null hypothesis; The series is stationary')

In [None]:
pip install pmdarima

In [None]:
import pmdarima as pmd

arima_model = pmd.auto_arima(
    df_ix['Penumpang Datang'],
    d=0, start_p=0,
    start_q=0, trace=True,
    stationary=True
)

In [None]:
from statsmodels.tsa.arima_model import ARIMA

model = ARIMA(df_ix['Penumpang Datang'], order=(2,0,0))
model_fit = model.fit()
print(model_fit.summary())

In [None]:
forecast_length = 12
forecast_result = model_fit.forecast(forecast_length)

In [None]:
forecast_result

In [None]:
print('Forecast result')
forecast_result[0]

In [None]:
forecasted_month = []
for i in range(1, forecast_length + 1):
    forecasted_month.append('2020-' + str(i))

forecasted_month

In [None]:
df_forecast = pd.DataFrame({
    'Date': forecasted_month,
    'Penumpang Datang': forecast_result[0],
    'Std. Error': forecast_result[1],
    'Lower Bound CI': forecast_result[2][:, 0],
    'Upper Bound CI': forecast_result[2][:, 1]
})

df_forecast['Date'] = pd.to_datetime(df_forecast['Date'], format='%Y-%m')
df_forecast

In [None]:
df_forecast.info()

In [None]:
fig, ax = plt.subplots(figsize=(9,5))
sns.lineplot(x='Date', y='Penumpang Datang', data=df, ax=ax)
sns.lineplot(x='Date', y='Penumpang Datang', data=df_forecast, ax=ax, label='Forecasted Penumpang Datang')
sns.lineplot(x='Date', y='Lower Bound CI', data=df_forecast, ax=ax, label='Lower Bound')
sns.lineplot(x='Date', y='Upper Bound CI', data=df_forecast, ax=ax, label='Upper Bound')
plt.legend();