In [None]:
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
from matplotlib import pyplot
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 15, 6

In [None]:
df = pd.read_csv('AirPassengers.csv')
df.head()

In [None]:
print(df.dtypes)

In [None]:
# The data contains a particular month and number of passengers travelling in that month.
# In order to read the data as a time series, we have to pass special arguments to the read_csv command:

dateparse = lambda dates: pd.to_datetime(dates, format='%Y-%m')

data = pd.read_csv('AirPassengers.csv', 
                   parse_dates=['Month'], 
                   index_col='Month', 
                   date_format='%Y-%m')

print('\n Parsed Data:')
print(data.head())

In [None]:
data.index

In [None]:
# Convert to timeseries
ts = data['#Passengers']
ts.head()

In [None]:
# Indexing in time series data
ts['1949-03-01']

In [None]:
# Import datetime library and use 'datetime' function
from datetime import datetime
ts[datetime(1949, 1, 1)]

In [None]:
# Check Stationarity
plt.plot(ts)

## Stationarity Test

In [None]:
from statsmodels.tsa.stattools import adfuller

# STATIONARITY CHECK
# Ho: TS is non-Stationary
# H1: TS is stationary

def test_stationarity(timeseries):
    
    # Determining rolling statistics
    # Note: Using the updated pandas syntax for rolling windows
    rolmean = pd.Series(timeseries).rolling(window=12).mean()
    rolstd = pd.Series(timeseries).rolling(window=12).std()
    
    # Plot rolling statistics:
    orig = plt.plot(timeseries, color='blue', label='Original')
    mean = plt.plot(rolmean, color='red', label='Rolling Mean')
    std = plt.plot(rolstd, color='black', label='Rolling Std')
    
    plt.legend(loc='best')
    plt.title('Rolling Mean & Standard Deviation')
    plt.show(block=False)

# Perform Dickey-Fuller test:
    print('Results of Dickey-Fuller Test:')
    dftest = adfuller(timeseries, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of Observations Used'])
    for key, value in dftest[4].items():
        dfoutput['Critical Value (%s)' % key] = value
    print(dfoutput)

In [None]:
ts

In [None]:
test_stationarity(ts)

# Making Time Series Stationary
* Log Transformation
* Differencing
* Double log
* Double Differencing
* Log + Differencing
###### After your forecasting is done always do the inverse transformation

In [None]:
ts.head()

In [None]:
ts_log = np.log(ts)

In [None]:
ts_log.head()

In [None]:
plt.plot(ts_log)

In [None]:
test_stationarity(ts_log)

In [None]:
ts_double_log = np.log(ts_log)

In [None]:
ts_double_log.head()

In [None]:
ts_double_log.tail()

In [None]:
test_stationarity(ts_double_log)

In [None]:
# Smoothing
moving_avg = pd.Series(ts_log).rolling(window=12).mean()
plt.plot(ts_log)
plt.plot(moving_avg, color='red')

ts_log_moving_avg_diff = ts_log - moving_avg
ts_log_moving_avg_diff.head(5)

ts_log_moving_avg_diff.dropna(inplace=True)
ts_log_moving_avg_diff.head(5)

In [None]:
# Again Stationarity test
test_stationarity(ts_log_moving_avg_diff)

In [None]:
exponential_weighted_average = ts_log.ewm(span=12).mean()
plt.plot(ts_log)
plt.plot(exponential_weighted_average, color='red')

In [None]:
ts_log_ewma_diff = ts_log - exponential_weighted_average
test_stationarity(ts_log_ewma_diff)

In [None]:
ts_log_diff = ts_log - ts_log.shift()
plt.plot(ts_log_diff)

In [None]:
ts_log_diff.dropna(inplace=True)
test_stationarity(ts_log_diff)

In [None]:
# DECOMPOSITION
from statsmodels.tsa.seasonal import seasonal_decompose
decomposition = seasonal_decompose(ts_log)

trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid

plt.subplot(411)
plt.plot(ts_log, label='Original')
plt.legend(loc='best')
plt.subplot(412)
plt.plot(trend, label='Trend')
plt.legend(loc='best')
plt.subplot(413)
plt.plot(seasonal, label='Seasonality')
plt.legend(loc='best')
plt.subplot(414)
plt.plot(residual, label='Residuals')
plt.legend(loc='best')
plt.tight_layout()

ts_log_decompose = residual
ts_log_decompose.dropna(inplace=True)
test_stationarity(ts_log_decompose)

In [None]:
from datetime import datetime
print(datetime.now())

In [None]:
import pandas as pd
import numpy as np
import datetime
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
import warnings

warnings.filterwarnings("ignore")

# --- 1. DATA LOADING ---
series = np.random.randn(100).cumsum()

# --- 2. TRAIN-TEST SPLIT ---
size = int(len(series) * 0.8)
train, test = series[0:size], series[size:]

# --- 3. GRID SEARCH ---
p_values = [0, 1, 2]
d_values = [0, 1]
q_values = [0, 1, 2]

best_rmse, best_cfg = float("inf"), None

print("Starting Grid Search...")

for p in p_values:
    for d in d_values:
        for q in q_values:
            order = (p, d, q)
            predictions = list()
            history = [x for x in train]
            
            try:
                for t in range(len(test)):
                    model = ARIMA(history, order=order)
                    model_fit = model.fit()
                    yhat = model_fit.forecast()[0]
                    predictions.append(yhat)
                    history.append(test[t])
                
                rmse = np.sqrt(mean_squared_error(test, predictions))
                if rmse < best_rmse:
                    best_rmse, best_cfg = rmse, order
                print(f'ARIMA{order} RMSE: {rmse:.3f}')
            except:
                continue

print("-" * 30)
print(f"WINNER: ARIMA{best_cfg} with RMSE: {best_rmse:.3f}")
print(f"Finished at: {datetime.datetime.now()}")