In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy import stats
import matplotlib.pyplot as plt

%matplotlib inline

###Little function to plot acf and pacf on one figure

In [None]:
def acf_pacf(ts, lags):
    fig = plt.figure(figsize=(12,8))
    ax1 = fig.add_subplot(211)
    fig = sm.graphics.tsa.plot_acf(ts, lags=lags, ax=ax1)
    ax2 = fig.add_subplot(212)
    fig = sm.graphics.tsa.plot_pacf(ts, lags=lags, ax=ax2)
    

###Global temperatures series: the data are a combination of land-air yearly average temperature anomalies for the years 1900-1997.

In [None]:
df= pd.read_csv('data/globtemp.txt')
df=df[44:]
df.head()
df.shape

In [None]:
df['dates'] = pd.date_range('1900','1998',freq='A')
df=df.set_index('dates')
df.head()

In [None]:
ts = pd.Series(df['temp_delta'])
ts.plot(figsize=(12,8))

Clearly increasing trend with possibley 2 inflection points

In [None]:
acf_pacf(ts, 20)

Nonstationary data, the ACF shows very slow decay, try first difference

In [None]:
ts_diff=ts.diff(1)
ts_diff.plot(figsize=(12,8))

Clearly removed trend and the following ACF cuts off quickly

In [None]:
acf_pacf(ts_diff[1:],20)

Strong peak at lag 3 in the PACF indicating an AR(3) component, try an ARIMA(3,1,0) on the original undifferenced data...the I in the ARIMA refers to number of differences needed

In [None]:
model=sm.tsa.SARIMAX(ts, order=(3,1,0)).fit()
model.summary()

In [None]:
acf_pacf(pd.Series(model.resid().flatten(),index=ts.index), 20)

ACF and PACF show no strong lags

###Seasonal Quarterly Financial data from Johnson and Johnson 1960-1981

In [None]:
df= pd.read_csv('data/jj.txt')
df.head()

In [None]:
dates = pd.date_range('1960','1981', freq='Q')
df['dates'] = dates
df['quarters'] = pd.DatetimeIndex(df['dates']).quarter
df.head()

In [None]:
df=df.set_index('dates')
df.head()

In [None]:
ts = pd.Series(df['earnings'])
ts.plot(figsize=(12,8))

In [None]:
acf_pacf(ts,16)

Clearly nonstationary on both the conditions, hence the very slow decay in the ACF

In [None]:
ts.diff(1).plot(figsize=(12,8))

After removing trend, the variance is obviously changing with time so we need to explore a variance stabilizing transform

In [None]:
ts = pd.Series(np.log(df['earnings']))
ts.plot(figsize=(12,8))

In [None]:
acf_pacf(ts,16)

Back to the slow decay in ACF due to the trend, so need to difference

In [None]:
ts_diff=ts.diff(1)[1:]
ts_diff.plot(figsize=(12,8))

In [None]:
acf_pacf(ts_diff[1:],16)

Concentrating on the ACF, very slow decay in the seasonal lags indicating a seasonal difference could be needed

In [None]:
ts_sdiff = ts_diff.diff(4)
ts_sdiff.plot(figsize=(12,8))
ts_sdiff.head()

In [None]:
acf_pacf(ts_sdiff[4:], 16)

Data appears stationary. Significant peak at 1st seasonal lag in PACF indicating a possible Seasonal AR component...lets fit an ARIMA(0,1,0)x(1,1,0)x4 model

In [None]:
model=sm.tsa.SARIMAX(ts, order=(0,1,0), seasonal_order=(1,1,0,4)).fit()
model.summary()

In [None]:
acf_pacf(pd.Series(model.resid().flatten(),index=ts.index), 16)

Here the ACF/PACF indicates either a potential nonseasonal AR(1) or nonseasonal MA(1) component...we will check both

In [None]:
model=sm.tsa.SARIMAX(ts, order=(1,1,0), seasonal_order=(1,1,0,4)).fit()
model.summary()

In [None]:
acf_pacf(pd.Series(model.resid().flatten(),index=ts.index), 16)

An AIC 0f -146.03 is the number to beat, now lets check the MA(1) model

In [None]:
model=sm.tsa.SARIMAX(ts, order=(0,1,1), seasonal_order=(1,1,0,4)).fit()
model.summary()

Looking at minimizing AIC, is an improvement over the ARIMA(1,1,0)x(1,1,0)x4 model

In [None]:
acf_pacf(pd.Series(model.resid().flatten(),index=ts.index), 16)

There is still a 'barely' significant lag 1, so lets try both AR(1) and MA(1) components and see if it can beet AIC of -150.9

In [None]:
model=sm.tsa.SARIMAX(ts, order=(1,1,1), seasonal_order=(1,1,0,4)).fit()
model.summary()

Looking at minimizing AIC, not an improvement over the ARIMA(0,1,1)x(1,1,0)x4 model so that would be our final model.

In [None]:
acf_pacf(pd.Series(model.resid().flatten(),index=ts.index), 16)