In [1]:
import pandas as pd
url='https://raw.githubusercontent.com/ubaid-shah/datasets/main/airline_passengers.csv'
df=pd.read_csv(url,index_col='Month',parse_dates=True)
df.index.freq='MS'
df.dropna(inplace=True)

In [2]:
df.head()

Unnamed: 0_level_0,Thousands of Passengers
Month,Unnamed: 1_level_1
1949-01-01,112
1949-02-01,118
1949-03-01,132
1949-04-01,129
1949-05-01,121


In [3]:
from statsmodels.tsa.stattools import adfuller

In [4]:
print('Dicky-Fuller Result')
result=adfuller(df['Thousands of Passengers'])
result

Dicky-Fuller Result


(0.8153688792060543,
 0.9918802434376411,
 13,
 130,
 {'1%': -3.4816817173418295,
  '5%': -2.8840418343195267,
  '10%': -2.578770059171598},
 996.692930839019)

In [5]:
dfout=pd.Series(result[0:4],index=['Adf test static','P-value','# lags used','#observations'])
dfout

Adf test static      0.815369
P-value              0.991880
# lags used         13.000000
#observations      130.000000
dtype: float64

In [6]:
df['d1']=df["Thousands of Passengers"].diff()


In [7]:
df.head()

Unnamed: 0_level_0,Thousands of Passengers,d1
Month,Unnamed: 1_level_1,Unnamed: 2_level_1
1949-01-01,112,
1949-02-01,118,6.0
1949-03-01,132,14.0
1949-04-01,129,-3.0
1949-05-01,121,-8.0


In [8]:
df.dropna(inplace=True)

In [9]:
print('Dicky-Fuller Result with differencing')
result1=adfuller(df['d1'])
result1

Dicky-Fuller Result with differencing


(-2.8292668241700034,
 0.054213290283824954,
 12,
 130,
 {'1%': -3.4816817173418295,
  '5%': -2.8840418343195267,
  '10%': -2.578770059171598},
 988.5069317854084)

In [10]:
dfout=pd.Series(result1[0:4],index=['Adf test static','P-value','# lags used','#observations'])
dfout

Adf test static     -2.829267
P-value              0.054213
# lags used         12.000000
#observations      130.000000
dtype: float64

In [11]:
df['d2']=df['d1'].diff()

In [12]:
print('Dicky-Fuller Result with differencing of order 2')
result2=adfuller(df['d2'].dropna())
result2

Dicky-Fuller Result with differencing of order 2


(-16.384231542468488,
 2.7328918500143186e-29,
 11,
 130,
 {'1%': -3.4816817173418295,
  '5%': -2.8840418343195267,
  '10%': -2.578770059171598},
 988.6020417275604)

In [13]:
dfout2=pd.Series(result2[0:4],index=['Adf test static','P-value','# lags used','#observations'])
dfout2

Adf test static   -1.638423e+01
P-value            2.732892e-29
# lags used        1.100000e+01
#observations      1.300000e+02
dtype: float64

In [14]:
from statsmodels.tsa.stattools import adfuller
def adf_test(series,title=''):
    print(f'Augmented Dickey-Fuller test: {title}')
    print()
    print('H0: Data has a  unit root and is non-stationary')
    print('Ha: Data has no unit root and is stationary')
    print('Reference P value:0.05')
    print()
    result=adfuller(series.dropna())
    index=['Adf test static','P-value','# lags used','#observations']
    
    out=pd.Series(result[0:4],index=index)
    
    if result[1]<=0.05:
        print('Strong evidence against the null hypothesis')
        print('Reject the null hypothesis')
        print('Data has no unit root and is stationary')
        
    else:
        print('Weak evidence against the null hypothesis')
        print('Accept the null hypothesis')
        print('Data has unit root and is non-stationary')

In [17]:
adf_test(df['d2'],'Second Differencing')

Augmented Dickey-Fuller test: Second Differencing

H0: Data has a  unit root and is non-stationary
Ha: Data has no unit root and is stationary
Reference P value:0.05

Strong evidence against the null hypothesis
Reject the null hypothesis
Data has no unit root and is stationary
