In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pandas_datareader.data as web
import datetime

In [24]:
# define period of interest
start = datetime.datetime(1959, 1, 1)
end = datetime.datetime(2018, 8, 1)

# read in or scrape data (all data for all commercial banks)
# LOANINV - Bank Credt
# INVEST - securities in bank credit
# USGSEC - Treasury and Agency Securities
# OTHSEC - other securities
# LOANS - Loans and leases in bank credit
# BUSLOANS -  commercial and business loans
# REALLN - real estate loans
# CONSUMER - consumer loans
dataset = web.DataReader(['LOANINV', 'INVEST', 'USGSEC', 'OTHSEC', 
                          'LOANS', 'BUSLOANS', 'REALLN', 'CONSUMER'], 'fred', start, end)
dataset.head(15)

Unnamed: 0_level_0,LOANINV,INVEST,USGSEC,OTHSEC,LOANS,BUSLOANS,REALLN,CONSUMER
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1959-01-01,184.1034,84.2043,69.3427,14.8616,99.8991,35.213,24.9242,20.7282
1959-02-01,183.6988,83.528,68.68,14.848,100.1708,35.2201,25.227,20.9603
1959-03-01,182.2348,81.6405,66.6329,15.0076,100.5943,35.1304,25.4218,21.209
1959-04-01,183.8576,81.8099,66.5471,15.2628,102.0477,35.5581,25.7261,21.4893
1959-05-01,184.2517,80.7315,65.6099,15.1216,103.5202,36.3064,26.0187,21.7188
1959-06-01,184.0247,78.6972,63.5965,15.1007,105.3275,37.1404,26.2668,21.9859
1959-07-01,186.547,78.9984,63.8898,15.1086,107.5485,37.6645,26.4697,22.4256
1959-08-01,186.14,77.3045,62.2434,15.0612,108.8354,38.089,26.6349,22.8187
1959-09-01,186.5529,76.8926,61.7223,15.1703,109.6602,38.2605,26.7848,23.2035
1959-10-01,186.3566,76.2235,61.0661,15.1574,110.1332,38.4113,26.9735,23.5847


In [25]:
dataset.to_csv('commercial bank data.csv')

In [26]:
from statsmodels.tsa.stattools import adfuller #to check unit root in time series

In [27]:
threshold=0.01 #significance level
for column in dataset:
    result=adfuller(dataset[column])
    print(column)
    print('ADF Statistic: %f' % result[0])
    print('p-value: %f' % result[1])
    if result[1]>threshold:
        print('not stationary')
        print('----------')
        dataset[column]=dataset[column].diff()   

LOANINV
ADF Statistic: 2.483492
p-value: 0.999044
not stationary
----------
INVEST
ADF Statistic: 4.669790
p-value: 1.000000
not stationary
----------
USGSEC
ADF Statistic: 3.610944
p-value: 1.000000
not stationary
----------
OTHSEC
ADF Statistic: -0.153148
p-value: 0.943881
not stationary
----------
LOANS
ADF Statistic: 2.150368
p-value: 0.998839
not stationary
----------
BUSLOANS
ADF Statistic: 2.007221
p-value: 0.998682
not stationary
----------
REALLN
ADF Statistic: 0.714541
p-value: 0.990124
not stationary
----------
CONSUMER
ADF Statistic: 4.160506
p-value: 1.000000
not stationary
----------


In [28]:
dataset.dropna(inplace=True)
for column in dataset:
    result=adfuller(dataset[column])
    if result[1]>threshold:
        dataset[column]=dataset[column].diff()
dataset.dropna(inplace=True)
display(dataset.head())
print(dataset.shape)

Unnamed: 0_level_0,LOANINV,INVEST,USGSEC,OTHSEC,LOANS,BUSLOANS,REALLN,CONSUMER
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1959-03-01,-1.0594,-1.8875,-2.0471,0.1596,0.1518,-0.0897,-0.108,0.2487
1959-04-01,3.0868,0.1694,-0.0858,0.2552,1.0299,0.4277,0.1095,0.2803
1959-05-01,-1.2287,-1.0784,-0.9372,-0.1412,0.0191,0.7483,-0.0117,0.2295
1959-06-01,-0.6211,-2.0343,-2.0134,-0.0209,0.3348,0.834,-0.0445,0.2671
1959-07-01,2.7493,0.3012,0.2933,0.0079,0.4137,0.5241,-0.0452,0.4397


(714, 8)


In [29]:
threshold=0.01 #significance level
for column in dataset:
    result=adfuller(dataset[column])
    print(column)
    if result[1]>threshold:
        print('not stationary')

LOANINV
INVEST
USGSEC
OTHSEC
LOANS
BUSLOANS
REALLN
CONSUMER


In [30]:
dataset.to_csv('stationary commercial bank data.csv')