In [None]:
import pandas as pd
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import grangercausalitytests


In [None]:
dfVariant = pd.read_csv('covid-variants.csv')
dfVariant = dfVariant[(dfVariant['variant'] == 'Delta')]

# parse date, select range and indexing
dfVariant['date']= pd.to_datetime(dfVariant['date'])
dfVariant=dfVariant[(dfVariant['date']>'2021-1-1') & (dfVariant['date']<='2021-10-16')]
dfVariant.set_index(dfVariant['date'], inplace=True)

print(dfVariant.columns)

In [None]:
df= pd.read_csv("owid-covid-data (1).csv")

# parse date, select range and indexing
df['date']= pd.to_datetime(df['date'])
df=df[(df['date']>'2021-1-1')&(df['date']<='2021-10-16')]
df.set_index(df['date'], inplace=True)

print(df.columns)

In [None]:
location = ['Austria', 'Bulgaria', 'France', 'Germany', 'Italy', 'Netherlands',
            'Portugal', 'Spain','United States']

In [None]:
for ctry in location:
    print(ctry)
    dfVariTemp = dfVariant[dfVariant['location'] == ctry].resample('sm').mean()
    # print(ctry, '\n', dfVariant.shape, dfVariant)

    dfTemp = df[df['location'] == ctry].resample('sm').mean()
    overall_pearson_cases = dfTemp.corr().loc['new_cases','people_fully_vaccinated_per_hundred']
    dfTemp["death_percentage"]=dfTemp["new_deaths"]/dfTemp["new_cases"]
    print(f"Pandas computed Pearson r: {overall_pearson_cases}")
    overall_pearson_deaths = dfTemp.corr().loc['new_deaths','people_fully_vaccinated_per_hundred']
    print(f"Pandas computed Pearson r: {overall_pearson_deaths}")

In [None]:
def pt_result(input):
    if input > 0.05:
        print("Series is not stationary")
    else:
        print("Series is stationary")

def pt_details(result):
    print(f'Test Statistics: {result[0]}')
    print(f'p-value: {result[1]}')
    print(f'critical_values: {result[4]}')

In [None]:
col_names1 =['total_vaccinations', 'new_cases', 'new_deaths']
col_names2 = ['hosp_patients', 'icu_patients']

for ctry in location:
    # dfVariTemp = dfVariant[dfVariant['location'] == ctry].resample('sm').mean()

    for col in col_names1:
        dfTemp = df[df['location'] == ctry]
        dfTemp = dfTemp.resample('10D', on='date').mean().reset_index().ffill()
        print('\n', dfTemp.shape, '10D')
        # print('\n', dfTemp.head)
        print(col + " stationary test (ADF test):")
        result = adfuller(dfTemp[col])
        # pt_details(result)
        pt_result(result[1])

    for col in col_names2:
        dfTemp = df[df['location'] == ctry]
        dfTemp = dfTemp.resample('18D', on='date').mean().reset_index().ffill()
        print('\n', dfTemp.shape)
        # print('\n', dfTemp.head)
        print(col + " stationary test (ADF test):")
        result = adfuller(dfTemp[col])
        # pt_details(result)
        pt_result(result[1])


In [None]:
tup_ls1 = [ ('new_cases', 'total_vaccinations'),
            ( 'total_vaccinations','new_cases'), ]
tup_ls2 = [ ('new_deaths', 'total_vaccinations'),
            ( 'total_vaccinations','new_deaths'), ]
tup_ls3 = [ ('hosp_patients', 'total_vaccinations'),
           ( 'total_vaccinations','hosp_patients') ]
tup_ls4 = [ ('icu_patients', 'total_vaccinations'),
           ('total_vaccinations','icu_patients') ]

for ctry in location:
    for tup in tup_ls4:
        print( '\n', ctry, tup)
        dfTemp = df[df['location'] == ctry]
        dfTemp = dfTemp.resample('10D', on='date').mean().ffill()
        # print('\n', dfTemp.head)
        grangercausalitytests(dfTemp[[tup[0], tup[1]]], maxlag=4)