In [1]:
#importing useful libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import scipy.stats as s
from datetime import datetime, date, timedelta
import math
import statsmodels.api as sm
import warnings
warnings.filterwarnings('ignore')

In [2]:
#read COVID-19 data 
Covid_Data = pd.read_csv('merged_df.csv',usecols=[0,2,4,17,18,19,20,21])
Covid_Data.rename(columns={'Difference_Yesterday':'PCR_Postive_Count','Hospitalized_diff':'PCR_Positive_Hospitalised','Death_diff':'Deceased'}, inplace = True)
Covid_Data['PCR_Positive_Hospitalised'] = Covid_Data['PCR_Positive_Hospitalised'].fillna(0).astype('int64')

In [3]:
Covid_Data1 = Covid_Data
Covid_Data1['Date']=pd.to_datetime(Covid_Data['Date'].astype(str), format='%d-%m-%Y')

In [4]:
#Autumn
data_autumn = Covid_Data1.loc[(Covid_Data1['Date'] >= pd.to_datetime('31-08-2021', format='%d-%m-%Y')) & (Covid_Data['Date'] <= pd.to_datetime('30-11-2021', format='%d-%m-%Y'))]

#Winter
data_winter = Covid_Data1.loc[(Covid_Data1['Date'] >= pd.to_datetime('01-12-2021', format='%d-%m-%Y')) & (Covid_Data1['Date'] <= pd.to_datetime('28-02-2022', format='%d-%m-%Y'))]

#Spring
data_spring = Covid_Data1.loc[(Covid_Data1['Date'] >= pd.to_datetime('01-03-2022', format='%d-%m-%Y')) & (Covid_Data1['Date'] <= pd.to_datetime('31-05-2022', format='%d-%m-%Y'))]

#Summer
data_summer = Covid_Data1.loc[Covid_Data['Date'] >= pd.to_datetime('01-06-2022', format='%d-%m-%Y')]
data_autumn

Unnamed: 0,Date,District,PCR_Postive_Count,PCR_Positive_Hospitalised,Deceased,temperature,wind_speed,relative_humidity
870,2021-08-31,KS Frankenthal,21,0,2,16.264,8.600,77.16
871,2021-08-31,KS Speyer,7,0,0,16.568,8.600,77.24
872,2021-08-31,KS Trier,20,1,1,15.108,9.532,79.84
873,2021-08-31,KS Worms,15,3,0,16.264,8.600,77.16
874,2021-08-31,KS Ludwigshafen,44,2,0,15.824,8.600,80.52
...,...,...,...,...,...,...,...,...
3625,2021-11-30,KS Pirmasens,9,0,0,1.444,29.892,95.60
3626,2021-11-30,Vulkaneifel,39,2,0,3.056,21.988,78.40
3627,2021-11-30,KS Trier,38,2,0,3.560,30.828,92.24
3628,2021-11-30,KS Worms,60,2,0,4.808,19.264,80.64


In [5]:
"""
Correlation:
1. Positive: when one variable moves higher or lower, the other variable moves in the same direction with the same magnitude.
2. Negative: if one variable increases, the other variable decreases with the same magnitude (and vice versa).
"""

'\nCorrelation:\n1. Positive: when one variable moves higher or lower, the other variable moves in the same direction with the same magnitude.\n2. Negative: if one variable increases, the other variable decreases with the same magnitude (and vice versa).\n'

In [6]:
#correlation matrix --- Kendall

def find_cor_sig(data,final_list,season_name):
    covid_param = ['PCR_Postive_Count','PCR_Positive_Hospitalised','Deceased']
    weather_param = ['temperature','wind_speed','relative_humidity']
    return_list = []
    for i in range(0,len(covid_param)):
        for j in range(0,len(weather_param)):
            corr_kendall= s.kendalltau(data[covid_param[i]],data[weather_param[j]])
            corr = round(corr_kendall[0],4)
            sig = corr_kendall[1] <= 0.05
            return_list.append(('Kendall-Tau',season_name,covid_param[i],weather_param[j],corr,corr_kendall[1],sig))
    for i in range(0,len(covid_param)):
        for j in range(0,len(weather_param)):
            corr_spear= s.spearmanr(data[covid_param[i]],data[weather_param[j]])
            corr = round(corr_spear[0],4)
            sig = corr_spear[1] <= 0.05
            return_list.append(('Spearmann',season_name,covid_param[i],weather_param[j],corr,corr_spear[1],sig))        
        
    for val in range(0,len(return_list)):
        final_list.append(return_list[val])
    return final_list

In [7]:
#corr_kendall_aut1.style.background_gradient(cmap='Oranges')
#----------autumn months correlation--------------  
final_list = []
final_list1 = find_cor_sig(data_autumn,final_list,'Autumn')
final_list2 = find_cor_sig(data_winter,final_list1,'Winter')
final_list3 = find_cor_sig(data_spring,final_list2,'Spring')
final_list4 = find_cor_sig(data_summer,final_list2,'Summer')

corr_sig_data = pd.DataFrame (final_list3, columns = ['CorrelationMethod','Season','Covid_Column','Weather_Column','Correlation','Significance_value','Significance'])
#corr_sig_data.to_csv('corr_sig_data.csv')
corr_sig_data

Unnamed: 0,CorrelationMethod,Season,Covid_Column,Weather_Column,Correlation,Significance_value,Significance
0,Kendall-Tau,Autumn,PCR_Postive_Count,temperature,-0.1884,2.349725e-48,True
1,Kendall-Tau,Autumn,PCR_Postive_Count,wind_speed,,,False
2,Kendall-Tau,Autumn,PCR_Postive_Count,relative_humidity,0.0931,5.441251e-13,True
3,Kendall-Tau,Autumn,PCR_Positive_Hospitalised,temperature,-0.0989,1.009573e-11,True
4,Kendall-Tau,Autumn,PCR_Positive_Hospitalised,wind_speed,,,False
...,...,...,...,...,...,...,...
67,Spearmann,Summer,PCR_Positive_Hospitalised,wind_speed,-0.0016,9.549007e-01,False
68,Spearmann,Summer,PCR_Positive_Hospitalised,relative_humidity,-0.0375,1.882778e-01,False
69,Spearmann,Summer,Deceased,temperature,0.0865,2.391335e-03,True
70,Spearmann,Summer,Deceased,wind_speed,0.0140,6.230029e-01,False


In [8]:
#combined multiple correlations
def find_mul_cor(AB,CB,AC, n):
    value =((AB**2) + (CB**2) - (2*AB*CB*AC))/(1 -(AC**2))

    r = math.sqrt(value)

    r_squared = r**2

    correlation_C_WRT_AB = (1- (((1-r_squared)*(n-1))/(n-2-1)))

    return round(correlation_C_WRT_AB,4)

In [9]:
mul_corr_list = []
#-----------fetching rows only with significance value=TRUE--------------
mul_corr_data = corr_sig_data.loc[(corr_sig_data["Significance"]==True)]

#-----correlation between temperature and relative humidity--------------
corr_kendall_aut= s.kendalltau(data_autumn['temperature'],data_autumn['relative_humidity'])
corr_TR_aut = round(corr_kendall_aut[0],4)
corr_kendall_win= s.kendalltau(data_winter['temperature'],data_winter['relative_humidity'])
corr_TR_win = round(corr_kendall_win[0],4)
corr_kendall_spr= s.kendalltau(data_spring['temperature'],data_spring['relative_humidity'])
corr_TR_spr = round(corr_kendall_spr[0],4)
corr_kendall_sum= s.kendalltau(data_summer['temperature'],data_summer['relative_humidity'])
corr_TR_sum = round(corr_kendall_sum[0],4)

corr_spear_aut= s.spearmanr(data_autumn['temperature'],data_autumn['relative_humidity'])
corr_sp_TR_aut = round(corr_spear_aut[0],4)
corr_spear_win= s.spearmanr(data_winter['temperature'],data_winter['relative_humidity'])
corr_sp_TR_win = round(corr_spear_win[0],4)
corr_spear_spr= s.spearmanr(data_spring['temperature'],data_spring['relative_humidity'])
corr_sp_TR_spr = round(corr_spear_spr[0],4)
corr_spear_sum= s.spearmanr(data_summer['temperature'],data_summer['relative_humidity'])
corr_sp_TR_sum = round(corr_spear_sum[0],4)

aut_n = 2760
spr_n = 2430
win_n = 2700
sum_n = 1230


In [10]:
covid_param = ['PCR_Postive_Count','PCR_Positive_Hospitalised','Deceased']
weather_param = ['temperature','relative_humidity']
weather = ['Autumn','Spring','Winter','Summer']
correlation = ['Kendall-Tau','Spearmann']
corr_mul_list=[]

for i in correlation:
    for j in weather:
        for k in covid_param:
            result_list=[]
            for l in weather_param:
                temp = mul_corr_data.loc[(mul_corr_data["Covid_Column"]== k) & (mul_corr_data['CorrelationMethod']==i) & (mul_corr_data["Season"]== j) & (mul_corr_data['Weather_Column']==l)]
                new_data = []
                new_data = temp['Correlation'].array
                if (len(new_data)>0):
                    result_list.append(new_data[0])

            if(len(result_list)==2):
                if(i=='Kendall-Tau'):
                    if(j=='Autumn'):
                        result = find_mul_cor(corr_TR_aut,result_list[0],result_list[1], aut_n)
                        corr_mul_list.append(('Kendall-Tau','Autumn',k,result))
                    elif(j=='Winter'):
                        result = find_mul_cor(corr_TR_win,result_list[0],result_list[1], win_n)
                        corr_mul_list.append(('Kendall-Tau','Winter',k,result))
                    elif(j=='Spring'):
                        result = find_mul_cor(corr_TR_spr,result_list[0],result_list[1], spr_n)
                        corr_mul_list.append(('Kendall-Tau','Spring',k,result))
                    else:
                        result = find_mul_cor(corr_TR_sum,result_list[0],result_list[1], sum_n)
                        corr_mul_list.append(('Kendall-Tau','Summer',k,result))
                else:
                    if(j=='Autumn'):
                        result = find_mul_cor(corr_sp_TR_aut,result_list[0],result_list[1], aut_n)
                        corr_mul_list.append(('Spearmann','Autumn',k,result))
                    elif(j=='Winter'):
                        result = find_mul_cor(corr_sp_TR_win,result_list[0],result_list[1], win_n)
                        corr_mul_list.append(('Spearmann','Winter',k,result))
                    elif(j=='Spring'):
                        result = find_mul_cor(corr_sp_TR_spr,result_list[0],result_list[1], spr_n)
                        corr_mul_list.append(('Spearmann','Spring',k,result))
                    else:
                        result = find_mul_cor(corr_sp_TR_sum,result_list[0],result_list[1], sum_n)
                        corr_mul_list.append(('Spearmann','Summer',k,result))
          


In [11]:
corr_mul_TH = pd.DataFrame (corr_mul_list, columns = ['CorrelationMethod','Season','Covid_Column','Correlation'])
#corr_mul_TH.to_csv('corr_mul_TH.csv')

In [12]:
#Aggregating the Covid Data for the whole state RLP
Covid_Data_avg = Covid_Data1.groupby(['Date']).agg({'PCR_Postive_Count' : 'sum', 'PCR_Positive_Hospitalised' : 'sum',
                                                     'Deceased' : 'sum',
                                             'temperature' : 'mean', 'wind_speed' : 'mean', 'relative_humidity': 'mean',
                                            }).reset_index()
#Covid_Data_avg

In [13]:
#lowest value weeks in all seasons
#Autumn: 22.11.2021 to 05.12.2021
low_data_aut = Covid_Data_avg.loc[(Covid_Data_avg['Date'] >= pd.to_datetime('22-11-2021', format='%d-%m-%Y')) & (Covid_Data_avg['Date'] <= pd.to_datetime('05-12-2021', format='%d-%m-%Y'))]
low_data_aut= low_data_aut.reset_index()

#Winter: 14.02.2022 to 27.02.2022
low_data_win = Covid_Data_avg.loc[(Covid_Data_avg['Date'] >= pd.to_datetime('14-02-2022', format='%d-%m-%Y')) & (Covid_Data_avg['Date'] <= pd.to_datetime('27-02-2022', format='%d-%m-%Y'))]
low_data_win= low_data_win.reset_index()

#Spring: 28.02.2022 to 13.03.2022
low_data_spr = Covid_Data_avg.loc[(Covid_Data_avg['Date'] >= pd.to_datetime('28-02-2022', format='%d-%m-%Y')) & (Covid_Data_avg['Date'] <= pd.to_datetime('13-03-2022', format='%d-%m-%Y'))]
low_data_spr= low_data_spr.reset_index()

#Summer: 04.06.2022 to 17.06.2022
low_data_sum = Covid_Data_avg.loc[(Covid_Data_avg['Date'] >= pd.to_datetime('04-06-2022', format='%d-%m-%Y')) & (Covid_Data_avg['Date'] <= pd.to_datetime('17-06-2022', format='%d-%m-%Y'))]
low_data_sum= low_data_sum.reset_index()


In [14]:
def cross_correlation(df,range_i):
    temp_val=[]
    pcr_val = []
    cr_corr = []
    cr_corr_ar = []
    """
    for i in range(0,range_i):
    #fetching temperature value for Day 0
        for j in range(1,8):
        #fetching temperature value for Day 1-8
            tp = round(df.loc[[i]].temperature.values[0],4)
            pcr = df.loc[[i+j]].PCR_Postive_Count.values[0]
            temp_val.append(tp)
            pcr_val.append(pcr)
        cr_corr_ar = sm.tsa.stattools.ccf(temp_val,pcr_val,adjusted=False)
        for l in range(0,len(cr_corr_ar)):
            cr_corr.append((temp_val[l],pcr_val[l],cr_corr_ar[l]))
        temp_val=[]
        pcr_val = []
        cr_corr_ar = []
    """
    cr_corr_ar = sm.tsa.stattools.ccf(df['temperature'],df['PCR_Postive_Count'],adjusted=False)
    for l in range(0,len(cr_corr_ar)):
        cr_corr.append((df.loc[[l]].temperature.values[0],df.loc[[l]].PCR_Postive_Count.values[0],cr_corr_ar[l]))
    return cr_corr
 
        

In [15]:
#Cross correlation values with lag
#Autumn
cr_cor_list=[]
cr_cor_list = cross_correlation(low_data_aut,7)
aut_cr_corr_df = pd.DataFrame (cr_cor_list, columns = ['temperature','PCR_Positive_Count','Cross Correlation'])
aut_cr_corr_df['Cross Correlation'] = aut_cr_corr_df['Cross Correlation'].fillna(0)
#aut_cr_corr_df.to_csv('aut_cr_corr_df.csv')

#Winter
cr_cor_list=[]
cr_cor_list = cross_correlation(low_data_win,7)
win_cr_corr_df = pd.DataFrame (cr_cor_list, columns = ['temperature','PCR_Positive_Count','Cross Correlation'])
win_cr_corr_df['Cross Correlation'] = win_cr_corr_df['Cross Correlation'].fillna(0)
#win_cr_corr_df.to_csv('win_cr_corr_df.csv')

#Spring
cr_cor_list=[]
cr_cor_list = cross_correlation(low_data_spr,7)
spr_cr_corr_df = pd.DataFrame (cr_cor_list, columns = ['temperature','PCR_Positive_Count','Cross Correlation'])
spr_cr_corr_df['Cross Correlation'] = spr_cr_corr_df['Cross Correlation'].fillna(0)
#spr_cr_corr_df.to_csv('spr_cr_corr_df.csv')

#Summer
cr_cor_list=[]
cr_cor_list = cross_correlation(low_data_sum,5)
sum_cr_corr_df = pd.DataFrame (cr_cor_list, columns = ['temperature','PCR_Positive_Count','Cross Correlation'])
sum_cr_corr_df['Cross Correlation'] = sum_cr_corr_df['Cross Correlation'].fillna(0)
#sum_cr_corr_df.to_csv('sum_cr_corr_df.csv')



In [16]:
low_data_aut

Unnamed: 0,index,Date,PCR_Postive_Count,PCR_Positive_Hospitalised,Deceased,temperature,wind_speed,relative_humidity
0,112,2021-11-22,1666,30,5,4.044667,16.137733,83.305333
1,113,2021-11-23,1812,38,9,1.7752,10.454133,84.190667
2,114,2021-11-24,2151,35,12,2.8272,10.162667,84.764
3,115,2021-11-25,2064,73,2,1.598533,5.603333,82.734667
4,116,2021-11-26,2212,45,4,1.8872,15.7944,86.692
5,117,2021-11-27,595,11,2,1.940933,14.093733,87.816
6,118,2021-11-28,925,27,3,1.455467,8.4724,90.604
7,119,2021-11-29,2035,47,14,1.293067,14.578086,86.973333
8,120,2021-11-30,1986,71,9,3.8296,25.308,87.649333
9,121,2021-12-01,2228,47,7,5.973333,27.384305,86.784


In [17]:
aut_cr_corr_df

Unnamed: 0,temperature,PCR_Positive_Count,Cross Correlation
0,4.044667,1666,0.045349
1,1.7752,1812,0.147435
2,2.8272,2151,-0.007611
3,1.598533,2064,-0.46426
4,1.8872,2212,-0.383996
5,1.940933,595,0.260489
6,1.455467,925,0.294736
7,1.293067,2035,0.005097
8,3.8296,1986,-0.030836
9,5.973333,2228,-0.054724


In [18]:
corr_kendall= s.kendalltau(15666,16.960667)
corr_kendall
#corr = round(corr_kendall[0],4)
#sig = corr_kendall[1] <= 0.05

KendalltauResult(correlation=nan, pvalue=nan)