In [None]:
import json
import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy.stats import pearsonr, spearmanr
from statsmodels.stats.multitest import multipletests

In [None]:
corona_df=pd.read_csv('../data/raw/corona/de_corona.csv',sep='\t')
with open('../data/raw/metadata/de_metadata.json','r', encoding="utf8") as f:
    country_metadata=json.load(f)

region_map = {country_metadata["country_metadata"][i]["covid_region_code"]: 
    country_metadata["country_metadata"][i]["iso3166-2_code"] for i in range(len(country_metadata["country_metadata"]))}
corona_df["iso3166-2"] = corona_df["region_code"].map(region_map)

corona_df

In [None]:
weather_df = pd.read_csv("../data/raw/weather/weather.csv")

weather_df["TemperatureAboveGround"] = weather_df["TemperatureAboveGround"] - 273.15
weather_df = weather_df[weather_df["iso3166-2"].str.startswith("DE")]

weather_df

In [None]:
merged_df = corona_df.merge(weather_df)

merged_df

In [None]:
weather_lost = weather_df.shape[0]-merged_df.shape[0]
corona_lost = corona_df.shape[0]-merged_df.shape[0]
print(f"We lost {weather_lost} rows from the weather dataset.")
print(f"We lost {corona_lost} rows from the corona datset")
print(f"We lost {weather_lost+corona_lost} rows in total, because presumeably the one of the datasets does not have the same amount of dates and so some rows are dropped")


In [None]:
Xs = list(merged_df.columns[6:12])

significance_threshold = 0.005

for var in Xs:
    corr, pvalue = pearsonr(merged_df["confirmed_addition"], merged_df[var])
    print(f"{var}\n{corr:.3f}\t{pvalue}\t{pvalue < significance_threshold}\n")


In [None]:
for var in Xs:
    corr, pvalue = spearmanr(merged_df["confirmed_addition"], merged_df[var])
    print(f"{var}\n{corr:.3f}\t{pvalue}\t{pvalue < significance_threshold}\n")

In [None]:
for var in Xs:
    corr, pvalue = pearsonr(np.log(merged_df["confirmed_addition"] + 1), merged_df[var])
    print(f"{var}\n{corr:.3f}\t{pvalue}\t{pvalue < significance_threshold}\n")

In [None]:
pvalues = []
tests = ["Linear", "Spearman", "Log"],

for var in Xs:
    corr, pvalue = pearsonr(merged_df["confirmed_addition"], merged_df[var])
    pvalues.append(pvalue)

for var in Xs:
    corr, pvalue = spearmanr(merged_df["confirmed_addition"], merged_df[var])
    pvalues.append(pvalue)

for var in Xs:
    corr, pvalue = pearsonr(np.log(merged_df["confirmed_addition"]), merged_df[var])
    pvalues.append(pvalue)


In [None]:
significant, pholmcorrected, _, _ = multipletests(pvalues, alpha = 0.005, method = "holm")

for i in range(len(tests)):
    for j in range(len(Xs)):
        print(f"{tests[i]}\t{Xs[j]}\t{significant[i + j]}")