In [1]:
import datetime
import numpy as np
import os 
import pandas as pd
from pytz import timezone

In [2]:
# Datenstand --> Date when the record was last updated
# Meldedatum --> Date when the health department became aware of the case

# AnzahlFall --> Number of cases in the corresponding group
# AnzahlTodesfall-->  Number of deaths in the corresponding group

german_covid = pd.read_csv("table_indicateurs_german_dep.csv")

# Source : https://npgeo-corona-npgeo-de.hub.arcgis.com/datasets/dd4580c810204019a7b8eb3e0b329dd6_0

france_covid = pd.read_csv("table_indicateurs_france.csv")

# https://www.data.gouv.fr/en/datasets/synthese-des-indicateurs-de-suivi-de-lepidemie-covid-19/


uk_covid = pd.read_csv("table_indicateurs_uk.csv")
# https://coronavirus.data.gov.uk/details/download

In [None]:
def set_name(df):
    mapping = { df.columns[0] : "date",
                df.columns[1] : "daily_case_count",
                df.columns[2] : "daily_death_count"}
    df = df.rename(columns=mapping)
    return df

def date_crop(df):
    df = df[(df["date"] >= "2020-03-18") & (df["date"] <= "2021-04-15")]
    return df

def sort_float_to_int(df):
    df = df.astype({df.columns[1] : int, df.columns[2] : int})
    df = df.sort_values(by="date", ascending=True)
    return df


def tidy(df):
    df = sort_float_to_int(date_crop(set_name(df)))
    return df

In [None]:
german_covid["Meldedatum"] = pd.to_datetime(german_covid["Meldedatum"]) #date of confirmed case or death

agg_fr_covid = france_covid.groupby("date")["conf_j1","incid_dchosp"].sum().reset_index() 
agg_ger_covid = german_covid.groupby(german_covid["Meldedatum"].dt.strftime("%Y-%m-%d"))["AnzahlFall","AnzahlTodesfall"].sum().reset_index()  
agg_uk_covid = uk_covid[["date","newCasesByPublishDate", "newDailyNsoDeathsByDeathDate"]]

In [None]:
France_Covid, Germany_Covid, UK_Covid = tidy(agg_fr_covid), tidy(agg_ger_covid), tidy(agg_uk_covid)

# EXPORT


In [None]:
files =[France_Covid, Germany_Covid, UK_Covid]

if not os.path.exists("country_covid"):
    os.mkdir("country_covid")

for country, df in enumerate(files):
    df.to_csv(f"country_covid/file_{country}.csv", index=False)

# Notes

In [None]:
# https://medium.com/@bewerunge.franz/google-trends-how-to-acquire-daily-data-for-broad-time-frames-b6c6dfe200e6
# trend scaling = take daily data and multiply by monthly metric (daily* month /100)

# https://www.youtube.com/watch?v=cuTUbPQk2R4&ab_channel=KostadinRistovski


# https://www.medrxiv.org/content/10.1101/2021.02.18.21251966v1.full

# study suggests that Google trends is biased :
#     quote: However, search activity may, for example, 
#         be triggered by curiosity following news reports of suicide deaths,
#         particularly celebrity deaths

#INFORMATION TO GET FOR CROSS ANALYSIS --> What are we trying to monitor 

# Political support : emmanuel Macron polls
# trust in political party

# Suicidal thoughts / depression :
#number of visits to therapists
#drug intake ( Psychoactive)
# alcohol and tabacoo intake evolution (addictions)
#pornography (evolution)
#job loss
# salary / compensation package evolution
# unemployment
#belief of a better future / optimism
#isolation rate ? 
# sleep quality


#Divide the population in segments and compare evolutions.
#age groups / socio economic groups

#insight : très difficile de trouver un flux continu de données à travers le temps.
# solution : Google API , Google Trends

# https://bmjopen.bmj.com/content/10/9/e040620
# paper in the uk: survey conducted april 2020
#     result : women, young and covid at risk = more anxiety and depression

# https://www-statista-com.ezp.em-lyon.com/statistics/1203423/state-mental-health-covid-19-france/
# france evolution of A, D, S thoughts (only 21 entries of data)