<a href="https://colab.research.google.com/github/vincnardelli/covstat/blob/master/covstat_vaccini.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [63]:
import pandas as pd
import numpy as np

## Confronto Italia - Germania

In [64]:
vaccinations = pd.read_csv("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv")

In [65]:
stati = ["Germany", "Italy"]
df = vaccinations[vaccinations.location.isin(stati)]
df = df.pivot(index='date', columns='location', values=['total_vaccinations', 'total_vaccinations_per_hundred'])
df.columns = df.columns.map('_'.join).str.strip('')
df.reset_index(inplace=True)
df.date = pd.to_datetime(df.date).dt.strftime('%d-%m-%Y')
df['spread'] = df['total_vaccinations_per_hundred_Germany'] - df['total_vaccinations_per_hundred_Italy']
df.dropna(axis=0, inplace = True)
df['total_vaccinations_Germany'] = df['total_vaccinations_Germany'].astype(int)
df['total_vaccinations_Italy'] = df['total_vaccinations_Italy'].astype(int)
df['spread'] = round(df['spread'], 2)
df.tail(1).to_csv("confronto_italia_germania_last.csv")

In [66]:
df = vaccinations[vaccinations.location.isin(stati)]
df.drop(columns=['iso_code', 'total_vaccinations', 'daily_vaccinations', 'daily_vaccinations_per_million', 
                 'people_fully_vaccinated', 'daily_vaccinations_raw', 'people_vaccinated', 'people_vaccinated_per_hundred', 
                 'people_fully_vaccinated_per_hundred'], inplace=True)
df = df.melt(id_vars = ['location', 'date'])
df.reset_index(inplace=True)
df.date = pd.to_datetime(df.date).dt.strftime('%d-%m-%Y')
df.to_csv("confronto_italia_germania.csv")
df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,index,location,date,variable,value
0,0,Germany,27-12-2020,total_vaccinations_per_hundred,0.03
1,1,Germany,28-12-2020,total_vaccinations_per_hundred,0.05
2,2,Germany,29-12-2020,total_vaccinations_per_hundred,0.1
3,3,Germany,30-12-2020,total_vaccinations_per_hundred,0.17
4,4,Germany,31-12-2020,total_vaccinations_per_hundred,0.21
5,5,Germany,01-01-2021,total_vaccinations_per_hundred,0.25
6,6,Germany,02-01-2021,total_vaccinations_per_hundred,0.3
7,7,Germany,03-01-2021,total_vaccinations_per_hundred,0.33
8,8,Germany,04-01-2021,total_vaccinations_per_hundred,0.39
9,9,Germany,05-01-2021,total_vaccinations_per_hundred,0.45


## Confronto internazionale

In [69]:
stati = ['Italy', 'Germany', 'United Kingdom', 'Spain', 'United States']
df = vaccinations[vaccinations.location.isin(stati)]
df.drop(columns=['iso_code',
                 'people_fully_vaccinated', 'daily_vaccinations_raw', 'people_vaccinated', 'people_vaccinated_per_hundred', 
                 'people_fully_vaccinated_per_hundred'], inplace=True)
df = df.melt(id_vars = ['location', 'date'])
df = df.pivot(index = ['date', 'variable'], columns = 'location', values= 'value')
df.reset_index(inplace=True)
df.date = pd.to_datetime(df.date).dt.strftime('%d-%m-%Y')
df.loc[df['variable'] == 'daily_vaccinations', ['variable']] = "Vaccinazioni giornaliere"
df.loc[df['variable'] == 'daily_vaccinations_per_million', ['variable']] = "Vaccinazioni giornaliere ogni milione ab"
df.loc[df['variable'] == 'total_vaccinations', ['variable']] = "Totale vaccinazioni"
df.loc[df['variable'] == 'total_vaccinations_per_hundred', ['variable']] = "Totale vaccinazioni ogni cento abitanti"

df.to_csv("confronto_stati.csv")
df


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


location,date,variable,Germany,Italy,Spain,United Kingdom,United States
0,20-12-2020,Vaccinazioni giornaliere,,,,,
1,20-12-2020,Vaccinazioni giornaliere ogni milione ab,,,,,
2,20-12-2020,Totale vaccinazioni,,,,650714.00,556208.00
3,20-12-2020,Totale vaccinazioni ogni cento abitanti,,,,0.96,0.17
4,21-12-2020,Vaccinazioni giornaliere,,,,44642.00,57909.00
...,...,...,...,...,...,...,...
91,11-01-2021,Totale vaccinazioni ogni cento abitanti,0.82,1.21,0.87,4.19,2.72
92,12-01-2021,Vaccinazioni giornaliere,,76059.00,49826.00,,641524.00
93,12-01-2021,Vaccinazioni giornaliere ogni milione ab,,1258.00,1066.00,,1938.00
94,12-01-2021,Totale vaccinazioni,,800730.00,488122.00,,9327138.00


## Analisi vaccini Italia

In [None]:
anagrafica = pd.read_csv("https://raw.githubusercontent.com/italia/covid19-opendata-vaccini/master/dati/anagrafica-vaccini-summary-latest.csv")
dati_istat = pd.read_csv("https://raw.githubusercontent.com/vincnardelli/covstat/master/vaccini/dati_istat.csv", sep=";")
dati_istat

Unnamed: 0,fascia_anagrafica,pop_maschile,pop_femminile
0,20-29,3192324,2964859
1,30-39,3500589,3452131
2,40-49,4495422,4544377
3,50-59,4656253,4844927
4,60-69,3554434,3870741
5,70-79,2753864,3252966
6,80-89,1446283,2201193
7,90+,217463,577109


In [None]:
anagrafica = anagrafica.merge(dati_istat, on="fascia_anagrafica")
anagrafica['Maschi'] = anagrafica['sesso_maschile']/anagrafica['pop_maschile']
anagrafica['Femmine'] = anagrafica['sesso_femminile']/anagrafica['pop_femminile']

In [None]:
anagrafica_graph = anagrafica[['fascia_anagrafica','Maschi', 'Femmine']]
anagrafica_graph.set_index('fascia_anagrafica', inplace = True)
anagrafica_graph['Femmine'] = anagrafica_graph['Femmine']*-100
anagrafica_graph['Maschi'] = anagrafica_graph['Maschi']*100
anagrafica_graph = round(anagrafica_graph, 2)
anagrafica_graph.to_csv("italia_anagrafica.csv")
anagrafica_graph

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


Unnamed: 0_level_0,Maschi,Femmine
fascia_anagrafica,Unnamed: 1_level_1,Unnamed: 2_level_1
20-29,0.45,-0.94
30-39,0.82,-1.23
40-49,0.67,-1.34
50-59,0.88,-1.59
60-69,1.03,-0.73
70-79,0.17,-0.09
80-89,0.19,-0.33
90+,0.52,-1.17


## Regioni italiane

In [None]:
popolation_regions = np.array([ 1304970,      559084,        533050,   1947131,   5801692,         
                               4459477,        1215220,5879082, 1550640,    
                               10060574,  1525271,  305617,    4356406, 4029053, 1639591,  
                               4999891,  3729641,       541380,  882015,          125666, 4905854])
name_regions       = np.array(['Abruzzo','Basilicata','P.A. Bolzano','Calabria','Campania',
                               'Emilia-Romagna','Friuli Venezia Giulia','Lazio','Liguria',
                               'Lombardia','Marche','Molise','Piemonte','Puglia','Sardegna',
                               'Sicilia','Toscana','P.A. Trento','Umbria','Valle d\'Aosta','Veneto'])
area       = np.array(['ABR','BAS','PAB','CAL','CAM',
                               'EMR','FVG','LAZ','LIG',
                               'LOM','MAR','MOL','PIE','PUG','SAR',
                               'SIC','TOS','PAT','UMB','VDA','VEN'])

popolation = pd.DataFrame([name_regions, popolation_regions, area]).transpose()
popolation.columns = ['regione', 'popolazione', 'area'] 

In [None]:
regioni = pd.read_csv("https://raw.githubusercontent.com/italia/covid19-opendata-vaccini/master/dati/somministrazioni-vaccini-summary-latest.csv")
regioni = regioni.merge(popolation, on='area')

regioni['Totale vaccinazioni'] = regioni.groupby(['regione'])['totale'].apply(lambda x: x.cumsum())
regioni['Totale vaccinazioni ogni cento ab'] = regioni['Totale vaccinazioni']/regioni['popolazione']*100
regioni['Totale vaccinazioni ogni cento ab'] = round(regioni['Totale vaccinazioni ogni cento ab'].astype(float), 2)


regioni['Vaccinazioni giornaliere ogni cento ab'] = regioni['totale']/regioni['popolazione']*100
regioni['Vaccinazioni giornaliere ogni cento ab'] = round(regioni['Vaccinazioni giornaliere ogni cento ab'].astype(float), 2)

regioni = regioni.rename(columns={'data_somministrazione': 'data', 
                        'totale': 'Vaccinazioni giornaliere', 
                        'categoria_operatori_sanitari_sociosanitari': 'Operatori sanitari - sociosanitari', 
                        'categoria_personale_non_sanitario': 'Personale non sanitario', 
                        'categoria_ospiti_rsa': 'Ospiti RSA', 
                        'regione': 'Regione'})
#regioni.reset_index(inplace=True)
#regioni.data = pd.to_datetime(regioni.data).dt.strftime('%d-%m-%Y')
regioni.to_csv("regioni.csv")

In [None]:
confronto_regioni = regioni[['data', 'Regione', 'Totale vaccinazioni', 'Totale vaccinazioni ogni cento ab', 'Vaccinazioni giornaliere', 'Vaccinazioni giornaliere ogni cento ab']]
confronto_regioni = confronto_regioni.melt(id_vars = ['Regione', 'data'])
confronto_regioni = confronto_regioni.pivot(index = ['data', 'variable'], columns = 'Regione', values= 'value')
confronto_regioni.reset_index(inplace=True)
confronto_regioni.data = pd.to_datetime(confronto_regioni.data).dt.strftime('%m-%d-%Y')
confronto_regioni.to_csv("confronto_regioni.csv")

In [None]:
confronto_regioni

Regione,data,variable,Abruzzo,Basilicata,Calabria,Campania,Emilia-Romagna,Friuli Venezia Giulia,Lazio,Liguria,Lombardia,Marche,Molise,P.A. Bolzano,P.A. Trento,Piemonte,Puglia,Sardegna,Sicilia,Toscana,Umbria,Valle d'Aosta,Veneto
0,12-27-2020,Totale vaccinazioni,135.0,208.0,274.0,720.0,962.0,265.0,179.0,135.0,766.0,200.0,50.0,144.0,100.0,908.0,555.0,65.0,132.0,620.0,144.0,20.0,880.0
1,12-27-2020,Totale vaccinazioni ogni cento ab,0.01,0.04,0.01,0.01,0.02,0.02,0.0,0.01,0.01,0.01,0.02,0.03,0.02,0.02,0.01,0.0,0.0,0.02,0.02,0.02,0.02
2,12-27-2020,Vaccinazioni giornaliere,135.0,208.0,274.0,720.0,962.0,265.0,179.0,135.0,766.0,200.0,50.0,144.0,100.0,908.0,555.0,65.0,132.0,620.0,144.0,20.0,880.0
3,12-27-2020,Vaccinazioni giornaliere ogni cento ab,0.01,0.04,0.01,0.01,0.02,0.02,0.0,0.01,0.01,0.01,0.02,0.03,0.02,0.02,0.01,0.0,0.0,0.02,0.02,0.02,0.02
4,12-28-2020,Totale vaccinazioni,,,279.0,,,,746.0,215.0,1157.0,,,144.0,,,,130.0,346.0,,,,
5,12-28-2020,Totale vaccinazioni ogni cento ab,,,0.01,,,,0.01,0.01,0.01,,,0.03,,,,0.01,0.01,,,,
6,12-28-2020,Vaccinazioni giornaliere,,,5.0,,,,567.0,80.0,391.0,,,0.0,,,,65.0,214.0,,,,
7,12-28-2020,Vaccinazioni giornaliere ogni cento ab,,,0.0,,,,0.01,0.01,0.0,,,0.0,,,,0.0,0.0,,,,
8,12-29-2020,Totale vaccinazioni,,,,,,,1097.0,340.0,1573.0,,,,,,,180.0,581.0,,,,
9,12-29-2020,Totale vaccinazioni ogni cento ab,,,,,,,0.02,0.02,0.02,,,,,,,0.01,0.01,,,,


# Piano vaccinale

In [70]:
dfIT = vaccinations[vaccinations['location']=='Italy'].iloc[2:,:]
dati_ITALIA = np.array(dfIT.iloc[3:,3])/(10**6)
x_ITALIA = np.arange(0,len(dati_ITALIA))/7

# requisiti per scenario minimo e immunità di gregge

over65 = 14*10**6
operatori_sanitari = 6.5*10**5
periodo = 30*9
soglia_immunita = 70/100
n_dosi = 2
popolazione = 60*10**6
obj_minimo = (over65+operatori_sanitari)*2
obj_ideale = (70/100*60*10**6)*2
days = np.arange(270,0,-1)[0:len(dfIT_soglie)]

dfIT_soglie = dfIT[['date','total_vaccinations']]
dfIT_soglie = dfIT_soglie.iloc[2:,:]
dfIT_soglie.iloc[6,1] = 321077
ll = len(dfIT_soglie)
 
dfIT_soglie['minimo'] = minimo_daily*np.arange(1,ll+1)
dfIT_soglie['immunità'] = immunità_daily*np.arange(1,ll+1)
 
dfIT_soglie['daily'] = np.diff(np.append(14613,np.array(dfIT_soglie['total_vaccinations'])))

dfIT_soglie['nane_min']=(obj_minimo-np.array(dfIT_soglie['total_vaccinations']))/days
dfIT_soglie['nane_ideal']=(obj_ideale-np.array(dfIT_soglie['total_vaccinations']))/days


minimo_daily = n_dosi*(over65+operatori_sanitari)/periodo
immunità_daily = n_dosi*(soglia_immunita*popolazione)/periodo

print(minimo_daily)
print(immunità_daily)

n_week = 3/4*52

week_min = obj_minimo/n_week/10**6
week_ideal = obj_ideale/n_week/10**6

week_grid = np.arange(0,n_week+0.1)

108518.51851851853
311111.1111111111


In [71]:
# piano vaccini
week_vaccini = [0,13,26,39,52,65,78]

piano_vaccini_diff = np.array([0,28.269*10**6,57.202*10**6,53.84*10**6,
                 14.806*10**6,28.266*10**6,20.19*10**6])/10**6

piano_vaccini = np.cumsum(piano_vaccini_diff)

In [73]:
pianovaccini = pd.DataFrame({"week": week_vaccini, "Numero di dosi piano vaccini": piano_vaccini})
pianovaccini

Unnamed: 0,week,Numero di dosi piano vaccini
0,0,0.0
1,13,28.269
2,26,85.471
3,39,139.311
4,52,154.117
5,65,182.383
6,78,202.573


In [74]:
vaccinazioni = pd.DataFrame({"week":x_ITALIA, "Vaccini effettuati": dati_ITALIA})
vaccinazioni

Unnamed: 0,week,Vaccini effettuati
0,0.0,0.04941
1,0.142857,0.087833
2,0.285714,0.12251
3,0.428571,0.190144
4,0.571429,0.268319
5,0.714286,0.332247
6,0.857143,0.422492
7,1.0,0.512868
8,1.142857,0.597017
9,1.285714,0.655941


In [75]:
projection = pd.DataFrame({"week": week_grid, 
                          "70% di vaccinati entro 30/09/21": week_grid*week_ideal, 
                           "Over 65 e op.sanitari vaccinati entro 30/09/21": week_grid*week_min})
projection

Unnamed: 0,week,70% di vaccinati entro 30/09/21,Over 65 e op.sanitari vaccinati entro 30/09/21
0,0.0,0.0,0.0
1,1.0,2.153846,0.751282
2,2.0,4.307692,1.502564
3,3.0,6.461538,2.253846
4,4.0,8.615385,3.005128
5,5.0,10.769231,3.75641
6,6.0,12.923077,4.507692
7,7.0,15.076923,5.258974
8,8.0,17.230769,6.010256
9,9.0,19.384615,6.761538


In [76]:
projection = projection.merge(pianovaccini, how="left")
projection = projection.merge(vaccinazioni, how="left")
projection.to_csv("projection.csv")
projection

Unnamed: 0,week,70% di vaccinati entro 30/09/21,Over 65 e op.sanitari vaccinati entro 30/09/21,Numero di dosi piano vaccini,Vaccini effettuati
0,0.0,0.0,0.0,0.0,0.04941
1,1.0,2.153846,0.751282,,0.512868
2,2.0,4.307692,1.502564,,
3,3.0,6.461538,2.253846,,
4,4.0,8.615385,3.005128,,
5,5.0,10.769231,3.75641,,
6,6.0,12.923077,4.507692,,
7,7.0,15.076923,5.258974,,
8,8.0,17.230769,6.010256,,
9,9.0,19.384615,6.761538,,


In [77]:
vaccinazioni = pd.DataFrame({"week":x_ITALIA, "Vaccini effettuati": dati_ITALIA, 
                                                       "70% di vaccinati entro 30/09/21": x_ITALIA*week_ideal,
                             "Over 65 e op.sanitari vaccinati entro 30/09/21": x_ITALIA*week_min 
})
vaccinazioni.to_csv("projection_vaccinazioni.csv")