In [13]:
import pandas as pd
import os
import sys
import pandarallel
if sys.platform == 'darwin':
    pandarallel.core.CONTEXT = pandarallel.core.multiprocessing.get_context('spawn')
# see https://github.com/nalepae/pandarallel/issues/225
pandarallel.pandarallel.initialize(progress_bar=True)

import agenas_pne_scraper

INFO: Pandarallel will run on 4 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.

https://nalepae.github.io/pandarallel/troubleshooting/


In [2]:
print (f'agenas-pne-scraper version: {agenas_pne_scraper.__version__}')

agenas-pne-scraper version: 0.1.0


In [3]:
YEAR = 2021

In [4]:
hospitals_df = agenas_pne_scraper.get_hospital_id_hospital_name_hospitals_df()

hospital_code_legend = hospitals_df.set_index('hospital_id').hospital_name

### Volume indicators
Table
- `strutture/stru_frequenza.php` + `cod_struttura` as querystring

Graph with historical data
- `strutture/grafico1Str5_HC.php` + `cod_struttura` and `ind` as querystring

### Outcome indicators
Table
- `strutture/stru_indicatori.php` + `cod_struttura` and `conf["reg", "prec"]` as querystring. Conf stands for _confronto_, which means compare in italian; "reg" stands for regional mean, "prec" for precedent year.


Graphs

### Waiting time indicators
Table
-`strutture/stru_tempi.php` + `cod_struttura` as querystring

Graphs


### Volume indicators

In [5]:
volume_indicators = pd.concat(
    hospital_code_legend.index.to_series().parallel_map(agenas_pne_scraper.PNEVolumeIndicatorsDownloader.generate_pandas_mapper(year=YEAR)).to_list(),
    axis=0,
    ignore_index=True
    )
volume_indicators

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=349), Label(value='0 / 349'))), HB…

Unnamed: 0,hospital_code,year,indicator_id,indicator_type,description,value
0,01090401,2021,222.0,volume,AIDS: volume di ricoveri,3.0
1,01090401,2021,139.0,volume,Appendicectomia laparoscopica: volume di ricov...,3.0
2,01090401,2021,114.0,volume,Artroscopia di ginocchio: volume di ricoveri,43.0
3,01090401,2021,123.0,volume,BPCO: volume di ricoveri in day hospital,1.0
4,01090401,2021,126.0,volume,BPCO: volume di ricoveri ordinari,213.0
...,...,...,...,...,...,...
35086,20090402,2021,130.0,volume,Scompenso cardiaco congestizio: volume di rico...,3.0
35087,20090402,2021,133.0,volume,Scompenso cardiaco: volume di ricoveri,4.0
35088,20090402,2021,98.0,volume,STEMI: volume di ricoveri,3.0
35089,20090402,2021,407.0,volume,Volume di ricoveri per embolia polmonare,6.0


### Outcome indicators

In [6]:
outcome_indicators = pd.concat(
    hospital_code_legend.index.to_series().parallel_map(agenas_pne_scraper.PNEOutcomeIndicatorsDownloader.generate_pandas_mapper(year=YEAR, compare='both')).to_list(),
    axis=0,
    ignore_index=True
    )
outcome_indicators

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=349), Label(value='0 / 349'))), HB…

Unnamed: 0,hospital_code,year,indicator_id,indicator_type,description,population,cases,pct_value,adj_pct_value,reg_adj_RR,reg_p_value,prec_adj_RR,prec_p_value
0,01090401,2021,1.0,outcome,Infarto Miocardico Acuto: mortalita' a 30 giorni,197,17.0,8.63,7.58,0.98,0.931,1.03,0.931
1,01090401,2021,15.0,outcome,Scompenso cardiaco congestizio: mortalita' a 3...,86,10.0,11.63,11.33,1.01,0.973,1.4,0.493
2,01090401,2021,201.0,outcome,Scompenso cardiaco congestizio: riammissioni o...,73,12.0,16.44,15.39,1.14,0.619,1.32,0.541
3,01090401,2021,202.0,outcome,Colecistectomia laparoscopica: proporzione di ...,78,40.0,51.28,60.74,2.49,0.0,1.12,0.428
4,01090401,2021,205.0,outcome,Intervento chirurgico per TM prostata: riammis...,200,3.0,1.50,1.46,0.4,0.109,1.32,0.76
...,...,...,...,...,...,...,...,...,...,...,...,...,...
38038,20090402,2021,662.0,outcome,Proporzione di nuovi interventi di resezione e...,245,29.0,11.84,7.82,1.39,0.081,0.9,0.674
38039,20090402,2021,556.0,outcome,Proporzione di interventi per tumore maligno d...,490,490.0,100.00,,,,,
38040,20090402,2021,,outcome,Proporzione di parti con taglio cesareo primario,2,0.0,0.00,,,,,
38041,20090402,2021,63.0,outcome,Intervento chirurgico per TM polmone: mortalit...,224,2.0,0.89,0.97,0.95,0.938,,


### Waiting Time indicators

In [7]:
wt_indicators = pd.concat(
    hospital_code_legend.index.to_series().parallel_map(agenas_pne_scraper.PNEWaitingTimeIndicatorsDownloader.generate_pandas_mapper(year=YEAR)).to_list(),
    axis=0,
    ignore_index=True
    )
wt_indicators

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=349), Label(value='0 / 349'))), HB…

Unnamed: 0,hospital_code,year,indicator_id,indicator_type,description,cases,intervention_pct,median,adj_median
0,01090401,2021,,wt,Frattura della Tibia e Perone: tempi di attesa...,16.0,100.0,1.0,
1,01090401,2021,80.0,wt,Intervento chirurgico per TM colon in laparosc...,32.0,100.0,6.0,6.0
2,01003001,2021,,wt,Frattura della Tibia e Perone: tempi di attesa...,20.0,100.0,2.0,
3,01003001,2021,,wt,Intervento chirurgico per TM colon in laparosc...,29.0,100.0,6.0,
4,01007901,2021,,wt,Frattura della Tibia e Perone: tempi di attesa...,19.0,100.0,4.0,
...,...,...,...,...,...,...,...,...,...
1085,20004501,2021,,wt,Intervento chirurgico per TM colon in laparosc...,5.0,100.0,8.0,
1086,20090401,2021,68.0,wt,Frattura della Tibia e Perone: tempi di attesa...,50.0,100.0,5.0,5.0
1087,20090401,2021,80.0,wt,Intervento chirurgico per TM colon in laparosc...,40.0,100.0,7.0,7.0
1088,20090402,2021,,wt,Frattura della Tibia e Perone: tempi di attesa...,1.0,100.0,5.0,


### Graphs

In [8]:
outcome_graphs_indicators = pd.concat(
    outcome_indicators[['hospital_code', 'indicator_id']].drop_duplicates().dropna(how='any').parallel_apply(agenas_pne_scraper.PNEOutcomeGraphsDownloader.generate_pandas_mapper(), axis=1).to_list(),
    axis=0,
    ignore_index=True
    )
outcome_graphs_indicators

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=4760), Label(value='0 / 4760'))), …

Unnamed: 0,hospital_code,indicator_id,indicator_type,year,value,ci95_lower,ci95_upper
0,01090401,1,outcome,2015,8.77,5.83,12.64
1,01090401,1,outcome,2016,4.00,2.30,6.84
2,01090401,1,outcome,2017,4.82,3.47,8.66
3,01090401,1,outcome,2018,8.44,5.70,12.18
4,01090401,1,outcome,2019,8.73,5.23,11.57
...,...,...,...,...,...,...,...
111419,20090402,63,outcome,2017,0.00,0.00,6.76
111420,20090402,63,outcome,2018,1.67,0.23,7.08
111421,20090402,63,outcome,2019,1.46,0.25,7.66
111422,20090402,63,outcome,2020,0.00,0.00,4.69


In [9]:
volume_graphs_indicators = pd.concat(
    volume_indicators[['hospital_code', 'indicator_id']].drop_duplicates().dropna(how='any').parallel_apply(agenas_pne_scraper.PNEVolumeGraphsDownloader.generate_pandas_mapper(), axis=1).to_list(),
    axis=0,
    ignore_index=True
    )
volume_graphs_indicators

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=8773), Label(value='0 / 8773'))), …

Unnamed: 0,hospital_code,indicator_id,indicator_type,year,value
0,01090401,222,volume,2020,2
1,01090401,222,volume,2021,3
2,01090401,139,volume,2015,3
3,01090401,139,volume,2016,6
4,01090401,139,volume,2017,6
...,...,...,...,...,...
222072,20090402,405,volume,2017,2
222073,20090402,405,volume,2018,1
222074,20090402,405,volume,2019,3
222075,20090402,405,volume,2020,3


In [10]:
wt_graphs_indicators = pd.concat(
    wt_indicators[['hospital_code', 'indicator_id']].drop_duplicates().dropna(how='any').parallel_apply(agenas_pne_scraper.PNEWaitingTimeGraphsDownloader.generate_pandas_mapper(), axis=1).to_list(),
    axis=0,
    ignore_index=True
    )
wt_graphs_indicators

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=64), Label(value='0 / 64'))), HBox…

Unnamed: 0,hospital_code,indicator_id,indicator_type,year,adj_median
0,01090401,80,wt,2015,12
1,01090401,80,wt,2016,9
2,01090401,80,wt,2017,8
3,01090401,80,wt,2018,7
4,01090401,80,wt,2019,7
...,...,...,...,...,...
1740,20090401,80,wt,2017,8
1741,20090401,80,wt,2018,8
1742,20090401,80,wt,2019,8
1743,20090401,80,wt,2020,10


### Merge indicator dfs

In [18]:
os.makedirs('out', exist_ok=True)

In [19]:
# Rename columns for merging
outcome_indicators = outcome_indicators.rename(columns={'cases':'value'})
wt_indicators = wt_indicators.rename(columns={'cases':'value'})
df = pd.concat([volume_indicators, outcome_indicators, wt_indicators], axis=0)
df.to_csv(os.path.join('out', f'PNE_{YEAR}.csv'), index=False)

In [20]:
df = pd.concat([volume_graphs_indicators, outcome_graphs_indicators, wt_graphs_indicators], axis=0)
df.to_csv(os.path.join('out', 'PNE_historical.csv'), index=False)

In [21]:
hospitals_df.to_csv(os.path.join('out', 'hospitals_legend.csv'), index=False)