In [103]:
import pandas as pd
import numpy as np
from ast import literal_eval

In [104]:
df_city = pd.read_csv('./data/deaths_by_city.csv', index_col=0)
df_born_region = pd.read_csv('./data/born_deaths_region.csv', index_col=0)

In [105]:
df_born_region = df_born_region.sum(axis=1).reset_index(name='accumulation')
accumulation_values = df_born_region.set_index('index')['accumulation'].reindex(df_city.columns, fill_value=np.nan).values

In [106]:
df_city.loc['0 a 4 anos'] = accumulation_values

In [107]:
# df_new_born = df_city.loc['0 a 4 anos']
# df_young = df_city.loc['5 a 9 anos':'15 a 19 anos']
# df_adult = df_city.loc['20 a 29 anos':'50 a 59 anos']
# df_elderly = df_city.loc['60 a 69 anos':]

In [108]:
def get_dataframe(df_name: str) -> pd.DataFrame():
    return pd.read_csv(f'./data/valid/{df_name}.csv')

In [109]:
df_group_age = get_dataframe('df_group_age')
df_group_age.shape

(187, 10)

In [110]:
df_population = get_dataframe('df_population')
df_population.shape

(185, 2)

In [111]:
df_merged = pd.merge(df_population, df_group_age, on='municipio', how='left')
df_merged.head()

Unnamed: 0,municipio,populacao,5 a 9 anos,10 a 14 anos,15 a 19 anos,20 a 29 anos,30 a 39 anos,40 a 49 anos,50 a 59 anos,60 a 69 anos,70 a 74 anos
0,abreu e lima,100699,1.0,1.0,8.0,33.0,41.0,68.0,156.0,176.0,114.0
1,afogados da ingazeira,37545,,1.0,4.0,8.0,12.0,26.0,50.0,43.0,42.0
2,afranio,19978,1.0,,3.0,5.0,8.0,10.0,9.0,17.0,13.0
3,agrestina,25240,,,5.0,8.0,20.0,32.0,15.0,36.0,25.0
4,agua preta,37385,1.0,,2.0,11.0,12.0,26.0,23.0,39.0,30.0


In [112]:
df_micro_region_map = get_dataframe('df_micro_region_map')
df_micro_region_map.head()

Unnamed: 0,microrregiao,municipios
0,alto capibaribe,"['casinhas', 'frei miguelinho', 'santa cruz do..."
1,araripina,"['araripina', 'bodoco', 'exu', 'granito', 'ipu..."
2,brejo pernambucano,"['agrestina', 'altinho', 'barra de guabiraba',..."
3,garanhuns,"['angelim', 'bom conselho', 'brejao', 'caetes'..."
4,itamaraca,"['aracoiaba', 'igarassu', 'ilha de itamaraca',..."


In [113]:
df_micro_region = pd.DataFrame(columns=['municipio', 'microrregiao'])

for index, row in df_micro_region_map.iterrows():
    cities = literal_eval(row['municipios'])
    microregion = row['microrregiao']
    
    for city in cities:
        df_micro_region = df_micro_region.append({'municipio': city, 'microrregiao': microregion}, ignore_index=True)


In [114]:
df_micro_region.head()

Unnamed: 0,municipio,microrregiao
0,casinhas,alto capibaribe
1,frei miguelinho,alto capibaribe
2,santa cruz do capibaribe,alto capibaribe
3,santa maria do cambuca,alto capibaribe
4,surubim,alto capibaribe


In [115]:
df_micro_region.shape

(181, 2)

In [116]:
df_merged = pd.merge(df_merged, df_micro_region, on='municipio', how='left')
df_merged.head()

Unnamed: 0,municipio,populacao,5 a 9 anos,10 a 14 anos,15 a 19 anos,20 a 29 anos,30 a 39 anos,40 a 49 anos,50 a 59 anos,60 a 69 anos,70 a 74 anos,microrregiao
0,abreu e lima,100699,1.0,1.0,8.0,33.0,41.0,68.0,156.0,176.0,114.0,recife
1,afogados da ingazeira,37545,,1.0,4.0,8.0,12.0,26.0,50.0,43.0,42.0,pajeu
2,afranio,19978,1.0,,3.0,5.0,8.0,10.0,9.0,17.0,13.0,petrolina
3,agrestina,25240,,,5.0,8.0,20.0,32.0,15.0,36.0,25.0,brejo pernambucano
4,agua preta,37385,1.0,,2.0,11.0,12.0,26.0,23.0,39.0,30.0,mata meridional pernambucana


In [117]:
df_health_centers = get_dataframe('df_health_centers')
df_health_centers.head()

Unnamed: 0,municipio,posto de saude,centro de saude/unidade basica,unidade mista
0,abreu e lima,4.0,26.0,
1,afogados da ingazeira,,16.0,
2,afranio,1.0,12.0,
3,agrestina,16.0,2.0,
4,agua preta,,15.0,1.0


In [118]:
df_merged = pd.merge(df_merged, df_health_centers, on='municipio', how='left')
df_merged.head()

Unnamed: 0,municipio,populacao,5 a 9 anos,10 a 14 anos,15 a 19 anos,20 a 29 anos,30 a 39 anos,40 a 49 anos,50 a 59 anos,60 a 69 anos,70 a 74 anos,microrregiao,posto de saude,centro de saude/unidade basica,unidade mista
0,abreu e lima,100699,1.0,1.0,8.0,33.0,41.0,68.0,156.0,176.0,114.0,recife,4.0,26.0,
1,afogados da ingazeira,37545,,1.0,4.0,8.0,12.0,26.0,50.0,43.0,42.0,pajeu,,16.0,
2,afranio,19978,1.0,,3.0,5.0,8.0,10.0,9.0,17.0,13.0,petrolina,1.0,12.0,
3,agrestina,25240,,,5.0,8.0,20.0,32.0,15.0,36.0,25.0,brejo pernambucano,16.0,2.0,
4,agua preta,37385,1.0,,2.0,11.0,12.0,26.0,23.0,39.0,30.0,mata meridional pernambucana,,15.0,1.0


In [119]:
df_idh = get_dataframe('df_idh')
df_idh.head()

Unnamed: 0,municipio,idh
0,fernandode noronha,0.788
1,recife,0.772
2,olinda,0.735
3,paulista,0.732
4,jaboataodos guararapes,0.717


In [120]:
df_merged = pd.merge(df_merged, df_idh, on='municipio', how='left')
df_merged.head()

Unnamed: 0,municipio,populacao,5 a 9 anos,10 a 14 anos,15 a 19 anos,20 a 29 anos,30 a 39 anos,40 a 49 anos,50 a 59 anos,60 a 69 anos,70 a 74 anos,microrregiao,posto de saude,centro de saude/unidade basica,unidade mista,idh
0,abreu e lima,100699,1.0,1.0,8.0,33.0,41.0,68.0,156.0,176.0,114.0,recife,4.0,26.0,,
1,afogados da ingazeira,37545,,1.0,4.0,8.0,12.0,26.0,50.0,43.0,42.0,pajeu,,16.0,,
2,afranio,19978,1.0,,3.0,5.0,8.0,10.0,9.0,17.0,13.0,petrolina,1.0,12.0,,0.588
3,agrestina,25240,,,5.0,8.0,20.0,32.0,15.0,36.0,25.0,brejo pernambucano,16.0,2.0,,0.592
4,agua preta,37385,1.0,,2.0,11.0,12.0,26.0,23.0,39.0,30.0,mata meridional pernambucana,,15.0,1.0,


In [121]:
df_merged.to_csv('./data/valid/df_merged.csv', index=False)