In [1]:
import pandas as pd
import numpy as np
import datetime

from app.home import vups

In [19]:
COLUMNS = ['Municipio', 'Classificacao', 'DataDiagnostico', 'DataCadastro', 'DataEncerramento']
# --------- FILTRANDO DF PARA CIDADES DO ES ---------
filtro_es = ['AGUIA BRANCA', 'ALTO RIO NOVO', 'ARACRUZ', 'BAIXO GUANDU',
    'COLATINA', 'GOVERNADOR LINDENBERG', 'IBIRACU', 'JOAO NEIVA',
    'LINHARES', 'MANTENOPOLIS', 'MARILANDIA', 'PANCAS', 'RIO BANANAL',
    'SAO DOMINGOS DO NORTE', 'SAO GABRIEL DA PALHA',
    'SAO ROQUE DO CANAA', 'SOORETAMA', 'VILA VALERIO',
    'AFONSO CLAUDIO', 'BREJETUBA', 'CARIACICA', 'CONCEICAO DO CASTELO',
    'DOMINGOS MARTINS', 'FUNDAO', 'GUARAPARI', 'IBATIBA', 'ITAGUACU',
    'ITARANA', 'LARANJA DA TERRA', 'MARECHAL FLORIANO',
    'SANTA LEOPOLDINA', 'SANTA MARIA DE JETIBA', 'SANTA TERESA',
    'SERRA', 'VENDA NOVA DO IMIGRANTE', 'VIANA', 'VILA VELHA',
    'VITORIA', 'AGUA DOCE DO NORTE', 'BARRA DE SAO FRANCISCO',
    'BOA ESPERANCA', 'CONCEICAO DA BARRA', 'ECOPORANGA', 'JAGUARE',
    'MONTANHA', 'MUCURICI', 'NOVA VENECIA', 'PEDRO CANARIO',
    'PINHEIROS', 'PONTO BELO', 'SAO MATEUS', 'VILA PAVAO', 'ALEGRE',
    'ALFREDO CHAVES', 'ANCHIETA', 'APIACA', 'ATILIO VIVACQUA',
    'BOM JESUS DO NORTE', 'CACHOEIRO DE ITAPEMIRIM', 'CASTELO',
    'DIVINO DE SAO LOURENCO', 'DORES DO RIO PRETO', 'GUACUI',
    'IBITIRAMA', 'ICONHA', 'IRUPI', 'ITAPEMIRIM', 'IUNA',
    'JERONIMO MONTEIRO', 'MARATAIZES', 'MIMOSO DO SUL', 'MUNIZ FREIRE',
    'MUQUI', 'PIUMA', 'PRESIDENTE KENNEDY', 'RIO NOVO DO SUL',
    'SAO JOSE DO CALCADO', 'VARGEM ALTA']

# --------- BUSCANDO DF ---------
df = vups.datasets.microdados(columns=COLUMNS)
# df = datasets.microdados(columns=COLUMNS)
df = df[df['Municipio'].isin(filtro_es)]

# --------- CRIANDO DF_CALENDAR_NEW(CASOS NOVOS) E DF_CALENDAR_CLOSED(CASOS FECHADOS) ---------
#df_calendar_new -> filtrar pacientes com covid confirmados; groupby(Municipio, DataDiagnostico); contar ocorrencias
df_calendar_new = df[df['Classificacao']=='Confirmados'].groupby(['Municipio','DataDiagnostico'])['DataCadastro'].size().reset_index(name='count_new')

#renomendo coluna DataDiagnostico
df_calendar_new.rename(columns={'DataDiagnostico': 'date'}, inplace=True)

#transformando o dtype na coluna 'date' para datetime
df_calendar_new['date'] = df_calendar_new['date'].astype('datetime64[ns]')

#df_calendar_closed -> filtrar pacientes com covid confirmados; groupby(Municipio, DataEncerramento); contar ocorrencias
df_calendar_closed = df[df['Classificacao']=='Confirmados'].groupby(['Municipio','DataEncerramento'])['DataCadastro'].size().reset_index(name='count_closed')

#--- ATENCAO!!! --- DataEncerramento NAO esta sendo transformada automaticamente, por isso vamos forçar essa transformação aqui
#após corrigido, retirar esse pedaço de código
#transformando DataEncerramento em datatype
#df_calendar_closed['DataEncerramento'] = pd.to_datetime(df_calendar_closed['DataEncerramento'], format='%Y-%m-%d')

#transformando valores de casos fechados em negativo
df_calendar_closed['count_closed'] = df_calendar_closed['count_closed']*-1

#renomendo coluna DataEncerramento
df_calendar_closed.rename(columns={'DataEncerramento': 'date'}, inplace=True)

#transformando o dtype na coluna 'date' para datetime
df_calendar_closed['date'] = df_calendar_closed['date'].astype('datetime64[ns]')

# --------- MERGE ENTRE OS DOIS DFs CRIADOS ---------
df_calendar = pd.merge(df_calendar_new, df_calendar_closed, how='outer', left_on=['Municipio', 'date'], right_on=['Municipio', 'date'])

# --------- TRABLHANDO O NOVO DF ---------
#organizando por cidade/data
df_calendar = df_calendar.sort_values(["Municipio", "date"]).reset_index()

#preenchendo Nan com zero(0)
for i in ['count_new', 'count_closed']:
    df_calendar[i] = df_calendar[i].fillna(0)

#criando coluna acumulado por cidade
municipios = df_calendar['Municipio'].unique()
acum = []
for idx, i in enumerate(df_calendar['date']):
    if df_calendar['Municipio'].iloc[idx] == df_calendar['Municipio'].iloc[idx-1]:
        try:
            acum.append(acum[idx-1] + df_calendar['count_new'].iloc[idx] + df_calendar['count_closed'].iloc[idx])
        except:
            acum.append(df_calendar['count_new'].iloc[idx] + df_calendar['count_closed'].iloc[idx])
    else:
        acum.append(0 + df_calendar['count_new'].iloc[idx] + df_calendar['count_closed'].iloc[idx])

df_calendar['acum'] = acum

#criando colunas de dia/semana/dia_da_semana/mes/ano
df_calendar['day'] = [i.day for i in df_calendar['date']]
df_calendar['week'] = [i.week for i in df_calendar['date']]
df_calendar['weekday'] = [i.weekday() for i in df_calendar['date']]
df_calendar['month'] = [i.month for i in df_calendar['date']]
df_calendar['year'] = [i.year for i in df_calendar['date']]

In [15]:
# df_calendar = df_calendar[(df_calendar['Municipio']=='SERRA') & (df_calendar['year'].isin([2020, 2021]))]

In [31]:
df_calendar2 = df_calendar[(df_calendar['Municipio'].isin(filtro_es)) & (df_calendar['year'].isin([2020, 2021]))].reset_index()

In [32]:
df_calendar2.head()

Unnamed: 0,level_0,index,Municipio,date,count_new,count_closed,acum,day,week,weekday,month,year
0,12274,11287,AFONSO CLAUDIO,2020-01-01,0.0,0.0,0.0,1,1,2,1,2020
1,12275,684051,AFONSO CLAUDIO,2020-01-02,0.0,0.0,0.0,2,1,3,1,2020
2,12276,11288,AFONSO CLAUDIO,2020-01-03,0.0,0.0,0.0,3,1,4,1,2020
3,12277,684052,AFONSO CLAUDIO,2020-01-04,0.0,0.0,0.0,4,1,5,1,2020
4,12278,684053,AFONSO CLAUDIO,2020-01-05,0.0,0.0,0.0,5,1,6,1,2020


In [35]:
acum_ = df_calendar2.groupby(['Municipio', 'week'])['acum'].sum().reset_index()
acum_[acum_['Municipio'].isin(filtro_es)]

Unnamed: 0,Municipio,week,acum
1113,AFONSO CLAUDIO,1,2303.0
1114,AFONSO CLAUDIO,2,2081.0
1115,AFONSO CLAUDIO,3,2239.0
1116,AFONSO CLAUDIO,4,2183.0
1117,AFONSO CLAUDIO,5,2372.0
...,...,...,...
66722,VITORIA,49,30056.0
66723,VITORIA,50,30852.0
66724,VITORIA,51,28437.0
66725,VITORIA,52,26397.0


In [25]:
df_acum_por_semana = df_calendar.groupby(['Municipio', 'week'])['acum'].sum()
df_acum_por_semana

Municipio        week
ABADIA DE GOIAS  1       0.0
                 2       0.0
                 3       0.0
                 4       0.0
                 5       0.0
                        ... 
ZE DOCA          49      0.0
                 50      0.0
                 51      0.0
                 52      0.0
                 53      0.0
Name: acum, Length: 67416, dtype: float64