In [7]:
import pandas
from sqlalchemy import create_engine
import psycopg2
from datetime import date
from datetime import datetime, timedelta
import os

In [8]:
sql_general = """SELECT gridid_mun, tipo_paciente, neumonia, intubado, uci, fecha_def FROM occurrence
WHERE covariable_id in (5, 2, 3, 7) and date_occurrence >= '{0}' and date_occurrence < '{1}' """

dbuser = os.environ['DBUSER']
dbpass = os.environ['DBPASS']
dbport = os.environ['DBPORT']
dbhost = os.environ['DBHOST']

In [9]:
def get_occs_df(initial_date, final_date):
    dbname = 'epi_puma_covid19'
    conn_string = 'postgresql+psycopg2://{dbuser}:{dbpass}@{dbhost}:{dbport}/{dbname}'\
        .format(dbuser=dbuser, dbpass=dbpass, dbhost=dbhost, dbport=dbport, dbname=dbname)
    engine = create_engine(conn_string)
    sql_statement = sql_general.format(initial_date, final_date)
    #print(sql_statement)
    df_occs = pd.read_sql(sql_statement, engine)
    df_occs['hospitalizado'] = df_occs['tipo_paciente'].apply(lambda x: 1 if x == 'HOSPITALIZADO' else 0)
    df_occs = df_occs.drop(columns=['tipo_paciente'])
    df_occs['fallecido'] = df_occs['fecha_def'].apply(lambda x: 1 if x != '9999-99-99' else 0)
    df_occs = df_occs.drop(columns=['fecha_def'])
    df_occs['neumonia'] = df_occs['neumonia'].apply(lambda x: 1 if x =='SI' else 0)
    df_occs['intubado'] = df_occs['intubado'].apply(lambda x: 1 if x =='SI' else 0)
    df_occs['uci'] = df_occs['uci'].apply(lambda x: 1 if x =='SI' else 0)
    return df_occs

def get_occs_file_df(initial_date):
    df_occs = pd.read_csv('../reports/occurrences_' + str(initial_date) + '.csv')
    df_occs = df_occs[['gridid_mun', 'tipo_paciente', 'neumonia', 'intubado', 'uci', 'fecha_def']]
    df_occs['hospitalizado'] = df_occs['tipo_paciente'].apply(lambda x: 1 if x == 'HOSPITALIZADO' else 0)
    df_occs = df_occs.drop(columns=['tipo_paciente'])
    df_occs['fallecido'] = df_occs['fecha_def'].apply(lambda x: 1 if x != '9999-99-99' else 0)
    df_occs = df_occs.drop(columns=['fecha_def'])
    df_occs['neumonia'] = df_occs['neumonia'].apply(lambda x: 1 if x =='SI' else 0)
    df_occs['intubado'] = df_occs['intubado'].apply(lambda x: 1 if x =='SI' else 0)
    df_occs['uci'] = df_occs['uci'].apply(lambda x: 1 if x =='SI' else 0)
    return df_occs

def get_inegi_occ():
    dbname = 'epi_puma_censo_inegi_2020'
    conn_string = 'postgresql+psycopg2://{dbuser}:{dbpass}@{dbhost}:{dbport}/{dbname}'\
        .format(dbuser=dbuser, dbpass=dbpass, dbhost=dbhost, dbport=dbport, dbname=dbname)
    engine = create_engine(conn_string)
    sql_statement = """select  covariable_id, gridid_mun from occurrence;"""
    df_inegi = pd.read_sql(sql_statement, engine)
    return df_inegi

def get_inegi_cov():
    dbname = 'epi_puma_censo_inegi_2020'
    conn_string = 'postgresql+psycopg2://{dbuser}:{dbpass}@{dbhost}:{dbport}/{dbname}'\
        .format(dbuser=dbuser, dbpass=dbpass, dbhost=dbhost, dbport=dbport, dbname=dbname)
    engine = create_engine(conn_string)
    sql_statement = """select id, name, interval from covariable;"""
    df_inegi = pd.read_sql(sql_statement, engine)
    df_inegi = df_inegi.rename(columns={'name': 'variable', 'interval': 'value'})
    return df_inegi

In [10]:
df_inegi = get_inegi_occ()
df_inegi.shape

<IPython.core.display.Javascript object>

(545649, 2)

In [11]:
cells = df_inegi.sort_values(by='gridid_mun')['gridid_mun'].unique().tolist()
print(len(cells))

2469


In [12]:
today_date = date.today() + timedelta(days = -30)
targets = ['HOSPITALIZADO', 'INTUBADO', 'UCI', 'NEUMONIA', 'FALLECIDO']
initial_date = date(2020, 2, 1)
alpha = 0.0005

while initial_date < today_date:
    
    final_date = initial_date + timedelta(days = 30)
    
    for target in targets:
        
        arr_Nx = []
        arr_Ncx = []
        arr_PCX = []
        arr_PC = [] 
        arr_Nc = [] 
        arr_N = []
        arr_epsilon = []
        arr_Nc_ = []
        arr_Nc_x = []
        arr_P_C = []
        arr_P_CX = []
        arr_s0 = []
        arr_score = []
        df_cov = None
        arr_score_occ = []
        
        if not 'inegi_covariables-' + target + '-' + str(initial_date) + '.csv' in os.listdir('../reports/'):
            
            print('inegi_covariables-' + target + '-' + str(initial_date) + '.csv')
            
            df_occs = get_occs_file_df(initial_date)
            N = df_occs.shape[0]
            Nc = df_occs[df_occs[target.lower()] == 1].shape[0]
            
            df_cov = get_inegi_cov()
            df_cov = df_cov[df_cov['id'].isin(df_inegi['covariable_id'].unique())]
            
            for index, cov in df_cov.iterrows():
                
                cells_x = df_inegi[df_inegi['covariable_id']==cov.id]['gridid_mun'].unique()
                Nx = df_occs[df_occs['gridid_mun'].isin(cells_x)].shape[0]
                if Nx == 0:
                    continue
                
                Ncx = df_occs[(df_occs['gridid_mun'].isin(cells_x))&(df_occs[target.lower()] == 1)].shape[0]
                Nc_x = Nx - Ncx
                PCX = Ncx/Nx
                Nc_ = N - Nc
                P_CX = Nc_x/Nx
                P_C = Nc_/N
                PC = Nc/N
                
                try:
                    s0 = np.log(PC/P_C)
                    epsilon = (Nx*(PCX - PC)) / ((Nx*PC*(1 - PC))**0.5)
                    score = np.log(((Ncx + alpha)/(Nc + 2*alpha))/((Nc_x + alpha)/(Nc_+ 2*alpha)))
                except Exception as e:
                    print(str(e))
                    s0 = 0
                    epsilon = 0
                    score = 0

                arr_Nx.append(Nx)
                arr_Ncx.append(Ncx)
                arr_PCX.append(PCX)
                arr_PC.append(PC)
                arr_Nc.append(Nc)
                arr_N.append(N)
                arr_epsilon.append(epsilon)
                arr_Nc_.append(Nc_)
                arr_Nc_x.append(Nc_x)
                arr_P_C.append(P_C)
                arr_P_CX.append(P_CX)
                arr_s0.append(s0)
                arr_score.append(score)

            df_cov['Nx'] = arr_Nx
            df_cov['Ncx'] = arr_Ncx
            df_cov['PCX'] = arr_PCX
            df_cov['PC'] = arr_PC
            df_cov['Nc'] = arr_Nc
            df_cov['N'] = arr_N
            df_cov['epsilon'] = arr_epsilon
            df_cov['Nc_'] = arr_Nc_
            df_cov['Nc_x'] = arr_Nc_x
            df_cov['P_C'] = arr_P_C
            df_cov['P_CX'] = arr_P_CX
            df_cov['s0'] = arr_s0
            df_cov['score'] = arr_score
            df_cov.to_csv('../reports/inegi_covariables-' + target + '-' + str(initial_date) + '.csv', index=False)
            df_cov = None
        
        arr_score_occ = []
        
        if not 'inegi_occurrences-' + target + '-' + str(initial_date) + '.csv' in os.listdir('../reports/'):
            
            print('inegi_occurrences-' + target + '-' + str(initial_date) + '.csv')
            df_cov = pd.read_csv('../reports/inegi_covariables-' + target + '-' + str(initial_date) + '.csv')
            
            for cell in cells:
                
                score = 0
                df = df_inegi[df_inegi['gridid_mun'] == cell]
                score += df_cov[df_cov['id'].isin(df['covariable_id'].unique())]['score'].sum()
                arr_score_occ.append(score)
            
            df_occ = {'gridid_mun': cells, 'scores': arr_score_occ}
            df_occ = pd.DataFrame(df_occ)
            df_occ.to_csv('../reports/inegi_occurrences-' + target + '-' + str(initial_date) + '.csv', index=False)
            df_occ = None
            
        print('=========================================================')
        
        
    
    initial_date = final_date



