In [None]:
import pathlib
import sys

sys.path.append("..")
from util.extract import extract_df_from_csv_path


region_mapping = {
    "AC": "Norte",
    "AL": "Nordeste",
    "AM": "Norte",
    "AP": "Norte",
    "BA": "Nordeste",
    "CE": "Nordeste",
    "DF": "Centro-Oeste",
    "ES": "Sudeste",
    "GO": "Centro-Oeste",
    "MA": "Nordeste",
    "MT": "Centro-Oeste",
    "MS": "Centro-Oeste",
    "MG": "Sudeste",
    "PA": "Norte",
    "PB": "Nordeste",
    "PE": "Nordeste",
    "PI": "Nordeste",
    "PR": "Sul",
    "RJ": "Sudeste",
    "RN": "Nordeste",
    "RR": "Norte",
    "RS": "Sul",
    "SC": "Sul",
    "SE": "Nordeste",
    "SP": "Sudeste",
    "TO": "Norte",
}
anos = [2017, 2018, 2019, 2020, 2021, 2022]

FILE_PATH = pathlib.Path(".").parent.absolute()

for ano in anos:
    csv_parent_path = FILE_PATH.parent / "transformado" / str(ano)
    nome_arquivo_sem_regiao = f"chamada_regular_sisu_{ano}_1.csv"

    df = extract_df_from_csv_path(csv_parent_path / nome_arquivo_sem_regiao)
    df["REGIAO_IES"] = df["UF_IES"].map(region_mapping)
    df.to_csv(
        csv_parent_path / f"chamada_regular_sisu_{ano}_1_com_regiao.csv",
        sep="|",
        encoding="utf-8",
        index=False,
    )

In [1]:
import pathlib
import sys

sys.path.append("..")
from db.settings import PgConnector


nomes_tabelas = [
    "regular_2017_1_com_regiao",
    "regular_2018_1_com_regiao",
    "regular_2019_1_com_regiao",
    "regular_2020_1_com_regiao",
    "regular_2021_1_com_regiao",
    "regular_2022_1_com_regiao",
]
anos = [2017, 2018, 2019, 2020, 2021, 2022]
mapa_tabela_ano = dict(zip(anos, nomes_tabelas))


create_query_template = """
CREATE TABLE IF NOT EXISTS public.{nome_tabela}
(
    "ANO" bigint,
    "EDICAO" bigint,
    "ETAPA" bigint,
    "DS_ETAPA" text COLLATE pg_catalog."default",
    "CODIGO_IES" bigint,
    "NOME_IES" text COLLATE pg_catalog."default",
    "SIGLA_IES" text COLLATE pg_catalog."default",
    "UF_IES" text COLLATE pg_catalog."default",
    "CODIGO_CAMPUS" bigint,
    "NOME_CAMPUS" text COLLATE pg_catalog."default",
    "UF_CAMPUS" text COLLATE pg_catalog."default",
    "MUNICIPIO_CAMPUS" text COLLATE pg_catalog."default",
    "CODIGO_CURSO" bigint,
    "NOME_CURSO" text COLLATE pg_catalog."default",
    "GRAU" text COLLATE pg_catalog."default",
    "TURNO" text COLLATE pg_catalog."default",
    "TIPO_MOD_CONCORRENCIA" text COLLATE pg_catalog."default",
    "MOD_CONCORRENCIA" text COLLATE pg_catalog."default",
    "PESO_L" double precision,
    "PESO_CH" double precision,
    "PESO_CN" double precision,
    "PESO_M" double precision,
    "PESO_R" double precision,
    "NOTA_MINIMA_L" double precision,
    "NOTA_MINIMA_CH" double precision,
    "NOTA_MINIMA_CN" double precision,
    "NOTA_MINIMA_M" double precision,
    "NOTA_MINIMA_R" double precision,
    "MEDIA_MINIMA" double precision,
    "CPF" text COLLATE pg_catalog."default",
    "INSCRICAO_ENEM" text COLLATE pg_catalog."default",
    "INSCRITO" text COLLATE pg_catalog."default",
    "SEXO" text COLLATE pg_catalog."default",
    "DATA_NASCIMENTO" text COLLATE pg_catalog."default",
    "UF_CANDIDATO" text COLLATE pg_catalog."default",
    "MUNICIPIO_CANDIDATO" text COLLATE pg_catalog."default",
    "OPCAO" bigint,
    "NOTA_L" double precision,
    "NOTA_CH" double precision,
    "NOTA_CN" double precision,
    "NOTA_M" double precision,
    "NOTA_R" double precision,
    "NOTA_L_COM_PESO" double precision,
    "NOTA_CH_COM_PESO" double precision,
    "NOTA_CN_COM_PESO" double precision,
    "NOTA_M_COM_PESO" double precision,
    "NOTA_R_COM_PESO" double precision,
    "NOTA_CANDIDATO" double precision,
    "NOTA_CORTE" double precision,
    "CLASSIFICACAO" bigint,
    "APROVADO" text COLLATE pg_catalog."default",
    "MATRICULA" text COLLATE pg_catalog."default",
    "REGIAO_IES" varchar(15) COLLATE pg_catalog."default"
)


TABLESPACE pg_default;

ALTER TABLE IF EXISTS public.{nome_tabela}
    OWNER to postgres;
"""

FILE_PATH = pathlib.Path(".").parent.absolute()
connector = PgConnector()
for ano, tabela in mapa_tabela_ano.items():
    # criar tabelas
    create_query_com_regiao = create_query_template.format(nome_tabela=tabela)
    connector.execute(create_query_com_regiao)

    # carregar dados
    csv_parent_path = FILE_PATH.parent / "transformado" / str(ano)
    csv_path = csv_parent_path / f"chamada_regular_sisu_{ano}_1_com_regiao.csv"
    print("csv_path: ", csv_path, "\ntabela: ", tabela)
    connector.load_csv_pgsql(csv_path=csv_path, nome_tabela=tabela)
    
connector.close_connection()




CREATE TABLE IF NOT EXISTS public.regular_2017_1_com_regiao
(
    "ANO" bigint,
    "EDICAO" bigint,
    "ETAPA" bigint,
    "DS_ETAPA" text COLLATE pg_catalog."default",
    "CODIGO_IES" bigint,
    "NOME_IES" text COLLATE pg_catalog."default",
    "SIGLA_IES" text COLLATE pg_catalog."default",
    "UF_IES" text COLLATE pg_catalog."default",
    "CODIGO_CAMPUS" bigint,
    "NOME_CAMPUS" text COLLATE pg_catalog."default",
    "UF_CAMPUS" text COLLATE pg_catalog."default",
    "MUNICIPIO_CAMPUS" text COLLATE pg_catalog."default",
    "CODIGO_CURSO" bigint,
    "NOME_CURSO" text COLLATE pg_catalog."default",
    "GRAU" text COLLATE pg_catalog."default",
    "TURNO" text COLLATE pg_catalog."default",
    "TIPO_MOD_CONCORRENCIA" text COLLATE pg_catalog."default",
    "MOD_CONCORRENCIA" text COLLATE pg_catalog."default",
    "PESO_L" double precision,
    "PESO_CH" double precision,
    "PESO_CN" double precision,
    "PESO_M" double precision,
    "PESO_R" double precision,
    "NOTA_MINI