## **EXTRACT**

In [8]:
# Rode só uma vez
import requests
import os

### Fazendo requisição na API da prefeitura

In [9]:
arquivos_extracao = {
    "2024": "http://dados.recife.pe.gov.br/dataset/ce5168d4-d925-48f5-a193-03d4e0f587c7/resource/96f8a467-12b1-4340-b19c-281907fabaae/download/situacaofinal2024.csv",
    "2023": "http://dados.recife.pe.gov.br/dataset/ce5168d4-d925-48f5-a193-03d4e0f587c7/resource/854da2d7-c34b-457f-97b9-ba217d489621/download/situacaofinal2023.csv",
    "2022": "http://dados.recife.pe.gov.br/dataset/ce5168d4-d925-48f5-a193-03d4e0f587c7/resource/9e22fc25-716f-4454-8d95-998894b6ce01/download/situacaofinal2022.csv",
}


for ano, url in arquivos_extracao.items():
  try:
    nome_arquivo = f"situacaofinal{ano}.csv"
    response = requests.get(url)
    response.raise_for_status()

    # Open file in binary mode by adding a b.
    with open("/content/"+nome_arquivo, "wb") as f:
      f.write(response.content)
      print(f"Arquivo {nome_arquivo} baixado com sucesso")

  except Exception as e:
    print(f"Erro ao baixar o arquivo {ano}: {e}")
    continue


Erro ao baixar o arquivo 2024: [Errno 2] No such file or directory: '/content/situacaofinal2024.csv'
Erro ao baixar o arquivo 2023: [Errno 2] No such file or directory: '/content/situacaofinal2023.csv'
Erro ao baixar o arquivo 2022: [Errno 2] No such file or directory: '/content/situacaofinal2022.csv'


##**Estabelecer uma conexão com o dl que será utilizado**

In [10]:
# Rode só uma vez
!pip install psycopg2




[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [13]:
url_db = "postgresql://postgres.jkgdzhpqywydnucbdfvp:postgres@aws-1-us-east-1.pooler.supabase.com:6543/postgres"

import psycopg2

try:
  conn = psycopg2.connect(url_db)
  cursor = conn.cursor()

  cursor.execute("SELECT version();")
  print(cursor.fetchone())

except Exception as e:
  print("Erro ao se conectar: ", e)

('PostgreSQL 17.6 on aarch64-unknown-linux-gnu, compiled by gcc (GCC) 13.2.0, 64-bit',)


## **Preparando o dl para a estrutura do Load**

In [14]:
sql = """

CREATE SCHEMA IF NOT EXISTS staging;

CREATE TABLE IF NOT EXISTS staging.alunos_raw (
    id SERIAL PRIMARY KEY,
    matricula VARCHAR(50),
    nome_aluno VARCHAR(255),
    data_nascimento DATE,
    genero VARCHAR(20),
    codigo_escola VARCHAR(50),
    nome_escola VARCHAR(255),
    regional VARCHAR(100),
    tipo_escola VARCHAR(50),
    ano_letivo INTEGER,
    periodo_letivo VARCHAR(50),
    data_inicio DATE,
    data_fim DATE,
    situacao_final VARCHAR(100),
    nota_final DECIMAL(5,2),
    faltas INTEGER,
    total_aulas INTEGER,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

CREATE INDEX idx_staging_matricula ON staging.alunos_raw(matricula);
CREATE INDEX idx_staging_escola ON staging.alunos_raw(codigo_escola);
CREATE INDEX idx_staging_ano ON staging.alunos_raw(ano_letivo);
CREATE INDEX idx_staging_loaded ON staging.alunos_raw(loaded_at);

CREATE TABLE IF NOT EXISTS staging.load_metadata (
    id SERIAL PRIMARY KEY,
    load_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    source_file VARCHAR(255),
    records_loaded INTEGER,
    load_status VARCHAR(50),
    error_message TEXT,
    load_duration_seconds INTEGER
);

COMMENT ON TABLE staging.alunos_raw IS 'Tabela de staging para dados brutos de alunos';
COMMENT ON TABLE staging.load_metadata IS 'Metadados das cargas realizadas';
"""

try:
  cursor.execute(sql)
  conn.commit()
except Exception as e:
  print("Erro ao criar tabela: ", e)
  conn.rollback()

# **LOAD**

In [15]:
cursor.execute("DELETE FROM staging.alunos_raw")
conn.commit()

In [16]:
load_2024_sql = """
COPY staging.alunos_raw
FROM STDIN
WITH (FORMAT CSV, HEADER, DELIMITER ';')
"""
load_2023_2022_sql = """
COPY staging.alunos_raw
FROM STDIN
WITH (FORMAT CSV, HEADER, DELIMITER ',')
"""

situacaofinal2022 = "/content/situacaofinal2022.csv"
situacaofinal2023 = "/content/situacaofinal2023.csv"
situacaofinal2024 = "/content/situacaofinal2024.csv"

try:
  with open(file=situacaofinal2022, mode="r") as f:
    cursor.copy_expert(sql=load_2023_2022_sql, file=f)

  with open(file=situacaofinal2023, mode="r") as f:
    cursor.copy_expert(sql=load_2023_2022_sql, file=f)

  with open(file=situacaofinal2024, mode="r") as f:
    cursor.copy_expert(sql=load_2024_sql, file=f)

  conn.commit()
  print("Dados carregados com sucesso!")

except Exception as e:
  print("Erro ao carregar dados: ", e)
  conn.rollback()


Erro ao carregar dados:  [Errno 2] No such file or directory: '/content/situacaofinal2022.csv'


# **Fechamento de Conexão**

In [17]:
if 'cursor' in locals() and cursor is not None:
    cursor.close()
if 'conn' in locals() and conn is not None:
    conn.close()
    print("Conexão com o banco de dados fechada.")

Conexão com o banco de dados fechada.


#### Referências:
https://www.psycopg.org/docs/usage.html

https://www.postgresql.org/docs/current/sql-copy.html

https://book.pythontips.com/en/latest/open_function.html

https://requests.readthedocs.io/en/latest/user/quickstart/