In [1]:
# Algumas bibliotecas necessárias 
#!pip install psycopg[binary]

In [1]:
!docker pull apache/age

Using default tag: latest
latest: Pulling from apache/age
Digest: sha256:9c88caf778914db658f7da8e54ba94c767aed03270b7bce9f7880efd42bd13fd
Status: Image is up to date for apache/age:latest
docker.io/apache/age:latest


## 1. Subindo o container

In [2]:
!docker compose down
!docker compose up -d --force-recreate

 Container postgres-age  Stopping
 Container postgres-age  Stopped
 Container postgres-age  Removing
 Container postgres-age  Removed
 Network atividade9_default  Removing
 Network atividade9_default  Removed
 Network atividade9_default  Creating
 Network atividade9_default  Created
 Container postgres-age  Creating
 Container postgres-age  Created
 Container postgres-age  Starting
 Container postgres-age  Started


In [3]:
# Verifica se container foi criado
!docker ps

CONTAINER ID   IMAGE               COMMAND                  CREATED         STATUS         PORTS                    NAMES
a59afd4a9c63   apache/age:latest   "docker-entrypoint.s…"   4 seconds ago   Up 3 seconds   0.0.0.0:5433->5432/tcp   postgres-age


# 2. Conectando ao banco

In [10]:
import psycopg

conn = psycopg.connect(
    host="localhost",
    port=5433,
    user="postgres",
    password="postgres",
    dbname="openflights"
)

cur = conn.cursor()
cur.execute("SELECT version();")
print(cur.fetchone())

('PostgreSQL 16.10 (Debian 16.10-1.pgdg13+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 14.2.0-19) 14.2.0, 64-bit',)


# 3. Criando as tabelas SQL

In [11]:
# Tabela airports
create_command_airports = """
    DROP TABLE IF EXISTS airports;

    CREATE TABLE airports(
        airport_id            INTEGER,            -- ID único
        name                  TEXT,               -- Nome do aeroporto
        city                  TEXT,               -- Cidade
        country               TEXT,               -- País
        iata                  TEXT,               -- Código IATA
        icao                  TEXT,               -- Código ICAO
        latitude              DOUBLE PRECISION,   -- Lat
        longitude             DOUBLE PRECISION,   -- Long
        height                INTEGER,            -- Altitude em pés
        timezone              DOUBLE PRECISION,   -- Offset UTC
        dst                   TEXT,               -- Tipo DST
        tz_database_timezone  TEXT,               -- Timezone Olson
        type                  TEXT,               -- airport/station/etc
        source                TEXT                -- Origem do dado
    );
"""

copy_command_airports = """
COPY airports
FROM '/data/airports.csv'
WITH (FORMAT csv, HEADER false, NULL '\\N');
"""

conn.rollback()
cur.execute(create_command_airports)
cur.execute(copy_command_airports)
conn.commit()

In [12]:
# Tabela routes
create_command_routes = """
    DROP TABLE IF EXISTS routes;

    CREATE TABLE IF NOT EXISTS routes (
        company                 TEXT,    -- Código da companhia (IATA/ICAO)
        airline_id              INTEGER, -- ID da companhia no OpenFlights
        source_airport          TEXT,    -- Aeroporto de origem (IATA/ICAO)
        source_airport_id       INTEGER, -- ID OpenFlights do aeroporto de origem
        destination_airport     TEXT,    -- Aeroporto de destino (IATA/ICAO)
        destination_airport_id  INTEGER, -- ID OpenFlights do aeroporto de destino
        codeshare               TEXT,    -- 'Y' para codeshare
        stops                   INTEGER, -- Número de escalas
        equipment               TEXT     -- Tipos de aviões, separados por espaço
    );
"""

copy_command_routes = """
COPY routes
FROM '/data/routes.csv'
WITH (FORMAT csv, HEADER false, NULL '\\N');
"""

conn.rollback()
cur.execute(create_command_routes)
cur.execute(copy_command_routes)
conn.commit()

In [13]:
conn.rollback()

# Remover extensão AGE e recriar
cur.execute("DROP EXTENSION IF EXISTS age CASCADE;")
cur.execute("CREATE EXTENSION age;")
cur.execute("LOAD 'age';")
cur.execute("SET search_path = ag_catalog, \"$user\", public;")
conn.commit()

print("Extensão AGE recriada.")

Extensão AGE recriada.


In [14]:
# Remover grafo se existir e criar novamente
conn.rollback()
cur.execute("""
DO $$
BEGIN
    IF EXISTS (
        SELECT 1 FROM ag_catalog.ag_graph WHERE name = 'openflights_graph'
    ) THEN
        PERFORM drop_graph('openflights_graph', true);
    END IF;
END$$;
""")

cur.execute("SELECT create_graph('openflights_graph');")
conn.commit()

print("Grafo openflights_graph criado.")


Grafo openflights_graph criado.


In [15]:
conn.rollback()
cur.execute("""
    SET search_path = "$user", public, ag_catalog;
""")
conn.commit()

print("search_path corrigido.")


search_path corrigido.


In [16]:
# Funções utilitárias
import json
from math import ceil

def cy_escape(s):
    """
    Tratamento de strings para Cypher
    Subsitui ' por \', e garante None-> null literal
    """
    if s is None:
        return 'null'

    s = str(s)
    s = s.replace("\\","\\\\").replace("'", "\\'")
    return f"'{s}'"

import json

def chunked(seq, size):
    for i in range(0, len(seq), size):
        yield seq[i:i+size]

def run_cypher(cypher_text, params=None):
    if params is None:
        q = f"""
            SELECT * FROM cypher(
                'openflights_graph',
                $${cypher_text}$$
            ) AS (result agtype);
        """
        cur.execute(q)
    else:
        q = f"""
            SELECT * FROM cypher(
                'openflights_graph',
                $${cypher_text}$$,
                %s
            ) AS (result agtype);
        """
        cur.execute(q, (json.dumps(params),))

    try:
        return cur.fetchall()
    except psycopg.errors.InFailedSqlTransaction:
        conn.rollback()
        raise


# 4. Criando o grafo

In [17]:
conn.rollback()

# Buscar aeroportos da tabela staging
cur.execute("""
    SELECT airport_id, name, city, country, iata, icao,
           latitude, longitude, height, timezone, dst,
           tz_database_timezone, type, source
    FROM airports;
""")
rows = cur.fetchall()

print("Total airports:", len(rows))

batch_size = 1000

for batch in chunked(rows, batch_size):
    param_rows = []
    for r in batch:
        param_rows.append({
            "airport_id": r[0],
            "name": r[1],
            "city": r[2],
            "country": r[3],
            "iata": r[4],
            "icao": r[5],
            "latitude": r[6],
            "longitude": r[7],
            "height": r[8],
            "timezone": r[9],
            "dst": r[10],
            "tz_database_timezone": r[11],
            "type": r[12],
            "source": r[13]
        })

    cypher = """
        UNWIND $rows AS r
        CREATE (a:Airport)
        SET a = r
        RETURN count(a)
    """

    out = run_cypher(cypher, params={"rows": param_rows})

conn.commit()
print("Inserção de Airports concluída.")

Total airports: 7698
Inserção de Airports concluída.


In [18]:
# Cria tabela de mapeamento de ids do aeroporto e nó
# Torna a criação das arestas mais eficiente
conn.rollback()

cur.execute("""
    DROP TABLE IF EXISTS airport_node_map;

    CREATE TABLE airport_node_map (
        airport_id INTEGER PRIMARY KEY,
        node_id BIGINT
    );
""")

conn.commit()
print("Tabela airport_node_map criada.")


Tabela airport_node_map criada.


In [19]:
# Populando a tabela de node map
conn.rollback()
rows = []

# Selecionas os vértices
cur.execute("""
    SELECT airport_id, node_id FROM cypher(
        'openflights_graph',
        $$ MATCH (a:Airport) RETURN a.airport_id AS airport_id, id(a) AS node_id $$
    ) AS (airport_id agtype, node_id agtype);
""")

rows = cur.fetchall()

print("Total nodes:", len(rows))

import json

# Converte o json para tuplas
parsed_rows = []
for airport_id_raw, node_id_raw in rows:
    airport_id = json.loads(str(airport_id_raw))
    node_id = json.loads(str(node_id_raw))
    parsed_rows.append((airport_id, node_id))

# Insere os ids (aeroporto, nó)
cur.executemany(
    "INSERT INTO airport_node_map (airport_id, node_id) VALUES (%s, %s)",
    parsed_rows
)
conn.commit()

print("Tabela airport_node_map populada.")



Total nodes: 7698
Tabela airport_node_map populada.


In [20]:
# Join nos ids dos nós e nos ids dos aeroportos
conn.rollback()
cur.execute("""
    SELECT r.company, r.airline_id,
           src.node_id AS src_node,
           dst.node_id AS dst_node,
           r.codeshare, r.stops, r.equipment
    FROM routes r
    JOIN airport_node_map src ON src.airport_id = r.source_airport_id
    JOIN airport_node_map dst ON dst.airport_id = r.destination_airport_id;
""")

route_rows = cur.fetchall()
print("Total rotas com nodes:", len(route_rows))


Total rotas com nodes: 66771


In [21]:
batch_size = 2000

for batch in chunked(route_rows, batch_size):
    param_rows = []
    for r in batch:
        param_rows.append({
            "company": r[0],
            "airline_id": r[1],
            "src": r[2],
            "dst": r[3],
            "codeshare": r[4],
            "stops": r[5],
            "equipment": r[6]
        })

    cypher = """
        UNWIND $rows AS r
        MATCH (src) WHERE id(src) = r.src
        MATCH (dst) WHERE id(dst) = r.dst
        CREATE (src)-[e:ROUTE]->(dst)
        SET e = {
            company: r.company,
            airline_id: r.airline_id,
            codeshare: r.codeshare,
            stops: r.stops,
            equipment: r.equipment
        }
        RETURN count(e)
    """

    out = run_cypher(cypher, params={"rows": param_rows})

conn.commit()
print("Inserção de rotas concluída!")


Inserção de rotas concluída!


In [22]:
conn.rollback()
cur.execute("""
    SELECT origem, destino FROM cypher(
        'openflights_graph',
        $$
            MATCH (br:Airport {country:'Brazil'})-[r:ROUTE]->(dest)
            RETURN br.name AS origem, dest.name AS destino
            LIMIT 20
        $$
    ) AS (origem text, destino text);
""")

rows = cur.fetchall()
print(rows)


[('Pampulha - Carlos Drummond de Andrade Airport', 'Leite Lopes Airport'), ('Presidente Juscelino Kubistschek International Airport', 'Leite Lopes Airport'), ('Presidente Juscelino Kubistschek International Airport', 'Presidente João Batista Figueiredo Airport'), ('Presidente Juscelino Kubistschek International Airport', 'Barreiras Airport'), ('Cascavel Airport', 'Guarulhos - Governador André Franco Montoro International Airport'), ('Campo Grande Airport', 'Dourados Airport'), ('Afonso Pena Airport', 'Carrasco International /General C L Berisso Airport'), ('Marechal Rondon Airport', 'Presidente João Batista Figueiredo Airport'), ('Marechal Rondon Airport', 'Santa Genoveva Airport'), ('Rio Galeão – Tom Jobim International Airport', 'Leite Lopes Airport'), ('Rio Galeão – Tom Jobim International Airport', 'Carrasco International /General C L Berisso Airport'), ('Santa Genoveva Airport', 'Ten. Cel. Aviador César Bombonato Airport'), ('Santa Genoveva Airport', 'Leite Lopes Airport'), ('Sant

In [34]:
import json
import re

# Converte o agtype em dict Python.
# Remove anotações como ::vertex, ::edge, ::path, etc.
def parse_agtype(value: str):
    
    # remove sufixos ::algo
    cleaned = re.sub(r'::[a-zA-Z_]+', '', value)

    return json.loads(cleaned)


In [35]:
# Query do exercício
conn.rollback()
# Aeroportos de São Paulo (GRU, CGH, VCP)
src_iata = ["GRU", "CGH", "VCP"]
dst_iata = "BSB"  # Brasília

cypher_query = f"""
    SELECT * FROM cypher(
        'openflights_graph',
        $$
        MATCH (src:Airport)
        WHERE src.iata IN {json.dumps(src_iata)}

        MATCH (dst:Airport {{iata: "{dst_iata}"}})

        MATCH p = (src)-[:ROUTE*1..3]->(dst)
        RETURN {{
            airports: nodes(p),
            legs: relationships(p),
            hops: length(p)
        }} AS result
        ORDER BY length(p)
        LIMIT 50
        $$
    ) AS (result agtype);
"""

cur.execute(cypher_query)
rows = cur.fetchall()



In [36]:
print("Rotas:")
for r in rows:
    raw = r[0]
    parsed = parse_agtype(raw)
    print(json.dumps(parsed, indent=4))

Rotas:
{
    "hops": 1,
    "legs": [
        {
            "id": 1125899906871295,
            "label": "ROUTE",
            "end_id": 844424930134373,
            "start_id": 844424930134455,
            "properties": {
                "stops": 0,
                "company": "G3",
                "codeshare": "",
                "equipment": "738 73G",
                "airline_id": 1790
            }
        }
    ],
    "airports": [
        {
            "id": 844424930134455,
            "label": "Airport",
            "properties": {
                "dst": "S",
                "city": "Sao Paulo",
                "iata": "CGH",
                "icao": "SBSP",
                "name": "Congonhas Airport",
                "type": "airport",
                "height": 2631,
                "source": "OurAirports",
                "country": "Brazil",
                "latitude": -23.626110076904297,
                "timezone": -3.0,
                "longitude": -46.65638732910156,
     

# Fecha conexão e derruba container

In [37]:
conn.commit()
cur.close()
conn.close()

In [38]:
!docker compose down

 Container postgres-age  Stopping
 Container postgres-age  Stopped
 Container postgres-age  Removing
 Container postgres-age  Removed
 Network atividade9_default  Removing
 Network atividade9_default  Removed
