In [1]:
from math import isnan

from avro.datafile import DataFileReader
from avro.io import DatumReader
from avro.schema import parse
from confluent_kafka.schema_registry import SchemaRegistryClient
from confluent_kafka.schema_registry.avro import AvroSerializer
from confluent_kafka.serialization import (
    MessageField,
    SerializationContext,
    StringSerializer,
)
from libs.configuration import configure
from libs.kafka import MessageProducer
from libs.models.flight_signal import RawFlightSignal

env = configure()

{'DATASTORAGE_AZURE_ACCESSKEY': 'dYj3PCGRyYHYR9pRo2xPa9BQ7t/sUonACJ29wcPdpa+1IC70rMbZmpfpHJJWJtk7OeASL+eQDIWq+AStGg3CCA==',
 'DATASTORAGE_AZURE_ACCOUNTNAME': 'icebergpe6t5cug7x9f45p8',
 'KAFKA_BOOTSTRAP_SERVERS': 'localhost:9092',
 'KAFKA_SCHEMA_REGISTRY_URL': 'http://localhost:8081',
 'KAFKA_SESSION_TIMEOUT_MS': '45000',
 'KAFKA_TOPIC_RAW_AIRCRAFT': 'raw.aircraft',
 'KAFKA_TOPIC_RAW_AIRLINE': 'raw.airline',
 'KAFKA_TOPIC_RAW_AIRPORT': 'raw.airport',
 'KAFKA_TOPIC_RAW_FLIGHT_SIGNAL': 'raw.flight_signal',
 'KAFKA_TOPIC_T1_AIRCRAFT': 't1.aircraft',
 'KAFKA_TOPIC_T1_AIRLINE': 't1.airline',
 'KAFKA_TOPIC_T1_AIRPORT': 't1.airport',
 'KAFKA_TOPIC_T1_FLIGHT_SIGNAL': 't1.flight_signal',
 'KAFKA_TOPIC_T2_AIRCRAFT': 't2.aircraft',
 'KAFKA_TOPIC_T2_AIRLINE': 't2.airline',
 'KAFKA_TOPIC_T2_AIRPORT': 't2.airport',
 'KAFKA_TOPIC_T2_FLIGHT_SIGNAL': 't2.flight_signal',
 'POSTGRE_JDBC_URL': 'jdbc:postgresql://localhost:5432/iceberg-catalog',
 'POSTGRE_PASSWORD': 'grebeci',
 'POSTGRE_USER': 'iceberg',
 

In [2]:
sr_client = SchemaRegistryClient({"url": env.KAFKA_SCHEMA_REGISTRY_URL})
out_schema = sr_client.get_latest_version(f"{env.KAFKA_TOPIC_RAW_FLIGHT_SIGNAL}-value")
out_schema.schema.schema_str

'{"type":"record","name":"RawFlightSignal","namespace":"neot.de.rbearthgaze","doc":"","fields":[{"name":"time","type":{"type":"long","logicalType":"timestamp-millis"},"doc":"This column contains the unix (aka POSIX or epoch) timestamp for which the state vector was valid. You\'ll find one state vector per second for each aircraft which was active within the coverage of OpenSky at that particular second."},{"name":"icao24","type":"string","doc":"This column contains the 24-bit ICAO transponder ID which can be used to track specific airframes over different flights."},{"name":"lat","type":["double","null"],"doc":"These column contain the last known latitude and longitude of the aircraft. Coordinates are stored as decimal WGS84 coordinates."},{"name":"lon","type":["double","null"],"doc":"These column contain the last known latitude and longitude of the aircraft. Coordinates are stored as decimal WGS84 coordinates."},{"name":"velocity","type":["double","null"],"doc":"This column contains t

In [3]:
string_serializer = StringSerializer("utf_8")
avro_serializer = AvroSerializer(
    schema_registry_client=sr_client,
    schema_str=out_schema.schema.schema_str,
    conf={"auto.register.schemas": False},
)
value_serialization_context = SerializationContext(
    env.KAFKA_TOPIC_RAW_FLIGHT_SIGNAL, MessageField.VALUE
)
value_serializer = lambda x: avro_serializer(x, value_serialization_context)

p = MessageProducer[RawFlightSignal](
    topic=env.KAFKA_TOPIC_RAW_FLIGHT_SIGNAL,
    key_serializer=string_serializer,
    value_serializer=value_serializer,
)

In [14]:
schema = parse(out_schema.schema.schema_str)

reader = DataFileReader(
    open("../../assets/states_2022-01-03-00.avro", "rb"),
    DatumReader(schema),
)

i = 0

for record in reader:
    for k in record.keys():
        if isinstance(record[k], float | int):
            if isnan(record[k]):
                record[k] = None
            elif k in ["time", "lastcontact", "lastposupdate"]:
                record[k] = int(record[k] * 1000)
        elif isinstance(record[k], str):
            record[k] = record[k].strip()

    p.produce(record)

    i = i + 1
    if i == 200:
        break

reader.close()

INFO:MessageProducer#raw.flight_signal:{'topic': 'raw.flight_signal', 'key': b'bb2174b9-eb3b-4354-80c9-af35d79ca846', 'value': b'\x00\x00\x00\x00\x08\xa0\xec\xe3\xd3\xc3_\x0ca88e2e\x00\x08\x1aN\xea\xc8\xa3E@\x00\xdc\xb6m\xdbg\xb3^\xc0\x00\xd6\x93\xbe\xae;\xedf@\x02sC\xf2O\xaa>h@\x02\xfd\x87\xf4\xdb\xd7\x81#\xc0\x02\x10QXE2508 \x00\x01\x01\x00\x087146\x00]\x8f\xc2\xf5\xa8\x17\xb9@\x00*\\\x8f\xc2u\xcb\xb8@\x00\xbe\xbf\xe2\xd3\xc3_\x00\xf4\xe3\xe2\xd3\xc3_'}
INFO:MessageProducer#raw.flight_signal:{'topic': 'raw.flight_signal', 'key': b'5a524eb7-13b1-4195-9a4b-854a5525d6b8', 'value': b'\x00\x00\x00\x00\x08\xa0\xec\xe3\xd3\xc3_\x0ca5251f\x00\x00\x00\x00\x00\xbf\xe3=@\x00<\xb1\x13;\xd3xW\xc0\x00\xec>s,\xdc\xc8r@\x02[7\x1c\x95$PY@\x02\x00\x00\x00\x00\x00\x00\x00\x00\x02\x10SWA1227 \x00\x00\x00\x00\x082562\x00\xcd\xcc\xcc\xcc\xcc\x06\xc6@\x00\xaeG\xe1z\x94\x97\xc6@\x00\xa8\xeb\xe3\xd3\xc3_\x00\xf8\xeb\xe3\xd3\xc3_'}
INFO:MessageProducer#raw.flight_signal:{'topic': 'raw.flight_signal', 'key': b