In [6]:
from confluent_kafka.schema_registry import SchemaRegistryClient
from confluent_kafka.schema_registry.avro import AvroSerializer
from confluent_kafka.serialization import (
    MessageField,
    SerializationContext,
    StringSerializer,
)
from libs.configuration import configure
from libs.kafka import MessageProducer
from libs.models.aircraft import RawAircraft

from aircraft import doc8643

env = configure()

{'DATASTORAGE_AZURE_ACCESSKEY': 'dYj3PCGRyYHYR9pRo2xPa9BQ7t/sUonACJ29wcPdpa+1IC70rMbZmpfpHJJWJtk7OeASL+eQDIWq+AStGg3CCA==',
 'DATASTORAGE_AZURE_ACCOUNTNAME': 'icebergpe6t5cug7x9f45p8',
 'KAFKA_BOOTSTRAP_SERVERS': 'localhost:9092',
 'KAFKA_SCHEMA_REGISTRY_URL': 'http://localhost:8081',
 'KAFKA_SESSION_TIMEOUT_MS': '45000',
 'KAFKA_TOPIC_RAW_AIRCRAFT': 'raw.aircraft',
 'KAFKA_TOPIC_RAW_AIRLINE': 'raw.airline',
 'KAFKA_TOPIC_RAW_AIRPORT': 'raw.airport',
 'KAFKA_TOPIC_RAW_FLIGHT': 'raw.flight',
 'KAFKA_TOPIC_T1_AIRCRAFT': 't1.aircraft',
 'KAFKA_TOPIC_T1_AIRLINE': 't1.airline',
 'KAFKA_TOPIC_T1_AIRPORT': 't1.airport',
 'KAFKA_TOPIC_T1_FLIGHT': 't1.flight',
 'KAFKA_TOPIC_T2_AIRCRAFT': 't2.aircraft',
 'KAFKA_TOPIC_T2_AIRLINE': 't2.airline',
 'KAFKA_TOPIC_T2_AIRPORT': 't2.airport',
 'KAFKA_TOPIC_T2_FLIGHT': 't2.flight',
 'POSTGRE_JDBC_URL': 'jdbc:postgresql://localhost:5432/iceberg-catalog',
 'POSTGRE_PASSWORD': 'grebeci',
 'POSTGRE_USER': 'iceberg',
 'SPARK_MASTER_URL': 'local[1]',
 'file': '

In [7]:
sr_client = SchemaRegistryClient({"url": env.KAFKA_SCHEMA_REGISTRY_URL})
out_schema = sr_client.get_latest_version(f"{env.KAFKA_TOPIC_RAW_AIRCRAFT}-value")
out_schema.schema.schema_str

'{"type":"record","name":"RawAircraft","namespace":"neot.de.rbearthgaze","doc":"Typical Aircraft message after crawling from https://doc8643.com.","fields":[{"name":"icao","type":"string","doc":"ICAO 4 letters"},{"name":"classification","type":"string","doc":""},{"name":"category","type":"string","doc":""},{"name":"manufacturers","type":{"type":"array","items":"string"},"doc":""},{"name":"wing_span","type":["string","null"],"doc":"float: Wing Span (m)"},{"name":"length","type":["string","null"],"doc":"float: Length (m)"},{"name":"height","type":["string","null"],"doc":"float: Height (m)"},{"name":"mtow","type":["string","null"],"doc":"float: MTOW (t)"},{"name":"fuel_capacity","type":["string","null"],"doc":"int: Fuel Capacity (ltr)"},{"name":"maximum_range","type":["string","null"],"doc":"int: Maximum Range (Nm)"},{"name":"persons_on_board","type":["string","null"],"doc":"int: Persons On Board"},{"name":"take_off_distance","type":["string","null"],"doc":"int: Take Off Distance (m)"},{"

In [8]:
string_serializer = StringSerializer("utf_8")
avro_serializer = AvroSerializer(
    schema_registry_client=sr_client,
    schema_str=out_schema.schema.schema_str,
    conf={"auto.register.schemas": False},
)
value_serialization_context = SerializationContext(env.KAFKA_TOPIC_RAW_AIRCRAFT, MessageField.VALUE)
value_serializer = lambda x: avro_serializer(x, value_serialization_context)

p = MessageProducer[RawAircraft](
    topic=env.KAFKA_TOPIC_RAW_AIRCRAFT,
    key_serializer=string_serializer,
    value_serializer=value_serializer,
)

In [3]:
f = open('../../assets/aircraft_icao.txt')
icao_list = f.read().splitlines()
f.close()

In [9]:
for x in doc8643.scrape(icao_list):
    p.produce(x)
    # print(x)

INFO:MessageProducer#raw.aircraft:{"topic": "raw.aircraft", "key": "cb5aab17-15df-41f6-8555-48d7dd8aef41", "value": "\u0000\u0000\u0000\u0000\u0005\bA002\u0006G1P\u0006L/G\u0002\u0016IRKUT A-002\u0000\u0000\u0002-\u0000\u00065.9\u0000\u00063.3\u0000\u00060.9\u0000\u0000\u0000\u0006270\u0000\u00023\u0000\u000490\u0000\u000410\u0000\u0006100\u0000\u000480\u0000\u0006113\u0000\u0000\u0000\b1100"}
INFO:MessageProducer#raw.aircraft:{"topic": "raw.aircraft", "key": "51a9d980-bec8-49a0-ab4c-073ae7e2233f", "value": "\u0000\u0000\u0000\u0000\u0005\u0004A1\u0006L1P\u0006M/G\u0004(DOUGLAS AD Skyraider,DOUGLAS EA-1 Skyraider\u0000\u0000\b15.4\u0000\b11.8\u0000\u00064.7\u0000\b11.3\u0000\b3150\u0000\b2500\u0000\u00021\u0000\u0006600\u0000\u0006600\u0000\u0006310\u0000\u0006215\u0000\u0006280\u0000\u0006163\u0000\b3700"}
INFO:MessageProducer#raw.aircraft:{"topic": "raw.aircraft", "key": "de0d61f3-e2f6-4157-b038-ab4bfbcbb296", "value": "\u0000\u0000\u0000\u0000\u0005\u0006A10\u0006L2J\u0006M/F\u0004@