In [1]:
import polars as pl
import altair as alt
from vega_datasets import data

alt.theme.enable('latimes')

alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

## load airport data

In [18]:
#file = 'https://davidmegginson.github.io/ourairports-data/airports.csv'
file = 'https://github.com/usefulmove/code/raw/refs/heads/main/eda/airports/airports.parquet'

airports = pl.read_parquet(file)

airports.sample(8)

id,ident,type,name,latitude_deg,longitude_deg,elevation_ft,continent,iso_country,iso_region,municipality,scheduled_service,icao_code,iata_code,gps_code,local_code,home_link,wikipedia_link,keywords
i64,str,str,str,f64,f64,i64,str,str,str,str,str,str,str,str,str,str,str,str
1467,"""CPW2""","""heliport""","""London (Victoria Hospital) Hel…",42.959067,-81.225572,875.0,"""NA""","""CA""","""CA-ON""","""London""","""no""",,,"""CPW2""","""CPW2""",,,"""PW2"""
342025,"""VU-0004""","""closed""","""Luganville Seaplane Base""",-15.555,167.147,0.0,"""OC""","""VU""","""VU-SAM""","""Espiritu Santo Is.""","""no""",,,,,,"""https://en.wikipedia.org/wiki/…",
349551,"""PH-0432""","""small_airport""","""Sagpangan Airport""",9.51026,118.55405,187.0,"""AS""","""PH""","""PH-PLW""","""Aborlan""","""no""",,,,,,,
5263,"""OPMR""","""medium_airport""","""Masroor Air Base""",24.8936,66.938797,35.0,"""AS""","""PK""","""PK-SD""","""Karachi""","""no""",,,"""OPMR""",,,"""https://en.wikipedia.org/wiki/…",
12014,"""54NJ""","""seaplane_base""","""Highlands Seaplane Base""",40.416801,-73.999603,,"""NA""","""US""","""US-NJ""","""Highlands""","""no""",,,"""54NJ""","""54NJ""",,,
325784,"""LV-0017""","""small_airport""","""Mežāre Airstrip""",56.515676,26.257966,,"""EU""","""LV""","""LV-042""","""Mežāre""","""no""",,,,,,,
11172,"""46NE""","""small_airport""","""Jantzen Airport""",40.893842,-101.50076,3375.0,"""NA""","""US""","""US-NE""","""Madrid""","""no""",,,"""46NE""","""46NE""",,,
595288,"""AU-0754""","""small_airport""","""Lynton Airfield""",-28.21283,114.30641,39.0,"""OC""","""AU""","""AU-WA""","""Yallabatharra""","""no""",,,,,,,"""Lynton Station"""


In [19]:
airports_us = airports.filter(
    pl.col('iso_country') == 'US',
    pl.col('type') != 'closed',
)

display(airports_us.shape)

display(airports_us.group_by('type').agg(pl.col('type').len().alias('count')))

(25015, 19)

type,count
str,u32
"""small_airport""",15250
"""large_airport""",71
"""medium_airport""",832
"""balloonport""",29
"""seaplane_base""",659
"""heliport""",8174


## u.s. airport locations

In [30]:
fig_width, fig_height = 1300, 900

# read in polygons from topojson
states = alt.topo_feature(data.us_10m.url, feature='states')

# US states background
states_background = alt.Chart(
    states,
    title='u.s. airports',
).mark_geoshape(
    fill='#faf6e4',
    stroke='#ff991c',
    strokeWidth=0.5,
).properties(
    width=fig_width,
    height=fig_height,
).project('albersUsa')

airports_chart = alt.Chart(
    airports_us.filter(
        pl.col('type').str.contains('airport'),
    ).with_columns(
        pl.when(pl.col('type') == 'large_airport').then(3)
            .when(pl.col('type') == 'medium_airport').then(2)
            .when(pl.col('type') == 'small_airport').then(1)
            .otherwise(0)
        .alias('airport_size'),
        pl.col('type').str.replace('_', ' '),
    ),
).mark_circle().encode(
    latitude='latitude_deg:Q',
    longitude='longitude_deg:Q',
    size=alt.Size(
        'type:O',
        title='',
        sort=alt.SortField(field='airport_size', order='ascending'),
        scale=alt.Scale(
            domain=['small airport', 'medium airport', 'large airport'],
            range=[10, 300],
        ),
    ),
    opacity=alt.Opacity(
        'type:O',
        scale=alt.Scale(
            domain=['medium airport', 'large airport', 'small airport'],
            range=[0.5, 1.0],
        ),
        legend=None,
    ),
    color=alt.Color(
        'type:N',
        title='',
        scale=alt.Scale(
            domain=['small airport', 'medium airport', 'large airport'],
            range=['#ef9d6e', '#ef9d6e', '#0060ff'],
        ),
    ),
    tooltip='custom_tooltip:N',
).transform_calculate(
    custom_tooltip='datum.name + " - " + datum.ident + " - " + datum.municipality',
).project(
    type='albersUsa',
).properties(
    width=fig_width,
    height=fig_height,
)

(states_background + airports_chart).show()

#(states_background + airports_chart).save('airports.html')

## load flight data

In [7]:
flights = pl.read_csv(data.flights_airport.base_url + data.flights_airport.filename)

print(flights.shape)

display(flights.sample(4))

(5366, 3)


origin,destination,count
str,str,i64
"""CAE""","""ORD""",1696
"""LAX""","""TPA""",516
"""JFK""","""IND""",125
"""DEN""","""BHM""",651


In [8]:
flights_info_us = flights.join(
    airports,
    left_on='origin',
    right_on='iata_code',
    how='left',
).select(
    pl.col('origin'),
    pl.col('type').alias('origin_type'),
    pl.col('name').alias('origin_name'),
    pl.col('latitude_deg').alias('origin_latitude'),
    pl.col('longitude_deg').alias('origin_longitude'),
    pl.col('elevation_ft').alias('origin_elevation'),
    pl.col('continent').alias('origin_continent'),
    pl.col('iso_country').alias('origin_country'),
    pl.col('iso_region').alias('origin_region'),
    pl.col('municipality').alias('origin_municipality'),
    pl.col('destination'),
    pl.col('count').alias('flights'),
).join(
    airports,
    left_on='destination',
    right_on='iata_code',
    how='left',
).select(
    pl.col('origin'),
    pl.col('origin_type'),
    pl.col('origin_name'),
    pl.col('origin_latitude'),
    pl.col('origin_longitude'),
    pl.col('origin_elevation'),
    pl.col('origin_continent'),
    pl.col('origin_country'),
    pl.col('origin_region'),
    pl.col('origin_municipality'),
    pl.col('destination'),
    pl.col('type').alias('dest_type'),
    pl.col('name').alias('dest_name'),
    pl.col('latitude_deg').alias('dest_latitude'),
    pl.col('longitude_deg').alias('dest_longitude'),
    pl.col('elevation_ft').alias('dest_elevation'),
    pl.col('continent').alias('dest_continent'),
    pl.col('iso_country').alias('dest_country'),
    pl.col('iso_region').alias('dest_region'),
    pl.col('municipality').alias('dest_municipality'),
    pl.col('flights'),
).filter(
    (pl.col('origin_country') == 'US') & (pl.col('dest_country') == 'US'),
)

flights_info_us.head(10)

origin,origin_type,origin_name,origin_latitude,origin_longitude,origin_elevation,origin_continent,origin_country,origin_region,origin_municipality,destination,dest_type,dest_name,dest_latitude,dest_longitude,dest_elevation,dest_continent,dest_country,dest_region,dest_municipality,flights
str,str,str,f64,f64,i64,str,str,str,str,str,str,str,f64,f64,i64,str,str,str,str,i64
"""ABE""","""medium_airport""","""Lehigh Valley International Ai…",40.651773,-75.442797,393,"""NA""","""US""","""US-PA""","""Allentown""","""ATL""","""large_airport""","""Hartsfield Jackson Atlanta Int…",33.6367,-84.428101,1026,"""NA""","""US""","""US-GA""","""Atlanta""",853
"""ABE""","""medium_airport""","""Lehigh Valley International Ai…",40.651773,-75.442797,393,"""NA""","""US""","""US-PA""","""Allentown""","""BHM""","""medium_airport""","""Birmingham-Shuttlesworth Inter…",33.562901,-86.753502,650,"""NA""","""US""","""US-AL""","""Birmingham""",1
"""ABE""","""medium_airport""","""Lehigh Valley International Ai…",40.651773,-75.442797,393,"""NA""","""US""","""US-PA""","""Allentown""","""CLE""","""large_airport""","""Cleveland Hopkins Internationa…",41.411701,-81.8498,791,"""NA""","""US""","""US-OH""","""Cleveland""",805
"""ABE""","""medium_airport""","""Lehigh Valley International Ai…",40.651773,-75.442797,393,"""NA""","""US""","""US-PA""","""Allentown""","""CLT""","""large_airport""","""Charlotte Douglas Internationa…",35.214001,-80.9431,748,"""NA""","""US""","""US-NC""","""Charlotte""",465
"""ABE""","""medium_airport""","""Lehigh Valley International Ai…",40.651773,-75.442797,393,"""NA""","""US""","""US-PA""","""Allentown""","""CVG""","""large_airport""","""Cincinnati Northern Kentucky I…",39.048801,-84.667801,896,"""NA""","""US""","""US-KY""","""Cincinnati / Covington""",247
"""ABE""","""medium_airport""","""Lehigh Valley International Ai…",40.651773,-75.442797,393,"""NA""","""US""","""US-PA""","""Allentown""","""DTW""","""large_airport""","""Detroit Metropolitan Wayne Cou…",42.21377,-83.353786,645,"""NA""","""US""","""US-MI""","""Detroit""",997
"""ABE""","""medium_airport""","""Lehigh Valley International Ai…",40.651773,-75.442797,393,"""NA""","""US""","""US-PA""","""Allentown""","""JFK""","""large_airport""","""John F Kennedy International A…",40.639447,-73.779317,13,"""NA""","""US""","""US-NY""","""New York""",3
"""ABE""","""medium_airport""","""Lehigh Valley International Ai…",40.651773,-75.442797,393,"""NA""","""US""","""US-PA""","""Allentown""","""LGA""","""large_airport""","""LaGuardia Airport""",40.777199,-73.872597,21,"""NA""","""US""","""US-NY""","""New York""",9
"""ABE""","""medium_airport""","""Lehigh Valley International Ai…",40.651773,-75.442797,393,"""NA""","""US""","""US-PA""","""Allentown""","""ORD""","""large_airport""","""Chicago O'Hare International A…",41.9786,-87.9048,680,"""NA""","""US""","""US-IL""","""Chicago""",1425
"""ABE""","""medium_airport""","""Lehigh Valley International Ai…",40.651773,-75.442797,393,"""NA""","""US""","""US-PA""","""Allentown""","""PHL""","""large_airport""","""Philadelphia International Air…",39.871899,-75.241096,36,"""NA""","""US""","""US-PA""","""Philadelphia""",2


## most frequent flights

In [9]:
flights_info_us.sort('flights', descending=True).select(
    pl.col('origin'),
    pl.col('origin_name'),
    pl.col('origin_municipality'),
    pl.col('destination'),
    pl.col('dest_name'),
    pl.col('dest_municipality'),
    pl.col('flights'),
).head(10)

origin,origin_name,origin_municipality,destination,dest_name,dest_municipality,flights
str,str,str,str,str,str,i64
"""SFO""","""San Francisco International Ai…","""San Francisco""","""LAX""","""Los Angeles International Airp…","""Los Angeles""",13788
"""LAX""","""Los Angeles International Airp…","""Los Angeles""","""SFO""","""San Francisco International Ai…","""San Francisco""",13390
"""OGG""","""Kahului International Airport""","""Kahului""","""HNL""","""Daniel K Inouye International …","""Honolulu, Oahu""",12383
"""LGA""","""LaGuardia Airport""","""New York""","""BOS""","""Logan International Airport""","""Boston""",12035
"""BOS""","""Logan International Airport""","""Boston""","""LGA""","""LaGuardia Airport""","""New York""",12029
"""HNL""","""Daniel K Inouye International …","""Honolulu, Oahu""","""OGG""","""Kahului International Airport""","""Kahului""",12014
"""LAX""","""Los Angeles International Airp…","""Los Angeles""","""LAS""","""Harry Reid International Airpo…","""Las Vegas""",11773
"""LAS""","""Harry Reid International Airpo…","""Las Vegas""","""LAX""","""Los Angeles International Airp…","""Los Angeles""",11729
"""LAX""","""Los Angeles International Airp…","""Los Angeles""","""SAN""","""San Diego International Airpor…","""San Diego""",11257
"""SAN""","""San Diego International Airpor…","""San Diego""","""LAX""","""Los Angeles International Airp…","""Los Angeles""",11224


In [23]:
num_bars = 30

alt.Chart(
    flights_info_us.sort('flights', descending=True).select(
        (pl.col('origin') + " to " + pl.col('destination')).alias('flight'),
        pl.col('origin'),
        pl.col('origin_name'),
        pl.col('origin_municipality'),
        pl.col('destination'),
        pl.col('dest_name'),
        pl.col('dest_municipality'),
        pl.col('flights'),
    ).head(num_bars),
    title=f'most frequent flights (u.s.) - top {num_bars}'
).transform_calculate(
    custom_tooltip='datum.origin_name + " (" + datum.origin_municipality + ") " + " to " + datum.dest_name + " (" + datum.dest_municipality + ")"',
).mark_bar(
    color='#f9ebc0',
    opacity=0.75,
    stroke='black',
).encode(
    x=alt.X('flights:Q', title='number of flights'),
    y=alt.Y('flight:N', title='', sort=alt.SortField('flights', order='descending')),
    tooltip='custom_tooltip:N',
).properties(
    width=800,
    height=700,
)