In [105]:
# find the most common airport 1-, 2- and 3-grams
# per call-sign from ADS-B “event” files like the sample below.
#
# Each input file contains one JSON object per line:
#
# {"time":"2025-04-21T00:00:25.445Z", "callSign":"N1872J  ",
#  "eventDetail":{"airport":{"airportIdent":"KLGB"}}, ... }
#

from __future__ import annotations

import glob
import json
import re
from collections import Counter, defaultdict
from typing import Iterable, List

In [117]:


def ngrams(seq: List[str], n: int) -> Iterable[str]:
    """Yield sliding n-grams as 'A-B-C' strings."""
    for i in range(len(seq) - n + 1):
        yield "-".join(seq[i:i + n])

def read_files(files: list[str], cs_re: re.Pattern) -> list:
    visits = []
    for f in files:
        with open(f) as fh:
            for line in fh:
                rec = json.loads(line)
                cs = rec["callSign"]
                if not cs:
                    continue
                cs_stripped = cs.strip()
                if not cs_stripped:
                    continue
                if not cs_re.match(cs_stripped):
                    continue
                icao24 = rec["icao"]
                airport = rec["eventDetail"]["airport"]
                if airport is None:
                    airportId = 'OFFAIRPORT'
                else:
                    airportId = rec["eventDetail"]["airport"]["airportIdent"]
                ts = rec["time"]
                visits.append((ts, icao24, airportId, cs_stripped))
    return visits


In [119]:
# Find landings-*.json in this directory.
TYSON_RE = re.compile(r"^(TYSON|TYS|ON)[0-9]+")
# TYSON_RE = re.compile(r"^GXA6[0-9]+")
# TYSON_RE = re.compile(r"^TYSON[0-9]+")
FILES = glob.glob("landings-*.json")
print(f"Found {len(FILES)} files")
VISITS = read_files(FILES, TYSON_RE)
print(f"Found {len(VISITS)} visits")


Found 21 files
Found 54 visits


In [120]:
# Print VISITS as csv.
print("time,icao24,airportId,callSign")
for visit in VISITS:
    print(f"{visit[0]},{visit[1]},{visit[2]},{visit[3]}")


time,icao24,airportId,callSign
2025-04-28T00:07:35.824Z,a2e13a,KDTW,GXA605
2025-04-28T18:36:05.407Z,a2d615,KLAS,GXA610
2025-04-12T02:28:49.261Z,a83f98,KIWA,GXA6139
2025-04-12T11:17:55.029Z,a83f98,KDAL,GXA6130
2025-04-12T15:13:38.824Z,a2c129,MYNN,GXA601
2025-04-12T17:36:46.524Z,ad7274,TJSJ,GXA6161
2025-04-12T19:00:54.062Z,a83f98,KMIA,GXA6132
2025-04-12T23:49:38.438Z,ad7274,KAEX,GXA6161
2025-04-09T00:32:09.451Z,a83f98,KIWA,GXA6133
2025-04-09T00:38:39.448Z,a2d25e,KELP,GXA6121
2025-04-09T03:03:43.437Z,a2d25e,KHRL,GXA6112
2025-04-09T12:23:37.503Z,a75013,KELP,GXA6167
2025-04-09T14:08:52.809Z,ad7274,KMSP,GXA6153
2025-04-09T15:04:08.033Z,a6b0f5,KELP,GXA6133
2025-04-09T16:11:55.083Z,ad7274,KOMA,GXA6154
2025-04-09T17:09:31.335Z,a835d1,KHRL,GXA6123
2025-04-09T17:21:50.406Z,a2bd72,MGGT,GXA6150
2025-04-09T18:12:51.336Z,a75013,MHLM,GXA6168
2025-04-09T19:19:11.748Z,a6b0f5,KAEX,GXA6134
2025-04-09T19:28:47.634Z,a2d25e,KCLT,GXA6113
2025-04-09T19:31:21.531Z,ad7274,KHRL,GXA6155
2025-04-09T20:39:41.815Z,a8

In [108]:

# For each call-sign, sort & create 1-,2-,3-gram frequency tables

TOP_N = 10
# for cs, tuples in VISITS.items():
#     tuples.sort(key=lambda t: t[0])                  # sort by time
#     airports = [ap for _, ap in tuples]
#     counters = {
#         n: Counter(ngrams(airports, n))
#         for n in (1, 2, 3)
#     }
#     print(f"\n=== {cs} ===")
#     for n in (1, 2, 3):
#         common = counters[n].most_common(TOP_N)
#         label = "-".join(["gram"] * n) if TOP_N == 1 else f"top {n}-grams"
#         for gram, cnt in common:
#             print(f"{n}-gram: {gram:<20}  ({cnt}×)")
#         if not common:
#             print(f"{n}-gram: <none recorded>")


In [109]:
# Print the TOP-N 1-,2-,3-grams.
counters: dict[int, Counter[str]] = defaultdict(Counter)

for icao, tuples in VISITS.items():
    tuples.sort(key=lambda t: t[0])
    airports = [ap for _, ap in tuples]
    for n in (1, 2, 3):
        for ngram in ngrams(airports, n):
            counters[n][ngram] += 1

TOP_N = 12
for n in (1, 2, 3):
    common = counters[n].most_common(TOP_N)
    label = "-".join(["gram"] * n) if TOP_N == 1 else f"top {n}-grams"
    for gram, cnt in common:
        print(f"{n}-gram: {gram:<20}  ({cnt}×)")
    if not common:
        print(f"{n}-gram: <none recorded>")

1-gram: KHRL                  (6×)
1-gram: KELP                  (6×)
1-gram: KIWA                  (5×)
1-gram: MYNN                  (4×)
1-gram: KJFK                  (4×)
1-gram: MHLM                  (3×)
1-gram: KLAS                  (2×)
1-gram: KIAH                  (2×)
1-gram: MGGT                  (2×)
1-gram: KDAL                  (2×)
1-gram: TJSJ                  (2×)
1-gram: KAEX                  (2×)
2-gram: KELP-KHRL             (3×)
2-gram: KIWA-KIAH             (2×)
2-gram: MYNN-MYNN             (2×)
2-gram: MYNN-KJFK             (2×)
2-gram: KJFK-KJFK             (2×)
2-gram: KHRL-KCLT             (2×)
2-gram: KCLT-KCSG             (2×)
2-gram: KBUR-CYVR             (1×)
2-gram: CYVR-KLAS             (1×)
2-gram: KLAS-KDTW             (1×)
2-gram: KIWA-KHRL             (1×)
2-gram: KHRL-KIWA             (1×)
3-gram: MYNN-MYNN-KJFK        (2×)
3-gram: MYNN-KJFK-KJFK        (2×)
3-gram: KELP-KHRL-KCLT        (2×)
3-gram: KHRL-KCLT-KCSG        (2×)
3-gram: KBUR-CYVR-KL

In [110]:
# Write a csv of the form origin,destination,count.
# Only write bigrams, and only the top 15.
import csv


def write_ngrams_csv(counters: dict[int, Counter[str]], filename: str, top_n: int = 15):
    """Write the top N bigrams to a CSV file."""
    bigram_counter = counters.get(2)
    if not bigram_counter:
        print("No bigrams found to write.")
        return

    top_bigrams = bigram_counter.most_common(top_n)

    with open(filename, "w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["origin", "destination", "count"])  # Write header
        for gram, count in top_bigrams:
            parts = gram.split("-")
            if len(parts) == 2:
                origin, destination = parts
                writer.writerow([origin, destination, count])
            else:
                # This case might happen if an airport code itself contains a hyphen
                # or if something unexpected occurred during ngram generation.
                print(f"Skipping potentially malformed bigram: {gram}")


# Call the function to write the CSV
# Ensure the 'counters' variable is defined and populated from the previous cell
csv_filename = "top_bigrams.csv"
write_ngrams_csv(counters, csv_filename)
print(f"Wrote top {15} bigrams to {csv_filename}")

Wrote top 15 bigrams to top_bigrams.csv


In [111]:
write_ngrams_csv(counters, "bigrams.csv")