In [1]:
# find the most common airport 1-, 2- and 3-grams
# per call-sign from ADS-B “event” files like the sample below.
#
# Each input file contains one JSON object per line:
#
# {"time":"2025-04-21T00:00:25.445Z", "callSign":"N1872J  ",
#  "eventDetail":{"airport":{"airportIdent":"KLGB"}}, ... }
#

from __future__ import annotations

import glob
import json
import re
from collections import Counter, defaultdict
from typing import Iterable, List

import pandas as pd

In [2]:
# $ head landings-takeoffs-2025-05-01.json | head
# {"time":"2025-05-01T09:42:44.829Z","icao":"4050dc","callSign":"GCEGU   ","typeCode":"P28A","typeDesc":null,"reg":"G-CEGU","lon":-0.806467,"lat":51.493347,"eventDetail":{"type":"takeoff","heading":321.20697,"airport":{"airportIdent":"EGLM","runwayIdent":"25L","runwayScore":-3960801.2},"confidence":"medium"},"reverseGeo":null,"url":"https://globe.adsbexchange.com/?icao=4050dc&zoom=13&lat=51.493347&lon=-0.806467&showTrace=2025-05-01&trackLabels&startTime=09:41&endTime=09:44"}
# {"time":"2025-05-01T09:42:54.829Z","icao":"39280f","callSign":"FGKAP   ","typeCode":"P28A","typeDesc":null,"reg":"F-GKAP","lon":7.455943,"lat":47.73506,"eventDetail":{"type":"takeoff","heading":28.28523,"airport":{"airportIdent":"LFGB","runwayIdent":"02R","runwayScore":-24712820.0},"confidence":"medium"},"reverseGeo":null,"url":"https://globe.adsbexchange.com/?icao=39280f&zoom=13&lat=47.73506&lon=7.455943&showTrace=2025-05-01&trackLabels&startTime=09:41&endTime=09:44"}
# {"time":"2025-05-01T09:42:57.829Z","icao":"04019e","callSign":"ETH204  ","typeCode":"DH8D","typeDesc":null,"reg":"ET-AYH","lon":38.823174,"lat":8.98393,"eventDetail":{"type":"takeoff","heading":74.57696,"airport":{"airportIdent":"HAAB","runwayIdent":"07R","runwayScore":-205.102},"confidence":"medium"},"reverseGeo":null,"url":"https://globe.adsbexchange.com/?icao=04019e&zoom=13&lat=8.98393&lon=38.823174&showTrace=2025-05-01&trackLabels&startTime=09:41&endTime=09:44"}

def read_landings_takeoffs(path: str) -> pd.DataFrame:
    """Reads a JSONL file line by line and returns a pandas DataFrame."""
    data = []
    with open(path, 'r') as f:
        for line in f:
            try:
                # Skip empty lines
                if line.strip():
                    data.append(json.loads(line))
            except json.JSONDecodeError as e:
                print(f"Skipping invalid JSON line in {path}: {e}")
                print(f"Problematic line: {line.strip()}") # Print the line causing trouble
    if not data:
        # Return an empty DataFrame if no valid JSON was found
        return pd.DataFrame()
    return pd.DataFrame(data)


def read_all_landings_takeoffs()-> pd.DataFrame:
    # Read all "landings-takeoffs-*.json" files using the new function.
    files = glob.glob("landings-takeoffs-*.json")
    if not files:
        print("No 'landings-takeoffs-*.json' files found.")
        df = pd.DataFrame() # Create an empty DataFrame
    else:
        df = pd.DataFrame()
        for f in files:
            print(f"Reading file: {f}")
            df = pd.concat([df, read_landings_takeoffs(f)], ignore_index=True)

    # Convert the 'time' column to datetime objects
    df['time'] = pd.to_datetime(df['time'], format='ISO8601')
    # Sort the DataFrame by the 'time' column
    df = df.sort_values(by='time').reset_index(drop=True)

    event_details_normalized = pd.json_normalize(df['eventDetail'])

    # Drop the original eventDetail column from df
    df = df.drop(columns=['eventDetail'])

    # Concatenate the original DataFrame (without eventDetail) and the normalized details
    # Ensure indices align correctly, especially after sorting df earlier
    df = pd.concat([df.reset_index(drop=True),
                            event_details_normalized.reset_index(drop=True)], axis=1)
    print(f"Successfully loaded {len(df)} records.")
    return df

df = read_all_landings_takeoffs()
df    

Reading file: landings-takeoffs-2025-04-17.json
Reading file: landings-takeoffs-2025-04-21.json
Reading file: landings-takeoffs-2025-04-20.json
Reading file: landings-takeoffs-2025-04-16.json
Reading file: landings-takeoffs-2025-05-01.json
Reading file: landings-takeoffs-2025-04-27.json
Reading file: landings-takeoffs-2025-04-30.json
Reading file: landings-takeoffs-2025-04-26.json
Reading file: landings-takeoffs-2025-04-25.json
Reading file: landings-takeoffs-2025-04-13.json
Reading file: landings-takeoffs-2025-04-29.json
Reading file: landings-takeoffs-2025-04-28.json
Reading file: landings-takeoffs-2025-04-24.json
Reading file: landings-takeoffs-2025-04-15.json
Reading file: landings-takeoffs-2025-04-19.json
Reading file: landings-takeoffs-2025-04-23.json
Reading file: landings-takeoffs-2025-04-22.json
Reading file: landings-takeoffs-2025-04-18.json
Reading file: landings-takeoffs-2025-04-14.json
Successfully loaded 4470052 records.


Unnamed: 0,time,icao,callSign,typeCode,typeDesc,reg,lon,lat,reverseGeo,url,type,heading,confidence,airport.airportIdent,airport.runwayIdent,airport.runwayScore,airport
0,2025-04-12 23:59:24.438000+00:00,adb1fd,,P28A,,N98152,-85.090050,37.315247,,https://globe.adsbexchange.com/?icao=adb1fd&zo...,takeoff,325.965300,medium,KI53,01,-3.110837e+08,
1,2025-04-12 23:59:24.438000+00:00,aa9c10,N783EA,C172,,N783EA,-89.027900,42.630165,,https://globe.adsbexchange.com/?icao=aa9c10&zo...,takeoff,221.310400,medium,KJVL,22,-2.027289e+00,
2,2025-04-12 23:59:34.438000+00:00,894076,GFA281,A321,,A9C-CC,50.613754,26.282318,,https://globe.adsbexchange.com/?icao=894076&zo...,takeoff,298.799220,medium,OBBI,30R,-1.350891e+03,
3,2025-04-12 23:59:47.438000+00:00,aafb07,N80636,C172,,N80636,-78.396545,35.529190,,https://globe.adsbexchange.com/?icao=aafb07&zo...,takeoff,24.284212,medium,KJNX,03,-1.376409e+05,
4,2025-04-12 23:59:51.438000+00:00,c80336,ZKDNT,C172,,ZK-DNT,176.319340,-38.102077,,https://globe.adsbexchange.com/?icao=c80336&zo...,takeoff,204.765580,medium,NZRO,18R,-1.560780e+02,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4470047,2025-05-01 23:59:48.461000+00:00,a6b10c,SKW5790,CRJ7,,N530GJ,-87.887314,41.963562,,https://globe.adsbexchange.com/?icao=a6b10c&zo...,takeoff,221.201580,high,KORD,22L,-1.037997e+01,
4470048,2025-05-01 23:59:48.461000+00:00,a3e55f,N350FW,B350,,N350FW,-93.617744,42.000183,,https://globe.adsbexchange.com/?icao=a3e55f&zo...,takeoff,18.187530,high,KAMW,01,-2.651932e+02,
4470049,2025-05-01 23:59:48.461000+00:00,a0b2a4,AVL214,C172,,N144ME,-78.143310,39.142660,,https://globe.adsbexchange.com/?icao=a0b2a4&zo...,takeoff,129.632500,high,KOKV,14,-8.272781e+01,
4470050,2025-05-01 23:59:48.461000+00:00,7c6dd4,QFA578,B738,,VH-VZA,151.179490,-33.959858,,https://globe.adsbexchange.com/?icao=7c6dd4&zo...,takeoff,169.363600,high,YSSY,16R,-3.058764e+01,


In [3]:
def count_sequence_violations(df: pd.DataFrame) -> tuple[int, float]:
    """
    Counts the number of times the takeoff/landing sequence is violated for each aircraft
    and calculates the percentage of violations relative to total consecutive pairs.

    A violation occurs when two consecutive events (sorted by time) for the same aircraft
    have the same type ('takeoff' or 'landing').

    Args:
        df: DataFrame containing flight events, must include 'icao', 'time', and 'type' columns.
            The DataFrame should ideally be pre-sorted by time globally, but the function
            will sort within each group for safety.

    Returns:
        A tuple containing:
          - The total number of sequence violations (int).
          - The percentage of consecutive pairs that are violations (float).
    """
    if not {'icao', 'time', 'type'}.issubset(df.columns):
        raise ValueError("DataFrame must contain 'icao', 'time', and 'type' columns.")

    if df.empty:
        return 0, 0.0

    total_violations = 0
    total_pairs = 0
    # Ensure 'type' column exists after flattening. It should be derived from eventDetail.type
    if 'type' not in df.columns:
         print("Warning: 'type' column not found. Returning (0, 0.0).")
         return 0, 0.0

    # Filter out rows where 'type' is NaN before grouping if necessary
    df_filtered = df.dropna(subset=['type'])

    # Group by aircraft
    grouped = df_filtered.groupby('icao')

    for name, group in grouped:
        if len(group) < 2:
            continue # Need at least two events to form a pair

        # Sort events for the aircraft by time (important for sequence checking)
        # Make sure NaT times don't cause issues, though sorting should handle them
        group_sorted = group.sort_values(by='time')

        # Calculate pairs for this group
        num_pairs_in_group = len(group_sorted) - 1
        total_pairs += num_pairs_in_group

        # Get the event type of the previous record within the group
        previous_type = group_sorted['type'].shift(1)

        # Find rows where the current type is the same as the previous type
        # This automatically handles the first row (previous_type is NaN)
        violations = group_sorted['type'] == previous_type

        # Sum the violations (True counts as 1)
        total_violations += violations.sum()

    # Calculate percentage
    if total_pairs == 0:
        percentage = 0.0
    else:
        percentage = (total_violations / total_pairs) * 100

    return int(total_violations), percentage # Cast count to int for clarity

# --- Now, let's use the function in a new cell ---

# Assuming 'df' is your flattened and sorted DataFrame from the previous steps
violation_count, violation_percentage = count_sequence_violations(df.copy()) # Use .copy() if you might modify df later

print(f"Total takeoff/landing sequence violations found: {violation_count}")
print(f"Percentage of consecutive pairs violating sequence: {violation_percentage:.2f}%")

# Example check for a specific aircraft (optional)
# try:
#     # Filter out NaNs in 'icao' before selecting unique values
#     valid_icaos = df['icao'].dropna().unique()
#     if len(valid_icaos) > 0:
#         specific_icao = valid_icaos[0] # Get the first valid ICAO
#         print(f"\\nChecking sequence for ICAO: {specific_icao}")
#         # Filter for the specific ICAO and ensure it's sorted
#         specific_df = df[df['icao'] == specific_icao].sort_values('time')
#         # Display only relevant columns and non-NaN types if possible
#         print(specific_df[['time', 'type']].dropna(subset=['type']).head(10))
#         specific_violations, specific_perc = count_sequence_violations(specific_df)
#         print(f"Violations for {specific_icao}: {specific_violations} ({specific_perc:.2f}%)")
#     else:
#          print("\\nCould not find any valid ICAOs to check.")
# except Exception as e: # Catch potential errors during the example check
#      print(f"\\nError during example check: {e}")


Total takeoff/landing sequence violations found: 843298
Percentage of consecutive pairs violating sequence: 19.75%


In [11]:
import re
import pandas as pd
# from typing import Optional, Pattern, Iterable, Tuple # Old
from typing import Optional, Pattern, Iterable, Tuple, Any # New
from tqdm.notebook import tqdm # Import tqdm for notebooks
# from itertools import pairwise # Can use if Python >= 3.10

def generate_airport_pairs(
    df: pd.DataFrame,
    callsign_filter: Optional[str | Pattern] = None
# ) -> Iterable[Tuple[str, str]]: # Old
) -> Iterable[Tuple[Any, str, str, str, Optional[str]]]: # New (Using Any for Timestamp type flexibility)
    """
    Generates (timestamp, icao, origin, destination, call_sign) tuples from flight event data, # Modified docstring
    handling sequence violations and optional callsign filtering.

    Handles takeoff/landing sequence violations by inferring intermediate events:
    - T(A) -> T(B) implies landing at B, yields flight (A -> B).
    - L(A) -> L(B) implies takeoff at A, yields flight (A -> B).
    - T(A) -> L(B) is a standard flight (A -> B).
    - L(A) -> T(B) marks the end of one leg and start of another, doesn't yield a pair itself.

    Filters flights based on a callsign regex: A pair (A -> B) is generated if
    EITHER the event determining A as origin OR the event determining B as
    destination matches the filter (if a filter is provided).

    Args:
        df: DataFrame with flight events. Must include 'icao', 'time', 'type',
            'callSign', and 'airport.airportIdent' columns. The DataFrame
            should be sorted by 'time' for correct sequencing.
        callsign_filter: An optional regex string or compiled pattern to filter
                         callsigns.

    Yields:
        # Tuples of (origin_airport_id, destination_airport_id). # Old
        Tuples of (timestamp, icao, origin_airport_id, destination_airport_id, call_sign). # New
        The timestamp and call_sign are typically from the event determining the destination. # New
    """
    required_cols = {'icao', 'time', 'type', 'callSign', 'airport.airportIdent'}
    if not required_cols.issubset(df.columns):
        missing = required_cols - set(df.columns)
        raise ValueError(f"DataFrame is missing required columns: {missing}")

    # --- Pre-compile regex if it's a string ---
    compiled_filter: Optional[Pattern] = None
    if isinstance(callsign_filter, str):
        try:
            compiled_filter = re.compile(callsign_filter)
        except re.error as e:
            raise ValueError(f"Invalid regex provided for callsign_filter: {e}")
    elif isinstance(callsign_filter, re.Pattern):
        compiled_filter = callsign_filter
    elif callsign_filter is not None:
        raise TypeError("callsign_filter must be a string or compiled regex pattern.")

    # --- Group by aircraft ---
    # Drop rows with NaN ICAO or NaN type as they break sequences
    df_filtered = df.dropna(subset=['icao', 'type'])
    grouped = df_filtered.groupby('icao')

    print(f"Processing {len(grouped)} aircraft groups...") # Initial message
    count = 0

    # Wrap the main loop with tqdm for progress bar
    # Use total=len(grouped) for accurate progress display
    for icao, group in tqdm(grouped, desc="Processing Aircraft Groups", total=len(grouped), unit="group"):
        if len(group) < 2:
            continue # Need at least two events to form a potential pair

        # Ensure group is sorted by time
        group_sorted = group.sort_values(by='time').reset_index(drop=True)

        # Iterate through consecutive pairs of events
        # Using manual zip for broader Python compatibility vs itertools.pairwise
        for i in range(len(group_sorted) - 1):
            event_prev = group_sorted.iloc[i]
            event_curr = group_sorted.iloc[i+1]

            prev_airport = event_prev['airport.airportIdent']
            prev_type = event_prev['type']
            prev_callsign = event_prev['callSign'] # Keep for filtering check
            curr_airport = event_curr['airport.airportIdent']
            curr_type = event_curr['type']
            curr_callsign = event_curr['callSign'] # Keep for filtering check
            curr_time = event_curr['time'] # Get the timestamp of the current (destination) event

            # --- Skip if airport data is missing for the potential pair ---
            # Use pd.isna() which is more robust for different missing value types
            if pd.isna(prev_airport) or pd.isna(curr_airport):
                continue

            # --- Determine potential origin/destination based on sequence logic ---
            origin: Optional[str] = None
            destination: Optional[str] = None
            # Store callsigns associated with the events determining origin/dest
            origin_event_cs = None
            dest_event_cs = None

            if prev_type == 'takeoff':
                origin = prev_airport
                origin_event_cs = prev_callsign
                if curr_type == 'landing': # T(A) -> L(B)
                    destination = curr_airport
                    dest_event_cs = curr_callsign
                elif curr_type == 'takeoff': # T(A) -> T(B) implies landing at B
                    destination = curr_airport
                    dest_event_cs = curr_callsign # Destination info comes from current event
            elif prev_type == 'landing':
                if curr_type == 'landing': # L(A) -> L(B) implies takeoff at A
                    origin = prev_airport
                    # origin_event_cs = curr_callsign # ERROR IN PREVIOUS VERSION - This was assigning the *current* callsign to the origin event
                    origin_event_cs = prev_callsign # Origin info comes from previous event
                    destination = curr_airport
                    dest_event_cs = curr_callsign
                # L(A) -> T(B) is start of next leg, no pair formed by this step

            # --- Yield the pair if valid and filter matches (or no filter) ---
            if origin and destination:
                filter_match = False
                if compiled_filter:
                    # Check if *either* callsign matches the filter (handle NaNs)
                    # Convert to string explicitly before match for safety
                    origin_match = pd.notna(origin_event_cs) and bool(compiled_filter.match(str(origin_event_cs)))
                    dest_match = pd.notna(dest_event_cs) and bool(compiled_filter.match(str(dest_event_cs)))
                    filter_match = origin_match or dest_match
                else:
                    # If no filter is active, all valid pairs pass
                    filter_match = True

                if filter_match:
                    # yield (origin, destination) # Old yield
                    yield (curr_time, icao, origin, destination, dest_event_cs) # New yield
                    count += 1
                    # Removed the print statement for periodic progress here

    print(f"Finished processing. Total pairs yielded: {count}") # Final count message

In [12]:
tyson_filter = r"^(TYSON|TYS|ON)[0-9]+"
pair_counts = Counter(generate_airport_pairs(df.copy(), callsign_filter=tyson_filter))
print("\nTop 20 most frequent pairs:")
for pair, count in pair_counts.most_common(20):
   print(f"  {pair[0]} -> {pair[1]} : {count}")


Processing 199518 aircraft groups...


Processing Aircraft Groups:   0%|          | 0/199518 [00:00<?, ?group/s]

KeyboardInterrupt: 

In [13]:
import csv
import datetime


def write_origin_destination_csv(
    tuples: Iterable[tuple[datetime, str, str, str, Optional[str]]], filename: str
):
    with open(filename, "w") as f:
        writer = csv.writer(f)
        writer.writerow(["timestamp", "icao", "origin", "destination", "callsign"])
        for timestamp, icao, origin, destination, callsign in tuples:
            if callsign is None:
                callsign = ""
            writer.writerow([timestamp, icao, origin, destination, callsign])


write_origin_destination_csv(
    generate_airport_pairs(df, callsign_filter=tyson_filter), "ice_air.csv"
)

NameError: name 'csv' is not defined

In [6]:


def ngrams(seq: List[str], n: int) -> Iterable[str]:
    """Yield sliding n-grams as 'A-B-C' strings."""
    for i in range(len(seq) - n + 1):
        yield "-".join(seq[i:i + n])

def read_files(files: list[str], cs_re: re.Pattern) -> list:
    visits = []
    for f in files:
        with open(f) as fh:
            for line in fh:
                rec = json.loads(line)
                cs = rec["callSign"]
                if not cs:
                    continue
                cs_stripped = cs.strip()
                if not cs_stripped:
                    continue
                if not cs_re.match(cs_stripped):
                    continue
                icao24 = rec["icao"]
                airport = rec["eventDetail"]["airport"]
                if airport is None:
                    airportId = 'OFFAIRPORT'
                else:
                    airportId = rec["eventDetail"]["airport"]["airportIdent"]
                ts = rec["time"]
                visits.append((ts, icao24, airportId, cs_stripped))
    return visits


In [7]:
# Find landings-*.json in this directory.
TYSON_RE = re.compile(r"^(TYSON|TYS|ON)[0-9]+")
# TYSON_RE = re.compile(r"^GXA6[0-9]+")
# TYSON_RE = re.compile(r"^TYSON[0-9]+")
FILES = glob.glob("landings-*.json")
print(f"Found {len(FILES)} files")
VISITS = read_files(FILES, TYSON_RE)
print(f"Found {len(VISITS)} visits")


Found 40 files
Found 968 visits


In [8]:
# Print VISITS as csv.
print("time,icao24,airportId,callSign")
for visit in VISITS:
    print(f"{visit[0]},{visit[1]},{visit[2]},{visit[3]}")


time,icao24,airportId,callSign
2025-04-28T11:45:28.348Z,a8d231,KHRL,TYSON51
2025-04-28T12:42:49.005Z,a2f9dd,KAEX,TYSON46
2025-04-28T13:03:32.882Z,a835d1,KELP,TYSON45
2025-04-28T14:39:34.929Z,a2c4e0,KAEX,TYSON44
2025-04-28T15:06:16.486Z,a2f9dd,KCSG,TYSON46
2025-04-28T15:09:02.760Z,a2bd72,KNYL,TYSON48
2025-04-28T15:42:07.947Z,a8b479,KMIA,TYSON52
2025-04-28T15:39:02.689Z,a2f016,KCSG,TYSON41
2025-04-28T17:12:22.765Z,a8d231,MGGT,TYSON51
2025-04-28T17:41:16.212Z,a2c4e0,KHRL,TYSON44
2025-04-28T19:01:49.661Z,a2bd72,KHRL,TYSON48
2025-04-28T19:04:09.881Z,a2f016,KHRL,TYSON41
2025-04-28T20:14:19.514Z,a8b479,KHRL,TYSON52
2025-04-28T21:08:42.361Z,a2f016,KAEX,TYSON41
2025-04-28T21:24:13.865Z,a8d231,KSAT,TYSON51
2025-04-28T21:44:20.897Z,a835d1,KHRL,TYSON45
2025-04-28T22:17:17.475Z,a75013,KAEX,TYSON42
2025-04-28T22:24:50.345Z,ad7274,KAEX,TYSON43
2025-04-28T22:31:00.472Z,a8b479,KSAT,TYSON52
2025-04-28T23:56:04.328Z,a6b0f5,KIWA,TYSON47
2025-04-12T01:35:58.767Z,a2d25e,KELP,TYSON45
2025-04-12T04:29:40.906Z

In [9]:

# For each call-sign, sort & create 1-,2-,3-gram frequency tables

TOP_N = 10
# for cs, tuples in VISITS.items():
#     tuples.sort(key=lambda t: t[0])                  # sort by time
#     airports = [ap for _, ap in tuples]
#     counters = {
#         n: Counter(ngrams(airports, n))
#         for n in (1, 2, 3)
#     }
#     print(f"\n=== {cs} ===")
#     for n in (1, 2, 3):
#         common = counters[n].most_common(TOP_N)
#         label = "-".join(["gram"] * n) if TOP_N == 1 else f"top {n}-grams"
#         for gram, cnt in common:
#             print(f"{n}-gram: {gram:<20}  ({cnt}×)")
#         if not common:
#             print(f"{n}-gram: <none recorded>")


In [10]:
# Print the TOP-N 1-,2-,3-grams.
counters: dict[int, Counter[str]] = defaultdict(Counter)

for icao, tuples in VISITS.items():
    tuples.sort(key=lambda t: t[0])
    airports = [ap for _, ap in tuples]
    for n in (1, 2, 3):
        for ngram in ngrams(airports, n):
            counters[n][ngram] += 1

TOP_N = 12
for n in (1, 2, 3):
    common = counters[n].most_common(TOP_N)
    label = "-".join(["gram"] * n) if TOP_N == 1 else f"top {n}-grams"
    for gram, cnt in common:
        print(f"{n}-gram: {gram:<20}  ({cnt}×)")
    if not common:
        print(f"{n}-gram: <none recorded>")

AttributeError: 'list' object has no attribute 'items'

In [110]:
# Write a csv of the form origin,destination,count.
# Only write bigrams, and only the top 15.
import csv


def write_ngrams_csv(counters: dict[int, Counter[str]], filename: str, top_n: int = 15):
    """Write the top N bigrams to a CSV file."""
    bigram_counter = counters.get(2)
    if not bigram_counter:
        print("No bigrams found to write.")
        return

    top_bigrams = bigram_counter.most_common(top_n)

    with open(filename, "w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["origin", "destination", "count"])  # Write header
        for gram, count in top_bigrams:
            parts = gram.split("-")
            if len(parts) == 2:
                origin, destination = parts
                writer.writerow([origin, destination, count])
            else:
                # This case might happen if an airport code itself contains a hyphen
                # or if something unexpected occurred during ngram generation.
                print(f"Skipping potentially malformed bigram: {gram}")


# Call the function to write the CSV
# Ensure the 'counters' variable is defined and populated from the previous cell
csv_filename = "top_bigrams.csv"
write_ngrams_csv(counters, csv_filename)
print(f"Wrote top {15} bigrams to {csv_filename}")

Wrote top 15 bigrams to top_bigrams.csv


In [111]:
write_ngrams_csv(counters, "bigrams.csv")