In [2]:
import pandas as pd

# Step 1: Load cleaned AIS dataset
df = pd.read_csv("../data/ais_cleaned.csv", parse_dates=["Timestamp"])


In [3]:
# Step 2: Infer MID and map to Country
df['MID'] = df['MMSI'].astype(str).str[:3].astype(int)

In [11]:
#these are taken from the official ITU MID-to-Country mapping
mid_country = {
    105: 'RU',  # Russia
    191: 'HR',  # Croatia
    210: 'CY',  # Cyprus
    212: 'CY',  # Cyprus
    215: 'MT',  # Malta
    220: 'SI',  # Slovenia
    229: 'MT',  # Malta
    232: 'UK',  # United Kingdom
    241: 'ES',  # Spain
    246: 'FI',  # Finland
    248: 'EE',  # Estonia
    249: 'CY',  # Cyprus
    255: 'PT',  # Portugal
    259: 'IS',  # Iceland
    303: 'CA',  # Canada
    309: 'BM',  # Bermuda
    311: 'AG',  # Antigua and Barbuda
    314: 'US',  # United States
    316: 'CA',  # Canada
    319: 'GP',  # Guadeloupe
    338: 'GL',  # Greenland
    339: 'VG',  # British Virgin Islands
    345: 'KY',  # Cayman Islands
    354: 'US',  # US Virgin Islands
    366: 'US',  # United States
    367: 'US',  # United States
    368: 'US',  # United States
    369: 'US',  # United States
    372: 'US',  # United States
    373: 'US',  # United States
    374: 'US',  # United States
    414: 'KP',  # North Korea
    431: 'JP',  # Japan
    441: 'KR',  # South Korea
    463: 'HK',  # Hong Kong
    477: 'AU',  # Australia
    499: 'MY',  # Malaysia
    503: 'AU',  # Australia
    538: 'FM',  # Micronesia
    563: 'NZ',  # New Zealand
    565: 'TO',  # Tonga
    566: 'WF',  # Wallis and Futuna
    567: 'VU',  # Vanuatu
    577: 'WS',  # Samoa
    636: 'EG'   # Egypt
}


In [5]:
df['Country'] = df['MID'].map(mid_country).fillna('Unknown')

In [6]:
# Step 3: Sort by MMSI and Timestamp
df = df.sort_values(['MMSI', 'Timestamp']).reset_index(drop=True)

# Step 4: Get previous country per MMSI
df['PrevCountry'] = df.groupby('MMSI')['Country'].shift(1)

In [7]:
# Step 5: Flag where country has changed
df['FlagChange'] = ((df['PrevCountry'].notna()) & (df['Country'] != df['PrevCountry'])).astype(int)

# Step 6: Compute time difference in hours between consecutive entries for same MMSI
df['TimeDelta_hrs'] = df.groupby('MMSI')['Timestamp'].diff().dt.total_seconds() / 3600.0

In [8]:
# Step 7: Mark suspicious hops (flag change within 24 hours)
df['SuspiciousHop'] = df.apply(
    lambda row: 1 if (row['FlagChange'] == 1 and row['TimeDelta_hrs'] < 24) else 0,
    axis=1
)

In [9]:
# Step 8: Extract and save flag-hopping events
flag_hops = df[df['SuspiciousHop'] == 1][[
    'MMSI', 'Timestamp', 'PrevCountry', 'Country', 'TimeDelta_hrs'
]]

print("=== Flag-Hop Events (Country Changed within 24 hrs) ===")
print(flag_hops)

=== Flag-Hop Events (Country Changed within 24 hrs) ===
Empty DataFrame
Columns: [MMSI, Timestamp, PrevCountry, Country, TimeDelta_hrs]
Index: []


In [10]:
df['MMSI'].value_counts().head()

MMSI
367593050    2
367102610    2
368024740    2
368083770    2
366811310    2
Name: count, dtype: int64