In [None]:
import pandas as pd
import main
from conversion import points_to_smooth_lines
from conversion_v2 import points_to_smooth_lines_fast
import plotly.graph_objects as go
from buffer import identify_matching

These are the imports required for running this algorithm, and creating the map. If a user does not want to create the map, then only the pandas and main modules are needed. The user also does not have to use any of these conversion modules if they have their own.

In [None]:
# ASTD Dataset after original conditioning 

df_astd_original = pd.read_csv('/Users/willponczak/Desktop/QC2025/data/grouped-months/all_months_1day.csv', sep=',')

# Running orig df through algorithm
track_table = main.build_track_table(df_astd_original)

This is the preprocessed astd DataFrame. It then gets run through the main algorithm build_track_table. buiild_track_table outputs a table with the month, segment_id and track_id

In [None]:
df_jul_astd = pd.read_csv('/Users/willponczak/Desktop/QC2025/data/astd/complete_data/2019/ASTD_area_level3_201907.csv', sep=';')
df_aug_astd = pd.read_csv('/Users/willponczak/Desktop/QC2025/data/astd/complete_data/2019/ASTD_area_level3_201908.csv', sep=';')
df_sep_astd = pd.read_csv('/Users/willponczak/Desktop/QC2025/data/astd/complete_data/2019/ASTD_area_level3_201909.csv', sep=';')
df_oct_astd = pd.read_csv('/Users/willponczak/Desktop/QC2025/data/astd/complete_data/2019/ASTD_area_level3_201910.csv', sep=';')
df_nov_astd = pd.read_csv('/Users/willponczak/Desktop/QC2025/data/astd/complete_data/2019/ASTD_area_level3_201911.csv', sep=';')
df_dec_astd = pd.read_csv('/Users/willponczak/Desktop/QC2025/data/astd/complete_data/2019/ASTD_area_level3_201912.csv', sep=';')

These are the original ASTD datasets. They have not been preprocessed. These are required to find the ships for creating a map.

In [None]:
# GFW DATA

import pandas as pd
import glob

def load_month_data(year, month, base_path):
    pattern = f"{base_path}/mmsi-daily-csvs-10-v3-{year}-{month:02d}-*.csv"
    files = glob.glob(pattern)
    if files:
        return pd.concat([pd.read_csv(file) for file in files], ignore_index=True)
    else:
        print(f"No files found for {year}-{month:02d}")
        return pd.DataFrame()

# Load different months
base_path = "/Users/willponczak/Desktop/QC2025/data/gfw/ais-apparent-fishing-effort/mmsi-daily-csvs-10-v3-2019"

df_jan_gfw = load_month_data(2019, 1, base_path)  
df_feb_gfw = load_month_data(2019, 2, base_path)   
df_mar_gfw = load_month_data(2019, 3, base_path)  
df_apr_gfw = load_month_data(2019, 4, base_path)  
df_may_gfw = load_month_data(2019, 5, base_path)  
df_jun_gfw = load_month_data(2019, 6, base_path)  
df_jul_gfw = load_month_data(2019, 7, base_path)  
df_aug_gfw = load_month_data(2019, 8, base_path)  
df_sep_gfw = load_month_data(2019, 9, base_path)  
df_oct_gfw = load_month_data(2019, 10, base_path)  
df_nov_gfw = load_month_data(2019, 11, base_path)  
df_dec_gfw = load_month_data(2019, 12, base_path)  

This is the GFW data. This was only apart of our verification process, so a user would not need to import these datasets unless they wanted to.

In [None]:
track_info = track_table[track_table['track_id'] == 'track_3566']
track_info

This is the track being used in the example. track_info becomes a table with only rows containing track_3566 in the track_id column. I then print it out to see which months and segment_id I need to get from the ASTD data.

In [None]:
# Filter each month for the specific shipid

df_jul_filtered_astd_3566 = df_jul_astd[df_jul_astd['shipid'] == 403].dropna()
df_aug_filtered_astd_3566 = df_aug_astd[df_aug_astd['shipid'] == 626].dropna()
df_sep_filtered_astd_3566 = df_sep_astd[df_sep_astd['shipid'] == 742].dropna()
df_oct_filtered_astd_3566 = df_oct_astd[df_oct_astd['shipid'] == 229].dropna()
df_nov_filtered_astd_3566 = df_nov_astd[df_nov_astd['shipid'] == 216].dropna()
df_dec_filtered_astd_3566 = df_dec_astd[df_dec_astd['shipid'] == 193].dropna()

print(f"Jul: {len(df_jul_filtered_astd_3566)}")
print(f"Aug: {len(df_aug_filtered_astd_3566)}")
print(f"Sep: {len(df_sep_filtered_astd_3566)}")
print(f"Oct: {len(df_oct_filtered_astd_3566)}")
print(f"Nov: {len(df_nov_filtered_astd_3566)}")
print(f"Dec: {len(df_dec_filtered_astd_3566)}")



# Combine them
df_filtered = pd.concat([df_jul_filtered_astd_3566, df_aug_filtered_astd_3566, df_sep_filtered_astd_3566,
                               df_oct_filtered_astd_3566, df_nov_filtered_astd_3566, df_dec_filtered_astd_3566], ignore_index=True)

In the first part of this, I filter each ASTD dataset so they only contain the specific ship that we want, and I assign it to a new variable. I then print how many rows of data each ship has. This step is not necessary for running the algorithm, or creating the map. It only helps the user get an idea of how much data they are working with. The last part of this cell is concatenating all of the filtered datasets into a single DataFrame that can be used to create a map.

In [None]:
track_3566_gfw = pd.concat([df_jul_gfw, df_aug_gfw, df_sep_gfw, df_oct_gfw, df_nov_gfw, df_dec_gfw])

Here, the GFW data for the relating months are also concatenated. This is a must for our verification process, but a user can completely skip this part.

In [None]:
mmsi_list = identify_matching(df_filtered, track_3566_gfw, 1000)

This cell runs a function from another file to automatically search for mmsi(s) which match the identified track_id. It uses both the filtered ASTD data, and the GFW data. This step is not necessary to create a map solely based on the ASTD data. It is only required for the verification process.

In [None]:
track_3566_df = track_3566_gfw[track_3566_gfw['mmsi'].isin(mmsi_list)]

track_3566_specific = points_to_smooth_lines(track_3566_df)

This cell takes all of the potential matching mmsi(s) from the mmsi_list and then runs them through a point-to-lines algorithm so we can visually identify which mmsi is a match. This is not a necessary step for creating a map with only ASTD data. It is only necessary for the verification process.

In [None]:
# Create the map figure
fig = go.Figure()

# Add tracks from df (precise coordinates)
for ship_id in df_filtered['shipid'].unique():
    ship_data = df_filtered[df_filtered['shipid'] == ship_id].sort_values('date_time_utc')
    fig.add_trace(go.Scattermapbox(
        lat=ship_data['latitude'],
        lon=ship_data['longitude'],
        mode='markers',
        name=f'ASTD - {ship_id}',
        line=dict(width=2),
        customdata=ship_data[["date_time_utc", "shipid"]].values,
        hovertemplate="date: %{customdata[0]} id:%{customdata[1]}",
    ))

# Add tracks from df_jan_feb_gfw_specific_mmsi_smooth (grid cells)
for mmsi in track_3566_df['mmsi'].unique():
    mmsi_data = track_3566_df[track_3566_df['mmsi'] == mmsi].sort_index()
    fig.add_trace(go.Scattermapbox(
        lat=mmsi_data['cell_ll_lat'],  # or whatever your lat column is
        lon=mmsi_data['cell_ll_lon'],  # or whatever your lon column is
        mode='lines',
        name=f'GFW - {mmsi}',
        line=dict(width=2),
        customdata=mmsi_data[["date", "mmsi"]].values,
        hovertemplate="date: %{customdata[0]} id:%{customdata[1]}",
    ))

# Update layout
fig.update_layout(
    mapbox_style="open-street-map",
    height=600,
    margin={"r":0,"t":0,"l":0,"b":0},
    legend=dict(
        x=1,           # Right side (1 = far right, 0 = far left)
        y=0.5,           # Bottom (0 = bottom, 1 = top)
        xanchor='right',  # Anchor the legend box to its right edge
        yanchor='bottom'  # Anchor the legend box to its bottom edge
    )
)

fig.show()

This is where the map is created. The first block takes the filtered ASTD data and shows that on the map, which shows a visualization of the ship track identified by the main algorithm. The second block is for GFW data, and is only required for the verification process. A user could completely delete this block of code if they were not using the GFW dataset. The last block is for configuration of the map. This can be edited and played around with by the user.