## Imports

In [6]:
from typing import List, Dict
import numpy as np
from numpy.linalg import inv
import pandas as pd
import glob
from dataclasses import dataclass
import plotly.express as px
import plotly.graph_objects as go
import re

## functions

In [5]:
@dataclass
class AISColumnNames:
    Date: str = "Date"
    Sampled_Date: str = "Sampled_Date"
    Latitude: str = "Latitude"
    Longitude: str = "Longitude"
    Pseudo_Longitude: str = "Pseudo_Longitude"
    SOG: str = "SOG"
    COG: str = "COG"
    Heading: str = "Heading"

    n_Latitude: str = "norm Latitude"
    n_Longitude: str = "norm Longitude"
    n_SOG: str = "norm SOG"
    n_COG: str = "norm COG"
    n_Heading: str = "norm Heading"

    is_synthetic: str = "is_synthetic"
    to_predict: str = "to_predict"

cols: AISColumnNames = AISColumnNames()
target_freq_in_minutes = 10
target_freq: str = f"{target_freq_in_minutes}min"
sample_T: pd.Timedelta = pd.Timedelta(minutes=target_freq_in_minutes)


def get_trajectory_sequences(trajectory_sampled: pd.DataFrame, time_column_name=None
    ) -> List[pd.DataFrame]:
        if time_column_name is None:
            time_column_name = cols.Sampled_Date
        trajectory_sequences: List[pd.DataFrame] = []  # To store the sequences
        current_sequence = pd.DataFrame(
            columns=trajectory_sampled.columns
        )  # DF To track the current sequence

        # Iterate through the timestamps
        for i in range(len(trajectory_sampled) - 1):
            if (
                trajectory_sampled[time_column_name][i + 1]
                - trajectory_sampled[time_column_name][i]
                == sample_T
            ):
                # If the difference is 10 minutes, add the current timestamp to the sequence
                if len(current_sequence) == 0:
                    current_sequence = trajectory_sampled.iloc[
                        [i]
                    ]  # Add the first timestamp of the sequence
                current_sequence = pd.concat(
                    [current_sequence, trajectory_sampled.iloc[[i + 1]]],
                    ignore_index=True,
                )  # Add the next timestamp
            else:
                # If the difference is not 10 minutes, end the current sequence
                if len(current_sequence) != 0:
                    trajectory_sequences.append(
                        current_sequence
                    )  # Store the completed sequence
                    current_sequence = pd.DataFrame(
                        columns=trajectory_sampled.columns
                    )  # Reset the current sequence

        # Handle the last sequence if it ends at the last timestamp
        if len(current_sequence) != 0:
            trajectory_sequences.append(current_sequence)

        return trajectory_sequences


def restore_missing_timestamps(df, freq='10T', interpolation_method='linear', noise_level=0.0):
    np.random.seed(42)
    
    # Make sure the time column is in datetime format
    df[cols.Sampled_Date] = pd.to_datetime(df[cols.Sampled_Date])
    
    # Set the timestamp as index
    df = df.set_index(cols.Sampled_Date)

    full_range = pd.date_range(
        start=df.index.min(),
        end=df.index.max(),
        freq=freq
    )
        
    # Reindex to the complete time range
    df = df.reindex(full_range)
        
    # Reset index to make Sampled_Date a column again
    df = df.reset_index().rename(columns={'index': cols.Sampled_Date})
            
    # Interpolate numeric columns
    numeric_cols = df.select_dtypes(include=[np.number]).columns
    
    # Store original non-null values to only add noise to interpolated points
    original_mask = df[numeric_cols].notna()
    
    # Perform interpolation
    df[numeric_cols] = df[numeric_cols].interpolate(method=interpolation_method)
    
    # Add noise (either independent or random walk)
    if noise_level > 0:
        # Apply random walk noise between known points
        for col in [cols.Longitude, cols.Latitude]:
            # Find where original data exists (anchor points)
            anchors = original_mask[col]
            anchor_indices = np.where(anchors)[0]
            
            # Iterate through each segment between anchors
            for i in range(len(anchor_indices) - 1):
                start_idx = anchor_indices[i]
                end_idx = anchor_indices[i + 1]
                segment_length = end_idx - start_idx - 1
                
                if segment_length > 0:
                    # Generate random steps (Brownian motion)
                    steps = np.random.normal(
                        scale=noise_level, 
                        size=segment_length
                    )
                    # Accumulate noise (cumulative sum)
                    noise = np.cumsum(steps)
                    # Apply noise to the interpolated segment
                    df.loc[start_idx + 1 : end_idx - 1, col] += noise
        
    return df

# Example usage:
# df = restore_missing_timestamps(your_dataframe)
# Example usage:
# df = pd.read_csv('your_data.csv')  # Load your data
# filtered_df = apply_aekf_to_dataframe(df)


def plot_plotly_trajectory(dfs: List[pd.DataFrame], 
                                   color_sequence=None,
                                   line_width=2,
                                   marker_size=4):
    if not dfs:
        raise ValueError("Empty list of DataFrames provided")
    
    # Combine all segments with a segment ID
    combined_df = pd.concat(
        [df.assign(segment_id=i) for i, df in enumerate(dfs)],
        ignore_index=True
    )
    
    if color_sequence is None:
        color_sequence = px.colors.qualitative.Plotly
    
    fig = px.line_mapbox(
        combined_df,
        lat="Latitude",
        lon="Longitude",
        color="segment_id",
        color_discrete_sequence=["blue"],
        hover_name="Sampled_Date",
        hover_data=["SOG", "COG", "MMSI"],
        zoom=10,
        height=600,
        title="Vessel Trajectory Segments"
    )
    
    # Update marker appearance
    fig.update_traces(
        mode="lines+markers",
        line=dict(width=line_width),
        marker=dict(size=marker_size)
    )
    
    fig.update_layout(
        mapbox_style="carto-positron",
        margin={"r":0,"t":40,"l":0,"b":0},
        showlegend=False,
        legend_title_text="Trajectory Segment"
    )
    
    return fig

def plot_plotly_trajectory_groups(df_groups: List[List[pd.DataFrame]],
                         group_names, 
                         color_sequence=None,
                         line_width=2,
                         marker_size=4):
    if not df_groups:
        raise ValueError("Empty list of DataFrame groups provided")
    
    if color_sequence is None:
        color_sequence = px.colors.qualitative.Plotly
    
    # Create empty figure with proper mapbox setup
    fig = px.scatter_mapbox(lat=[None], lon=[None]).update_layout(
        mapbox_style="open-street-map",
        mapbox_zoom=8,
        height=600
    )
    
    for group_id, df_group in enumerate(df_groups):
        group_color = color_sequence[group_id % len(color_sequence)]
        
        for segment_id, df in enumerate(df_group):
            if len(df) == 0:
                continue  # Skip empty dataframes
                
            # Add line trace for this segment
            fig.add_trace(
                px.line_mapbox(
                    df,
                    lat="Latitude",
                    lon="Longitude",
                    color_discrete_sequence=[group_color]
                ).data[0].update(
                    mode="lines+markers",
                    line=dict(width=line_width),
                    marker=dict(size=marker_size),
                    name=f"{group_names[group_id]}",
                    showlegend=(segment_id == 0),  # Only show legend for first segment
                    legendgroup=f"{group_names[group_id]}",
                    hoverinfo="text",
                    customdata=df[["Sampled_Date", "SOG", "COG", "MMSI"]],
                    hovertemplate=(
                        "Latitude: %{lat}<br>"
                        "Longitude: %{lon}<br>"
                        "Date: %{customdata[0]}<br>"
                        "SOG: %{customdata[1]}<br>"
                        "COG: %{customdata[2]}<br>"
                        "MMSI: %{customdata[3]}<extra></extra>"
                    )
                )
            )
    
    fig.update_layout(
        margin={"r":0,"t":40,"l":0,"b":0},
        showlegend=True,
        legend_title_text="Trajectory Groups",
        title="Vessel Trajectory"
    )
    
    # Auto-zoom to the data
    if len(df_groups) > 0 and len(df_groups[0]) > 0:
        first_df = df_groups[0][0]
        fig.update_mapboxes(
            center=dict(
                lat=first_df["Latitude"].mean(),
                lon=first_df["Longitude"].mean()
            )
        )
    
    return fig

## plots

In [4]:
all_boats_trajectories = {}
dataset_path = "../../data/FishingKoreaAIS_sampled_new/*.csv"
dynamic_data_files = glob.glob(dataset_path)
dynamic_data_files.sort(key=lambda data: int(data.split("len_")[1].split("_mmsi_")[0]), reverse=True)


In [None]:

# Read and process CSV files
for file in dynamic_data_files[:10]:
    # Extract length and MMSI from filename
    len_match = re.search(r'len_(\d+)_mmsi_(\d+)', file)
    if len_match:
        length = int(len_match.group(1))
        mmsi = len_match.group(2)
        
        # Only process trajectories with length >= 10
        if length >= 20:
            # Read CSV file
            df = pd.read_csv(file)
            all_boats_trajectories[mmsi] = {
                'length': length,
                'dataframe': df
            }

# Create empty figure with Mapbox setup
fig = px.scatter_mapbox(lat=[None], lon=[None]).update_layout(
    mapbox_style="open-street-map",
    mapbox_zoom=8,
    height=600
)

# Initialize min/max for auto-zoom
min_lat, max_lat = 360, -360
min_lon, max_lon = 360, -360

# Color sequence for trajectories
color_sequence = px.colors.qualitative.Plotly

# Plot each boat's trajectory
for idx, (mmsi, data) in enumerate(all_boats_trajectories.items()):
    df = data['dataframe']
    group_color = color_sequence[idx % len(color_sequence)]
    
    # Update min/max for auto-zoom
    min_lat = min(min_lat, df['latitude'].min())
    max_lat = max(max_lat, df['latitude'].max())
    min_lon = min(min_lon, df['longitude'].min())
    max_lon = max(max_lon, df['longitude'].max())
    
    # Add trajectory trace
    fig.add_trace(
        go.Scattermapbox(
            lat=df['latitude'],
            lon=df['longitude'],
            mode='lines+markers',
            line=dict(width=2),
            marker=dict(size=4),
            name=f"MMSI: {mmsi} (Len: {data['length']})",
            legendgroup=f"MMSI_{mmsi}",
            hoverinfo="text",
            customdata=df[['sampled_timestamp', 'sog', 'cog']],
            hovertemplate=(
                "Latitude: %{lat:.6f}<br>"
                "Longitude: %{lon:.6f}<br>"
                "Date: %{customdata[0]}<br>"
                "SOG: %{customdata[1]}<br>"
                "COG: %{customdata[2]}<br>"
            ),
            line_color=group_color
        )
    )

# Update layout with auto-zoom and styling
fig.update_layout(
    margin={"r":0, "t":40, "l":0, "b":0},
    showlegend=True,
    legend_title_text="Boat Trajectories",
    title="Vessel Trajectories ",
    mapbox=dict(
        style="open-street-map",
        center=dict(
            lat=(min_lat + max_lat) / 2,
            lon=(min_lon + max_lon) / 2
        ),
        zoom=7
    )
)

# Show the plot
# fig.show()
vesrion = 1
fig.write_image(f"../../results/sampled_trajectoryies_Jun9/All_trajectories_v{vesrion}.png")
fig.write_html(f"../../results/sampled_trajectoryies_Jun9/All_trajectories_v{vesrion}.html")