In [1]:
from typing import List, Dict
import numpy as np
from numpy.linalg import inv
import pandas as pd
import glob
from dataclasses import dataclass
import folium
import plotly.express as px

import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature

import pydeck as pdk





In [2]:
all_boats_trajectories = {}
dataset_path = "../../data/FishingKoreaAIS/Dynamic_*.csv"
dynamic_data_files = glob.glob(dataset_path)

for dynamic_data_file in dynamic_data_files:
    print(f"Reading {dynamic_data_file}...")
    df_dynamic = pd.read_csv(dynamic_data_file)
    data_grouped = df_dynamic.groupby("MMSI")
    for mmsi, data in data_grouped:
        if mmsi not in all_boats_trajectories:
            all_boats_trajectories[mmsi] = (
                data.copy()
            )  # Create a copy to avoid SettingWithCopyWarning
        else:
            all_boats_trajectories[mmsi] = pd.concat(
                [all_boats_trajectories[mmsi], data], ignore_index=True
            )
    print("Done!")

Reading ../../data/FishingKoreaAIS/Dynamic_20230502_fishing_boats.csv...
Done!
Reading ../../data/FishingKoreaAIS/Dynamic_20230501_fishing_boats.csv...
Done!


In [17]:
len(all_boats_trajectories)

663

In [3]:
@dataclass
class AISColumnNames:
    Date: str = "Date"
    Sampled_Date: str = "Sampled_Date"
    Latitude: str = "Latitude"
    Longitude: str = "Longitude"
    Pseudo_Longitude: str = "Pseudo_Longitude"
    SOG: str = "SOG"
    COG: str = "COG"
    Heading: str = "Heading"

    n_Latitude: str = "norm Latitude"
    n_Longitude: str = "norm Longitude"
    n_SOG: str = "norm SOG"
    n_COG: str = "norm COG"
    n_Heading: str = "norm Heading"

    is_synthetic: str = "is_synthetic"
    to_predict: str = "to_predict"


In [4]:
cols: AISColumnNames = AISColumnNames()
target_freq_in_minutes = 10
target_freq: str = f"{target_freq_in_minutes}min"
sample_T: pd.Timedelta = pd.Timedelta(minutes=target_freq_in_minutes)

def get_sampled_trajectory(trajectory: pd.DataFrame) -> pd.DataFrame:
        trajectory[cols.Date] = pd.to_datetime(trajectory[cols.Date])
        trajectory = trajectory.set_index(cols.Date)
        trajectory = trajectory.sort_index()

        # add first and last steps of trajectory which are divisible by 10 minutes
        first = trajectory.iloc[:1].copy()
        first.index = [trajectory.index.min().floor(target_freq)]
        last = trajectory.iloc[-1:].copy()
        last.index = [trajectory.index.max().ceil(target_freq)]
        trajectory = pd.concat([first, trajectory, last])

        # Define exact 10-minute sampling times
        start_time = trajectory.index.min().floor("h")  # Round down to the nearest hour
        end_time = trajectory.index.max().ceil("h")  # Round up to the nearest hour
        sampling_times = pd.date_range(start_time, end_time, freq=target_freq)

        # Filter only timestamps where at least one real record exists within ±10 minutes
        valid_sampling_times = [
            t
            for t in sampling_times
            if any(abs(trajectory.index - t) <= sample_T)
        ]

        trajectory = trajectory[~trajectory.index.duplicated(keep="first")]
        trajectory_interpolated = trajectory.reindex(
            trajectory.index.union(valid_sampling_times)
        ).sort_index()

        # Perform linear interpolation
        trajectory_interpolated = trajectory_interpolated.interpolate(method="time")

        # Keep only the sampled timestamps and drop any remaining NaNs
        trajectory_sampled = (
            trajectory_interpolated.loc[valid_sampling_times].dropna().reset_index()
        )
        trajectory_sampled.rename(
            columns={"index": cols.Sampled_Date}, inplace=True
        )
        return trajectory_sampled

In [5]:
list(all_boats_trajectories.values())[0]

Unnamed: 0,MMSI,Date,Latitude,Longitude,SOG,COG,Heading
0,41215086,2023-05-02 01:31:43,36.729807,123.20306,1.1,32.1,32
1,41215086,2023-05-01 22:53:52,36.700318,123.18686,4.8,274.9,274


In [30]:
# sample_traj = get_sampled_trajectory(list(all_boats_trajectories.values())[1])
# sample_traj

In [31]:
def get_trajectory_sequences(trajectory_sampled: pd.DataFrame, time_column_name=None
    ) -> List[pd.DataFrame]:
        if time_column_name is None:
            time_column_name = cols.Sampled_Date
        trajectory_sequences: List[pd.DataFrame] = []  # To store the sequences
        current_sequence = pd.DataFrame(
            columns=trajectory_sampled.columns
        )  # DF To track the current sequence

        # Iterate through the timestamps
        for i in range(len(trajectory_sampled) - 1):
            if (
                trajectory_sampled[time_column_name][i + 1]
                - trajectory_sampled[time_column_name][i]
                == sample_T
            ):
                # If the difference is 10 minutes, add the current timestamp to the sequence
                if len(current_sequence) == 0:
                    current_sequence = trajectory_sampled.iloc[
                        [i]
                    ]  # Add the first timestamp of the sequence
                current_sequence = pd.concat(
                    [current_sequence, trajectory_sampled.iloc[[i + 1]]],
                    ignore_index=True,
                )  # Add the next timestamp
            else:
                # If the difference is not 10 minutes, end the current sequence
                if len(current_sequence) != 0:
                    trajectory_sequences.append(
                        current_sequence
                    )  # Store the completed sequence
                    current_sequence = pd.DataFrame(
                        columns=trajectory_sampled.columns
                    )  # Reset the current sequence

        # Handle the last sequence if it ends at the last timestamp
        if len(current_sequence) != 0:
            trajectory_sequences.append(current_sequence)

        return trajectory_sequences

In [69]:
class AdaptiveExtendedKalmanFilter:
    def __init__(self, initial_state, initial_covariance, process_noise, measurement_noise, alpha=0.7):
        """
        Initialize the Adaptive Extended Kalman Filter.
        
        Parameters:
        - initial_state: Initial state vector [lat, lon, sog, cog]
        - initial_covariance: Initial covariance matrix
        - process_noise: Process noise covariance matrix
        - measurement_noise: Measurement noise covariance matrix
        """
        self.state = initial_state
        self.covariance = initial_covariance
        self.Q = process_noise  # Process noise covariance
        self.R = measurement_noise  # Measurement noise covariance
        self.innovation_history = []
        self.window_size = 5  # Window size for adaptive estimation
        self.alpha = alpha  # Forgetting factor
        
    def predict(self, dt):
        """
        Prediction step of the AEKF.
        
        Parameters:
        - dt: Time step in hours (since coordinates are in degrees)
        """
        # State transition matrix (simple constant velocity model)
        F = np.eye(4)
        F[0, 2] = dt * np.cos(np.radians(self.state[3])) / 60  # latitude change from SOG/COG
        F[1, 2] = dt * np.sin(np.radians(self.state[3])) / 60  # longitude change from SOG/COG
        
        # Predict state
        self.state = F @ self.state
        
        # Predict covariance
        self.covariance = F @ self.covariance @ F.T + self.Q
        
        return self.state
    
    def update(self, measurement):
        """
        Update step of the AEKF with adaptive noise estimation.
        
        Parameters:
        - measurement: [lat, lon, sog, cog]
        """
        # Measurement matrix (we directly observe all states)
        H = np.eye(4)
        
        # Calculate innovation
        innovation = measurement - H @ self.state
        self.innovation_history.append(innovation)
        
        # Keep only the most recent innovations
        if len(self.innovation_history) > self.window_size:
            self.innovation_history.pop(0)
        
        # Adaptive estimation of measurement noise
        if len(self.innovation_history) >= 2:
            innovation_cov = np.cov(np.array(self.innovation_history).T)
            self.R = self.alpha * innovation_cov + (1 - self.alpha) * self.R
        
        # Kalman gain
        S = H @ self.covariance @ H.T + self.R
        K = self.covariance @ H.T @ inv(S)
        
        # Update state
        self.state = self.state + K @ innovation
        
        # Update covariance
        self.covariance = (np.eye(4) - K @ H) @ self.covariance
        
        return self.state
    
    def get_state(self):
        """Return the current state estimate."""
        return self.state


def AEKF_traj(df, alpha = 0.7):
    """
    Apply Adaptive Extended Kalman Filter to the vessel tracking dataframe.
    
    Parameters:
    - df: Pandas DataFrame with columns ['Sampled_Date', 'MMSI', 'Latitude', 'Longitude', 'SOG', 'COG', 'Heading']
    
    Returns:
    - DataFrame with filtered positions and additional AEKF output
    """
    # Sort by MMSI and timestamp
    df = df.sort_values(['MMSI', 'Sampled_Date'])
    
    # Convert datetime to seconds for delta time calculation
    df['time_seconds'] = pd.to_datetime(df['Sampled_Date']).astype('int64') // 10**9
    
    # Initialize output columns
    df['filtered_lat'] = np.nan
    df['filtered_lon'] = np.nan
    df['filtered_sog'] = np.nan
    df['filtered_cog'] = np.nan
    
    # Group by vessel (MMSI)
    for mmsi, group in df.groupby('MMSI'):
        if len(group) < 2:
            continue  # Need at least 2 points for filtering
            
        # Initialize AEKF with first measurement
        initial_state = np.array([
            group.iloc[0]['Latitude'],
            group.iloc[0]['Longitude'],
            group.iloc[0]['SOG'],
            group.iloc[0]['COG']
        ])
        
        # Initial covariance (tune these based on your application)
        initial_covariance = np.diag([1e-4, 1e-4, 0.1, 1.0])
        
        # Process noise covariance (tune these)
        process_noise = np.diag([1e-6, 1e-6, 0.01, 0.1])
        
        # Measurement noise covariance (tune these)
        measurement_noise = np.diag([1e-5, 1e-5, 0.1, 1.0])
        
        aekf = AdaptiveExtendedKalmanFilter(
            initial_state, initial_covariance, process_noise, measurement_noise, alpha
        )
        
        # Store first filtered values (same as measurement)
        # df.loc[group.index[0], 'filtered_lat'] = initial_state[0]
        # df.loc[group.index[0], 'filtered_lon'] = initial_state[1]
        # df.loc[group.index[0], 'filtered_sog'] = initial_state[2]
        # df.loc[group.index[0], 'filtered_cog'] = initial_state[3]
        
        # Iterate through remaining points
        for i in range(1, len(group)):
            prev_time = group.iloc[i-1]['time_seconds']
            curr_time = group.iloc[i]['time_seconds']
            dt = (curr_time - prev_time) / 3600  # hours
            
            # Prediction step
            aekf.predict(dt)
            
            # Update step with current measurement
            measurement = np.array([
                group.iloc[i]['Latitude'],
                group.iloc[i]['Longitude'],
                group.iloc[i]['SOG'],
                group.iloc[i]['COG']
            ])
            
            filtered_state = aekf.update(measurement)
            
            # Store filtered values
            df.loc[group.index[i], cols.Latitude] = filtered_state[0]
            df.loc[group.index[i], cols.Longitude] = filtered_state[1]
            df.loc[group.index[i], cols.SOG] = filtered_state[2]
            df.loc[group.index[i], cols.COG] = filtered_state[3]
    
    return df


# Example usage:
# df = pd.read_csv('your_data.csv')  # Load your data
# filtered_df = apply_aekf_to_dataframe(df)

In [None]:
def restore_missing_timestamps(df, freq='10T', interpolation_method='linear', noise_level=0.0):
    np.random.seed(42)
    
    # Make sure the time column is in datetime format
    df[cols.Sampled_Date] = pd.to_datetime(df[cols.Sampled_Date])
    
    # Set the timestamp as index
    df = df.set_index(cols.Sampled_Date)

    full_range = pd.date_range(
        start=df.index.min(),
        end=df.index.max(),
        freq=freq
    )
        
    # Reindex to the complete time range
    df = df.reindex(full_range)
        
    # Reset index to make Sampled_Date a column again
    df = df.reset_index().rename(columns={'index': cols.Sampled_Date})
            
    # Interpolate numeric columns
    numeric_cols = df.select_dtypes(include=[np.number]).columns
    
    # Store original non-null values to only add noise to interpolated points
    original_mask = df[numeric_cols].notna()
    
    # Perform interpolation
    df[numeric_cols] = df[numeric_cols].interpolate(method=interpolation_method)
    
    # Add noise (either independent or random walk)
    if noise_level > 0:
        # Apply random walk noise between known points
        for col in [cols.Longitude, cols.Latitude]:
            # Find where original data exists (anchor points)
            anchors = original_mask[col]
            anchor_indices = np.where(anchors)[0]
            
            # Iterate through each segment between anchors
            for i in range(len(anchor_indices) - 1):
                start_idx = anchor_indices[i]
                end_idx = anchor_indices[i + 1]
                segment_length = end_idx - start_idx - 1
                
                if segment_length > 0:
                    # Generate random steps (Brownian motion)
                    steps = np.random.normal(
                        scale=noise_level, 
                        size=segment_length
                    )
                    # Accumulate noise (cumulative sum)
                    noise = np.cumsum(steps)
                    # Apply noise to the interpolated segment
                    df.loc[start_idx + 1 : end_idx - 1, col] += noise
        
    return df

# Example usage:
# df = restore_missing_timestamps(your_dataframe)

In [62]:
def plot_plotly_trajectory_groups(df_groups: List[List[pd.DataFrame]],
                         group_names, 
                         color_sequence=None,
                         line_width=2,
                         marker_size=4):
    if not df_groups:
        raise ValueError("Empty list of DataFrame groups provided")
    
    if color_sequence is None:
        color_sequence = px.colors.qualitative.Plotly
    
    # Create empty figure with proper mapbox setup
    fig = px.scatter_mapbox(lat=[None], lon=[None]).update_layout(
        mapbox_style="open-street-map",
        mapbox_zoom=8,
        height=600
    )
    
    for group_id, df_group in enumerate(df_groups):
        group_color = color_sequence[group_id % len(color_sequence)]
        
        for segment_id, df in enumerate(df_group):
            if len(df) == 0:
                continue  # Skip empty dataframes
                
            # Add line trace for this segment
            fig.add_trace(
                px.line_mapbox(
                    df,
                    lat="Latitude",
                    lon="Longitude",
                    color_discrete_sequence=[group_color]
                ).data[0].update(
                    mode="lines+markers",
                    line=dict(width=line_width),
                    marker=dict(size=marker_size),
                    name=f"{group_names[group_id]}",
                    showlegend=(segment_id == 0),  # Only show legend for first segment
                    legendgroup=f"{group_names[group_id]}",
                    hoverinfo="text",
                    customdata=df[["Sampled_Date", "SOG", "COG", "MMSI"]],
                    hovertemplate=(
                        "Latitude: %{lat}<br>"
                        "Longitude: %{lon}<br>"
                        "Date: %{customdata[0]}<br>"
                        "SOG: %{customdata[1]}<br>"
                        "COG: %{customdata[2]}<br>"
                        "MMSI: %{customdata[3]}<extra></extra>"
                    )
                )
            )
    
    fig.update_layout(
        margin={"r":0,"t":40,"l":0,"b":0},
        showlegend=True,
        legend_title_text="Trajectory Groups",
        title="Vessel Trajectory"
    )
    
    # Auto-zoom to the data
    if len(df_groups) > 0 and len(df_groups[0]) > 0:
        first_df = df_groups[0][0]
        fig.update_mapboxes(
            center=dict(
                lat=first_df["Latitude"].mean(),
                lon=first_df["Longitude"].mean()
            )
        )
    
    return fig

In [49]:
all_boats_trajectories_list = list(all_boats_trajectories.values())

In [70]:
count = 0
for i, traj in enumerate(all_boats_trajectories_list):
    print(f"{i}, {100.0*i/len(all_boats_trajectories_list)}%")
    sample_traj = get_sampled_trajectory(traj)
    if len(sample_traj) < 40 or len(sample_traj) > 100:
        continue
    sample_traj_sequences = get_trajectory_sequences(sample_traj)
    
    png = True
    for alpha in [0.2, 0.5, 0.8, 1.0]:
        # AEKF_sample_traj = AEKF_traj(sample_traj, alpha)
        # AEKF_sample_traj_sequences = get_trajectory_sequences(AEKF_sample_traj)

        ### STABLE DIFFUSION
        interpolated_sample_traj = restore_missing_timestamps(sample_traj, noise_level=0.0012)
        interpolated_sample_traj_sequences = get_trajectory_sequences(interpolated_sample_traj)
        AEKF_interpolated_sample_traj= AEKF_traj(interpolated_sample_traj, alpha)
        AEKF_interpolated_sample_traj_sequences = get_trajectory_sequences(AEKF_interpolated_sample_traj)


        fig = plot_plotly_trajectory_groups([AEKF_interpolated_sample_traj_sequences, sample_traj_sequences], group_names=["Stable Diffusion + AEKF", "Initial trajectory"])
        fig.write_image(f"results/png_{i}_sd_traj_alpha_{alpha}.png")
        fig.write_html(f"results/html_{i}_sd_traj_alpha_{alpha}.html")
        
        ### JUST AEKF
        interpolated_sample_traj = restore_missing_timestamps(sample_traj, noise_level=0.0000)
        interpolated_sample_traj_sequences = get_trajectory_sequences(interpolated_sample_traj)
        AEKF_interpolated_sample_traj= AEKF_traj(interpolated_sample_traj, alpha)
        AEKF_interpolated_sample_traj_sequences = get_trajectory_sequences(AEKF_interpolated_sample_traj)

        fig = plot_plotly_trajectory_groups([AEKF_interpolated_sample_traj_sequences, sample_traj_sequences], group_names=["Interpolated + AEKF", "Initial trajectory"])
        fig.write_image(f"results/png_{i}_interpolated_traj_alpha_{alpha}.png")
        fig.write_html(f"results/html_{i}_interpolated_traj_alpha_{alpha}.html")
    # break
    count += 1
    if count > 10:
        break

0, 0.0%
1, 0.15082956259426847%



'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.



2, 0.30165912518853694%
3, 0.45248868778280543%
4, 0.6033182503770739%
5, 0.7541478129713424%
6, 0.9049773755656109%
7, 1.0558069381598794%
8, 1.2066365007541477%



'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.



9, 1.3574660633484164%
10, 1.5082956259426847%



'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.



11, 1.6591251885369533%
12, 1.8099547511312217%



'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.



13, 1.9607843137254901%
14, 2.1116138763197587%
15, 2.262443438914027%



'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.



16, 2.4132730015082955%



'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.



17, 2.5641025641025643%



'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.



18, 2.7149321266968327%



'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.



19, 2.865761689291101%
20, 3.0165912518853695%
21, 3.167420814479638%
22, 3.3182503770739067%



'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.



23, 3.469079939668175%
24, 3.6199095022624435%
25, 3.770739064856712%
26, 3.9215686274509802%
27, 4.072398190045249%
28, 4.223227752639517%
29, 4.374057315233785%



'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.



30, 4.524886877828054%
31, 4.675716440422323%
32, 4.826546003016591%



'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.


'T' is deprecated and will be removed in a future version, please use 'min' instead.



In [9]:
# sample_traj_sequences[1]

In [None]:
# def plot_folium_trajectory(dfs: List[pd.DataFrame]):    
#     # Create map centered on first point
#     m = folium.Map(location=[dfs[0].iloc[0]['Latitude'], dfs[0].iloc[0]['Longitude']], zoom_start=10)

#     for vessel_df in dfs:
#         # Add points with popup info
#         for idx, row in vessel_df.iterrows():
#             popup = f"MMSI: {row['MMSI']}<br>Time: {row['Sampled_Date']}<br>SOG: {row['SOG']} kn<br>COG: {row['COG']}°"
#             folium.CircleMarker(
#                 location=[row['Latitude'], row['Longitude']],
#                 radius=3,
#                 popup=popup,
#                 color='blue',
#                 fill=True
#             ).add_to(m)
        
#         # Add line connecting points
#         folium.PolyLine(
#             locations=vessel_df[['Latitude', 'Longitude']].values,
#             color='green',
#             weight=2,
#             opacity=0.7
#         ).add_to(m)
    
#     return m

# Example usage:
# plot_folium_trajectory(your_df, 123456789)

In [32]:
# plot_folium_trajectory(sample_traj_sequences)

In [None]:
# def plot_plotly_trajectory(dfs: List[pd.DataFrame], 
#                                    color_sequence=None,
#                                    line_width=2,
#                                    marker_size=4):
#     if not dfs:
#         raise ValueError("Empty list of DataFrames provided")
    
#     # Combine all segments with a segment ID
#     combined_df = pd.concat(
#         [df.assign(segment_id=i) for i, df in enumerate(dfs)],
#         ignore_index=True
#     )
    
#     if color_sequence is None:
#         color_sequence = px.colors.qualitative.Plotly
    
#     fig = px.line_mapbox(
#         combined_df,
#         lat="Latitude",
#         lon="Longitude",
#         color="segment_id",
#         color_discrete_sequence=["blue"],
#         hover_name="Sampled_Date",
#         hover_data=["SOG", "COG", "MMSI"],
#         zoom=10,
#         height=600,
#         title="Vessel Trajectory Segments"
#     )
    
#     # Update marker appearance
#     fig.update_traces(
#         mode="lines+markers",
#         line=dict(width=line_width),
#         marker=dict(size=marker_size)
#     )
    
#     fig.update_layout(
#         mapbox_style="carto-positron",
#         margin={"r":0,"t":40,"l":0,"b":0},
#         showlegend=False,
#         legend_title_text="Trajectory Segment"
#     )
    
#     return fig

In [33]:
# plot_plotly_trajectory(sample_traj_sequences)

In [None]:
# def plot_cartopy_trajectory_segments(dfs: List[pd.DataFrame]):
#     if not dfs:
#         raise ValueError("Empty list of DataFrames provided")
    
#     # Set default colors if not provided
#     colors = ["blue"]*len(dfs)
    
#     # Create figure
#     fig = plt.figure(figsize=(12, 8))
#     ax = fig.add_subplot(1, 1, 1, projection=ccrs.PlateCarree())
    
#     # Add map features
#     ax.add_feature(cfeature.LAND, facecolor='lightgray')
#     ax.add_feature(cfeature.OCEAN, facecolor='lightblue')
#     ax.add_feature(cfeature.COASTLINE, edgecolor='black')
#     ax.add_feature(cfeature.BORDERS, linestyle=':', edgecolor='gray')
    
#     # Plot each segment
#     for i, df in enumerate(dfs):
#         # Handle line width and marker size (single value or list)
#         lw = 2
#         ms = 4
        
#         ax.plot(
#             df['Longitude'],
#             df['Latitude'],
#             color=colors[i],
#             linewidth=lw,
#             marker='o',
#             markersize=ms,
#             transform=ccrs.PlateCarree(),
#             label=f'Segment {i+1}'
#         )
        
#         # Mark start and end points
#         ax.plot(
#             df['Longitude'].iloc[0],
#             df['Latitude'].iloc[0],
#             'o',
#             color=colors[i],
#             markersize=ms+2,
#             transform=ccrs.PlateCarree()
#         )
#         ax.plot(
#             df['Longitude'].iloc[-1],
#             df['Latitude'].iloc[-1],
#             's',
#             color=colors[i],
#             markersize=ms+2,
#             transform=ccrs.PlateCarree()
#         )
    
#     # Add legend and title
#     # ax.legend(loc='upper right')
#     plt.title('Vessel Trajectory Segments')
#     plt.tight_layout()
    
#     return fig

In [34]:
# plot_cartopy_trajectory_segments(sample_traj_sequences)

In [None]:

# def plot_pydeck_trajectory_segments(dfs: List[pd.DataFrame]):
#     if not dfs:
#         raise ValueError("Empty list of DataFrames provided")
    
#     # Combine all segments with segment IDs
#     combined_df = pd.concat(
#         [df.assign(segment_id=i) for i, df in enumerate(dfs)],
#         ignore_index=True
#     )
    
#     # Default colors if not provided
#     # colors = ["blue"]*len(dfs)
    
#     # Create a view centered on the first point
#     view_state = pdk.ViewState(
#         latitude=dfs[0].iloc[0]['Latitude'],
#         longitude=dfs[0].iloc[0]['Longitude'],
#         zoom=10
#     )
    
#     # Create line layer
#     line_layer = pdk.Layer(
#         "PathLayer",
#         data=combined_df.groupby('segment_id')
#             .apply(lambda x: x[['Longitude', 'Latitude']].values.tolist())
#             .reset_index()
#             .rename(columns={0: 'path'}),
#         get_path='path',
#         get_color='[50, 200, 150]',
#         get_width=100,
#         pickable=True
#     )
    
#     # Create scatter plot layer for points
#     scatter_layer = pdk.Layer(
#         "ScatterplotLayer",
#         data=combined_df,
#         get_position=['Longitude', 'Latitude'],
#         get_color='[0, 0, 0, 160]',
#         get_radius=100,
#         pickable=True
#     )
    
#     # Create tooltip
#     tooltip = {
#         "html": "<b>MMSI:</b> {MMSI}<br/>"
#                 "<b>Time:</b> {Sampled_Date}<br/>"
#                 "<b>SOG:</b> {SOG} kn<br/>"
#                 "<b>COG:</b> {COG}°",
#         "style": {
#             "backgroundColor": "white",
#             "color": "black"
#         }
#     }
    
#     # Render
#     r = pdk.Deck(
#         layers=[line_layer, scatter_layer],
#         initial_view_state=view_state,
#         tooltip=tooltip
#     )
    
#     return r

In [35]:
# plot_pydeck_trajectory_segments(sample_traj_sequences)

In [18]:
# def ssa(eps):
#     """
#     Convert latitude and longitude residuals to the standard range of -pi to pi.
    
#     Args:
#         eps (np.array): Residuals.
        
#     Returns:
#         np.array: Standardized residuals.
#     """
#     # Normalize latitude to the range [-pi/2, pi/2]
#     eps[0] = (eps[0] + np.pi/2) % (np.pi) - np.pi/2
#     # Normalize longitude to the range [-pi, pi]
#     eps[1] = (eps[1] + np.pi) % (2 * np.pi) - np.pi
#     return eps

# def AEKF_traj(traj: pd.DataFrame, alpha: float = 0.2):
#     """
#     Compute a trajectory correction using an adaptive Extened Kalman filter for maritime vessel data.
#     If alpha = 0: Extended Kalman Filter
    
#     Args:
#         traj pd.DataFrame: The input trajectory data.
#         alpha (float): Weighting factor for process and measurement noise adjustments.
    
#     Returns:
#         mpd.Trajectory: Corrected trajectory.
#     """
#     target_vessel = traj.copy()  # Extract trajectory data as a DataFrame
#     mmsi = int(target_vessel["MMSI"].unique()[0])  # Extract unique vessel identifier (MMSI)
#     a = 6378137  # Semi-major axis (equatorial radius) in meters
#     f = 1 / 298.257223563  # Flattening factor of Earth
#     e = np.sqrt(2 * f - f**2)  # Earth eccentricity
#     alpha_1 = 0.01  # Damping coefficient for speed
#     alpha_2 = 0.01  # Damping coefficient for angular velocity

#     # Convert latitude and longitude to radians
#     lat = np.deg2rad(target_vessel[cols.Latitude].values).tolist()
#     lon = np.deg2rad(target_vessel[cols.Longitude].values).tolist()

#     # Initial process and measurement noise covariance matrices
#     Q11 = 1e8
#     Q22 = 1e2
#     R11 = R22 = 36
#     Qd = np.diag([Q11, Q22])  # Process noise covariance
#     Rd = np.diag([R11, R22])  # Measurement noise covariance

#     frame = 'LL'  # Coordinate frame ('LL' for Latitude/Longitude)
#     P_prd = np.eye(5)  # Initial prediction covariance matrix
#     I5 = np.eye(5)  # 5x5 Identity matrix
#     Cd = np.array([[1, 0, 0, 0, 0],  # Observation matrix for position
#                    [0, 1, 0, 0, 0]])
#     coord_ls = []  # List to store corrected positions
#     sog = []  # List to store corrected speed over ground
#     cog = []  # List to store corrected course over ground

#     # Initialize state vector (latitude, longitude, speed, angle, angular velocity)
#     x_hat = np.array([lat[0], lon[0], 0, 0, 0], dtype=float)

#     # Calculate time steps in seconds
#     dt_ = np.diff(target_vessel.index.view(int) // 1e9)
#     dt_min = dt_.min()  # Minimum time step
#     dts = np.hstack([0, dt_])  # Append 0 for initial time step
    
#     delta_q = 0
#     delta_r = 0
#     try:

#         for i in range(len(lat)):  # Main loop over all data points
#             h_samp = dts[i]  # Sampling interval for the current step

#             # Adjust process and measurement noise adaptively based on sampling interval
#             if alpha != 0:
#                 if h_samp != 0:
#                     Qd = (alpha) * (h_samp / dt_min) * np.diag([Q11, Q22]) + (1 - alpha) * delta_q
#                     Rd = (alpha) * (dt_min / h_samp) * np.diag([R11, R22]) + (1 - alpha) * delta_r
            
#             # GNSS measurement for current step
#             mu = lat[i]
#             l = lon[i]
#             y = np.array([mu, l])  # Observation vector

#             # Sampling matrix
#             Ed = h_samp * np.array([[0, 0],
#                                     [0, 0],
#                                     [1, 0],
#                                     [0, 0],
#                                     [0, 1]])
            
#             # Calculate Earth radius values for the current latitude
#             Rn = a / np.sqrt(1 - e**2 * np.sin(x_hat[0])**2)  # Radius of curvature in the prime vertical
#             Rm = Rn * ((1 - e**2) / (1 - e**2 * np.sin(x_hat[0])**2))  # Radius of curvature in the meridian

#             # State transition function
#             f = np.array([(1 / Rm) * x_hat[2] * np.cos(x_hat[3]),  # Latitude rate
#                         (1 / (Rn * np.cos(x_hat[0]))) * x_hat[2] * np.sin(x_hat[3]),  # Longitude rate
#                         -alpha_1 * x_hat[2],  # Speed damping
#                         x_hat[4],  # Angle rate
#                         -alpha_2 * x_hat[4]])  # Angular velocity damping

#             # State transition matrix
#             A21 = (x_hat[2] * np.sin(x_hat[3]) * np.tan(x_hat[0])) / (Rn * np.cos(x_hat[0]))
#             Ad = I5 + h_samp * np.array([
#                 [0, 0, (1 / Rm) * np.cos(x_hat[3]), -(1 / Rm) * x_hat[2] * np.sin(x_hat[3]), 0],
#                 [A21, 0, (1 / (Rn * np.cos(x_hat[0]))) * np.sin(x_hat[3]), (1 / (Rn * np.cos(x_hat[0]))) * x_hat[2] * np.cos(x_hat[3]), 0],
#                 [0, 0, -alpha_1, 0, 0],
#                 [0, 0, 0, 0, 1],
#                 [0, 0, 0, 0, -alpha_2]])

#             # Predict state and covariance
#             x_prd = x_hat + h_samp * f
#             P_hat = Ad @ P_prd @ Ad.T + Ed @ Qd @ Ed.T

#             # Compute observation residual
#             d = y - Cd @ x_prd
#             if frame == 'LL':  # Normalize residuals if in latitude/longitude frame
#                 d = ssa(d)

#             # Compute Kalman gain
#             S = Cd @ P_hat @ Cd.T + Rd
#             K = P_hat @ Cd.T @ np.linalg.inv(S)
#             IKC = I5 - K @ Cd  # Update factor for covariance matrix

#             # Update state and covariance
#             P_prd = IKC @ P_hat @ IKC.T + K @ Rd @ K.T
#             x_hat = x_prd + K @ d
#             x_hat[:2] = ssa(x_hat[:2])  # Normalize latitude and longitude

#             # Update process noise covariance (adaptive filtering)
#             delta_q = (Cd @ K @ np.expand_dims(d, 1) @ np.expand_dims(d, 1).T @ K.T @ Cd.T) * np.eye(2) % 1e8
            
#             esp = y - Cd @ x_hat
#             esp = np.expand_dims(esp, 1)
#             delta_r = (esp @ esp.T + Cd @ P_hat @ Cd.T) *np.eye(2) / R11
#             # Store corrected state values
#             coord_ls.append(np.rad2deg(x_hat[:2]))  # Convert latitude/longitude back to degrees
#             sog.append(x_hat[2] / 0.51444444)  # Convert speed to knots
#             cog.append(np.rad2deg(x_hat[3]) % 360)  # Normalize angle to [0, 360]
#             # print(sog)
            

#         # Create corrected trajectory DataFrame
#         states = [i.tolist() for i in coord_ls]
#         lat, lon = list(zip(*states))
#         traj = pd.DataFrame({cols.Sampled_Date: target_vessel[cols.Sampled_Date], "MMSI": [mmsi]*len(lat), cols.Latitude: lat, cols.Longitude: lon, cols.SOG: sog, cols.COG: cog}, index=target_vessel.index)
#         # df_geo = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat), crs=CRS_METRIC)
#         # traj = mpd.Trajectory(df_geo, traj_id=mmsi)
#         return traj
#     except Exception as e:
#         print(e)
#         return False

# def RMSE_error(collection, new_collections_aekf, mask_traj):
#     """
#     Calculate the Root Mean Square Error (RMSE) between true and estimated (filtered) trajectories.

#     Args:
#         collection (list): A list of true trajectories, each containing latitude and longitude data.
#         new_collections_aekf (list): A list of estimated (filtered) trajectories, each containing latitude and longitude data.
#         mask_traj (list): A boolean mask list indicating which trajectories should be considered for RMSE calculation.

#     Returns:
#         list: A list of RMSE values for each pair of true and estimated trajectories.
#     """
    
#     # Initialize an empty list to store RMSE values for each trajectory pair
#     RMSE_aekf = list()

#     # Filter the collection of true trajectories based on the mask_traj list (only keep True values)
#     collection = [collection[idx] for idx in range(len(mask_traj)) if mask_traj[idx] == True]

#     # Iterate over pairs of true and estimated trajectories
#     for true_traj, est_traj in tqdm(zip(collection, new_collections_aekf), desc="RMSE"):
        
#         # Extract the latitude and longitude values for the true trajectory and convert to radians
#         true_coord = true_traj.df[['lat', 'lon']].values
#         true_coord = np.deg2rad(true_coord)  # Convert coordinates to radians for distance calculation
        
#         # Extract the latitude and longitude values for the estimated trajectory and convert to radians
#         pred_coord = est_traj.df[['lat', 'lon']].values
#         pred_coord = np.deg2rad(pred_coord)  # Convert coordinates to radians
        
#         # Compute the Haversine distance between the true and predicted coordinates (distance in km)
#         traj_dist = (haversine_distances(true_coord, pred_coord) * 6371) * np.eye(len(true_traj.df))
        
#         # Remove zero distances (indicating identical points)
#         traj_dist = traj_dist[traj_dist != 0]

#         # Calculate the Root Mean Square Error (RMSE) as the square root of the mean of squared distances
#         rmse = np.sqrt((traj_dist**2).mean())
        
#         # Append the computed RMSE to the list of RMSE values
#         RMSE_aekf.append(rmse)

#     return RMSE_aekf  # Return the list of RMSE values for each trajectory pair



In [None]:
# AEKF_sample_traj = AEKF_traj(sample_traj)

In [37]:
# AEKF_sample_traj

In [None]:
# AEKF_sample_traj_sequences = get_trajectory_sequences(AEKF_sample_traj)

In [40]:
# plot_folium_trajectory(sample_traj_sequences)
# plot_plotly_trajectory(sample_traj_sequences)

In [39]:
# plot_folium_trajectory(AEKF_sample_traj_sequences)
# plot_plotly_trajectory(AEKF_sample_traj_sequences)

In [42]:
# interpolated_sample_traj = restore_missing_timestamps(sample_traj, noise_level=0.0007)
# interpolated_sample_traj_sequences = get_trajectory_sequences(interpolated_sample_traj)
# plot_plotly_trajectory(interpolated_sample_traj_sequences)

In [43]:
# AEKF_interpolated_sample_traj= AEKF_traj(interpolated_sample_traj)
# AEKF_interpolated_sample_traj_sequences = get_trajectory_sequences(AEKF_interpolated_sample_traj)
# plot_plotly_trajectory(AEKF_interpolated_sample_traj_sequences)

In [45]:
# fig = plot_plotly_trajectory_groups([AEKF_interpolated_sample_traj_sequences, sample_traj_sequences], group_names=["Stable Diffusion + AEKF", "Initial trajectory"])
# fig.show()
# fig.write_html("test_1.html")