In [1]:
import pandas as pd
import glob

In [2]:
dynamic_data_files = glob.glob('../../data/fishing_boats_dynamic/Dynamic_*.csv')
all_boats_trajectories = {}
for dynamic_data_file in dynamic_data_files:
    print(dynamic_data_file)
    df_dynamic = pd.read_csv(dynamic_data_file)
    # print(df_dynamic.head())
    data_grouped = df_dynamic.groupby("MMSI")
    for mmsi, data in data_grouped:
        if mmsi not in all_boats_trajectories:
            all_boats_trajectories[mmsi] = data.copy()  # Create a copy to avoid SettingWithCopyWarning
        else:
            all_boats_trajectories[mmsi] = pd.concat([all_boats_trajectories[mmsi], data], ignore_index=True)



../../data/fishing_boats_dynamic/Dynamic_20230502_fishing_boats.csv
../../data/fishing_boats_dynamic/Dynamic_20230501_fishing_boats.csv


In [3]:
print(len(all_boats_trajectories.keys()))

663


In [4]:
df = all_boats_trajectories[list(all_boats_trajectories.keys())[2]]
df = df.sort_index()

In [5]:
df

Unnamed: 0,MMSI,Date,Latitude,Longitude,SOG,COG,Heading
0,108672075,2023-05-02 21:23:22,35.24726,122.26202,8.5,330.8,511
1,108672075,2023-05-02 21:24:16,35.248818,122.260813,6.9,327.9,511
2,108672075,2023-05-02 21:24:29,35.249188,122.260505,8.0,329.5,511
3,108672075,2023-05-01 16:55:35,34.765128,122.441865,8.3,170.3,511
4,108672075,2023-05-01 16:55:48,34.764625,122.44192,8.3,173.7,511
5,108672075,2023-05-01 17:00:28,34.758133,122.443578,5.1,359.3,511
6,108672075,2023-05-01 17:52:15,34.837367,122.443537,5.5,358.0,511
7,108672075,2023-05-01 18:10:41,34.866912,122.443363,5.8,359.0,511
8,108672075,2023-05-01 18:32:15,34.900963,122.451562,5.4,26.1,511
9,108672075,2023-05-01 18:33:35,34.902682,122.452918,5.5,32.0,511


In [6]:
df["Date"] = pd.to_datetime(df["Date"])
df = df.set_index("Date")
df = df.sort_index()

df_first = df.iloc[:1].copy()
df_first.index = [df.index.min().floor("10min")]

# Duplicate the last row with the time ceiled to the end of the last 10-minute interval
df_last = df.iloc[-1:].copy()
df_last.index = [df.index.max().ceil("10min")]

# Append the modified rows to the original dataframe
df = pd.concat([df_first, df, df_last])

# Define exact 10-minute sampling times
start_time = df.index.min().floor("h")  # Round down to the nearest hour
end_time = df.index.max().ceil("h")  # Round up to the nearest hour
sampling_times = pd.date_range(start_time, end_time, freq="10min")

# Filter only timestamps where at least one real record exists within ±10 minutes
valid_sampling_times = [t for t in sampling_times if any(abs(df.index - t) <= pd.Timedelta(minutes=10))]

# Reindex only valid sampling times
df_interpolated = df.reindex(df.index.union(valid_sampling_times)).sort_index()

# Perform linear interpolation
df_interpolated = df_interpolated.interpolate(method="time")

# Keep only the sampled timestamps and drop any remaining NaNs
df_sampled = df_interpolated.loc[valid_sampling_times].dropna().reset_index()
df_sampled.rename(columns={"index": "Sampled_Date"}, inplace=True)
df_sampled

Unnamed: 0,Sampled_Date,MMSI,Latitude,Longitude,SOG,COG,Heading
0,2023-05-01 16:50:00,108672075.0,34.765128,122.441865,8.3,170.3,511.0
1,2023-05-01 17:00:00,108672075.0,34.758782,122.443412,5.42,340.74,511.0
2,2023-05-01 17:10:00,108672075.0,34.77272,122.443571,5.17364,359.060669,511.0
3,2023-05-01 17:50:00,108672075.0,34.833924,122.443538,5.48262,358.056485,511.0
4,2023-05-01 18:00:00,108672075.0,34.849788,122.443464,5.62613,358.420434,511.0
5,2023-05-01 18:10:00,108672075.0,34.865816,122.44337,5.788879,358.962929,511.0
6,2023-05-01 18:20:00,108672075.0,34.881622,122.446905,5.627202,215.189258,511.0
7,2023-05-01 18:30:00,108672075.0,34.897411,122.450706,5.441731,60.83068,511.0
8,2023-05-01 18:40:00,108672075.0,34.904846,122.455105,4.144119,45.639043,511.0
9,2023-05-01 19:10:00,108672075.0,34.902787,122.457248,1.02402,130.773174,511.0


In [7]:
sample_T = pd.Timedelta(minutes=10)
sequences = []  # To store the sequences
current_sequence = pd.DataFrame(columns=df_sampled.columns) # DF To track the current sequence

# Iterate through the timestamps
for i in range(len(df_sampled) - 1):
    if df_sampled["Sampled_Date"][i + 1] - df_sampled["Sampled_Date"][i] == sample_T:
        # If the difference is 10 minutes, add the current timestamp to the sequence
        if len(current_sequence) == 0:
            current_sequence = df_sampled.iloc[[i]] # Add the first timestamp of the sequence
        current_sequence = pd.concat([current_sequence, df_sampled.iloc[[i+1]]], ignore_index=True)  # Add the next timestamp
    else:
        # If the difference is not 10 minutes, end the current sequence
        if len(current_sequence) != 0:
            sequences.append(current_sequence)  # Store the completed sequence
            current_sequence = pd.DataFrame(columns=df_sampled.columns)   # Reset the current sequence

# Handle the last sequence if it ends at the last timestamp
if len(current_sequence) != 0:
    sequences.append(current_sequence)

In [8]:
print(sequences)

[         Sampled_Date         MMSI   Latitude   Longitude      SOG  \
0 2023-05-01 16:50:00  108672075.0  34.765128  122.441865  8.30000   
1 2023-05-01 17:00:00  108672075.0  34.758782  122.443412  5.42000   
2 2023-05-01 17:10:00  108672075.0  34.772720  122.443571  5.17364   

          COG  Heading  
0  170.300000    511.0  
1  340.740000    511.0  
2  359.060669    511.0  ,          Sampled_Date         MMSI   Latitude   Longitude       SOG  \
0 2023-05-01 17:50:00  108672075.0  34.833924  122.443538  5.482620   
1 2023-05-01 18:00:00  108672075.0  34.849788  122.443464  5.626130   
2 2023-05-01 18:10:00  108672075.0  34.865816  122.443370  5.788879   
3 2023-05-01 18:20:00  108672075.0  34.881622  122.446905  5.627202   
4 2023-05-01 18:30:00  108672075.0  34.897411  122.450706  5.441731   
5 2023-05-01 18:40:00  108672075.0  34.904846  122.455105  4.144119   

          COG  Heading  
0  358.056485    511.0  
1  358.420434    511.0  
2  358.962929    511.0  
3  215.189258    51

In [9]:
good_sequence = sequences[1].copy()

In [10]:
good_sequence

Unnamed: 0,Sampled_Date,MMSI,Latitude,Longitude,SOG,COG,Heading
0,2023-05-01 17:50:00,108672075.0,34.833924,122.443538,5.48262,358.056485,511.0
1,2023-05-01 18:00:00,108672075.0,34.849788,122.443464,5.62613,358.420434,511.0
2,2023-05-01 18:10:00,108672075.0,34.865816,122.44337,5.788879,358.962929,511.0
3,2023-05-01 18:20:00,108672075.0,34.881622,122.446905,5.627202,215.189258,511.0
4,2023-05-01 18:30:00,108672075.0,34.897411,122.450706,5.441731,60.83068,511.0
5,2023-05-01 18:40:00,108672075.0,34.904846,122.455105,4.144119,45.639043,511.0


In [11]:
(good_sequence['Latitude'].max() + good_sequence['Latitude'].min())/2

34.8693848889765

In [20]:
from scipy.spatial.transform import Rotation as R
import numpy as np
import pyproj

In [43]:
from pyproj import CRS, Transformer

# Define the Coordinate Reference Systems
crs_geodetic = CRS.from_epsg(4326)  # WGS84
crs_ecef = CRS.from_epsg(4978)      # ECEF

# Create transformers
transformer_to_ecef = Transformer.from_crs(crs_geodetic, crs_ecef, always_xy=True)
transformer_to_geodetic = Transformer.from_crs(crs_ecef, crs_geodetic, always_xy=True)

# Convert geodetic coordinates to ECEF
def geodetic_to_ecef(lat, lon):
    x, y, z = transformer_to_ecef.transform(lon, lat, 0)  # Assuming altitude = 0
    return np.array([x, y, z])

# Convert ECEF coordinates to geodetic
def ecef_to_geodetic(x, y, z):
    lon, lat, _ = transformer_to_geodetic.transform(x, y, z)
    return lat, lon  # Ignoring altitude

def enu_matrix(lat, lon):
    """
    Compute the rotation matrix from ECEF to local ENU coordinates at a given geodetic point.
    The rows are the unit vectors for East, North, and Up, respectively.
    """
    lat_rad = np.radians(lat)
    lon_rad = np.radians(lon)
    
    # East vector
    east = np.array([-np.sin(lon_rad), np.cos(lon_rad), 0])
    # North vector
    north = np.array([-np.sin(lat_rad)*np.cos(lon_rad),
                      -np.sin(lat_rad)*np.sin(lon_rad),
                      np.cos(lat_rad)])
    # Up vector
    up = np.array([np.cos(lat_rad)*np.cos(lon_rad),
                   np.cos(lat_rad)*np.sin(lon_rad),
                   np.sin(lat_rad)])
    
    return np.vstack((east, north, up))  # 3x3 matrix

In [None]:
angle_deg = 180
pivot_lat = (good_sequence['Latitude'].max() + good_sequence['Latitude'].min())/2
pivot_lon = (good_sequence['Longitude'].max() + good_sequence['Longitude'].min())/2
"""
Rotate ship trajectory around a pivot point (given in geodetic coordinates)
by a given angle (in degrees) about the pivot's local up (vertical) direction.
"""
# Get pivot ECEF coordinates
pivot_ecef = geodetic_to_ecef(pivot_lat, pivot_lon)

# Compute ENU transformation matrix at the pivot
enu_mat = enu_matrix(pivot_lat, pivot_lon)  # Rows: [east, north, up]

# Convert ship trajectory to ECEF coordinates
ecef_coords = np.array([
    geodetic_to_ecef(lat, lon) for lat, lon in zip(good_sequence['Latitude'], good_sequence['Longitude'])
])

# Translate to pivot frame
relative_ecef = ecef_coords - pivot_ecef  # shape (n, 3)

# Transform relative coordinates to local ENU frame
# (Since enu_mat's rows are unit vectors, we multiply on the left)
enu_coords = (enu_mat @ relative_ecef.T).T  # shape (n, 3)

# Define a 2D rotation matrix for the horizontal (east, north) plane
theta = np.radians(angle_deg)
rot_2d = np.array([
    [np.cos(theta), -np.sin(theta)],
    [np.sin(theta),  np.cos(theta)]
])

# Apply rotation to east and north components; leave up unchanged
enu_rotated = enu_coords.copy()
enu_rotated[:, :2] = (rot_2d @ enu_coords[:, :2].T).T

# Convert rotated ENU coordinates back to ECEF
# Since enu_mat is orthonormal, its transpose converts from ENU back to ECEF
rotated_relative_ecef = (enu_mat.T @ enu_rotated.T).T  # shape (n, 3)

# Translate back to the original ECEF frame
new_ecef_coords = rotated_relative_ecef + pivot_ecef

# Convert back to geodetic coordinates
new_lat_lon = np.array([ecef_to_geodetic(x, y, z)[:2] for x, y, z in new_ecef_coords])
good_sequence[['Latitude ROTATED', 'Longitude ROTATED']] = new_lat_lon
good_sequence["COG ROTATED"] = (good_sequence["COG"] + angle_deg) % 360
good_sequence["Heading ROTATED"] = (good_sequence["Heading"] != 511) * (good_sequence["Heading"] + angle_deg) % 360 + (good_sequence["Heading"] == 511) * 511
good_sequence

Unnamed: 0,Sampled_Date,MMSI,Latitude,Longitude,SOG,COG,Heading,Latitude ROTATED,Longitude ROTATED,Latitude ROTATED 2,Longitude ROTATED 2,COG ROTATED,Heading ROTATED
0,2023-05-01 17:50:00,108672075.0,34.833924,122.443538,5.48262,358.056485,511.0,34.904845,122.454941,34.833923,122.443544,178.056485,331.0
1,2023-05-01 18:00:00,108672075.0,34.849788,122.443464,5.62613,358.420434,511.0,34.888981,122.455014,34.849787,122.443469,178.420434,331.0
2,2023-05-01 18:10:00,108672075.0,34.865816,122.44337,5.788879,358.962929,511.0,34.872953,122.455105,34.865815,122.443375,178.962929,331.0
3,2023-05-01 18:20:00,108672075.0,34.881622,122.446905,5.627202,215.189258,511.0,34.857148,122.451569,34.881621,122.446911,35.189258,331.0
4,2023-05-01 18:30:00,108672075.0,34.897411,122.450706,5.441731,60.83068,511.0,34.841359,122.447769,34.89741,122.450712,240.83068,331.0
5,2023-05-01 18:40:00,108672075.0,34.904846,122.455105,4.144119,45.639043,511.0,34.833923,122.443375,34.904845,122.45511,225.639043,331.0


In [45]:
angle_deg = 180
pivot_lat = (good_sequence['Latitude ROTATED'].max() + good_sequence['Latitude ROTATED'].min())/2
pivot_lon = (good_sequence['Longitude ROTATED'].max() + good_sequence['Longitude ROTATED'].min())/2
"""
Rotate ship trajectory around a pivot point (given in geodetic coordinates)
by a given angle (in degrees) about the pivot's local up (vertical) direction.
"""
# Get pivot ECEF coordinates
pivot_ecef = geodetic_to_ecef(pivot_lat, pivot_lon)

# Compute ENU transformation matrix at the pivot
enu_mat = enu_matrix(pivot_lat, pivot_lon)  # Rows: [east, north, up]

# Convert ship trajectory to ECEF coordinates
ecef_coords = np.array([
    geodetic_to_ecef(lat, lon) for lat, lon in zip(good_sequence['Latitude ROTATED'], good_sequence['Longitude ROTATED'])
])

# Translate to pivot frame
relative_ecef = ecef_coords - pivot_ecef  # shape (n, 3)

# Transform relative coordinates to local ENU frame
# (Since enu_mat's rows are unit vectors, we multiply on the left)
enu_coords = (enu_mat @ relative_ecef.T).T  # shape (n, 3)

# Define a 2D rotation matrix for the horizontal (east, north) plane
theta = np.radians(angle_deg)
rot_2d = np.array([
    [np.cos(theta), -np.sin(theta)],
    [np.sin(theta),  np.cos(theta)]
])

# Apply rotation to east and north components; leave up unchanged
enu_rotated = enu_coords.copy()
enu_rotated[:, :2] = (rot_2d @ enu_coords[:, :2].T).T

# Convert rotated ENU coordinates back to ECEF
# Since enu_mat is orthonormal, its transpose converts from ENU back to ECEF
rotated_relative_ecef = (enu_mat.T @ enu_rotated.T).T  # shape (n, 3)

# Translate back to the original ECEF frame
new_ecef_coords = rotated_relative_ecef + pivot_ecef

# Convert back to geodetic coordinates
new_lat_lon = np.array([ecef_to_geodetic(x, y, z)[:2] for x, y, z in new_ecef_coords])
good_sequence[['Latitude ROTATED 2', 'Longitude ROTATED 2']] = new_lat_lon
good_sequence

Unnamed: 0,Sampled_Date,MMSI,Latitude,Longitude,SOG,COG,Heading,Latitude ROTATED,Longitude ROTATED,Latitude ROTATED 2,Longitude ROTATED 2
0,2023-05-01 17:50:00,108672075.0,34.833924,122.443538,5.48262,358.056485,511.0,34.904845,122.454941,34.833923,122.443544
1,2023-05-01 18:00:00,108672075.0,34.849788,122.443464,5.62613,358.420434,511.0,34.888981,122.455014,34.849787,122.443469
2,2023-05-01 18:10:00,108672075.0,34.865816,122.44337,5.788879,358.962929,511.0,34.872953,122.455105,34.865815,122.443375
3,2023-05-01 18:20:00,108672075.0,34.881622,122.446905,5.627202,215.189258,511.0,34.857148,122.451569,34.881621,122.446911
4,2023-05-01 18:30:00,108672075.0,34.897411,122.450706,5.441731,60.83068,511.0,34.841359,122.447769,34.89741,122.450712
5,2023-05-01 18:40:00,108672075.0,34.904846,122.455105,4.144119,45.639043,511.0,34.833923,122.443375,34.904845,122.45511


In [12]:
good_sequence

Unnamed: 0,Sampled_Date,MMSI,Latitude,Longitude,SOG,COG,Heading
0,2023-05-01 17:50:00,108672075.0,34.833924,122.443538,5.48262,358.056485,511.0
1,2023-05-01 18:00:00,108672075.0,34.849788,122.443464,5.62613,358.420434,511.0
2,2023-05-01 18:10:00,108672075.0,34.865816,122.44337,5.788879,358.962929,511.0
3,2023-05-01 18:20:00,108672075.0,34.881622,122.446905,5.627202,215.189258,511.0
4,2023-05-01 18:30:00,108672075.0,34.897411,122.450706,5.441731,60.83068,511.0
5,2023-05-01 18:40:00,108672075.0,34.904846,122.455105,4.144119,45.639043,511.0


In [28]:
import numpy as np
# print(np.cos(np.radians(0)))
# print(np.cos(np.radians(45)))
# print(np.cos(np.radians(90)))
# print(np.cos(np.radians(180)))
# print(np.cos(np.radians(270)))
print(np.cos(np.array((0., 30., 45., 60., 90.)) * np.pi / 180. ))

[1.00000000e+00 8.66025404e-01 7.07106781e-01 5.00000000e-01
 6.12323400e-17]


In [None]:
good_sequence["Pseudo Longitude"] = good_sequence["Longitude"].min() + (good_sequence["Longitude"] - good_sequence["Longitude"].min()) * np.cos(np.radians(good_sequence["Latitude"]))
good_sequence

Unnamed: 0,Sampled_Date,MMSI,Latitude,Longitude,SOG,COG,Heading,Pseudo Longitude
0,2023-05-01 17:50:00,108672075.0,34.833924,122.443538,5.48262,358.056485,511.0,122.443508
1,2023-05-01 18:00:00,108672075.0,34.849788,122.443464,5.62613,358.420434,511.0,122.443447
2,2023-05-01 18:10:00,108672075.0,34.865816,122.44337,5.788879,358.962929,511.0,122.44337
3,2023-05-01 18:20:00,108672075.0,34.881622,122.446905,5.627202,215.189258,511.0,122.44627
4,2023-05-01 18:30:00,108672075.0,34.897411,122.450706,5.441731,60.83068,511.0,122.449387
5,2023-05-01 18:40:00,108672075.0,34.904846,122.455105,4.144119,45.639043,511.0,122.452994


In [35]:
np.sign(-100)

-1

In [47]:
((good_sequence["SOG"] / 5 < 1) * good_sequence["SOG"] / 5) + (good_sequence["SOG"] / 5 > 1)

0    1.000000
1    1.000000
2    1.000000
3    1.000000
4    1.000000
5    0.828824
Name: SOG, dtype: float64