In [18]:
import pandas as pd
import plotly.express as px

In [19]:
skipped_lines = []

# Custom function to handle bad lines
def bad_line_handler(line):
    skipped_lines.append(line)
    return None 

In [20]:
filename = '../../raw_data/Static.csv'
df = pd.read_csv(filename, 
                 encoding='euc-kr', 
                 on_bad_lines=bad_line_handler,
                 engine='python')
                 
kor_columns = list(df.columns)
eng_columns = ["MMSI", "Ship Name", "Linetype Code", "IMO", "Call Sign", "DimA", "DimB", "DimC", "DimD", "Draft", "Estimated Tons"]
df.columns = eng_columns
print(f"Korean column names: {kor_columns}")
print(f"English column names: {eng_columns}")
print(df.head())

# https://api.vtexplorer.com/docs/ref-aistypes.html

Korean column names: ['MMSI', '선박명', '선종코드', 'IMO', '호출부호', 'DimA', 'DimB', 'DimC', 'DimD', '흘수', '추정톤수']
English column names: ['MMSI', 'Ship Name', 'Linetype Code', 'IMO', 'Call Sign', 'DimA', 'DimB', 'DimC', 'DimD', 'Draft', 'Estimated Tons']
   MMSI      Ship Name  Linetype Code          IMO Call Sign  DimA  DimB  \
0     0            NaN            0.0          NaN       NaN   0.0   0.0   
1     1  HEMINGWAY3600           52.0  910417200.0      V3GU  13.0  20.0   
2    10   BAOLI-10-99%            0.0          0.0       NaN   0.0   0.0   
3   100    SUQIYU01201           30.0          0.0       600  26.0   8.0   
4  1000              0           30.0        100.0       AAA  23.0  15.0   

   DimC  DimD  Draft  Estimated Tons  
0   0.0   0.0    0.0             0.0  
1   6.0   3.0    4.0           114.0  
2   0.0   0.0    0.0             0.0  
3   5.0   2.0    0.0           124.0  
4   3.0   3.0    0.0           174.0  


In [21]:
len(df)

58679

In [None]:
fishing_boats = df[df['Linetype Code'] == 30]

In [25]:
len(fishing_boats)/len(df)

0.19628828030470866

In [26]:
filename = '../../data/fishing_boats_dynamic/Dynamic_20230501_fishing_boats.csv'
df_dynamic = pd.read_csv(filename, 
                 encoding='euc-kr', 
                 skiprows=2)
# , 
#                  on_bad_lines=bad_line_handler,
                #  engine='python')s
                 
kor_columns = list(df_dynamic.columns)
eng_columns = ["MMSI", "Date", "Latitude", "Longitude", "SOG", "COG", "Heading"]
# SOG = speed over ground
# COG = course over ground
# heading = where the ship is pointing
df_dynamic.columns = eng_columns
print(f"Korean column names: {kor_columns}")
print(f"English column names: {eng_columns}")
print(df_dynamic.head())

Korean column names: ['100044559', '2023-05-01 01:07:15', '34.7379983333333', '122.672906666667', '0.2', '177.4', '511']
English column names: ['MMSI', 'Date', 'Latitude', 'Longitude', 'SOG', 'COG', 'Heading']
        MMSI                 Date   Latitude   Longitude  SOG    COG  Heading
0  100044559  2023-05-01 01:07:15  34.738002  122.672907  0.9  232.2      511
1  100044559  2023-05-01 01:10:15  34.737837  122.672932  0.2  168.8      511
2  100044559  2023-05-01 17:01:23  34.702052  122.633342  6.4  181.2      511
3  100044559  2023-05-01 17:04:23  34.696788  122.633318  6.0  178.7      511
4  100044559  2023-05-01 17:05:23  34.695100  122.633358  5.9  177.3      511


In [27]:
len(df_dynamic)

995104

In [28]:
data_grouped = df_dynamic.groupby("MMSI")

In [30]:
for mmsi, data in data_grouped:
    if len(data) < 10:
        continue
    # print(mmsi, len(data))
    # print()
    
    data.to_csv(f"../../data/processed_fishing_boats/mmsi_{mmsi}_len_{len(data)}.csv")
    # break

In [4]:
df_dynamic['Date'] = pd.to_datetime(df_dynamic['Date'])
df_dynamic = df_dynamic.sort_values(by=['MMSI', 'Date'])

In [5]:
df_to_plot = df_dynamic[df_dynamic['MMSI']< 100000000]

In [6]:
len(df_to_plot)/len(df_dynamic)

0.002318439635773563

In [7]:
# fig = px.scatter_mapbox(
#     df_to_plot,
#     lat="Latitude",
#     lon="Longitude",
#     color="MMSI",  # Different colors for different ships
#     size="SOG",    # Size of points based on speed
#     animation_frame="Date",  # Dynamic animation over time
#     title="Dynamic Ship Trajectories",
#     hover_data=["COG", "Heading"]
# )

# # Configure the map style
# fig.update_layout(
#     mapbox_style="carto-positron",
#     mapbox_zoom=10,  # Adjust the zoom level
#     mapbox_center={"lat": df_to_plot["Latitude"].mean(), "lon": df_to_plot["Longitude"].mean()},
#     height=600,
# )

fig = px.line_mapbox(
    df_to_plot,
    lat="Latitude",
    lon="Longitude",
    color="MMSI",  # Different colors for different boats
    title="Ship Trajectories (1-5 Boats)",
    hover_data=["Date", "SOG", "COG", "Heading"]
)

# Configure the map style
fig.update_layout(
    mapbox_style="carto-positron",
    mapbox_zoom=10,  # Adjust the zoom level
    mapbox_center={"lat": df_to_plot["Latitude"].mean(), "lon": df_to_plot["Longitude"].mean()},
    height=600,
)

fig.write_html("ship_trajectories.html")


In [None]:
# fig.write_html("ship_trajectories_dynamic.html")