In [1]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Now you can access your Google Drive files.  The path '/content/drive'
# is the mount point.  Everything in your Drive will be accessible under
# this directory.

# Example: List files in your Drive's root directory
!ls /content/drive/MyDrive

Mounted at /content/drive
 20240929_124221.mp4  'Israeli_passport_malyants_all_pages (1).pdf'
'Alan Essays.gdoc'     Israeli_passport_malyants_all_pages.pdf
'AlexIrina (1).pdf'   'Israeli_passport_ukhatov_all_pages (1).pdf'
 AlexIrina.pdf	       Israeli_passport_ukhatov_all_pages.pdf
 Bioinformatics        Masters
'Colab Notebooks'      mlabs_codes
 Documents	      'Nomad visa.zip'


In [18]:
!ls /content/drive/MyDrive/Masters

FishingKoreaAIS  koreaais  MS


In [11]:
!ls /content/drive/MyDrive/Masters/koreaais

Dynamic_20230501.csv  Dynamic_20230509.csv  Dynamic_20230517.csv  Dynamic_20230525.csv
Dynamic_20230502.csv  Dynamic_20230510.csv  Dynamic_20230518.csv  Dynamic_20230526.csv
Dynamic_20230503.csv  Dynamic_20230511.csv  Dynamic_20230519.csv  Dynamic_20230527.csv
Dynamic_20230504.csv  Dynamic_20230512.csv  Dynamic_20230520.csv  Dynamic_20230528.csv
Dynamic_20230505.csv  Dynamic_20230513.csv  Dynamic_20230521.csv  Dynamic_20230529.csv
Dynamic_20230506.csv  Dynamic_20230514.csv  Dynamic_20230522.csv  Dynamic_20230530.csv
Dynamic_20230507.csv  Dynamic_20230515.csv  Dynamic_20230523.csv  Dynamic_20230531.csv
Dynamic_20230508.csv  Dynamic_20230516.csv  Dynamic_20230524.csv  Static.csv


In [30]:
import pandas as pd
import glob

In [13]:
skipped_lines = []

# Custom function to handle bad lines
def bad_line_handler(line):
    skipped_lines.append(line)
    return None

filename = '/content/drive/MyDrive/Masters/koreaais/Static.csv'
df = pd.read_csv(filename,
                 encoding='euc-kr',
                 on_bad_lines=bad_line_handler,
                 engine='python')

kor_columns = list(df.columns)
eng_columns = ["MMSI", "Ship Name", "Linetype Code", "IMO", "Call Sign", "DimA", "DimB", "DimC", "DimD", "Draft", "Estimated Tons"]
df.columns = eng_columns
print(f"Korean column names: {kor_columns}")
print(f"English column names: {eng_columns}")
print(df.head())

Korean column names: ['MMSI', '선박명', '선종코드', 'IMO', '호출부호', 'DimA', 'DimB', 'DimC', 'DimD', '흘수', '추정톤수']
English column names: ['MMSI', 'Ship Name', 'Linetype Code', 'IMO', 'Call Sign', 'DimA', 'DimB', 'DimC', 'DimD', 'Draft', 'Estimated Tons']
   MMSI      Ship Name  Linetype Code          IMO Call Sign  DimA  DimB  \
0     0            NaN            0.0          NaN       NaN   0.0   0.0   
1     1  HEMINGWAY3600           52.0  910417200.0      V3GU  13.0  20.0   
2    10   BAOLI-10-99%            0.0          0.0       NaN   0.0   0.0   
3   100    SUQIYU01201           30.0          0.0       600  26.0   8.0   
4  1000              0           30.0        100.0       AAA  23.0  15.0   

   DimC  DimD  Draft  Estimated Tons  
0   0.0   0.0    0.0             0.0  
1   6.0   3.0    4.0           114.0  
2   0.0   0.0    0.0             0.0  
3   5.0   2.0    0.0           124.0  
4   3.0   3.0    0.0           174.0  


In [14]:
fishing_boats = df[df['Linetype Code'] == 30] # https://api.vtexplorer.com/docs/ref-aistypes.html

In [19]:
fishing_boats.to_csv('/content/drive/MyDrive/Masters/FishingKoreaAIS/Static_fishing_boats.csv', index=False)

In [27]:
# checking that only fishing boats
fishing_boats['Linetype Code'].unique()
# len(fishing_boats['Linetype Code'].unique())

array([30.])

In [49]:
fishing_boats_mmsi = set(fishing_boats["MMSI"])

In [56]:
# Get a list of all CSV files in the folder
dynamic_data_files = glob.glob('/content/drive/MyDrive/Masters/koreaais/Dynamic_*.csv')

for dynamic_data_file in dynamic_data_files:
  df_dynamic = pd.read_csv(dynamic_data_file,
                  encoding='euc-kr',
                  skiprows=2)

  kor_columns = list(df_dynamic.columns)
  eng_columns = ["MMSI", "Date", "Latitude", "Longitude", "SOG", "COG", "Heading"]
  # SOG = speed over ground
  # COG = course over ground
  # heading = where the ship is pointing
  df_dynamic.columns = eng_columns
  print(f"Korean column names: {kor_columns}")
  print(f"English column names: {eng_columns}")
  filtered_df = df_dynamic[df_dynamic["MMSI"].isin(fishing_boats_mmsi)]
  print(filtered_df.head())
  filtered_df.to_csv(f'/content/drive/MyDrive/Masters/FishingKoreaAIS/{dynamic_data_file.split("/")[-1][:-4]}_fishing_boats.csv', index=False)
  # break

Korean column names: ['MMSI', '일시', '위도', '경도', 'SOG', 'COG', 'Heading']
English column names: ['MMSI', 'Date', 'Latitude', 'Longitude', 'SOG', 'COG', 'Heading']
         MMSI                 Date   Latitude   Longitude   SOG    COG  \
1   100000000  2023-05-01 09:04:23  36.717567  122.198050  10.1  342.6   
87  100044559  2023-05-01 01:07:15  34.737998  122.672907   0.2  177.4   
88  100044559  2023-05-01 01:07:15  34.738002  122.672907   0.9  232.2   
89  100044559  2023-05-01 01:10:15  34.737837  122.672932   0.2  168.8   
90  100044559  2023-05-01 17:01:23  34.702052  122.633342   6.4  181.2   

    Heading  
1       511  
87      511  
88      511  
89      511  
90      511  
Korean column names: ['MMSI', '일시', '위도', '경도', 'SOG', 'COG', 'Heading']
English column names: ['MMSI', 'Date', 'Latitude', 'Longitude', 'SOG', 'COG', 'Heading']
          MMSI                 Date   Latitude   Longitude  SOG    COG  \
102  100044559  2023-05-02 17:08:29  34.689125  122.622248  6.0    2.7   

In [57]:
# fishing_boats_mmsi = set(fishing_boats["MMSI"])
# filtered_df = df_dynamic[df_dynamic["MMSI"].isin(fishing_boats_mmsi)]
# filtered_df

In [47]:
len(filtered_df)/len(df_dynamic)

0.1858610482716577

In [48]:
print(len(filtered_df), len(df_dynamic))

995106 5354032


In [51]:
print(len(filtered_df))

995106
