# Data AIS

In [1]:
#For 3.3.1
#Register Sedona Functions to Spark
from sedona.register import SedonaRegistrator
SedonaRegistrator.registerAll(spark)

True

In [2]:
#For 3.3.2
from shapely.geometry import Point, Polygon, mapping
import h3.api.numpy_int as h3int

In [3]:
import pandas as pd
from pyspark.sql import functions as F
from pyspark.sql.functions import col, when

In [4]:
# Path
base_path = "s3a://ungp-ais-data-historical-backup/user_temp/"
path_unique = base_path + "212112409/"

In [5]:
# Read Data
data_ais = spark.read.parquet(path_unique + "ais-ihs-indonesia-2024.parquet")

In [7]:
data_ais.select("nav_status").distinct().show(truncate=False)

+-----------------------------+
|nav_status                   |
+-----------------------------+
|Moored                       |
|Restricted Manoeuvrability   |
|Not Defined                  |
|Not Under Command            |
|Underway Sailing             |
|Unknown                      |
|At Anchor                    |
|Constrained By Her Draught   |
|Under Way Using Engine       |
|Engaged In Fishing           |
|Reserved For Future Amendment|
|Aground                      |
|Reserved For Future Use      |
|AIS-SART                     |
+-----------------------------+



In [6]:
data_ais.count()

612394139

# Filter Data

In [7]:
# Filter MMSI
filtered_mmsi = data_ais.filter(col("mmsi").between(100000000, 999999999))

In [8]:
filtered_mmsi.count()

612367690

In [9]:
# Filter IMO
filtered_imo = filtered_mmsi.filter(col("imo").between(1000000, 9999999))

In [10]:
filtered_imo.count()

612367690

In [11]:
# Filter Status Navigasi
filtered_nav_status_code = filtered_imo.filter(col("nav_status_code").between(0, 14))

# Nilai yang akan di-filter
nav_status_values = ['Under Way Using Engine', 'At Anchor', 'Restricted Manoeuvrability', 'Moored', 'Engaged In Fishing', 'Underway Sailing']

# Filter data berdasarkan nilai 'nav_status'
filtered_nav_status = filtered_nav_status_code.filter(col("nav_status").isin(nav_status_values))

In [12]:
filtered_nav_status.count()

541565087

In [13]:
# Filter Pergerakan Anomali

# Tentukan kriteria pergerakan tidak normal
kriteria_pergerakan_tidak_normal = (
    ((col("nav_status").isin(['At Anchor', 'Moored'])) &
    (col("sog") > 1)) | 
    ((col("nav_status").isin(['Not Under Command', 'Underway Sailing', 'Under Way Using Engine', 'Engaged In Fishing', 'Restricted Manoeuvrability', 'Aground'])) &
    (col("sog") < 1))
)

# kriteria pergerakan normal
kriteria_pergerakan_normal = ~kriteria_pergerakan_tidak_normal

# Filter pergerakan normal
filtered_pergerakan_normal = filtered_nav_status.filter(kriteria_pergerakan_normal)

In [14]:
filtered_pergerakan_normal.count()

410475975

## Save Data

In [15]:
data_ais_filtered = filtered_pergerakan_normal

In [16]:
# Save Data
data_ais_filtered.write.option("header", True).mode("overwrite").parquet(path_unique + "data-ais-clean-2024.parquet")

In [14]:
spark.stop()