In [None]:
import csv
from selenium import webdriver
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import ee
import geemap

def scrape_bps_data():
    print("Memulai scraping data dari BPS...")

    geckodriver_path = "C:/Users/User/Downloads/geckodriver.exe"
    firefox_binary_path = "C:/Program Files/Mozilla Firefox/firefox.exe"

    options = Options()
    options.binary_location = firefox_binary_path

    service = Service(geckodriver_path)
    driver = webdriver.Firefox(service=service, options=options)

    try:
        url = "https://www.bps.go.id/id/statistics-table/2/MTUxMyMy/produksi-perikanan-budidaya-menurut-komoditas-utama.html"
        driver.get(url)

        WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.TAG_NAME, "table")))

        table = driver.find_element(By.TAG_NAME, "table")
        rows = table.find_elements(By.TAG_NAME, "tr")

        header_rows = rows[:4]

        headers = []
        for row in header_rows:
            cells = row.find_elements(By.XPATH, ".//th|.//td")
            header = []
            for cell in cells:
                text = cell.text.strip()
                colspan = int(cell.get_attribute("colspan") or "1")
                header.extend([text] + [""] * (colspan - 1))
            headers.append(header)

        table_data = []
        for row in rows[4:]:
            cells = row.find_elements(By.TAG_NAME, "td")
            row_data = []
            for cell in cells:
                text = cell.text.strip()
                colspan = int(cell.get_attribute("colspan") or "1")
                row_data.extend([text] + [""] * (colspan - 1))
            table_data.append(row_data)

        max_len = max(len(r) for r in headers + table_data)
        headers = [r + [""] * (max_len - len(r)) for r in headers]
        table_data = [r + [""] * (max_len - len(r)) for r in table_data]

        output_path = "produksi_perikanan_budidaya.csv"
        with open(output_path, mode="w", newline="", encoding="utf-8") as f:
            writer = csv.writer(f)
            for header in headers:
                writer.writerow(header)
            for row in table_data:
                writer.writerow(row)

        print(f"Data BPS berhasil disimpan rapi ke {output_path}")
        return output_path

    finally:
        driver.quit()

def initialize_earth_engine():
    try:
        ee.Initialize()
        print("Earth Engine berhasil diinisialisasi.")
    except Exception:
        print("Earth Engine belum diinisialisasi, mencoba autentikasi...")
        try:
            ee.Authenticate()
            ee.Initialize()
            print("Earth Engine berhasil diautentikasi dan diinisialisasi.")
        except Exception as e:
            print(f"Gagal mengautentikasi Earth Engine: {e}")

def extract_gee_data():
    print("Mengambil data dari Google Earth Engine...")
    initialize_earth_engine()

    def get_temperature_stats(start_date, end_date):
        dataset = ee.ImageCollection('NOAA/CFSR').filter(ee.Filter.date(start_date, end_date))
        temperature_surface = dataset.select('Temperature_surface')
        indonesia = ee.FeatureCollection("FAO/GAUL/2015/level1").filter(
            ee.Filter.eq("ADM0_NAME", "Indonesia")
        )
        temp_stats = temperature_surface.mean().reduceRegions(
            collection=indonesia,
            reducer=ee.Reducer.mean(),
            scale=1000
        )
        df = geemap.ee_to_df(temp_stats)
        df = df[['ADM1_NAME', 'mean']].rename(columns={'ADM1_NAME': 'Provinsi', 'mean': f'Suhu {start_date[:4]}'})
        return df

    df_2023 = get_temperature_stats('2023-04-01', '2023-04-07')

    df_2023['Suhu rata-rata'] = df_2023[[f'Suhu 2023']].mean(axis=1)

    df_2023['Provinsi'] = df_2023['Provinsi'].str.upper()

    output_csv = "suhu_perbandingan.csv"
    df_2023.to_csv(output_csv, index=False, encoding='utf-8')
    print(f"Data suhu berhasil disimpan: {output_csv}")

    return df_2023

if __name__ == "__main__":  
    df_bps = scrape_bps_data()
    df_gee = extract_gee_data()
    print("Pipeline selesai dijalankan.")


Memulai scraping data dari BPS...
Data BPS berhasil disimpan rapi ke produksi_perikanan_budidaya.csv
Mengambil data dari Google Earth Engine...
Earth Engine berhasil diinisialisasi.
Data suhu berhasil disimpan: suhu_perbandingan.csv
Pipeline selesai dijalankan.


In [21]:
import pandas as pd

df = pd.read_csv('produksi_perikanan_budidaya.csv', header=None)
df = df.drop(index=0)
df.columns = df.iloc[0]
df = df.drop(index=1)
provinces = df.iloc[:, 0]
df = df.iloc[:, 1:]

fish_species = ['Gurame', 'Patin', 'Lele', 'Nila', 'Ikan Mas', 'Kakap', 'Bandeng', 'Rumput Laut', 'Kerapu', 'Udang', 'Ikan lainnya']
columns_2020 = []
columns_2023 = []
for species in fish_species:
    columns_2023.append(f"{species} (2023)")
new_columns = []
for i in range(len(fish_species)):
    new_columns.append(columns_2023[i])

assert len(new_columns) == len(df.columns)

df.columns = new_columns

df.insert(0, 'Provinsi', provinces)

df.to_csv('produksi_perikanan_budidaya_try.csv', index=False)


In [23]:
import pandas as pd

df = pd.read_csv('produksi_perikanan_budidaya_final.csv', header=None)
df = df.drop(index=0)
df.columns = df.iloc[0]
df = df.drop(index=1)
provinces = df.iloc[:, 0]
df = df.iloc[:, 1:]

fish_species = ['Gurame', 'Patin', 'Lele', 'Nila', 'Ikan Mas', 'Kakap', 'Bandeng', 'Rumput Laut', 'Kerapu', 'Udang', 'Ikan lainnya']

columns_2020 = []
columns_2023 = []
for species in fish_species:
    columns_2023.append(f"{species} (2023)")
new_columns = []
for i in range(len(fish_species)):
    new_columns.append(columns_2023[i])

assert len(new_columns) == len(df.columns)

df.columns = new_columns

df.insert(0, 'Provinsi', provinces)

df.to_csv('produksi_perikanan_budidaya_final.csv', index=False)

In [25]:
import pandas as pd

df = pd.read_csv("suhu_perbandingan.csv")

suhu_columns = ['Suhu 2023', 'Suhu rata-rata']
df[suhu_columns] = df[suhu_columns] - 273.15
df[suhu_columns] = df[suhu_columns].round(2)
df.to_csv("suhu_perbandingan_celsius.csv", index=False, encoding='utf-8')

print("CSV has been converted to Celsius and saved as suhu_perbandingan_celsius.csv")


CSV has been converted to Celsius and saved as suhu_perbandingan_celsius.csv


In [27]:
import pandas as pd

df = pd.read_csv("produksi_perikanan_budidaya_final.csv", na_values=["-"])
df.to_csv("produksi_ikan_bersih.csv", index=False)