# **Eksplorasi Data Analysis - Data PHL**

*Oleh Tim 1*

Anggota:
*   Aldrin Rayhan Putra
*   Caturiani Pratidina Bintari
*   Muhammad Azhar Alauddin

In [None]:
import pandas as pd
import matplotlib.pylab as plt
import numpy as np
from datetime import datetime
import io
from google.colab import files

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')
import os
CURRENT_PATH = "/content/drive/MyDrive/datasets"
os.chdir(CURRENT_PATH)

Mounted at /content/drive


In [None]:
# import Kayu bulat CSV File from https://phl.menlhk.go.id/tabular
data_kayu_bulat = pd.read_csv("produksi_kayu_bulat.csv")
# import EKSPOR CSV File from https://phl.menlhk.go.id/tabular
data_ekspor = pd.read_csv("ekspor.csv")
data_kayu_olah = pd.read_csv("produksi_kayu_olahan.csv")
data_bahan_baku = pd.read_csv("pemenuhan_bahan_baku.csv")


def styling_table_contents(table_input):
    # Define the CSS styles for center alignment
    styles = [
        {'selector': 'th', 'props': [('text-align', 'center')]},  # For column names
        {'selector': 'td', 'props': [('text-align', 'center')]}   # For table content
    ]

    # Apply the styles to the DataFrame
    styled_table = table_input.style.set_table_styles(styles).set_properties(**{'text-align': 'center'})

    # Return the styled table
    return styled_table

# **Produksi Kayu Bulat**

- Volume produksi kayu bulat terbanyak berdasarkan provinsi 

In [None]:
kbulat_grouped_by_provinsi = data_kayu_bulat.groupby('provinsi')['volume'].sum().reset_index()
kbulat_grouped_by_provinsi_sorted = kbulat_grouped_by_provinsi.sort_values(by='volume', ascending=False).reset_index(drop=True)

In [None]:
import plotly.express as px
fig = px.bar(kbulat_grouped_by_provinsi_sorted, x='provinsi', y='volume',
             title='Bar Chart Nilai Ekspor berdasarkan Provinsi',
             labels={'provinsi': 'Provinsi', 'volume': 'Volume'})
# Mengatur judul dan label sumbu
fig.update_layout(title='Bar Chart Provinsi per Volume Kayu Bulat',
                  xaxis_title='Provinsi',
                  yaxis_title='Volume')
fig.update_xaxes(categoryorder='total descending')
fig.show()

- Volume kelompok kayu bulat berdasarkan terbanyak

In [None]:
# Group by kelompok and calculate the sum of the volume
kbulat_grouped_by_kelompok = data_kayu_bulat.groupby('kelompok')['volume'].sum().reset_index()
# Sort the values in descending order
kbulat_grouped_by_kelompok_sorted = kbulat_grouped_by_kelompok.sort_values(by='volume', ascending=False).reset_index(drop=True)
kbulat_grouped_by_kelompok_sorted

Unnamed: 0,kelompok,volume
0,Hutan Tanaman Industri,165672444
1,Rimba Campuran,81387782
2,Kelompok Meranti,26466182
3,Jenis Khusus,7493774
4,Kayu Indah,766261
5,JENIS KHUSUS,180000
6,Jenis Kayu Lainnya,34008
7,Kayu Eboni,5451
8,Jenis Dilindungi,1244


In [None]:
fig = px.bar(kbulat_grouped_by_kelompok_sorted, x='kelompok', y='volume',
             title='Bar Chart Volume Kayu Bulat',
             labels={'kelompok': 'Kelompok', 'volume': 'Volume'})

fig.update_layout(xaxis={'categoryorder': 'total descending'})

fig.show()


# **Data Ekspor**

- Provinsi dengan value terbanyak (Ekspor)

In [None]:
# Group by provinsi and calculate the sum of the value usd
ekspor_grouped_by_provinsi = data_ekspor.groupby('provinsi')['value usd'].sum().reset_index()
# Sort the values in descending order
ekspor_grouped_by_provinsi_sorted = ekspor_grouped_by_provinsi.sort_values(by='value usd', ascending=False).reset_index(drop=True)
ekspor_grouped_by_provinsi_sorted = ekspor_grouped_by_provinsi.rename(columns={'provinsi': 'Provinsi', 'value usd': 'Nilai Ekspor'})
ekspor_grouped_by_provinsi_sorted


Unnamed: 0,Provinsi,Nilai Ekspor
0,Bali,445881100.0
1,Banten,1299468000.0
2,Bengkulu,6514999.0
3,DI Yogyakarta,344114200.0
4,DKI Jakarta,25863090000.0
5,Gorontalo,6431317.0
6,Jambi,901009300.0
7,Jawa Barat,2216913000.0
8,Jawa Tengah,8986058000.0
9,Jawa Timur,10240830000.0


In [None]:
fig = px.bar(ekspor_grouped_by_provinsi_sorted, x='Provinsi', y='Nilai Ekspor',
             title='Bar Chart Nilai Ekspor berdasarkan Provinsi',
             labels={'Provinsi': 'Provinsi', 'Nilai Ekspor': 'Nilai (USD)'})

fig.update_layout(xaxis={'categoryorder': 'total descending'})
fig.show()

- Jenis dengan urutan value (ekspor)

In [None]:
# Group by value and calculate the sum of the value usd
ekspor_grouped_by_produk = data_ekspor.groupby('produk')['value usd'].sum().reset_index()
# Sort the values in descending order
ekspor_grouped_by_produk_sorted = ekspor_grouped_by_produk.sort_values(by='value usd', ascending=False).reset_index(drop=True)
ekspor_grouped_by_produk_sorted = ekspor_grouped_by_produk.rename(columns={'produk': 'Produk', 'value usd': 'Nilai Ekspor'})
ekspor_grouped_by_produk_sorted

Unnamed: 0,Produk,Nilai Ekspor
0,Bangunan Prefabrikasi,18082460.0
1,Chipwood,406569900.0
2,Furnitur Kayu,9188575000.0
3,Kerajinan,539418900.0
4,Panel,13455910000.0
5,Paper,20884230000.0
6,Pulp,16084950000.0
7,Veneer,553725400.0
8,Woodworking,5917787000.0


In [None]:
# Buat bar chart menggunakan Plotly Express
fig = px.bar(ekspor_grouped_by_produk_sorted, x='Produk', y='Nilai Ekspor',
             title='Bar Chart Nilai Ekspor Berdasarkan Produk',
             labels={'Nilai Ekspor': 'Nilai (USD)'})

# Mengatur urutan sumbu x berdasarkan nilai terbesar ke terkecil
fig.update_layout(xaxis={'categoryorder': 'total descending'})

# Tampilkan bar chart
fig.show()

# **Exploratory Data Analysis**
**Keterkaitan Produksi Kayu Bulat dan Ekspor**

1. Merge data volume dan value based on provinsi, sort by volume

In [None]:
np.where(pd.isnull(data_ekspor))
np.where(pd.isnull(data_kayu_bulat))
data_ekspor[data_ekspor.duplicated(keep=False)]
data_kayu_bulat[data_kayu_bulat.duplicated(keep=False)]

Unnamed: 0,tahun,bulan,provinsi,kelompok,volume,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9


In [None]:
import pandas as pd

merge_data = pd.merge(kbulat_grouped_by_provinsi, ekspor_grouped_by_provinsi, on='provinsi', how='outer')

merge_data['volume'] = merge_data['volume'].fillna(0)
merge_data['value usd'] = merge_data['value usd'].fillna(0)

merge_data_sorted = merge_data.sort_values(by='volume', ascending=False).reset_index(drop=True)
merge_data_sorted = merge_data_sorted.rename(columns={'provinsi': 'Provinsi', 'volume': 'Volume(m3)', 'value usd': 'Nilai Ekspor(USD)'})

styled_data = styling_table_contents(merge_data_sorted)
table1 = styled_data.format({'Volume (m3)': '{:,.2f}', 'Nilai Ekspor': '{:,.2f}'})

table1

Unnamed: 0,Provinsi,Volume(m3),Nilai Ekspor(USD)
0,Riau,120530495.0,2943398296.41
1,Sumatera Selatan,49596849.0,7413962913.42
2,Jambi,29238930.0,901009345.32
3,Kalimantan Timur,26143394.0,974667079.98
4,Kalimantan Tengah,17729320.0,315947342.11
5,Sumatera Utara,8489791.0,1533624128.49
6,Kalimantan Utara,7564534.0,329794777.88
7,Kalimantan Barat,5989828.0,662064186.74
8,Papua Barat,3956200.0,55555358.96
9,Papua,3011898.0,693779377.52


In [None]:
fig = px.bar(merge_data_sorted, x='Provinsi', y='Volume(m3)', color='Nilai Ekspor(USD)', 
             title='Volume produksi kayu bulat dan nilai ekspor berdasarkan Provinsi')
fig.update_layout(xaxis_title='Provinsi', yaxis_title='Volume(m3)', coloraxis_colorbar_title='Nilai Ekspor(USD)')
fig.show()

In [None]:
import plotly.graph_objects as go

fig = go.Figure()

# Tambahkan trace bar untuk Luas Tanah (Ha)
fig.add_trace(go.Bar(
    x=merge_data_sorted['Provinsi'],
    y=merge_data_sorted['Volume(m3)'],
    name='Volume(m3)',
    yaxis='y',
    offset=0,
    width=0.4,
    marker_color='blue'
))

# Tambahkan trace bar untuk Bahan Baku
fig.add_trace(go.Bar(
    x=merge_data_sorted['Provinsi'],
    y=merge_data_sorted['Nilai Ekspor(USD)'],
    name='Nilai Ekspor(USD)',
    yaxis='y2',
    offset=0.4,
    width=0.4,
    marker_color='red'
))

# Atur layout
fig.update_layout(
    title='Volume produksi kayu bulat dan nilai ekspor berdasarkan Provinsi',
    xaxis=dict(title='Provinsi'),
    yaxis=dict(title='Volume(m3)', side='left', showgrid=False, tickformat=',.2f'),
    yaxis2=dict(title='Nilai Ekspor(USD)', side='right', overlaying='y', showgrid=False, tickformat=',.2f'),
    barmode='group',
    legend=dict(x=0.8, y=1),
)

fig.show()

2. Volume kayu dan Jenis Kayu Bulat beradasakan provinsi

In [None]:
kbulat_grouped_by_provinsi_sorted = data_kayu_bulat.groupby(['provinsi', 'kelompok'])['volume'].sum().reset_index()
fig = px.bar(kbulat_grouped_by_provinsi_sorted, x='provinsi', y='volume', color='kelompok', 
             title='Volume dan Jenis Kayu Bulat berdasarkan Provinsi')

fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()

# **Bahan Sankey**

In [33]:
import pandas as pd
import plotly.graph_objects as go

df = pd.read_csv('ekspor.csv')
df1 = df.groupby(['tahun', 'produk'])['value usd'].count().reset_index()
df1.columns = ['source', 'target', 'value']
unique_source_target = df1['source'].unique().tolist() + df1['target'].unique().tolist()

source = df1['source'].map({'2023':'2023', '2022':'2022', '2020':'2020', '2019':'2019', '2018':'2018'})
target = df1['target'].map({'Bangunan Prefarikasi': 'Bangunan Prefarikasi', 'Chipwood': 'Chipwood', 'Furnitur Kayu': 'Furnitur Kayu', 'Kerajinan': 'Kerajinan', 'Panel': 'Panel', 'Pulp': 'Pulp', 'Paper': 'Paper', 'Veneer': 'Veneer', 'Woodworking': 'Woodworking'})

fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=unique_source_target,
        color="blue"
    ),
    link=dict(
        source=df1['tahun'],
        target=df1['produk'],
        value=df1['value']
    ))])

fig.update_layout(title_text="Alur Proses Pemetaan Supply and Demand", font_size=10)
fig.show()


KeyError: ignored