In [3]:
import pandas as pd
import numpy as np
import folium
from fcmeans import FCM
from sklearn.preprocessing import MinMaxScaler
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [4]:
# Load dataset
data = pd.read_csv('C:/Users/ASUS/Downloads/katalog_gempa.csv')
data

Unnamed: 0,tgl,ot,lat,lon,depth,mag,remark,strike1,dip1,rake1,strike2,dip2,rake2
0,5/6/2013,26:25.4,-7.33,107.71,19,3.1,Java - Indonesia,,,,,,
1,5/7/2013,12:39.7,-7.36,107.59,10,3.5,Java - Indonesia,,,,,,
2,5/7/2013,35:40.2,1.37,98.35,54,4.6,Northern Sumatra - Indonesia,,,,,,
3,5/7/2013,27:06.9,-7.33,107.69,17,3.0,Java - Indonesia,,,,,,
4,5/7/2013,22:00.5,0.66,123.91,257,3.5,Minahassa Peninsula - Sulawesi,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
77726,1/26/2023,25:09.3,3.24,127.18,10,4.0,Talaud Islands - Indonesia,,,,,,
77727,1/26/2023,15:03.9,2.70,127.10,10,3.9,Northern Molucca Sea,,,,,,
77728,1/26/2023,57:08.9,-7.83,121.07,10,3.8,Flores Sea,,,,,,
77729,1/26/2023,46:21.0,3.00,127.16,10,4.1,Northern Molucca Sea,,,,,,


In [8]:
# Preprocessing data
data['tgl'] = pd.to_datetime(data['tgl'])
df_filter = data[(data['tgl'].dt.year >= 2013) & (data['tgl'].dt.year <= 2022)]
df_filter['year'] = df_filter['tgl'].dt.year

# Yearly number of recorded earthquakes from 2013 to 2022
df_year = df_filter.groupby(['year'])['tgl'].count().reset_index().rename(columns={'tgl': 'count'})
df_year.sort_values(['year'])
print(df_year)


   year  count
0  2013   1263
1  2014   2430
2  2015   5378
3  2016   5134
4  2017   7597
5  2018  12345
6  2019  12155
7  2020   8690
8  2021  10714
9  2022  10849




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [6]:
# 1. How many earthquakes have been recorded in Indonesia from 2013 to 2022?
total_earthquakes = df_filter.shape[0]
print(f"From 2013 to 2022, {total_earthquakes} earthquakes have been recorded in Indonesia.")

# Yearly number of recorded earthquakes from 2013 to 2022
df_year = df_filter.groupby(['year'])['tgl'].count().reset_index().rename(columns={'tgl': 'count'})  # Mengganti 'datetime' menjadi 'tgl'
df_year.sort_values(['year'])
print(df_year)

portion_2021_2022 = df_year[df_year['year'] >= 2021]['count'].sum() / df_year['count'].sum()
print(f"The portion of the number of recorded earthquakes in 2021 and 2022 is {portion_2021_2022}.")

fig = px.bar(
    df_year, y='count', x='year',
    text_auto='.2s',
    category_orders={'year': ['2022', '2021', '2020', '2019', '2018', '2017', '2016', '2015', '2014', '2013']},
    color=['#bfbfbf', '#bfbfbf', '#bfbfbf', '#bfbfbf', '#bfbfbf', '#bfbfbf', '#437dce', '#bfbfbf', '#437dce', '#437dce'],
    color_discrete_map="identity",
    width=780
)

fig.update_traces(hovertemplate=('<b>%{x}</b><br>%{y:.0f}'))

fig.update_layout(
    title="<b>The Number of Recorded Earthquakes in Indonesia</b><br><sub>Year</sub>",
    xaxis_title="<sub>Between 2013 and 2022, Indonesia experienced 77,261 earthquakes, a quarter of which occurred in 2021 and 2022.<br>However, the year with the highest number of recorded earthquakes was 2019.</sub>",
    yaxis_title="",
    plot_bgcolor='#fbfbfa'
)

fig.show()

# 3. What are the top 10 most affected regions by earthquakes in Indonesia?
df_region = df_filter.groupby(['remark'])['tgl'].count().reset_index().rename(columns={'tgl': 'count'})  # Mengganti 'region' menjadi 'remark'
df_region = df_region.sort_values('count', ascending=False).head(10)
print(df_region)

fig = px.bar(
    df_region, y='count', x='remark',  # Mengganti 'region' menjadi 'remark'
    text='count',
    color='count',
    color_continuous_scale='Blues',
    width=780
)

fig.update_traces(hovertemplate=("<b>%{x}</b><br>Number of Earthquakes: %{y}"))

fig.update_layout(
    title="<b>Top 10 Most Affected Regions by Earthquakes in Indonesia</b>",
    xaxis_title="<sub>Region</sub>",
    yaxis_title="<sub>Number of Earthquakes</sub>",
    plot_bgcolor='#fbfbfa'
)

fig.show()
data

From 2013 to 2022, 76555 earthquakes have been recorded in Indonesia.
   year  count
0  2013   1263
1  2014   2430
2  2015   5378
3  2016   5134
4  2017   7597
5  2018  12345
6  2019  12155
7  2020   8690
8  2021  10714
9  2022  10849
The portion of the number of recorded earthquakes in 2021 and 2022 is 0.28166677552086733.


                            remark  count
18  Minahassa Peninsula - Sulawesi   7818
40            Sulawesi - Indonesia   6876
43      Sumbawa Region - Indonesia   6659
15                Java - Indonesia   5305
27    Northern Sumatra - Indonesia   4705
26            Northern Molucca Sea   4173
4                        Banda Sea   4068
38    Southern Sumatra - Indonesia   4027
31               Seram - Indonesia   3764
12           Halmahera - Indonesia   3106


Unnamed: 0,tgl,ot,lat,lon,depth,mag,remark,strike1,dip1,rake1,strike2,dip2,rake2
0,2013-05-06,26:25.4,-7.33,107.71,19,3.1,Java - Indonesia,,,,,,
1,2013-05-07,12:39.7,-7.36,107.59,10,3.5,Java - Indonesia,,,,,,
2,2013-05-07,35:40.2,1.37,98.35,54,4.6,Northern Sumatra - Indonesia,,,,,,
3,2013-05-07,27:06.9,-7.33,107.69,17,3.0,Java - Indonesia,,,,,,
4,2013-05-07,22:00.5,0.66,123.91,257,3.5,Minahassa Peninsula - Sulawesi,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
77726,2023-01-26,25:09.3,3.24,127.18,10,4.0,Talaud Islands - Indonesia,,,,,,
77727,2023-01-26,15:03.9,2.70,127.10,10,3.9,Northern Molucca Sea,,,,,,
77728,2023-01-26,57:08.9,-7.83,121.07,10,3.8,Flores Sea,,,,,,
77729,2023-01-26,46:21.0,3.00,127.16,10,4.1,Northern Molucca Sea,,,,,,


In [12]:
# Preprocessing data
data_preprocessed = data[['lat', 'lon', 'depth', 'mag', 'remark']].copy()
data_preprocessed

# Scaling data
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data_preprocessed[['lat', 'lon', 'depth', 'mag']])

# FPCM clustering
fcm = FCM(n_clusters=5, m=2, max_iter=100)
fcm.fit(data_scaled)
cluster_labels = fcm.predict(data_scaled)

# Add cluster labels to the preprocessed data
data_preprocessed['cluster'] = cluster_labels

# Group data by location and calculate median latitude and longitude for each cluster
df_location = data_preprocessed.groupby(['cluster', 'remark'])[['lat', 'lon']].median().reset_index().rename(columns={'lat':'median_latitude', 'lon':'median_longitude'})

# Group data by location and count the number of earthquakes for each cluster
df_location_count = data_preprocessed.groupby(['cluster', 'remark']).size().reset_index(name='count')

# Merge location data and earthquake count data
df_location_count = pd.merge(df_location, df_location_count, how='inner', on=['cluster', 'remark'])

# Remove label of locations that have a count of earthquakes less than the 80th percentile for simpler visualization
df_location_count.loc[df_location_count['count'] < df_location_count['count'].quantile(0.8), 'remark'] = ''

# Create a map centered on Indonesia
map_center = [-2.5, 118]
m = folium.Map(location=map_center, zoom_start=5)

# Define colors for each cluster
cluster_colors = ['blue', 'red', 'green', 'orange', 'purple']

# Add markers to the map for each cluster
for index, row in df_location_count.iterrows():
    lat, lon, count, remark, cluster = row['median_latitude'], row['median_longitude'], row['count'], row['remark'], row['cluster']
    folium.CircleMarker(
        location=[lat, lon],
        radius=3,
        color=cluster_colors[cluster],
        fill=True,
        fill_color=cluster_colors[cluster],
        fill_opacity=0.6,
        popup=f"Cluster: {cluster}<br>Location: {remark}<br>Earthquake Count: {count}"
    ).add_to(m)

# Display the map
m