# Importing Libraries

In [32]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

from imblearn.combine import SMOTEENN

# Loading the Data

In [33]:
df = pd.read_csv('earthquake-data.csv')
df

Unnamed: 0,tgl,ot,lat,lon,depth,mag,remark,strike1,dip1,rake1,strike2,dip2,rake2
0,2008/11/01,21:02:43.058,-9.18,119.06,10,4.9,Sumba Region - Indonesia,,,,,,
1,2008/11/01,20:58:50.248,-6.55,129.64,10,4.6,Banda Sea,,,,,,
2,2008/11/01,17:43:12.941,-7.01,106.63,121,3.7,Java - Indonesia,,,,,,
3,2008/11/01,16:24:14.755,-3.30,127.85,10,3.2,Seram - Indonesia,,,,,,
4,2008/11/01,16:20:37.327,-6.41,129.54,70,4.3,Banda Sea,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
92882,2023/01/26,02:25:09.288,3.24,127.18,10,4.0,Talaud Islands - Indonesia,,,,,,
92883,2023/01/26,02:15:03.893,2.70,127.10,10,3.9,Northern Molucca Sea,,,,,,
92884,2023/01/26,01:57:08.885,-7.83,121.07,10,3.8,Flores Sea,,,,,,
92885,2023/01/26,01:46:21.009,3.00,127.16,10,4.1,Northern Molucca Sea,,,,,,


# Cleaning the Data

In [34]:
# Translating Headings
translations = {
    'tgl': 'Date',
    'ot' : 'Origin Time',
    'lat' : 'Latitude',
    'lon' : 'Longitude',
    'depth' : 'Depth',
    'mag' : 'Magnitude',
    'remark' : 'Remark',
}
df.rename(columns = translations, inplace = True)

# Dropping columns we don't need
temp = df.drop(columns = ['strike1', 'dip1', 'rake1', 'strike2', 'dip2', 'rake2'])
df = temp

# Droping Nan Rows
df.dropna(inplace=True)
df

Unnamed: 0,Date,Origin Time,Latitude,Longitude,Depth,Magnitude,Remark
0,2008/11/01,21:02:43.058,-9.18,119.06,10,4.9,Sumba Region - Indonesia
1,2008/11/01,20:58:50.248,-6.55,129.64,10,4.6,Banda Sea
2,2008/11/01,17:43:12.941,-7.01,106.63,121,3.7,Java - Indonesia
3,2008/11/01,16:24:14.755,-3.30,127.85,10,3.2,Seram - Indonesia
4,2008/11/01,16:20:37.327,-6.41,129.54,70,4.3,Banda Sea
...,...,...,...,...,...,...,...
92882,2023/01/26,02:25:09.288,3.24,127.18,10,4.0,Talaud Islands - Indonesia
92883,2023/01/26,02:15:03.893,2.70,127.10,10,3.9,Northern Molucca Sea
92884,2023/01/26,01:57:08.885,-7.83,121.07,10,3.8,Flores Sea
92885,2023/01/26,01:46:21.009,3.00,127.16,10,4.1,Northern Molucca Sea


# Exploring the Data

In [35]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 92887 entries, 0 to 92886
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Date         92887 non-null  object 
 1   Origin Time  92887 non-null  object 
 2   Latitude     92887 non-null  float64
 3   Longitude    92887 non-null  float64
 4   Depth        92887 non-null  int64  
 5   Magnitude    92887 non-null  float64
 6   Remark       92887 non-null  object 
dtypes: float64(3), int64(1), object(3)
memory usage: 5.0+ MB


In [36]:
df.describe()

Unnamed: 0,Latitude,Longitude,Depth,Magnitude
count,92887.0,92887.0,92887.0,92887.0
mean,-3.404577,119.159707,49.009399,3.592788
std,4.354584,10.833202,76.76107,0.834042
min,-11.0,94.02,2.0,1.0
25%,-7.885,113.17,10.0,3.0
50%,-2.91,121.16,16.0,3.5
75%,0.14,126.9,54.0,4.2
max,6.0,142.0,750.0,7.9


- The DataFrame has 92,887 entries.
- There are 7 columns:
    1. Date: 
    2. Origin Time
    3. Latitude: Latitude of the location of the earthquake
    4. Longitude: Longitude of the location of the earthquake
    5. Depth: Depth of the source of the earthquake
    6. Magntidue: Magntidue of earthquake
    7. Remark
- All columns have non-null values for all entries.
- Data types include object, float64, and int64 (integers).
- The DataFrame occupies over 5 MB of memory

In [37]:
df.Date = pd.to_datetime(df.Date)
df.set_index('Date', inplace=True)
numerical_columns = df.select_dtypes(include=['number']).columns
df_resampled = df[numerical_columns].resample('D').mean()
df_resampled['PM 10'].plot(kind='line', color='blue')

KeyError: 'PM 10'