# LANL-Earthquake-Prediction based on FFT features

* [1. Introduction](#section1)
* [2. Initial Setup](#section2)
* [3. Features Set EDA](#section3)

<a id='section1'></a>
## 1. Introduction

<a id='section2'></a>
## 2. Initial Setup

In [None]:
import numpy as np 
import pandas as pd
import os

import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib import style
style.use('ggplot')

import seaborn as sns
sns.set()

from IPython.display import HTML
display(HTML("<style>.container { width:98% !important; }</style>"))

import timeit
from tqdm import tqdm

from ipywidgets import interact
import ipywidgets as widgets

from scipy import fftpack

from os import listdir
print(listdir("../features"))

<a id='section3'></a>
## 3. Features Set EDA

In [None]:
from glob import glob

In [None]:
filter_type = '2048'

In [None]:
#features_files = listdir("../features")
features_files = glob("../features/*{}.csv".format(filter_type))
features_files[:5]

In [None]:
def show_features_data_by_index(idx):
    the_file_name = sorted(features_files)[idx]
    print(the_file_name)
    df = pd.read_csv(os.path.join("../features",the_file_name)).drop(labels=['Unnamed: 0'],axis=1)
    if not ('test' in the_file_name):
        df.columns = list(df.columns[:-1])+['ttf']
    display(df.head())

In [None]:
interact(show_features_data_by_index, idx=widgets.IntSlider(min=0,max=len(features_files)-1,step=1,value=0));

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
#from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
#from sklearn.preprocessing import Imputer
from xgboost import XGBRegressor

In [None]:
my_XGB_model = XGBRegressor()

In [None]:
idx=1
the_file_name = sorted(features_files)[idx]
df = pd.read_csv(os.path.join("../features",the_file_name)).drop(labels=['Unnamed: 0'],axis=1)

In [None]:
train_concat_df = pd.DataFrame()
valid_concat_df = pd.DataFrame()

In [None]:
for idx in range(len(features_files)):
    the_file_name = sorted(features_files)[idx]
    if idx < 13 and (not ('test' in the_file_name)):
        df = pd.read_csv(os.path.join("../features",the_file_name)).drop(labels=['Unnamed: 0'],axis=1)
        train_concat_df = train_concat_df.append(df)
    elif not ('test' in the_file_name):
        df = pd.read_csv(os.path.join("../features",the_file_name)).drop(labels=['Unnamed: 0'],axis=1)
        valid_concat_df = train_concat_df.append(df)

In [None]:
train_X=train_concat_df.drop(labels=[filter_type],axis=1)
train_y=train_concat_df[filter_type]

In [None]:
start_time = timeit.default_timer()
my_XGB_model.fit(train_X, train_y, verbose=True)
print('elapsed time: {:.2f} sec'.format(timeit.default_timer()-start_time))    

In [None]:
valid_X=valid_concat_df.drop(labels=[filter_type],axis=1)
valid_y=valid_concat_df[filter_type]

In [None]:
XGB_predictions = my_XGB_model.predict(valid_X)
#XGB_predictions = np.exp(XGB_predictions)

In [None]:
XGB_mae = mean_absolute_error(XGB_predictions, valid_y)
print("Validation MAE for XGBoost Model : " + str(XGB_mae))

In [None]:
{
32:{'MAE': 2.851, 'elapsed time': 24.7}, 
128:{'MAE': 2.61466, 'elapsed time': 95.85},
1024: {'MAE': 2.527, 'elapsed time': 758},
2048:{'MAE': 2.527, 'elapsed time': 1515}
}