In [0]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

In [2]:
# Load S&P 500 df
df_quake_gold = pd.read_csv("https://raw.githubusercontent.com/labs13-quake-viewer/ds-data/master/" +
                             "Gold%20Price%20Change%20by%20Earthquake.csv", index_col=0)
df_quake_gold.shape

(1445, 9)

In [3]:
df_quake_gold.head()

Unnamed: 0,Date,Mag,Price_Day_0,Price_Day_7,Price_Day_14,Price_Day_30,Appr_Day_7,Appr_Day_14,Appr_Day_30
0,1968-04-01,7.5,37.7,37.05,38.0,39.2,-1.724138,0.795756,3.97878
1,1968-04-01,6.8,37.7,37.05,38.0,39.2,-1.724138,0.795756,3.97878
2,1968-05-14,6.7,39.8,42.4,41.5,41.3,6.532663,4.271357,3.768844
3,1968-05-16,8.2,42.25,41.4,41.95,40.7,-2.011834,-0.710059,-3.668639
4,1968-05-16,7.9,42.25,41.4,41.95,40.7,-2.011834,-0.710059,-3.668639


In [0]:
dates = []
for i in df_quake_gold.Date:
  dates.append(int(''.join(c for c in i if c.isdigit())))

In [0]:
df_quake_gold["magg"] = (df_quake_gold["Mag"] * 10).astype(int)

In [0]:
df_quake_gold["dates"] = dates

In [7]:
df_quake_gold.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1445 entries, 0 to 1444
Data columns (total 11 columns):
Date            1445 non-null object
Mag             1445 non-null float64
Price_Day_0     1445 non-null float64
Price_Day_7     1445 non-null float64
Price_Day_14    1445 non-null float64
Price_Day_30    1445 non-null float64
Appr_Day_7      1445 non-null float64
Appr_Day_14     1445 non-null float64
Appr_Day_30     1445 non-null float64
magg            1445 non-null int64
dates           1445 non-null int64
dtypes: float64(8), int64(2), object(1)
memory usage: 135.5+ KB


In [8]:
y = df_quake_gold['Appr_Day_30']
X = df_quake_gold[['dates', 'Mag']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state=42)
print("Original shape:", X.shape, "\n")

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

Original shape: (1445, 2) 

X_train shape: (1083, 2)
X_test shape: (362, 2)
y_train shape: (1083,)
y_test shape: (362,)


In [9]:
X_train.sample()

Unnamed: 0,dates,Mag
606,19911113,6.8


In [0]:
# Instantiate model with 100 decision trees
rf = RandomForestRegressor(n_estimators = 100, random_state = 42)

In [20]:
# Train model on training data
rf.fit(X_train, y_train)

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
                      max_features='auto', max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, n_estimators=100,
                      n_jobs=None, oob_score=False, random_state=42, verbose=0,
                      warm_start=False)

In [0]:
# Use forest's predict method on test data
predictions = rf.predict(X_test)

In [0]:
# Calculate absolute errors
errors = abs(predictions - y_test)

In [23]:
# Print out mean absolute error
print('Mean Absolute Error:', round(np.mean(errors), 2), 'degrees.')

Mean Absolute Error: 3.11 degrees.


In [24]:
# Calculate and display accuracy
accuracy = errors.sum() / y_test.sum()
print("For Gold, Incident Mag >= 6.7 ({} incidents)".format(df_quake_gold.shape[0]))
print("Random Forest Regressor Model score:", rf.score(X_train, y_train))
print('Predictive Accuracy:', round(accuracy, 2), '%.')

For Gold, Incident Mag >= 6.7 (1445 incidents)
Random Forest Regressor Model score: 0.911516941707296
Predictive Accuracy: 6.14 %.
