In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression

In [None]:
data = pd.read_csv("processed_hazard.csv")
print(data.head())

In [None]:
data_original = pd.read_csv("data_time_series2.csv")
print(data_original.head())

In [None]:
print(data_original.columns)

In [None]:
data["Total Loss"] = data_original["GrossChargeOffAmount"]

In [None]:
print(data.head())

In [None]:
print(data.columns)

In [None]:
data = data[data["value"] == 1]
print(data.head())

In [None]:
data = data.drop(["Unnamed: 0","value"],1)
print(data.head())
print(data.columns)

In [None]:
Y = data["Total Loss"]
X = data.drop("Total Loss",1)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=0)
x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size=0.5, random_state=0)

In [None]:
model = RandomForestRegressor(10000,n_jobs = -1,max_depth = 10)
model.fit(x_train,y_train)

In [None]:
y_train_preds = model.predict(x_train)
y_test_preds = model.predict(x_test)

In [None]:
mse_train = mean_squared_error(y_train_preds,y_train)/(np.mean(y_train_preds)*np.mean(y_train))
mse_test = mean_squared_error(y_test_preds,y_test)/(np.mean(y_test_preds)*np.mean(y_test))
print("Training NMSE is ", mse_train)
print("Testing NMSE is ", mse_test)

In [None]:
r2_train = r2_score(y_train_preds,y_train)
r2_test = r2_score(y_test_preds,y_test)
print("Training R2 Score is ", r2_train)
print("Testing R2 Score is ", r2_test)

In [None]:
feat_import = model.feature_importances_
col_names = x_train.columns
order = np.argsort(feat_import)[::-1]
feat_import = feat_import[order]
col_names = col_names[order]

for i in range(len(feat_import)):
    print("Feature ",col_names[i], "has an importance of: ", feat_import[i])

In [None]:
lin_model = LinearRegression()
lin_model.fit(x_train,y_train)

In [None]:
y_train_preds = lin_model.predict(x_train)
y_test_preds = lin_model.predict(x_test)

In [None]:
mse_train = mean_squared_error(y_train_preds,y_train)/(np.mean(y_train_preds)*np.mean(y_train))
mse_test = mean_squared_error(y_test_preds,y_test)/(np.mean(y_test_preds)*np.mean(y_test))
print("Training NMSE is ", mse_train)
print("Testing NMSE is ", mse_test)

In [None]:
r2_train = r2_score(y_train_preds,y_train)
r2_test = r2_score(y_test_preds,y_test)
print("Training R2 Score is ", r2_train)
print("Testing R2 Score is ", r2_test)