In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
import shap
import lime
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso, BayesianRidge
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, explained_variance_score
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.pipeline import make_pipeline

In [2]:
# Load your dataset
df = pd.read_csv('flood.csv')  # Use your dataset file path here

In [3]:
# Prepare the data
# Assuming 'FloodProbability' is the target variable
X = df.drop('FloodProbability', axis=1)
y = df['FloodProbability']

In [4]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize dictionaries to store metrics
performance_metrics = {}

In [5]:
def print_and_store_metrics(model_name, y_test, y_pred, tolerance=0.1):
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    # Print metrics formatted to 5 decimal places
    print(f'{model_name}:')
    print(f'MSE: {mse:}')
    print(f'RMSE: {rmse:}')
    print(f'MAE: {mae:}')
    print(f'R2: {r2:}')
    
    # Store metrics without rounding (full values)
    performance_metrics[model_name] = {
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'R2': r2,
    }


the concept of "accuracy" isn't typically used because the predictions are continuous rather than categorical. However, if you're interested in evaluating the performance of regression models, you might consider using alternative metrics like the coefficient of determination (R^2), Mean Absolute Error (MAE), Mean Squared Error (MSE), or Root Mean Squared Error (RMSE).

In [6]:
# Linear Regression
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
linear_pred = linear_model.predict(X_test)
performance_metrics['Linear Regression'] = {
    'MSE': mean_squared_error(y_test, linear_pred),
    'R2': r2_score(y_test, linear_pred),
    'MAE': mean_absolute_error(y_test, linear_pred),
    'Explained Variance': explained_variance_score(y_test, linear_pred)
    
}


In [7]:
# Linear Regression
print_and_store_metrics('Linear Regression', y_test, linear_pred)

Linear Regression:
MSE: 6.802384563575717e-33
RMSE: 8.247656978545917e-17
MAE: 6.000200336586658e-17
R2: 1.0


In [8]:
# Ridge Regression
ridge_model = Ridge(alpha=1.0)
ridge_model.fit(X_train, y_train)
ridge_pred = ridge_model.predict(X_test)
performance_metrics['Ridge Regression'] = {
    'MSE': mean_squared_error(y_test, ridge_pred),
    'R2': r2_score(y_test, ridge_pred),
    'MAE': mean_absolute_error(y_test, ridge_pred),
    'Explained Variance': explained_variance_score(y_test, ridge_pred)
}


In [9]:
print_and_store_metrics('Ridge Regression', y_test, ridge_pred)

Ridge Regression:
MSE: 6.210758797573216e-14
RMSE: 2.492139401713559e-07
MAE: 1.9891473955384598e-07
R2: 0.9999999999750631


In [10]:
# Lasso Regression
lasso_model = Lasso(alpha=1.0)
lasso_model.fit(X_train, y_train)
lasso_pred = lasso_model.predict(X_test)
performance_metrics['Lasso Regression'] = {
    'MSE': mean_squared_error(y_test, lasso_pred),
    'R2': r2_score(y_test, lasso_pred),
    'MAE': mean_absolute_error(y_test, lasso_pred),
    'Explained Variance': explained_variance_score(y_test, lasso_pred)
}

In [11]:
print_and_store_metrics('Lasso Regression', y_test, lasso_pred)

Lasso Regression:
MSE: 0.002491088247515625
RMSE: 0.04991080291395466
MAE: 0.039821733225
R2: -0.00020176127920357168


In [12]:
# Polynomial Regression (degree=2)
poly_model = make_pipeline(PolynomialFeatures(degree=2), LinearRegression())
poly_model.fit(X_train, y_train)
poly_pred = poly_model.predict(X_test)
performance_metrics['Polynomial Regression'] = {
    'MSE': mean_squared_error(y_test, poly_pred),
    'R2': r2_score(y_test, poly_pred),
    'MAE': mean_absolute_error(y_test, poly_pred),
    'Explained Variance': explained_variance_score(y_test, poly_pred)
}

In [13]:
print_and_store_metrics('Polynomial Regression', y_test, poly_pred)

Polynomial Regression:
MSE: 8.019787992582222e-31
RMSE: 8.955326902230996e-16
MAE: 7.053024830838695e-16
R2: 1.0


In [14]:
# Support Vector Regression (SVR)
svr_model = SVR(kernel='linear')
svr_model.fit(X_train, y_train)
svr_pred = svr_model.predict(X_test)
performance_metrics['SVR'] = {
    'MSE': mean_squared_error(y_test, svr_pred),
    'R2': r2_score(y_test, svr_pred),
    'MAE': mean_absolute_error(y_test, svr_pred),
    'Explained Variance': explained_variance_score(y_test, svr_pred)
}

In [15]:
print_and_store_metrics('SVR', y_test, svr_pred)

SVR:
MSE: 0.0006589851359625163
RMSE: 0.025670705794008007
MAE: 0.02051742115334172
R2: 0.7354095767968614


In [31]:
# Decision Tree Regression
dt_model = DecisionTreeRegressor()
dt_model.fit(X_train, y_train)
dt_pred = dt_model.predict(X_test)
performance_metrics['Decision Tree Regression'] = {
    'MSE': mean_squared_error(y_test, dt_pred),
    'R2': r2_score(y_test, dt_pred),
    'MAE': mean_absolute_error(y_test, dt_pred),
    'Explained Variance': explained_variance_score(y_test, dt_pred)
}

In [32]:
print_and_store_metrics('Decision Tree Regression', y_test, dt_pred)

Decision Tree Regression:
MSE: 0.0022423575
RMSE: 0.04735353735466866
MAE: 0.037560500000000004
R2: 0.0996666123111466


In [18]:
# Random Forest Regression
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)
performance_metrics['Random Forest Regression'] = {
    'MSE': mean_squared_error(y_test, rf_pred),
    'R2': r2_score(y_test, rf_pred),
    'MAE': mean_absolute_error(y_test, rf_pred),
    'Explained Variance': explained_variance_score(y_test, rf_pred)
}



In [19]:
# Train a RandomForestRegressor model
rfc = RandomForestRegressor(n_estimators=100, random_state=42)
rfc.fit(X_train, y_train)



In [20]:
# Define custom colors
colors = ["#9bb7d4", "#0f4c81"]           
cmap = matplotlib.colors.LinearSegmentedColormap.from_list("", colors)


In [21]:
print_and_store_metrics('Random Forest Regression', y_test, rf_pred)

Random Forest Regression:
MSE: 0.0006737141275000004
RMSE: 0.025956003688934866
MAE: 0.02049824000000001
R2: 0.7294957103200914


In [22]:
# Gradient Boosting Regression
gb_model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
gb_model.fit(X_train, y_train)
gb_pred = gb_model.predict(X_test)
performance_metrics['Gradient Boosting Regression'] = {
    'MSE': mean_squared_error(y_test, gb_pred),
    'R2': r2_score(y_test, gb_pred),
    'MAE': mean_absolute_error(y_test, gb_pred),
    'Explained Variance': explained_variance_score(y_test, gb_pred)
}


In [23]:
print_and_store_metrics('Gradient Boosting Regression', y_test, gb_pred)

Gradient Boosting Regression:
MSE: 0.0005505254200461408
RMSE: 0.023463278118075078
MAE: 0.018480706386966018
R2: 0.7789574515040661


In [24]:
# Bayesian Ridge Regression
bayesian_model = BayesianRidge()
bayesian_model.fit(X_train, y_train)
bayesian_pred = bayesian_model.predict(X_test)
performance_metrics['Bayesian Ridge Regression'] = {
    'MSE': mean_squared_error(y_test, bayesian_pred),
    'R2': r2_score(y_test, bayesian_pred),
    'MAE': mean_absolute_error(y_test, bayesian_pred),
    'Explained Variance': explained_variance_score(y_test, bayesian_pred)
}


In [25]:
print_and_store_metrics('Bayesian Ridge Regression', y_test, bayesian_pred)

Bayesian Ridge Regression:
MSE: 2.4670999811699375e-25
RMSE: 4.96699102190646e-13
MAE: 3.9645329241722036e-13
R2: 1.0


In [26]:
# Display performance metrics for all models
performance_df = pd.DataFrame(performance_metrics).T
print(performance_df)

                                       MSE          RMSE           MAE  \
Linear Regression             6.802385e-33  8.247657e-17  6.000200e-17   
Ridge Regression              6.210759e-14  2.492139e-07  1.989147e-07   
Lasso Regression              2.491088e-03  4.991080e-02  3.982173e-02   
Polynomial Regression         8.019788e-31  8.955327e-16  7.053025e-16   
SVR                           6.589851e-04  2.567071e-02  2.051742e-02   
Decision Tree Regression      2.214170e-03  4.705497e-02  3.728400e-02   
Random Forest Regression      6.737141e-04  2.595600e-02  2.049824e-02   
Gradient Boosting Regression  5.505254e-04  2.346328e-02  1.848071e-02   
Bayesian Ridge Regression     2.467100e-25  4.966991e-13  3.964533e-13   

                                    R2  
Linear Regression             1.000000  
Ridge Regression              1.000000  
Lasso Regression             -0.000202  
Polynomial Regression         1.000000  
SVR                           0.735410  
Decision Tree