In [1]:
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from Helper import Test

In [11]:
boston = load_boston()

In [14]:
y = boston.target
X = boston.data

In [15]:
X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.33, random_state=42)

In [None]:
a = 'regression'
b = 'classification'
c = 'both regression and classification'

models = {
    'decision trees':c ,
    'random forest': c # Letter here,
    'adaptive boosting': c # Letter here,
    'logistic regression': b # Letter here,
    'linear regression': a # Letter here
}


In [19]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor

In [21]:
linear_regress  = LinearRegression()
DTR = DecisionTreeRegressor()
RFR = RandomForestRegressor()
AdaBoost = AdaBoostRegressor()

In [23]:
linear_regress.fit(X_train,y_train)
DTR.fit(X_train,y_train)
RFR.fit(X_train, y_train)
AdaBoost.fit(X_train,y_train)

AdaBoostRegressor()

In [28]:
pred_lin = linear_regress.predict(X_test)
pred_DTR = DTR.predict(X_test)
pred_RFR = RFR.predict(X_test)
pred_Ada = AdaBoost.predict(X_test);

In [30]:
# potential model options
a = 'regression'
b = 'classification'
c = 'both regression and classification'

#
metrics = {
    'precision': b,
    'recall': b,
    'accuracy': b,
    'r2_score': a,
    'mean_squared_error': a ,
    'area_under_curve': a, 
    'mean_absolute_area': a 
}

In [31]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [32]:
def r2(actual, preds):
    '''
    INPUT:
    actual - numpy array or pd series of actual y values
    preds - numpy array or pd series of predicted y values
    OUTPUT:
    returns the r-squared score as a float
    '''
    sse = np.sum((actual-preds)**2)
    sst = np.sum((actual-np.mean(actual))**2)
    return 1 - sse/sst

# Check solution matches sklearn
print(r2(y_test, pred_lin))
print(r2_score(y_test, pred_lin))
print("Since the above match, we can see that we have correctly calculated the r2 value.")

0.7261570836552478
0.7261570836552478
Since the above match, we can see that we have correctly calculated the r2 value.


In [33]:
# mean squared error
def mse(actual, preds):
    '''
    INPUT:
    actual - numpy array or pd series of actual y values
    preds - numpy array or pd series of predicted y values
    OUTPUT:
    returns the mean squared error as a float
    '''
    MSE = np.sum((actual - preds)**2) / len(actual)
    
    
    return MSE


# Check your solution matches sklearn
print(mse(y_test, pred_lin))
print(mean_squared_error(y_test, pred_lin))
print("If the above match, you are all set!")

20.72402343733974
20.72402343733974
If the above match, you are all set!


In [40]:
def mae(actual, preds):
    '''
    INPUT:
    actual - numpy array or pd series of actual y values
    preds - numpy array or pd series of predicted y values
    OUTPUT:
    returns the mean absolute error as a float
    '''
    
    return np.sum(np.abs(actual - preds)) / len(actual)

# Check your solution matches sklearn
print(mae(y_test, pred_lin))
print(mean_absolute_error(y_test, pred_lin))
print("If the above match, you are all set!")

3.1482557548168217
3.1482557548168217
If the above match, you are all set!


In [44]:
# Model Comparisons

print("Prediction Score for Decision Tree", r2_score(y_test,pred_DTR))
print("Prediction Score for Random Forest", r2_score(y_test,pred_RFR))
print("Prediction Score for Adaptive Boosting", r2_score(y_test,pred_Ada))


Prediction Score for Decision Tree 0.763749283899607
Prediction Score for Random Forest 0.8658529611547235
Prediction Score for Adaptive Boosting 0.7869752998765327


In [None]:
def print_metrics(y_true, preds, model_name=None):
    '''
    INPUT:
    y_true - the y values that are actually true in the dataset (numpy array or pandas series)
    preds - the predictions for those values from some model (numpy array or pandas series)
    model_name - (str - optional) a name associated with the model if you would like to add it to the print statements 
    
    OUTPUT:
    None - prints the mse, mae, r2
    '''
    if model_name == None:
        print('Mean Squared Error: ', format(mean_squared_error(y_true, preds)))
        print('Mean Absolute Error: ', format(mean_absolute_error(y_true, preds)))
        print('R2 Score: ', format(r2_score(y_true, preds)))
        print('\n\n')
    
    else:
        print('Mean Squared Error ' + model_name + ' :' , format(mean_squared_error(y_true, preds)))
        print('Mean Absolute Error ' + model_name + ' :', format(mean_absolute_error(y_true, preds)))
        print('R2 Score ' + model_name + ' :', format(r2_score(y_true, preds)))
        print('\n\n')