# Final Project Functions List

In [1]:
import pandas as pd
import numpy as np


In [2]:
def clf_threshold(y_predict, y_prob, y_test):
    """
    Returns plot of precision / recall vs threshold
    
        Parameters:
            y_predict(float):  y_predict from fitted model
            y_prob(float):     y_probability from fitted model (predict_proba)
            y_test(array):     target values from test set
            
        Output:
            Plot      
    
    """

    precision, recall, thresholds = precision_recall_curve(y_test, probs_y[:, 1]) 

    pr_auc = metrics.auc(recall, precision)

    plt.title("Precision-Recall vs Threshold Chart")
    plt.plot(thresholds, precision[: -1], "b--", label="Precision")
    plt.plot(thresholds, recall[: -1], "r--", label="Recall")
    plt.ylabel("Precision, Recall")
    plt.xlabel("Threshold")
    plt.legend(loc="lower left")
    plt.ylim([0,1])

    return


    
def top_categories(X, perc):
    """
    Returns dominant groups in category by %
    
        Parameters:
            X (list):      column of values
            perc (float):  threshold for cutoff (percentage)
            
        Returns:
            List of top groups
    
    """
    
    val_list = []
    key_list = []
    denom = sum(X.values())
    
    for count, (keys, values) in enumerate(X.items()):
        if (sum(val_list)/denom) < perc:
            key_list.append(keys)
            val_list.append(values)
            
    return val_list, key_list


def clf_metrics(model,X_test,y_test):
    """
    Returns various classification evaluation metrics
    
        Parameters:
            model ():         ML model to be evaluated
            X_test (df):      scaled test data used to evaluate the model
            y_test (series):  target data used for evaluating predictions
        
        Returns:
            Classification report and plots of roc curve and confusion matrix
            
    
    """
    clf_rep = classification_report(y_test, (model.predict(X_test)))
    roc = plot_roc_curve(model, X_test, y_test)
    mtrx = plot_confusion_matrix(model, X_test, y_test)
    
    
def plot_history(history):
    """
    Returns two graphs:
        Training and validation accuracy
        Traning and validation loss
        
    Parameters:
        Trained keras model.fit
    """
    
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    x = range(1, len(acc) + 1)

    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(x, acc, 'b', label='Training acc')
    plt.plot(x, val_acc, 'r', label='Validation acc')
    plt.title('Training and validation accuracy')
    plt.legend()
    plt.subplot(1, 2, 2)
    plt.plot(x, loss, 'b', label='Training loss')
    plt.plot(x, val_loss, 'r', label='Validation loss')
    plt.title('Training and validation loss')
    plt.legend()
