# Various Performance Metrics

Here's what we will try out:
## Classification
   * Accuracy,
   * Error rate,
   * True Positive Rate,
   * False Negative Rate,
   * Specificity,
   * Prcesion,
   * Recall,
   * F1-Measure,
   * ROC Curve,
   * AUC,
   * Log Loss

## Regression
   * Mean Squared Error,
   * Root Mean Squared Error,
   * Relative Squared Error,
   * Mean Absolute Error

## Clustering
   * Silhouette Score,
   * Rand Index,
   * Mutual Information

# Metrics for classification

We'll use the ["monks" data](https://api.openml.org/d/334) - this is a totally made up toy dataset, but there are no missing values, so we can go straight to the prediction of the class.

In [None]:
import pandas as pd
monks_data =pd.read_csv("https://www.openml.org/data/get_csv/52237/php4fATLZ.csv")
monks_data.tail()

We split the data to "train" and "test" subsets. The learned models (trained on the "train" subset) will be applied to the test data.

In [None]:
from sklearn.model_selection import train_test_split
y=monks_data['class']
X=monks_data.drop(['class'], axis=1)
X_monks_train, X_monks_test, y_monks_train, y_monks_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
X_monks_train.tail()

In [None]:
y_monks_train.tail()

In [None]:
from sklearn.metrics import f1_score    # We use f1-measure because the classes are not balanced

from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(random_state=0)

# training data
clf.fit(X_monks_train, y_monks_train)

# testing data
f1_score(clf.predict(X_monks_test), y_monks_test)

In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(random_state=0)
#clf = MLPClassifier(random_state=1, max_iter=300)
#clf = GaussianNB()

clf.fit(X_monks_train, y_monks_train)
y_pred = clf.predict(X_monks_test)

print(y_pred[:15])
import numpy as np
print(np.array(y_monks_test[:15]))

In [None]:
y_pred_prob = clf.predict_proba(X_monks_test)
print(y_pred_prob[:5,:])

### Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix

cm=confusion_matrix(y_monks_test, y_pred)
print(cm)

### TP, FN, TN, FP

In [None]:
tn, fp, fn, tp = cm.ravel()
print("TP = {}".format(tp))
print("FN = {}".format(fn))
print("TN = {}".format(tn))
print("FP = {}".format(fp))

### We can calculate Accuracy, TPR, ... based on the confusion matrix first

In [None]:
print("accuracy   = {}".format((tp+tn)/(tn+fp+fn+tp)))
print("TPR/Recall = {}".format(tp/(tp+fn)))
print("specificity= {}".format(tn/(tn+fp)))
print("Precision  = {}".format(tp/(tp+fp)))

### Use sklearn.metrics  

### Accuracy

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_monks_test, y_pred)

### Recall

In [None]:
from sklearn.metrics import recall_score
r=recall_score(y_monks_test, y_pred)
print(r)

### Recall on the negative class == specificity

In [None]:
from sklearn.metrics import recall_score
recall_score(y_monks_test, y_pred, pos_label=0)

### Precision

In [None]:
from sklearn.metrics import precision_score
p=precision_score(y_monks_test, y_pred)
print(p)

### F1-Measure

In [None]:
print("directly: f1-score={}".format(2*p*r/(p+r)))

from sklearn.metrics import f1_score
f1 = f1_score(y_monks_test, y_pred)

print("sklearn : f1-score={}".format(f1))

### ROC Curve and AUC

In [None]:
from sklearn.metrics import roc_curve
from sklearn.metrics import auc
fpr, tpr, thresholds = roc_curve(y_monks_test, y_pred_prob[:,1], pos_label=1)
roc_auc = auc(fpr, tpr)
#print(fpr)
#print(tpr)
#print(thresholds)
print("The ROC AUC score = {}".format(roc_auc))

In [None]:
import matplotlib.pyplot as plt
plt.figure()
lw = 2
plt.plot(fpr, tpr, color='darkorange',
         lw=lw, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()

### Log Loss

In [None]:
from sklearn.metrics import log_loss
log_loss(y_monks_test, y_pred_prob)

In [None]:
np.exp(-0.36)


# Metrics for Regression

For this, we use the Boston Housing data set, but check out the warning that sklearn throws up for this. Interesting!

 * the [docs](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_boston.html)
 * someone [digging](https://medium.com/@docintangible/racist-data-destruction-113e3eff54a8)

In [None]:
#from sklearn.datasets import load_boston
#from sklearn.model_selection import train_test_split
#X, y = load_boston(return_X_y=True);
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

In [None]:
# get house information
from sklearn.datasets import fetch_california_housing
houses = fetch_california_housing()

X = houses.data
y = houses.target

df_data = pd.DataFrame(houses.data, columns=houses.feature_names)
df_data.head()

In [None]:
X_houses_train, X_houses_test, y_houses_train, y_houses_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
y_houses_train

In [None]:
#from sklearn.linear_model import LinearRegression
from sklearn import ensemble

params = {'n_estimators': 500,
          'max_depth': 4,
          'min_samples_split': 5,
          'learning_rate': 0.01,
          'loss': 'squared_error'}
reg = ensemble.GradientBoostingRegressor(**params).fit(X_houses_train, y_houses_train)
#reg = LinearRegression().fit(X_houses_train, y_houses_train)
y_test_pred = reg.predict(X_houses_test)
print(y_test_pred)

### MSE

In [None]:
from sklearn.metrics import mean_squared_error
mean_squared_error(y_houses_test, y_test_pred)

### R-Squared

In [None]:
from sklearn.metrics import r2_score
print(r2_score(y_houses_test, y_test_pred))
print(reg.score(X_houses_test, y_houses_test))

### MAE

In [None]:
from sklearn.metrics import mean_absolute_error

mean_absolute_error(y_houses_test, y_test_pred)

#  Performance Metrics - Clustering

NOTE: it would be much better to compare k-means here with a second clustering algorithm! The following just indicates how to call silhouette (and others).
I do compare different k at the end, but a second clusterer would be good.

Examples here:
 * silhouette
 * Rand (named after Rand, not "random"!)
 * mutual information

In [None]:
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=2, random_state=0, n_init='auto').fit(X_houses_train)
kmeans_train_labels = kmeans.labels_
kmeans_test_labels  = kmeans.predict(X_houses_test)
print(kmeans_train_labels[:10])
type(kmeans_train_labels[2])

In [None]:
from sklearn.metrics import silhouette_score
# do it for train and test sets
score_train = silhouette_score(X_houses_train, kmeans_train_labels, metric='euclidean')
score_test  = silhouette_score(X_houses_test,  kmeans_test_labels,  metric='euclidean')
print("Silhouette on train: {0:6.2f} \t test: {1:6.2f} ".format(score_train, score_test))

Here we look at the silhouette score for k-means as we increase k

In [None]:
for k in [2,3,4,6,10,20, 30]:
  kmeans = KMeans(n_clusters=k, random_state=0, n_init='auto').fit(X_houses_train)
  kmeans_train_labels = kmeans.labels_
  kmeans_test_labels  = kmeans.predict(X_houses_test)
  sil_train = silhouette_score(X_houses_train, kmeans_train_labels, metric='euclidean')
  sil_test  = silhouette_score(X_houses_test, kmeans_test_labels, metric='euclidean')
  print("k={0:3d} \t Silhouette on train: {1:6.2f} \t test: {2:6.2f} ".format(k, sil_train, sil_test))

Trying a couple of alternative metrics for clustering

In [None]:
from sklearn.metrics import rand_score
# do it for train and test sets
score_train = rand_score(y_houses_train, kmeans_train_labels)
score_test  = rand_score(y_houses_test,  kmeans_test_labels)
print("Rand on train: {0:6.2f} \t test: {1:6.2f} ".format(score_train, score_test))

In [None]:
from sklearn.metrics import mutual_info_score
score_train = mutual_info_score(y_houses_train, kmeans_train_labels)
score_test  = mutual_info_score(y_houses_test,  kmeans_test_labels)
print("Mutual Information on train: {0:6.2f} \t test: {1:6.2f} ".format(score_train, score_test))