In [34]:
import numpy as np

from matplotlib import pyplot as plt

from sklearn import metrics
from sklearn.metrics import average_precision_score
from sklearn.metrics import precision_recall_curve

# Load testing data (only labels needed) and probabilities by SVM
test_data = np.loadtxt("../data/labels/testing_labelled_27Aug.txt")
probs = np.loadtxt("../data/classification/probs_27Aug.txt")

test_labels = test_data[:, 3]
test_labels = np.array([test_labels]).T
probs = probs[:, 1]
probs = np.array([probs]).T

In [35]:
# ROC Curve and cut-off point
fpr, tpr, threshold = metrics.roc_curve(test_labels, probs, pos_label=1)
roc_auc = metrics.auc(fpr, tpr)

In [36]:
# Plot ROC Curve
plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

In [37]:
# Precision-Recall Score
average_precision = average_precision_score(test_labels, probs)

print('Average precision-recall score: {0:0.2f}'.format(
      average_precision))

Average precision-recall score: 0.86


In [39]:
# Plot Precision-Recall Curve

precision, recall, _ = precision_recall_curve(test_labels, probs, pos_label=1)

plt.step(recall, precision, color='b',
         where='post')
# plt.fill_between(recall, precision, step='post', alpha=0.2,
#                  color='b')

plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.title('2-class Precision-Recall curve: AUC={0:0.2f}'.format(
          average_precision))
plt.show()