This repository was archived by the owner on May 10, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathaccuracy.py
73 lines (57 loc) · 2.5 KB
/
accuracy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# -*- coding: utf-8 -*-
"""
Created on Sun Sep 9 14:51:24 2018
@author: Mohammad Doosti Lakhani
"""
"""
In this file, I implemented functions to calculate accuracy like confusion_matrix or CAP.
You can use this functions in any of the implemented models.
"""
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy import integrate
def capcurve(y_values, y_preds_proba,title_of_chart):
num_pos_obs = np.sum(y_values)
num_count = len(y_values)
rate_pos_obs = float(num_pos_obs) / float(num_count)
ideal = pd.DataFrame({'x':[0,rate_pos_obs,1],'y':[0,1,1]})
xx = np.arange(num_count) / float(num_count - 1)
y_cap = np.c_[y_values,y_preds_proba[:,1]]
y_cap_df_s = pd.DataFrame(data=y_cap)
y_cap_df_s = y_cap_df_s.sort_values([1], ascending=False).reset_index(level = y_cap_df_s.index.names, drop=True)
print(y_cap_df_s.head(20))
yy = np.cumsum(y_cap_df_s[0]) / float(num_pos_obs)
yy = np.append([0], yy[0:num_count-1]) #add the first curve point (0,0) : for xx=0 we have yy=0
percent = 0.5
row_index = int(np.trunc(num_count * percent))
val_y1 = yy[row_index]
val_y2 = yy[row_index+1]
if val_y1 == val_y2:
val = val_y1*1.0
else:
val_x1 = xx[row_index]
val_x2 = xx[row_index+1]
val = val_y1 + ((val_x2 - percent)/(val_x2 - val_x1))*(val_y2 - val_y1)
sigma_ideal = 1 * xx[num_pos_obs - 1 ] / 2 + (xx[num_count - 1] - xx[num_pos_obs]) * 1
sigma_model = integrate.simps(yy,xx)
sigma_random = integrate.simps(xx,xx)
ar_value = (sigma_model - sigma_random) / (sigma_ideal - sigma_random)
fig, ax = plt.subplots(nrows = 1, ncols = 1)
ax.plot(ideal['x'],ideal['y'], color='grey', label='Perfect Model')
ax.plot(xx,yy, color='red', label='User Model')
ax.plot(xx,xx, color='blue', label='Random Model')
ax.plot([percent, percent], [0.0, val], color='green', linestyle='--', linewidth=1)
ax.plot([0, percent], [val, val], color='green', linestyle='--', linewidth=1, label=str(val*100)+'% of positive obs at '+str(percent*100)+'%')
plt.xlim(0, 1.02)
plt.ylim(0, 1.25)
plt.title("CAP Curve ("+title_of_chart+") - a_r value ="+str(ar_value))
plt.xlabel('% of the data')
plt.ylabel('% of positive obs')
plt.legend()
# ref = https://github.com/APavlides/cap_curve/blob/master/cap_curve.py
def accuracy_on_cm(confusion_matrix):
t = np.trace(confusion_matrix)
f = np.sum(confusion_matrix) - t
ac = t/(t+f)
return (t,f,ac)