/
perf.c
1873 lines (1657 loc) · 57.9 KB
/
perf.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#define MAX_ITEMS 5000000
#define MAX_TIES 500000
#define MAX_BINS 10000
#define debug 0
#define eps 1.0e-10
#define nnn -999999
/* Version 5.12 KDDCUP-2004 July 12, 2004 */
/* COPYRIGHT INFO
perf is available at http://www.cs.cornell.edu/~caruana/
Author: Rich Caruana
caruana@cs.cornell.edu
Cornell University
Department of Computer Science
4157 Upson Hall
Ithaca, NY 14853
USA
LICENSING
This program is granted free of charge for research and education
purposes. You must obtain a license from the author to use it for
commercial purposes.
Please acknowledge perf in any scientific results and reports where
perf was used. Many of the performance metrics perf calculates are
not always defined the same way by different communities. By citing
perf you make it clear exactly what metrics you are reporting. We do
not yet have a report describing perf, so the best way to acknowledge
using to code is:
R. Caruana, The PERF Performance Evaluation Code,
http://www.cs.cornell.edu/~caruana/perf
The software must not be modified and distributed without prior
permission of the author.
By using perf you agree to the licensing terms.
NO WARRANTY
BECAUSE THIS PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT
WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER
PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND,
EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF
THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO
LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY
OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED
OF THE POSSIBILITY OF SUCH DAMAGES.
*/
/*
WARNING!: This code was recently rewritten has not yet been
thoroughly tested. If you find errors please send
email to caruana@cs.cornell.edu
ACKNOWLEDGEMENTS:
Over the years, many people have helped with this code.
We'd like to thank:
- Alex Niculescu
- Filip Radlinski
- Thorsten Joachims
- Constantin Aliferis
- Stefano Monti
- Greg Cooper
*** WARNING ***
The documentation below is somewhat out of date.
Run "perf -h" to see current options.
Input to program is a sequence of lines, one line per case
with format: "TRUE_VALUE whitespace PRED_VALUE"
The file test.data contains sample test data. The file
test.results contains the results of running:
perf -options < test.data > test.results
or
perf -options -file test.data
TRUE_VALUE and PRED_VALUE can be any numbers, positive
or negative. The program computes the mean TRUE_VALUE and
mean PRED_VALUE.
If TRUE_VALUE <= MEAN_TRUE_VALUE, the truth for the case is
considered to be a 0, otherwise it is considered a 1. This
lets you use any coding for the TRUE_VALUE's as long as the
value used to represent 0's is less than the value used to
represent 1's. So you can code things as 0/1, -1/+1, etc.
Note that some metrics such as squared error are calculated
using the TRUE_VALUEs input to the code, not values transformed
to 0/1.
The program computes the ROC curve, ROC area, accuracy (ACC),
Positive Predictive Value (PPV), Negative Predictive Value (NPC),
Sensitivity (SEN), Specificity(SPC), Precision/Recall curve,
F, Precision/Recall Break Even point, Lift, and a number of
other performance metrics.
Computing things like Accuracy requires a prediction threshold
for the PRED_VALUE:
(PRED_VALUE < thresh) is interpreted as a prediction of 0.
(PRED_VALUE >= thresh) is interpreted as a prediction of 1.
The program takes an optional argument that lets you specify the
prediction threshold. If you don't specify a value, the program
assumes 0.5, which is the right thing to do if the PRED_VALUES
are properly calibrated probabilities.
The program also computes two different prediction threshold
values itself. The first is done by finding the threshold that
would make the number of predicted 1's match the number of true
1's in the data set. If 15 of 100 points given to the program
are true 1's, it sorts the points by PRED_VALUE (least first) and
computes a threshold from the average PRED_VALUE of points 84 and
85 in the sort. This prediction threshold makes 15 of the 100
points be predicted 1. (If two or more points have the same
PRED_VALUE and these to sort to the place where the threshold
finder picks values, the number of points predicted to be 1 can't
match the number of true 1's.)
The second computed threshold is the prediction threshold that
maximizes the Accuracy. This threshold is found by trying all
thresholds that fall half way between adjacent prediction values
in the data set, and reporting the one that yields the highest
accuracy. It is not uncommon for more than one threshold to
yield the same maximum accuracy. In this case, the program
reports the lowest threshold that achieved maximum accuracy, and
prints a caution that more than one prediction threshold achives
this same accuracy. When this happens, you can rerun the program
with the -accplot option to see the accuracy for each threshold.
(In Unix "perf -accplot -noroc < infile | sort -n +1 | tail"
will find just those thresholds that yielded highest accuracy.)
The program computes the error measures listed above for the 0.5
threshold (or user supplied threshold) and for the two automatically
computed thresholds. (Neither affects the ROC curve or its area.)
Be careful using computed thresholds and their associated stats.
It is probably improper to use the threshold computed on a test
set to estimate statistics for that test set. The threshold
probably should be estimated with the training set, and then
given to the program to calculate test set stats. That's why the
threshold can be specified as an optional parameter.
The ROC curve is a plot of SENSITIVITY vs. 1-SPECIFICITY as the
prediction threshold sweeps from 0 to 1. A typical ROC curve for
reasonably accurate predictions looks like this:
| *
S | *
E | *
N | *
S |
I | *
T |
I | *
V |
I | *
T |
Y |*
- - - - - - - - - - - - - - - - - - -
1 - SPECIFICITY
If there is no relationship between the prediction and truth, thethe
ROC curve is a diagonal line with area 0.5. If the prediction
strongly predicts truth, the curve rises quickly and has area near
1.0. If the prediction strongly predicts anti-truth, the ROC area
is less than 0.5.
Here's a definition of SPECIFICITY, SENSITIVITY, and the other
error measures this program computes such as PRECISION, RECALL,
LIFT, ... (Some of this is from Constantin's email.)
Here's a confusion matrix for a two class problem:
MODEL PREDICTION
| 1 0 |
- - - + - - - - - - - - - - - + - - - - -
TRUE 1 | A B | A+B
OUTCOME | |
0 | C D | C+D
- - - + - - - - - - - - - - - + - - - - -
| A+C B+D | A+B+C+D
1 = POSITIVE
0 = NEGATIVE
ACC = (A+D) /(A+B+C+D)
PPV = A / (A+C)
NPV = D / (B+D)
SEN = A / (A+B)
SPE = D / (C+D)
PRE = A / (A+C) (PRECISION = PPV)
REC = A / (A+B) (RECALL = SENSITIVITY)
A Precision/Recall Curve is a plot of Precision on the vertical
axis vs. Recall on the horizontal axis as the threshold sweeps
through the data. A Precision/Recall plot attempts to measure
similar behavior as an ROC plot. But ROC and PR plots are not
the same. The vertical axis on an ROC plot is the same as the
horizontal axis on a PR plot, but the other axes differ. PR plots
tend to be used by the information retreival communities, wheras
ROC curves tend to be used by the medical, signal processing,
statistics, and machine learning communities. ROC plots have
some properties that PR plots do not have: ROCs are monotonic,
always touch both axes, and have well characterized statistical
properties. PR plots need not be monotonic (but often are), may
not touch the Precision axes (when the cases with the highest
predicted values are class negative), and has less statistical
semantics. In practice, however, both provide qualitatively
similar views of performance.
Here is a sample Precision/Recall Curve:
|
P | *
R | * *
E | *
C | *
C | *
I | *
S | *
I | *
O | *
N | *
| *
- - - - - - - - - - - - - - - - - - -
RECALL
LIFT and LIFT CURVES measure similar quantities as ROC and
and Precision/Recall. Lift is a measure of the percent of
the positive class that is in the returned objects. For
example, if a dataset contains 1000 objects, 200 of which
are the target class (positives), and the threshold is set
so that 30% of the dataset is returned, and that 30% contains
90% of the positives, then LIFT = 0.90/0.30 = 3. LIFT greater
than 1 means the retreived objects (the objects greater than
threshold) have more targets in them than a random sample
would have. LIFT less than 1 is bad. LIFT = 1 is what is
expected from random (uninformed) sampling. Obviously we
want LIFT as large as possible. The largest LIFT occurs
if 100% of the target class is returned without any of the
negatives being included, perfect prediction. If the target
class is p% of the dataset, and all targets are returned in
a sample also of size p%, LIFT = 100%/p%. For the example
above where 20% of the data is targets, the maximum lift is
100%/20% = 5.
LIFT sometimes is plotted as a function of the percent of
objects returned. This is the LIFT plot. LIFT often is
presented in a table of LIFTS measured when some small but
interesting percent of the objects are returned (e.g., 5%,
10%, 15%, 20%, 25%, ...). LIFT is a good measure to use in
addition to accuracy. Suppose the accuracy of a model is
90%. Is this good performance? Maybe. Maybe not. If it
is possible to acheive 90% accuracy by just predicting the
most common class, then a model with 90% accuracy is nothing
to write home about. LIFT is less sensitive to the base
prediction rate than accuracy. For example, if 90% of the
data is 0's, 10% 1's, the base accuracy is 90% if the model
predicts 0 for everything, so 90% accuracy is not really
very good. If a threshold is used that causes 10% of the
cases to be predicted 1, and if 80% of the cases predicted
to be 1 actually are 1, the accuracy is 0.80*0.1 +
Lift tends to be used in marketing and data mining more
than in machine learning and information retreival because
it is easy to understand, and because it directly measures
how effective the model is at detecting targets with high
reliability. For example, if you are going to send marketing
flyers to only 10% of your customers, you want that 10% to
include as large a fraction of targets (responders) as
possible.
*/
float btrue[MAX_ITEMS];
float bpred[MAX_ITEMS];
double mean_apr, mean_rkl, mean_rms, mean_top1;
int total_items, cur_block, no_blocks, bitem, more_blocks;
int block[MAX_ITEMS];
float true[MAX_ITEMS];
float pred[MAX_ITEMS];
float fraction[MAX_ITEMS]; //for the fractional cases to handle ties
double mean_true, mean_pred;
double min_true, max_true, assumed_true;
int not_min_warned, not_max_warned, targs_all_same;
double pred_thresh;
double sse, rmse, cxe, loge, log_2, norm, norm_err;
double a, b, c, d;
double freq_thresh, threshold;
double max_acc, max_acc_thresh, last_acc_thresh, acc;
double min_cost, min_cost_thresh, last_cost_thresh, cost;
double cost_a, cost_b, cost_c, cost_d;
int costs, min_cost_count;
double freq_a, freq_b, freq_c, freq_d;
double max_acc_a, max_acc_b, max_acc_c, max_acc_d;
int max_acc_count;
int count, no_poz, begin;
int rank_last, top_sum, top, t01, t10;
double topn;
double factCache[MAX_TIES];
int arg, taken, no_area, thresh, confusion, prcdata, plot;
int ACC_flag, APR_flag, CAL_flag, CST_flag, CXE_flag, TOP_flag, RKL_flag;
int LFT_flag, NPV_flag, NRM_flag, PPV_flag, PRB_flag, PRF_flag, PRE_flag;
int REC_flag, RMS_flag, ROC_flag, R50_flag, SEN_flag, SLQ_flag, SPC_flag;
int SAR_flag, T01_flag, T10_flag, APC_flag;
int BLOCKS_flag;
int acc_plot, cost_plot, roc_plot, pr_plot, lift_plot;
int SET_stats, SET_easy, SET_all;
double slacq_window, slacq_bin_width, slac_w, slac_q, last_p, n_cor, n_tot;
int slacq_tb[MAX_BINS], slacq_tbb[MAX_BINS], bin, no_bins, set, slac_qn;
int no_item, item;
double tt, tf, ft, ff;
int total_true_0, total_true_1;
double sens, spec, tpf, fpf, tpf_prev, fpf_prev, roc_area, roc50_area;
double precision, recall, precision_prev, recall_prev, pr_break_even;
double lift, frac_above_threshold;
double prcones, apr;
double lasttt, lasttf, lastft, lastff;
int cnt, onecnt, i;
int data_split, no_remaining;
double data_prc;
double allwacc, allwroc, allwrms;
int loss;
double max_lift, min_lift;
int cal_bin_size, cal_f, cal_t, cal_sum_n;
double sum_p, cal_sum, cal_sumsquares, obs_p, cal_err, cal1_mean_err, cal2_mean_err, plow, phigh;
int tmp_int;
double tmp_double;
/* Print the usage */
void print_usage(char *s) {
printf("Version 5.12 [KDDCUP-2004 July 12, 2004]\n\n");
printf("Usage:\n\t %s [options] [-blocks] < input \n", s);
printf("\tOR %s [options] [-blocks] -file <input file> \n", s);
printf("\tOR %s [options] [-blocks] -files <targets file> <predictions file> \n\n", s);
printf("Allowed options:\n");
printf("\n PERFORMANCE MEASURES\n");
printf(" -ACC Accuracy\n");
printf(" -RMS Root Mean Squared Error\n");
printf(" -CXE Mean Cross-Entropy\n");
printf(" -ROC ROC area\n");
printf(" -R50 ROC area up to 50 negative examples\n");
printf(" -SEN Sensitivity\n");
printf(" -SPC Specificity\n");
printf(" -NPV Negative Predictive Value\n");
printf(" -PPV Positive Predictive Value\n");
printf(" -PRE Precision\n");
printf(" -REC Recall\n");
printf(" -PRF F1 score\n");
printf(" -PRB Precision/Recall Break Even Point\n");
printf(" -APR Mean Average Precision\n");
printf(" -AUPRC Area under Precision/Recall Curve\n");
printf(" -LFT Lift (at threshold)\n");
printf(" -TOP1 Top 1: is the top ranked case positive\n");
printf(" -TOP10 Top 10: is there a positive in the top 10 ranked cases\n");
printf(" -NTOP <N> How many positives in the top N ranked cases\n");
printf(" -RKL Rank of *last* (poorest ranked) positive case\n");
printf(" -NRM <arg> Norm error using L<arg> metric\n");
printf(" -CST <tp> <fn> <fp> <tn>\n");
printf(" Total cost using these cost values, plus min-cost results\n");
printf(" -SAR <wACC> <wROC> <wRMS>\n");
printf(" SAR = (wACC*ACC + wROC*ROC + wRMS(1-RMS))/(wACC+wROC+wRMS)\n");
printf(" typically wACC = wROC = wRMS = 1.0\n");
printf(" -CAL <bin_size> CA1/CA2 scores\n");
printf(" -SLQ <bin_width> Slac Q-score\n");
printf("\n METRIC GROUPINGS\n");
printf(" -all Display most metrics (the default if no options are specified)\n");
printf(" -easy ROC, ACC and RMS\n");
printf(" -stats Accuracy, confusion table metrics and lift\n");
printf(" -confusion Confusion table plus all metrics in stats\n");
printf("\n PLOTS (Only one plot can be output at a time)\n");
printf(" -plot roc Output ROC plot\n");
printf(" -plor pr Output Precision/Recall plot\n");
printf(" -plot lift Output Lift versus threshold plot\n");
printf(" -plot cost Output Cost versus threshold plot\n");
printf(" -plot acc Output Accuracy versus threshold plot\n");
printf("\n PARAMETERS\n");
printf(" -t <arg> Set threshold [default threshold is 0.5 if not set]\n");
printf(" -percent <arg> Set threshold so <arg> percent of data falls above threshold\n");
printf(" (is predicted positive)\n");
printf("\n INPUT\n");
printf(" -blocks Input has BLOCK ID numbers in first column. Calculate performance\n");
printf(" for each block and report the mean performance across the blocks.\n");
printf(" Only works with APR, TOP1, RKL, and RMS.\n");
printf(" If using separate files for target and predictions input (-files\n");
printf(" option), the BLOCK ID numbers must be ithe first column of the\n");
printf(" target file, with no block numbers in the predictions file.\n\n");
printf(" -file <file> Read input from one file (1st col targets, 2nd col predictions)\n");
printf(" -files <targets file> <predictions file> Read input from two separate files\n");
printf("\n");
}
void print_confusion_table (a,b,c,d,thresh_text,thresh)
double a,b,c,d;
char* thresh_text;
double thresh;
{
printf ("\n");
printf (" Predicted_1 Predicted_0 %s %9.6lf\n", thresh_text, thresh);
printf ("True_1 %6.4lf %6.4lf\n", a, b);
printf ("True_0 %6.4lf %6.4lf\n", c, d);
}
/* partition is used by quicksort */
int partition (p,r)
int p,r;
{
int i, j;
float x, tempf;
x = pred[p];
i = p - 1;
j = r + 1;
while (1)
{
do j--; while (!(pred[j] <= x));
do i++; while (!(pred[i] >= x));
if (i < j)
{
tempf = pred[i];
pred[i] = pred[j];
pred[j] = tempf;
tempf = true[i];
true[i] = true[j];
true[j] = tempf;
}
else
return(j);
}
}
/* vanilla quicksort */
void quicksort (p,r)
int p,r;
{
int q;
if (p < r)
{
q = partition (p,r);
quicksort (p,q);
quicksort (q+1,r);
}
}
/* If the option is equal to the wanted string (case ignored), set the flag */
int accept_flag(char *option, char *wanted, int *flag) {
/* Use strncasecmp to avoid buffer overflows. */
if (strncasecmp(option, wanted, 20)==0) {
*flag = 1;
return 1;
}
return 0;
}
/* If the option is equal to the wanted string (case ignored) plus
anything at the end, set the flag */
int accept_flag_ext(char *option, char *wanted, int *flag) {
if (strncasecmp(option, wanted, strlen(wanted))==0) {
*flag = 1;
return 1;
}
return 0;
}
/* If the option is equal to the wanted string, the next argument is
the floating point value */
int accept_parameter(char *argv[], int *option_n, char *wanted, int *flag, double *value) {
char *option;
char *next_option;
option = argv[*option_n];
next_option = argv[*option_n+1];
if (strncasecmp(option, wanted, 20)==0) {
*flag = 1;
if (value == NULL) {
fprintf(stderr, "Error: parameter %s requires an option.\n", option);
exit(1);
}
*value = atof(next_option);
/* Skip the next option in our processing code */
(*option_n) ++;
return 1;
}
return 0;
}
void print_output(int flag, char *name, double value) {
if (flag == 1)
printf ("%3s %8.5lf\n", name, value);
}
void print_thresh_output(int flag, char *name, double value, char *t_name, double t_val) {
if (flag == 1)
printf("%3s %8.5lf %s %9.6lf\n", name, value, t_name, t_val);
}
/* Print standard stats */
void print_stats(double stat_a, double stat_b, double stat_c, double stat_d, char *t_name, double t_val){
precision = stat_a / (stat_a + stat_c + eps);
recall = stat_a / (stat_a + stat_b + eps);
if (loss == 0) {
print_thresh_output (ACC_flag, "ACC", (stat_a+stat_d) / ((stat_a+stat_b+stat_c+stat_d)+eps), t_name, t_val);
print_thresh_output (PPV_flag, "PPV", stat_a / ((stat_a+stat_c)+eps), t_name, t_val);
print_thresh_output (NPV_flag, "NPV", stat_d / ((stat_b+stat_d)+eps), t_name, t_val);
print_thresh_output (SEN_flag, "SEN", stat_a / ((stat_a+stat_b)+eps), t_name, t_val);
print_thresh_output (SPC_flag, "SPC", stat_d / ((stat_c+stat_d)+eps), t_name, t_val);
print_thresh_output (PRE_flag, "PRE", precision, t_name, t_val);
print_thresh_output (REC_flag, "REC", recall, t_name, t_val);
print_thresh_output (PRF_flag, "PRF", (2.0*precision*recall)/(precision+recall+eps), t_name, t_val);
print_thresh_output (LFT_flag, "LFT", (stat_a / ((double) total_true_1)) / ((stat_a + stat_c) / ((double) (total_true_1 + total_true_0)) + eps), t_name, t_val);
if ( SAR_flag )
printf ("SAR %8.5lf %s %9.6lf wacc %9.6lf wroc %9.6lf wrms %9.6lf \n", ((allwacc*(stat_a+stat_d) / ((stat_a+stat_b+stat_c+stat_d)+eps) + allwroc*roc_area + allwrms*(1.0-rmse))) / (allwacc+allwroc+allwrms+eps), t_name, t_val, allwacc, allwroc, allwrms);
} else if (loss == 1) {
print_thresh_output (ACC_flag, "ACC", 1.0 - ((stat_a+stat_d) / ((stat_a+stat_b+stat_c+stat_d)+eps)), t_name, t_val);
print_thresh_output (PPV_flag, "PPV", 1.0 - (stat_a / ((stat_a+stat_c)+eps)), t_name, t_val);
print_thresh_output (NPV_flag, "NPV", 1.0 - (stat_d / ((stat_b+stat_d)+eps)), t_name, t_val);
print_thresh_output (SEN_flag, "SEN", 1.0 - (stat_a / ((stat_a+stat_b)+eps)), t_name, t_val);
print_thresh_output (SPC_flag, "SPC", 1.0 - (stat_d / ((stat_c+stat_d)+eps)), t_name, t_val);
print_thresh_output (PRE_flag, "PRE", 1.0 - precision, t_name, t_val);
print_thresh_output (REC_flag, "REC", 1.0 - recall, t_name, t_val);
print_thresh_output (PRF_flag, "PRF", 1.0 - (2.0*precision*recall)/(precision+recall+eps), t_name, t_val);
print_thresh_output (LFT_flag, "LFT", (max_lift - (stat_a / ((double)total_true_1)) / ((stat_a + stat_c) / ((double) (total_true_1 + total_true_0)) + eps))/(max_lift-min_lift+eps), t_name, t_val);
if ( SAR_flag )
printf ("SAR %8.5lf %s %9.6lf wacc %9.6lf wroc %9.6lf wrms %9.6lf \n", 1.0 - (((allwacc*(stat_a+stat_d) / ((stat_a+stat_b+stat_c+stat_d)+eps) + allwroc*roc_area + allwrms*(1.0-rmse))) / (allwacc+allwroc+allwrms+eps)), t_name, t_val, allwacc, allwroc, allwrms);
} /* End of loop if loss is set or not */
print_thresh_output(CST_flag, "CST", cost_a*stat_a + cost_b*stat_b + cost_c*stat_c + cost_d*stat_d, t_name, t_val);
} /* End of print_stats */
double calc_rmse()
{
int item;
double sse, rmse;
sse = 0.0;
for (item=0; item<no_item; item++)
{
sse+= (true[item]-pred[item])*(true[item]-pred[item]);
if ( (pred[item] > 1.0) && (not_max_warned == 1) ) {
fprintf(stderr, "Warning: Prediction (%10.6lf) larger than 1.0 when calculating RMS or CXE\n", pred[item]);
not_max_warned = 0;
}
if ( (pred[item] < 0.0) && (not_min_warned == 1) ) {
fprintf(stderr, "Warning: Prediction (%10.6lf) less than 0.0 when calculating RMS or CXE\n", pred[item]);
not_min_warned = 0;
}
}
rmse = sqrt (sse / ((double) no_item));
return(rmse);
}
double calc_mcxe()
{
int item;
double cxe, log_2;
cxe = 0.0;
log_2 = log(2.0);
/* loge = log(2.718281828); */
for (item=0; item<no_item; item++)
{
if ( true[item] < mean_true )
{
true[item] = 0;
total_true_0++;
if ((1-pred[item]) <= 0.0)
cxe+= 9e99;
else
cxe+= -log(1-pred[item])/log_2;
}
else
{
true[item] = 1;
total_true_1++;
if ( pred[item] <= 0.0 )
cxe+= 9e99;
else
cxe+= -log(pred[item])/log_2;
}
if ( (pred[item] > 1.0) && (not_max_warned == 1) ) {
fprintf(stderr, "Warning: Prediction (%10.6lf) larger than 1.0 when calculating RMS or CXE\n", pred[item]);
not_max_warned = 0;
}
if ( (pred[item] < 0.0) && (not_min_warned == 1) ) {
fprintf(stderr, "Warning: Prediction (%10.6lf) less than 0.0 when calculating RMS or CXE\n", pred[item]);
not_min_warned = 0;
}
}
cxe = cxe / ((double) no_item);
return(cxe);
}
double calc_norm()
{
int item;
double norm_err;
norm_err = 0.0;
for (item=0; item<no_item; item++)
norm_err+= pow(fabs(true[item]-pred[item]),norm);
norm_err = pow(norm_err/ ((double) no_item),1/norm);
return(norm_err);
}
double calc_roca(int do_roc_plot)
{
double tt, tf, ft, ff;
double sens, spec, tpf, fpf, tpf_prev, fpf_prev;
double roc_area;
tt = 0;
tf = total_true_1;
ft = 0;
ff = total_true_0;
sens = ((double) tt) / ((double) (tt+tf));
spec = ((double) ff) / ((double) (ft+ff));
tpf = sens;
fpf = 1.0 - spec;
if ( do_roc_plot )
printf ("%6.4lf %6.4lf\n", fpf, tpf);
roc_area = 0.0;
tpf_prev = tpf;
fpf_prev = fpf;
for (item=no_item-1; item>-1; item--)
{
tt+= fraction[item];
tf-= fraction[item];
ft+= 1 - fraction[item];
ff-= 1 - fraction[item];
sens = ((double) tt) / ((double) (tt+tf));
spec = ((double) ff) / ((double) (ft+ff));
tpf = sens;
fpf = 1.0 - spec;
if ( do_roc_plot )
printf ("%6.4lf %6.4lf\n", fpf, tpf);
roc_area+= 0.5*(tpf+tpf_prev)*(fpf-fpf_prev);
tpf_prev = tpf;
fpf_prev = fpf;
}
return(roc_area);
}
double calc_rocn(int no_negs)
{
double tt, tf, ft, ff;
double sens, spec, tpf, fpf, tpf_prev, fpf_prev;
double roc_area;
tt = 0;
tf = total_true_1;
ft = 0;
ff = total_true_0;
sens = ((double) tt) / ((double) (tt+tf));
spec = ((double) ff) / ((double) (ft+ff));
tpf = sens;
fpf = 1.0 - spec;
roc_area = 0.0;
tpf_prev = tpf;
fpf_prev = fpf;
for (item=no_item-1; item>-1; item--)
{
tt+= fraction[item];
tf-= fraction[item];
ft+= 1 - fraction[item];
ff-= 1 - fraction[item];
sens = ((double) tt) / ((double) (tt+tf));
spec = ((double) ff) / ((double) (ft+ff));
tpf = sens;
fpf = 1.0 - spec;
if (ft <= no_negs)
roc_area += 0.5*(tpf+tpf_prev)*(fpf-fpf_prev)*(total_true_0/no_negs);
tpf_prev = tpf;
fpf_prev = fpf;
}
return(roc_area);
}
double calc_top1()
{
if ( fraction[no_item-1] >= (1.0-eps) )
return(1.0);
else
return(0.0);
}
double calc_topn(int n)
{
int item;
for (item=no_item-1; item>-1; item--)
if ( ((no_item - item) <= n) && (fraction[item] >= (1.0-eps)) )
return(1.0);
return(0.0);
}
double calc_rank_last()
{
int rank_last, item;
rank_last = no_item - 1;
if ( targs_all_same == 1 )
return((double) no_item);
for (item=no_item-1; item>-1; item--)
if ( fraction[item] > 0 )
rank_last = no_item - item;
return((double) rank_last);
}
double calc_top_sum(int topn)
{
int item;
double top, top_sum;
top_sum = 0.0;
top = (int) (topn+0.01);
for (item=no_item-1; item>-1; item--)
if ( (no_item - item) <= top )
top_sum+= fraction[item];
return(top_sum);
}
double calc_break_even_point()
/* compute the Break-Even Point. We compute it as the Recall
when you predict total_true_1 cases as being positive cases
ties are split by fractional cases */
{
int item;
double tt;
tt = 0.0;
for ( item = no_item - 1; item >= total_true_0; --item )
tt += fraction[item];
return(tt/((double)total_true_1));
}
double calc_slq_score()
/* calculate improved SLAC Q-score using two test set samples and evenly spaced bins */
{
int bin, item;
int slacq_tb[MAX_BINS], slacq_tbb[MAX_BINS], slac_qn;
double slac_q, slac_w;
for (bin=0; bin<no_bins; bin++)
{
slacq_tb[bin] = 0;
slacq_tbb[bin] = 0;
}
for (item=0; item<no_item; item++)
{
bin = floor(pred[item] / (slacq_bin_width+eps));
if ( true[item] > 0.5 )
slacq_tb[bin]++;
else
slacq_tbb[bin]++;
}
slac_q = 0.0;
slac_qn = 0;
for (bin=0; bin<no_bins; bin++)
{
if ( slacq_tb[bin] > slacq_tbb[bin] )
slac_w = 1.0 - ( ((double)slacq_tb[bin]) / ( ((double)(slacq_tb[bin]+slacq_tbb[bin])) + eps) );
else
slac_w = 1.0 - ( ((double)slacq_tbb[bin]) / ( ((double)(slacq_tb[bin]+slacq_tbb[bin])) + eps) );
slac_q += ((double)(slacq_tb[bin]+slacq_tbb[bin])) * (1.0 - 2.0*slac_w) * (1.0 - 2.0*slac_w);
slac_qn += slacq_tb[bin]+slacq_tbb[bin];
}
slac_q = slac_q / ((double)slac_qn);
return(slac_q);
}
double chooseLog(int n, int m){
if(m < 0 || m > n)return -1.0E300;
double ret = factCache[n] - factCache[m] - factCache[n-m];
return ret;
}
double apr_prob(int j, int k, int n, int m){
if(j-1 < j-k-1)return 0;
if(n-j < m-k-1)return 0;
double ret = 0;
ret += chooseLog(j-1,k);
ret += chooseLog(n-j,m-k-1);
ret -= chooseLog(n,m);
if(ret > -50)
return exp(ret);
else
return 0;
}
/*
* a is the number of cases before the tie
* b is the number of positives before the tie
* n is the number of cases in the tie
* m is the number of positives in the tie
*/
double apr_ties(int a, int b, int n, int m){
int j,k;
double ret = 0;
if(m==0)return 0;
//code works for this case, but this way is faster
if(n==1)return (b + 1.0) / (a + 1.0);
if(n >= MAX_TIES){
fprintf(stderr, "Warning: There are more than %d ties in your prediction, using pessimistic ordering in Average Precision.\n",MAX_TIES);
for(j = 1; j<=m; j++){
ret += ((double)b + j) / (a + n - m + j);
}
return ret;
}
for(j = 1; j<=n; j++){
for(k = 0; k<j && k<=m; k++){
double p = apr_prob(j,k,n,m);
double contrib = ((double)b + k + 1) / (a + j);
ret += p * contrib;
}
}
return ret;
}
double calc_apr(int pr_plot)
{
/* now let's do the PRECISION/RECALL curve and Mean Average Precision*/
/* note: the approach followed here may not be standard when it comes to ties */
/* note: the approach followed here computes the area under the precision/recall
curve, not just the average precision at recall = 0,.1,.2,..,1.0 */
int item,i;
double tt, tf, ft, ff, apr, precision, recall;
double t_in_tie, f_in_tie, m, n;
factCache[0] = 0;
for(i=1; i<MAX_TIES; i++)
factCache[i] = factCache[i-1] + log((double)i);
/* if all items negative, precision and recall are not well defined
so just return 0.0 and don't do PR plot */
if ( targs_all_same == 1 && assumed_true == 0.0 ) {
fprintf(stderr, "Warning: Precision and Recall not well defined if all cases are class 0.\n");
return(0.0);
}
tt = 0;
tf = total_true_1;
ft = 0;
ff = total_true_0;
apr = 0.0;
t_in_tie = 0.0;
f_in_tie = 0.0;
for (item=no_item-1; item>-1; item--) {
tt+= fraction[item];
tf-= fraction[item];
ft+= 1 - fraction[item];
ff-= 1 - fraction[item];
t_in_tie += fraction[item];
f_in_tie += 1-fraction[item];
if(item==0 || pred[item] != pred[item-1]){
n = tt + ft - f_in_tie - t_in_tie;
m = tt - t_in_tie;
apr += apr_ties(n+0.5,m+0.5,t_in_tie+f_in_tie+0.5,t_in_tie+0.5);
t_in_tie = 0;
f_in_tie = 0;
}
precision = tt / (tt+ft);
recall = tt / total_true_1;
if ( pr_plot )
printf ("%6.4lf %6.4lf\n", recall, precision);
}
return(apr/((double) total_true_1));
}
double calc_area_under_pr(int pr_plot)
{
/* now let's do the PRECISION/RECALL curve and Mean Average Precision*/
/* note: the approach followed here may not be standard when it comes to ties */
/* note: the approach followed here computes the area under the precision/recall
curve, not just the average precision at recall = 0,.1,.2,..,1.0 */
int item;
double tt, tf, ft, ff, apr, precision, recall, precision_prev, recall_prev;
/* if all items positive, precision and recall are not well defined
so just return 0.0 and don't do PR plot */
if ( targs_all_same == 1 && assumed_true == 0.0 ) {
fprintf(stderr, "Warning: Precision and Recall not well defined if all cases are class 0.\n");
return(0.0);
}
tt = 0;
tf = total_true_1;
ft = 0;
ff = total_true_0;
apr = 0.0;
precision_prev = 0.0;
recall_prev = 0.0;
for (item=no_item-1; item>-1; item--) {
tt+= fraction[item];
tf-= fraction[item];
ft+= 1 - fraction[item];
ff-= 1 - fraction[item];
if ((tt+ft) > 0.0)
precision = tt / (tt+ft);
else
precision = 0;
recall = tt / total_true_1;
apr += 0.5*(precision+precision_prev)*(recall-recall_prev);
if ( pr_plot )
printf ("%6.4lf %6.4lf\n", recall, precision);
precision_prev = precision;
recall_prev = recall;
}
return(apr);
}
void calc_lift(int lift_plot)
{
/* let's do the lift curve */
int item;
double tt, tf, ft, ff, frac_above_threshold, lift;
tt = 0;
tf = total_true_1;
ft = 0;
ff = total_true_0;
for (item=no_item-1; item>-1; item--) {
tt+= fraction[item];
tf-= fraction[item];
ft+= 1 - fraction[item];
ff-= 1 - fraction[item];
frac_above_threshold = (tt + ft) / no_item;
lift = ( tt / (total_true_1 + eps)) / (frac_above_threshold + eps);
if ( lift_plot )
printf ("%6.4lf %6.4lf\n", frac_above_threshold, lift);
}
}
double calc_cal1()
{
/* calculate mean absolute calibration error */
int item, cal_sum_n, cal_f, cal_t;
double cal_sum, plow, phigh, sum_p, obs_p, cal_err;
cal_sum = 0.0;
cal_sum_n = 0;
for (i=0; i<19; i++) {
plow = i * 0.05;