-
Notifications
You must be signed in to change notification settings - Fork 0
/
datasets_config.json
234 lines (234 loc) · 13 KB
/
datasets_config.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
{ "SERVER_URL" : "https://raw.githubusercontent.com/uwgraphics/BoxerData/master/",
"datasets": {
"Iris": {
"path": "datasets/iris/",
"description": "",
"related_use_case":"https://uwgraphics.github.io/BoxerDocs/docs/publication/demo/",
"views_recommendation":[
{"name":"Performance_Overall"},
{"name":"Histogram","config":["SepalLengthCm"]}
]
},
"Imputation": {
"path": "datasets/imputation/",
"description": "This use case uses Mushroom dataset and considers a testing set of 2,000 (of 8,124) randomly selected instances.",
"related_use_case":"https://uwgraphics.github.io/BoxerDocs/docs/use_cases/model-selection/",
"views_recommendation":[
{"name":"MetricsParallel"},
{"name":"Performance_Overall"},
{"name":"Histogram","config":["sColour"]}
]
},
"IMDB Confidence": {
"path": "datasets/confidence/",
"description": "The data consists of 5044 movies with 27 features, however, 25% is sequestered for final assessment. A stratified sampling of 200 movies per class is held out from the 3756 for testing.",
"related_use_case":"https://uwgraphics.github.io/BoxerDocs/docs/use_cases/model-selection-tuning/",
"views_recommendation":[
{"name":"Performance_Overall"},
{"name":"CumulativeAccuracy"},
{"name":"Histogram","config":["ClassDistribution"]}
]
},
"recid": {
"path": "datasets/recid/",
"description": "We consider a standard test case for fair learning: the Broward County recidivism dataset, popularized by ProPublica. The data set contains 6,172 instances and 14 numeric features (created by one-hot encoding the categorical features in the initial seven feature data set). 20% are held for testing.",
"related_use_case":"https://uwgraphics.github.io/BoxerDocs/docs/use_cases/fairness-assessment/",
"views_recommendation":[
{"name":"Histogram","config":["race"]},
{"name":"Performance_Selection"},
{"name":"ConfusionMatrixGrid"}
]
},
"date-12000-strat": {
"path": "datasets/date-12000-strat/",
"description": "The dataset for this use case is the TCP collection of historical documents. This use case took a random sample of 12,000 documents, and held out 30% using stratified sampling. While the testing set is balanced (1,800 per class), the training set is highly skewed (only 15% before 1642)",
"related_use_case":"https://uwgraphics.github.io/BoxerDocs/docs/use_cases/bias-discovery/",
"views_recommendation":[
{"name":"Performance_Overall"},
{"name":"MetricsParallel"},
{"name":"ConfusionMatrixGrid"},
{"name":"Histogram","config":["date-bucket"]}
]
},
"fuzz-mod-5-02": {
"path": "datasets/fuzz-mod-5-02/",
"description": "The data set is a collection of 554 plays written in the Early Modern Period (1470-1660). Five linguistic features are used. We classify plays with one of four genres (Comedy, History, Tragedy and Tragicomedy).",
"related_use_case":"https://uwgraphics.github.io/BoxerDocs/docs/use_cases/feature-sensitivity/",
"views_recommendation":[
{"name":"Performance_Overall"},
{"name":"MetricsTable"},
{"name":"Performance_Selection"}
]
},
"tcp-tree-select-9-10": {
"path": "datasets/tcp-tree-select-9-10/",
"description": "This use case considers a corpus of 59,989 documents from a historical literary collection and the data counts the 500 most common English words in each document.",
"related_use_case":"https://uwgraphics.github.io/BoxerDocs/docs/use_cases/model-selection-discovery/",
"views_recommendation":[
{"name":"MetricsParallel"},
{"name":"Performance_Overall"},
{"name":"CumulativeAccuracy"},
{"name":"Histogram","config":["ClassDistribution"]}
]
},
"(continuous) wine quality":{
"path": "datasets/wine_quality_one_classifier/",
"description": "This use case illustrates how detailed comparisons can improve threshold selection. The scenario uses wine quality benchmark from which requires classifying the quality of a wine from its properties.",
"related_use_case":"https://uwgraphics.github.io/BoxerDocs/docs/use_cases/continuous-tuning/",
"views_recommendation":[
{"name":"Performance_Overall"},
{"name":"Histogram","config":["ClassDistribution"]},
{"name":"MetricsTable"},
{"name":"Trinary_Instance_Distribuion","config":{
"selection":"overall",
"distribution":"score"}},
{"name":"Thresholds","config":{
"rf":50
}},
{"name":"Bandwidths","config":{
"rf":"0.45-0.55"
}}
]
},
"(continuous) heart disease":{
"path": "datasets/heart_disease/",
"description": "The Heart Disease prediction dataset is a standard data set used in machine learning education. Classifiers are trained to predict if a patient is likely to develop a disease (binary decision). The scenario is an example where the cost of a false negative error (not warning a patient of a potential problem) is more costly than a false positive (which may cause extra caution, or fear). It is also a scenario where a 'no-prediction' option is viable, recommending that patient is tested further has a lower cost than an error.",
"related_use_case":"https://uwgraphics.github.io/BoxerDocs/docs/use_cases/continuous-heart-example/",
"views_recommendation":[
{"name":"Reliability_Curve","config":{
"classifiers":["lr","rf"],
"selection":"overall"}},
{"name":"Trinary_Performance_Confidence","config":{
"selection":"overall",
"distribution":"trinary"}},
{"name":"UncertaintyHeatMap","config":{
"classifiers":["rf"],
"correlationMode":"precision"}},
{"name":"Trinary_Bandwidth_Assessment","config":{
"classifiers":["knn"],
"evaluation":"accuracy"}},
{"name":"Trinary_Instance_Distribuion","config":{
"selection":"overall",
"distribution":"score"}},
{"name":"Histogram", "config":["sex"]},
{"name":"Thresholds","config":{
"lr":50,
"knn":50,
"rf":50
}},
{"name":"Bandwidths","config":{
"lr":"0.2-0.55",
"knn":"0.45-0.55",
"rf":"0.45-0.55"
}}
]
},
"(continuous) income":{
"path":"datasets/income_original",
"description": "This use case demonstrates how calibration benefits from detailed assessment. The scenario uses the income classification benchmark dataset from that has been downsampled. Classifiers determine whether an individual’s income is above a certain level.",
"related_use_case":"https://uwgraphics.github.io/BoxerDocs/docs/use_cases/continous-model-selection/",
"views_recommendation":[
{"name":"Performance_Overall"},
{"name":"Reliability_Curve","config":{
"classifiers":["lr","nb"],
"selection":"overall"}},
{"name":"Trinary_Performance_Confidence", "config":{
"selection":"overall",
"distribution":"trinary"}},
{"name":"Trinary_Instance_Distribuion","config":{
"selection":"overall",
"distribution":"score"}},
{"name":"Thresholds","config":{
"lr":50,
"nb":50,
"mlp":50
}},
{"name":"Bandwidths","config":{
"lr":"0.40-0.60",
"nb":"0.40-0.60",
"mlp":"0.40-0.60"
}}
]
},
"(continuous) income2":{
"path":"datasets/income2",
"description": "This is the income use case adapted to make a smaller figure in the paper.",
"related_use_case":"https://uwgraphics.github.io/BoxerDocs/docs/use_cases/continous-model-selection/",
"views_recommendation":[
{"name":"Performance_Overall"},
{"name":"Reliability_Curve","config":{
"classifiers":["lr","nb"],
"selection":"overall"}},
{"name":"Trinary_Performance_Confidence", "config":{
"selection":"overall",
"distribution":"trinary"}},
{"name":"Trinary_Instance_Distribuion","config":{
"selection":"overall",
"distribution":"score"}},
{"name":"Thresholds","config":{
"lr":50,
"nb":50
}},
{"name":"Bandwidths","config":{
"lr":"0.40-0.60",
"nb":"0.40-0.60"
}}
]
},
"(continuous) cifar-sampled-scaling":{
"path": "datasets/coarse-flowers-1in10-vgg16-scaling-0.0510",
"description": "This use case shows how out approach can help with model selection and threshold tuning. A classifier was built for the CIFAR 100 computer vision benchmark using Tensorflow. The data set has 100 classes, and the trained classifier produces a distribution over these classes as its decision. Our goal is to create a binary classifier for a “meta-class” which combines 5 of the main classes. In particular, we want to classify flowers, which can be any one of 5 of the original classes. Because the test set contains all 100 classes, it is quite imbal- anced: flowers are only 5% of the total instances",
"related_use_case":"https://uwgraphics.github.io/BoxerDocs/docs/use_cases/continous-detail-examination/",
"views_recommendation":[
{"name":"Reliability_Curve","config":{
"classifiers":["largest","avgtop5", "sum"],
"selection":"overall"}},
{"name":"Trinary_Bandwidth_Assessment", "config":["largest"]},
{"name":"Rejected_Curve", "config":{
"classifiers":["largest","avgtop5", "sum"],
"selection":"overall",
"evaluation":"c_accuracy"}},
{"name":"Trinary_Performance_Confidence","config":{
"selection":"overall",
"distribution":"trinary"}},
{"name":"Trinary_Instance_Distribuion","config":{
"selection":"overall",
"distribution":"score"}},
{"name":"FocusItem"},
{"name":"Thresholds","config":{
"largest":30,
"avgtop5":10,
"sum":50
}},
{"name":"Bandwidths","config":{
"largest":"0.12-0.48",
"avgtop5":"0.06-0.16",
"sum":"0.46-0.54"
}}
]
},
"(continuous) cdate-2500": {
"path": "datasets/cdate-2500",
"description": "This use case considers a corpus of 59,989 documents from a historical literary collection: Text Creation Partnership (TCP) transcriptions of the Early English Books Online (EEBO). The data counts the 500 most common English words in each document. For the experiment, we took a random sample of 2500 documents, and held out 30% using stratified sampling. ",
"related_use_case":"https://uwgraphics.github.io/BoxerDocs/docs/use_cases/continuous-exmanination/",
"views_recommendation":[
{"name":"Histogram","config":["date-bucket"]},
{"name":"Trinary_Performance_Confidence","config":{
"selection":"overall",
"distribution":"trinary"}},
{"name":"Thresholds","config":{
"rf":70,
"rf-bal":70,
"lr":70,
"lr-bal":70
}},
{"name":"Bandwidths","config":{
"rf":"0.65-0.75",
"rf-bal":"0.65-0.75",
"lr":"0.65-0.75",
"lr-bal":"0.65-0.75"
}}
]
}
}
}