wikimedia · halfak · Dec 17, 2018 · Dec 16, 2018
diff --git a/Makefile b/Makefile
@@ -817,55 +817,101 @@ datasets/dewiki.autolabeled_revisions.20k_2015.review.json: \
 		datasets/dewiki.autolabeled_revisions.20k_2015.json
 	cat $< | grep -E '"needs_review": (true|"True")' > $@
 
-datasets/dewiki.autolabeled_revisions.w_cache.20k_2015.json: \
-		datasets/dewiki.autolabeled_revisions.20k_2015.review.json \
+
+datasets/dewiki.human_labeled_revisions.5k_2015.json:
+	./utility fetch_labels \
+		https://labels.wmflabs.org/campaigns/dewiki/16/ > $@
+
+datasets/dewiki.labeled_revisions.20k_2015.json: \
+		datasets/dewiki.human_labeled_revisions.5k_2015.json \
 		datasets/dewiki.autolabeled_revisions.20k_2015.no_review.json
-	cat $^ | \
+	./utility merge_labels $^ > $@
+
+datasets/dewiki.labeled_revisions.w_cache.20k_2015.json: \
+		datasets/dewiki.labeled_revisions.20k_2015.json
+	cat $< | \
 	revscoring extract \
-		editquality.feature_lists.dewiki.reverted \
+		editquality.feature_lists.dewiki.damaging \
+		editquality.feature_lists.dewiki.goodfaith \
 		--host https://de.wikipedia.org \
 		--extractor $(max_extractors) \
 		--verbose > $@
 
-tuning_reports/dewiki.reverted.md: \
-		datasets/dewiki.autolabeled_revisions.w_cache.20k_2015.json
+tuning_reports/dewiki.damaging.md: \
+		datasets/dewiki.labeled_revisions.w_cache.20k_2015.json
 	cat $< | \
 	revscoring tune \
 		config/classifiers.params.yaml \
-		editquality.feature_lists.dewiki.reverted \
-		reverted_for_damage \
+		editquality.feature_lists.dewiki.damaging \
+		damaging \
 		roc_auc.labels.true \
-		--label-weight "true=$(reverted_weight)" \
-		--pop-rate "true=0.049775581219426095" \
-		--pop-rate "false=0.950224418780574" \
+		--label-weight "true=$(damaging_weight)" \
+		--pop-rate "true=0.029975955116216937" \
+		--pop-rate "false=0.970024044883783" \
 		--center --scale \
 		--cv-timeout 60 \
 		--debug > $@
 
-models/dewiki.reverted.gradient_boosting.model: \
-		datasets/dewiki.autolabeled_revisions.w_cache.20k_2015.json
+models/dewiki.damaging.gradient_boosting.model: \
+		datasets/dewiki.labeled_revisions.w_cache.20k_2015.json
 	cat $^ | \
 	revscoring cv_train \
 		revscoring.scoring.models.GradientBoosting \
-		editquality.feature_lists.dewiki.reverted \
-		reverted_for_damage \
-		--version=$(reverted_major_minor).0 \
+		editquality.feature_lists.dewiki.damaging \
+		damaging \
+		--version=$(damaging_major_minor).0 \
 		-p 'learning_rate=0.1' \
 		-p 'max_depth=3' \
 		-p 'max_features="log2"' \
-		-p 'n_estimators=300' \
-		--label-weight "true=$(reverted_weight)" \
-		--pop-rate "true=0.049775581219426095" \
-		--pop-rate "false=0.950224418780574" \
+		-p 'n_estimators=100' \
+		--label-weight "true=$(damaging_weight)" \
+		--pop-rate "true=0.029975955116216937" \
+		--pop-rate "false=0.970024044883783" \
+		--center --scale > $@
+
+	revscoring model_info $@ > model_info/dewiki.damaging.md
+
+tuning_reports/dewiki.goodfaith.md: \
+		datasets/dewiki.labeled_revisions.w_cache.20k_2015.json
+	cat $< | \
+	revscoring tune \
+		config/classifiers.params.yaml \
+		editquality.feature_lists.dewiki.goodfaith \
+		goodfaith \
+		roc_auc.labels.true \
+		--label-weight "false=$(goodfaith_weight)" \
+		--pop-rate "true=0.9806572268234037" \
+		--pop-rate "false=0.019342773176596273" \
+		--center --scale \
+		--cv-timeout 60 \
+		--debug > $@
+
+models/dewiki.goodfaith.gradient_boosting.model: \
+		datasets/dewiki.labeled_revisions.w_cache.20k_2015.json
+	cat $^ | \
+	revscoring cv_train \
+		revscoring.scoring.models.GradientBoosting \
+		editquality.feature_lists.dewiki.goodfaith \
+		goodfaith \
+		--version=$(goodfaith_major_minor).0 \
+		-p 'learning_rate=0.5' \
+		-p 'max_depth=5' \
+		-p 'max_features="log2"' \
+		-p 'n_estimators=500' \
+		--label-weight "false=$(goodfaith_weight)" \
+		--pop-rate "true=0.9806572268234037" \
+		--pop-rate "false=0.019342773176596273" \
 		--center --scale > $@
 
-	revscoring model_info $@ > model_info/dewiki.reverted.md
+	revscoring model_info $@ > model_info/dewiki.goodfaith.md
 
 dewiki_models: \
-	models/dewiki.reverted.gradient_boosting.model
+	models/dewiki.damaging.gradient_boosting.model \
+	models/dewiki.goodfaith.gradient_boosting.model
 
 dewiki_tuning_reports: \
-	tuning_reports/dewiki.reverted.md
+	tuning_reports/dewiki.damaging.md \
+	tuning_reports/dewiki.goodfaith.md
 
 ############################# Greek Wikipedia ################################
 

diff --git a/config/wikis/dewiki.yaml b/config/wikis/dewiki.yaml
@@ -5,16 +5,32 @@ host: de.wikipedia.org
 samples:
     20k_2015:
         quarry_url: "http://quarry.wmflabs.org/run/42223/output/0/json-lines?download=true"
+    5k_2015:
+        labeling_campaign: "https://labels.wmflabs.org/campaigns/dewiki/16/"
 
 default_sample: 20k_2015
+review_sample: 5k_2015
+
+merged_samples:
+  20k_2015:
+    autolabeled_revisions: "20k_2015"
+    human_labeled_revisions: "5k_2015"
 
 models:
-    reverted:
+    damaging:
         tuning_params:
-            n_estimators: 300
             learning_rate: 0.1
             max_depth: 3
-        pop_rate_true: 0.049775581219426095
+            n_estimators: 100
+            max_features: "log2"
+        pop_rate_true: 0.029975955116216937
+    goodfaith:
+        tuning_params:
+            learning_rate: 0.5
+            n_estimators: 500
+            max_features: "log2"
+            max_depth: 5
+        pop_rate_true: 0.9806572268234037
 
 trusted_groups:
     - sysop

diff --git a/model_info/dewiki.damaging.md b/model_info/dewiki.damaging.md
@@ -0,0 +1,81 @@
+Model Information:
+	 - type: GradientBoosting
+	 - version: 0.4.0
+	 - params: {'random_state': None, 'min_weight_fraction_leaf': 0.0, 'init': None, 'min_samples_split': 2, 'min_impurity_split': None, 'max_features': 'log2', 'population_rates': None, 'min_samples_leaf': 1, 'presort': 'auto', 'learning_rate': 0.1, 'min_impurity_decrease': 0.0, 'scale': True, 'n_estimators': 100, 'max_leaf_nodes': None, 'verbose': 0, 'criterion': 'friedman_mse', 'label_weights': OrderedDict([(True, 10)]), 'loss': 'deviance', 'warm_start': False, 'subsample': 1.0, 'multilabel': False, 'center': True, 'max_depth': 3, 'labels': [True, False]}
+	Environment:
+	 - revscoring_version: '2.2.4'
+	 - platform: 'Linux-4.9.0-8-amd64-x86_64-with-debian-9.6'
+	 - machine: 'x86_64'
+	 - version: '#1 SMP Debian 4.9.110-3+deb9u4 (2018-08-21)'
+	 - system: 'Linux'
+	 - processor: ''
+	 - python_build: ('default', 'Jan 19 2017 14:11:04')
+	 - python_compiler: 'GCC 6.3.0 20170118'
+	 - python_branch: ''
+	 - python_implementation: 'CPython'
+	 - python_revision: ''
+	 - python_version: '3.5.3'
+	 - release: '4.9.0-8-amd64'
+
+	Statistics:
+	counts (n=18692):
+		label        n         ~True    ~False
+		-------  -----  ---  -------  --------
+		True       561  -->      415       146
+		False    18131  -->     1206     16925
+	rates:
+		              True    False
+		----------  ------  -------
+		sample        0.03     0.97
+		population    0.03     0.97
+	match_rate (micro=0.889, macro=0.5):
+		  False    True
+		-------  ------
+		  0.913   0.087
+	filter_rate (micro=0.111, macro=0.5):
+		  False    True
+		-------  ------
+		  0.087   0.913
+	recall (micro=0.928, macro=0.837):
+		  False    True
+		-------  ------
+		  0.933    0.74
+	!recall (micro=0.746, macro=0.837):
+		  False    True
+		-------  ------
+		   0.74   0.933
+	precision (micro=0.969, macro=0.624):
+		  False    True
+		-------  ------
+		  0.991   0.256
+	!precision (micro=0.278, macro=0.624):
+		  False    True
+		-------  ------
+		  0.256   0.991
+	f1 (micro=0.944, macro=0.671):
+		  False    True
+		-------  ------
+		  0.962    0.38
+	!f1 (micro=0.398, macro=0.671):
+		  False    True
+		-------  ------
+		   0.38   0.962
+	accuracy (micro=0.928, macro=0.928):
+		  False    True
+		-------  ------
+		  0.928   0.928
+	fpr (micro=0.254, macro=0.163):
+		  False    True
+		-------  ------
+		   0.26   0.067
+	roc_auc (micro=0.937, macro=0.936):
+		  False    True
+		-------  ------
+		  0.937   0.934
+	pr_auc (micro=0.983, macro=0.752):
+		  False    True
+		-------  ------
+		  0.998   0.507
+
+	 - score_schema: {'type': 'object', 'properties': {'prediction': {'type': 'bool', 'description': 'The most likely label predicted by the estimator'}, 'probability': {'type': 'object', 'description': 'A mapping of probabilities onto each of the potential output labels', 'properties': {'false': 'number', 'true': 'number'}}}, 'title': 'Scikit learn-based classifier score with probability'}
+
diff --git a/model_info/dewiki.goodfaith.md b/model_info/dewiki.goodfaith.md
@@ -0,0 +1,81 @@
+Model Information:
+	 - type: GradientBoosting
+	 - version: 0.4.0
+	 - params: {'loss': 'deviance', 'center': True, 'min_weight_fraction_leaf': 0.0, 'min_samples_leaf': 1, 'min_impurity_split': None, 'min_impurity_decrease': 0.0, 'subsample': 1.0, 'max_features': 'log2', 'population_rates': None, 'warm_start': False, 'learning_rate': 0.5, 'labels': [True, False], 'presort': 'auto', 'label_weights': OrderedDict([(False, 10)]), 'max_depth': 5, 'scale': True, 'init': None, 'multilabel': False, 'criterion': 'friedman_mse', 'min_samples_split': 2, 'verbose': 0, 'random_state': None, 'max_leaf_nodes': None, 'n_estimators': 500}
+	Environment:
+	 - revscoring_version: '2.2.4'
+	 - platform: 'Linux-4.9.0-8-amd64-x86_64-with-debian-9.6'
+	 - machine: 'x86_64'
+	 - version: '#1 SMP Debian 4.9.110-3+deb9u4 (2018-08-21)'
+	 - system: 'Linux'
+	 - processor: ''
+	 - python_build: ('default', 'Jan 19 2017 14:11:04')
+	 - python_compiler: 'GCC 6.3.0 20170118'
+	 - python_branch: ''
+	 - python_implementation: 'CPython'
+	 - python_revision: ''
+	 - python_version: '3.5.3'
+	 - release: '4.9.0-8-amd64'
+
+	Statistics:
+	counts (n=18692):
+		label        n         ~True    ~False
+		-------  -----  ---  -------  --------
+		True     18330  -->    18280        50
+		False      362  -->      219       143
+	rates:
+		              True    False
+		----------  ------  -------
+		sample       0.981    0.019
+		population   0.981    0.019
+	match_rate (micro=0.971, macro=0.5):
+		  False    True
+		-------  ------
+		   0.01    0.99
+	filter_rate (micro=0.029, macro=0.5):
+		  False    True
+		-------  ------
+		   0.99    0.01
+	recall (micro=0.986, macro=0.696):
+		  False    True
+		-------  ------
+		  0.395   0.997
+	!recall (micro=0.407, macro=0.696):
+		  False    True
+		-------  ------
+		  0.997   0.395
+	precision (micro=0.983, macro=0.864):
+		  False    True
+		-------  ------
+		  0.741   0.988
+	!precision (micro=0.745, macro=0.864):
+		  False    True
+		-------  ------
+		  0.988   0.741
+	f1 (micro=0.983, macro=0.754):
+		  False    True
+		-------  ------
+		  0.515   0.993
+	!f1 (micro=0.524, macro=0.754):
+		  False    True
+		-------  ------
+		  0.993   0.515
+	accuracy (micro=0.986, macro=0.986):
+		  False    True
+		-------  ------
+		  0.986   0.986
+	fpr (micro=0.593, macro=0.304):
+		  False    True
+		-------  ------
+		  0.003   0.605
+	roc_auc (micro=0.977, macro=0.91):
+		  False    True
+		-------  ------
+		   0.84    0.98
+	pr_auc (micro=0.985, macro=0.768):
+		  False    True
+		-------  ------
+		  0.542   0.994
+
+	 - score_schema: {'properties': {'probability': {'properties': {'false': 'number', 'true': 'number'}, 'description': 'A mapping of probabilities onto each of the potential output labels', 'type': 'object'}, 'prediction': {'description': 'The most likely label predicted by the estimator', 'type': 'bool'}}, 'title': 'Scikit learn-based classifier score with probability', 'type': 'object'}
+
diff --git a/models/dewiki.damaging.gradient_boosting.model b/models/dewiki.damaging.gradient_boosting.model
diff --git a/models/dewiki.goodfaith.gradient_boosting.model b/models/dewiki.goodfaith.gradient_boosting.model
diff --git a/models/dewiki.reverted.gradient_boosting.model b/models/dewiki.reverted.gradient_boosting.model