Merge pull request #40 from wikimedia/revscoring-2.8.2

Rebuilds models with revscoring-2.8.2
wikimedia · Jun 9, 2020 · cc0593a · cc0593a
2 parents 10cb7d5 + e29321c
commit cc0593a
Show file tree

Hide file tree

Showing 10 changed files with 96 additions and 87 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -10,14 +10,13 @@ addons:
 # command to install dependencies
 install:
   - pip install -r requirements.txt
+  - pip install -r test-requirements.txt
   - pip install -r docs/requirements.txt
   - python setup.py install
-  - pip install twine
-  - pip install flake8
   - python -m nltk.downloader stopwords
 
 script:
-  - flake8 . --ignore=E722
+  - flake8 .
   - sphinx-build -anW -b html docs dist/docs
   - sphinx-build -b linkcheck docs dist/docs
 

diff --git a/Makefile b/Makefile
@@ -34,7 +34,7 @@ datasets/enwiki.draft_quality.balanced_200k.json.bz2: \
 datasets/enwiki.draft_quality.balanced_200k.with_text.json.bz2: \
 		datasets/enwiki.draft_quality.balanced_200k.json.bz2
 	bzcat $< | \
-	revscoring fetch_text --host https://en.wikipedia.org --threads 4 \
+	revscoring fetch_text --host https://en.wikipedia.org --threads 4 --login \
 	  --verbose | bzip2 -c > $@
 
 datasets/enwiki.draft_quality.balanced_200k.with_cache.json.bz2: \
@@ -276,7 +276,7 @@ datasets/ptwiki.draft_quality.balanced_3k.json.bz2: \
 datasets/ptwiki.draft_quality.balanced_3k.with_text.json.bz2: \
 		datasets/ptwiki.draft_quality.balanced_3k.json.bz2
 	bzcat $< | \
-	revscoring fetch_text --host https://pt.wikipedia.org --threads 4 \
+	revscoring fetch_text --host https://pt.wikipedia.org --threads 4 --login \
 	 --verbose | bzip2 -c > $@
 
 datasets/ptwiki.draft_quality.balanced_3k.with_cache.json.bz2: \

diff --git a/examples/scoring.py b/examples/scoring.py
@@ -6,7 +6,8 @@
 model = Model.load(
     bz2.open("models/ptwiki.draft_quality.gradient_boosting.model.bz2", "rb"))
 
-extractor = api.Extractor(mwapi.Session(host="https://pt.wikipedia.org",
-    user_agent="draftquality test"))
+extractor = api.Extractor(
+    mwapi.Session(host="https://pt.wikipedia.org",
+                  user_agent="draftquality test"))
 values = extractor.extract(58071111, model.features)
 print(model.score(values))
diff --git a/model_info/enwiki.draft_quality.md b/model_info/enwiki.draft_quality.md
@@ -1,10 +1,10 @@
 Model Information:
 	 - type: GradientBoosting
 	 - version: 0.2.1
-	 - params: {'min_samples_leaf': 1, 'loss': 'deviance', 'max_features': 'log2', 'min_weight_fraction_leaf': 0.0, 'min_impurity_decrease': 0.0, 'presort': 'auto', 'warm_start': False, 'population_rates': None, 'min_impurity_split': None, 'max_depth': 5, 'verbose': 0, 'labels': ['OK', 'spam', 'vandalism', 'attack'], 'tol': 0.0001, 'learning_rate': 0.1, 'validation_fraction': 0.1, 'init': None, 'multilabel': False, 'label_weights': None, 'subsample': 1.0, 'max_leaf_nodes': None, 'random_state': None, 'n_iter_no_change': None, 'center': False, 'min_samples_split': 2, 'criterion': 'friedman_mse', 'scale': False, 'n_estimators': 300}
+	 - params: {'max_depth': 5, 'criterion': 'friedman_mse', 'presort': 'deprecated', 'loss': 'deviance', 'max_features': 'log2', 'learning_rate': 0.1, 'init': None, 'min_weight_fraction_leaf': 0.0, 'warm_start': False, 'population_rates': None, 'scale': False, 'min_impurity_decrease': 0.0, 'center': False, 'n_iter_no_change': None, 'labels': ['OK', 'spam', 'vandalism', 'attack'], 'min_impurity_split': None, 'n_estimators': 300, 'random_state': None, 'ccp_alpha': 0.0, 'min_samples_leaf': 1, 'verbose': 0, 'tol': 0.0001, 'max_leaf_nodes': None, 'validation_fraction': 0.1, 'label_weights': None, 'subsample': 1.0, 'min_samples_split': 2, 'multilabel': False}
 	Environment:
-	 - revscoring_version: '2.6.2'
-	 - platform: 'Linux-4.9.0-11-amd64-x86_64-with-debian-9.11'
+	 - revscoring_version: '2.8.2'
+	 - platform: 'Linux-4.9.0-11-amd64-x86_64-with-debian-9.12'
 	 - machine: 'x86_64'
 	 - version: '#1 SMP Debian 4.9.189-3+deb9u1 (2019-09-20)'
 	 - system: 'Linux'
@@ -21,63 +21,63 @@ Model Information:
 	counts (n=201261):
 		label             n          ~OK    ~spam    ~vandalism    ~attack
 		-----------  ------  ---  ------  -------  ------------  ---------
-		'OK'         175000  -->  171487     2634           816         63
-		'spam'        17699  -->    2770    14065           820         44
-		'vandalism'    6503  -->    1627     1341          3188        347
-		'attack'       2059  -->     263      357          1084        355
+		'OK'         175000  -->  171398     2658           856         88
+		'spam'        17699  -->    2747    14038           840         74
+		'vandalism'    6503  -->    1602     1366          3109        426
+		'attack'       2059  -->     261      343          1041        414
 	rates:
 		              'OK'    'spam'    'vandalism'    'attack'
 		----------  ------  --------  -------------  ----------
 		sample       0.87      0.088          0.032       0.01
 		population   0.971     0.02           0.007       0.002
-	match_rate (micro=0.93, macro=0.254):
+	match_rate (micro=0.929, macro=0.254):
 		   OK    spam    vandalism    attack
 		-----  ------  -----------  --------
-		0.957   0.039        0.017     0.003
-	filter_rate (micro=0.07, macro=0.746):
+		0.956   0.039        0.017     0.003
+	filter_rate (micro=0.071, macro=0.746):
 		   OK    spam    vandalism    attack
 		-----  ------  -----------  --------
-		0.043   0.961        0.983     0.997
-	recall (micro=0.971, macro=0.609):
-		  OK    spam    vandalism    attack
-		----  ------  -----------  --------
-		0.98   0.795         0.49     0.172
-	!recall (micro=0.827, macro=0.946):
+		0.044   0.961        0.983     0.997
+	recall (micro=0.97, macro=0.613):
 		   OK    spam    vandalism    attack
 		-----  ------  -----------  --------
-		0.823   0.976        0.986     0.998
-	precision (micro=0.975, macro=0.436):
+		0.979   0.793        0.478     0.201
+	!recall (micro=0.829, macro=0.946):
 		   OK    spam    vandalism    attack
 		-----  ------  -----------  --------
-		0.995   0.401        0.202     0.147
-	!precision (micro=0.563, macro=0.885):
-		  OK    spam    vandalism    attack
-		----  ------  -----------  --------
-		0.55   0.996        0.996     0.998
-	f1 (micro=0.971, macro=0.491):
+		0.824   0.976        0.986     0.997
+	precision (micro=0.975, macro=0.431):
 		   OK    spam    vandalism    attack
 		-----  ------  -----------  --------
-		0.987   0.533        0.286     0.159
-	!f1 (micro=0.669, macro=0.909):
+		0.995   0.399        0.197     0.134
+	!precision (micro=0.557, macro=0.884):
 		   OK    spam    vandalism    attack
 		-----  ------  -----------  --------
-		0.659   0.986        0.991     0.998
-	accuracy (micro=0.975, macro=0.982):
+		0.544   0.996        0.996     0.998
+	f1 (micro=0.971, macro=0.489):
 		   OK    spam    vandalism    attack
 		-----  ------  -----------  --------
-		0.975   0.973        0.982     0.996
-	fpr (micro=0.173, macro=0.054):
+		0.987   0.531        0.279     0.161
+	!f1 (micro=0.665, macro=0.908):
 		   OK    spam    vandalism    attack
 		-----  ------  -----------  --------
-		0.177   0.024        0.014     0.002
-	roc_auc (micro=0.979, macro=0.971):
+		0.656   0.986        0.991     0.998
+	accuracy (micro=0.975, macro=0.981):
 		   OK    spam    vandalism    attack
 		-----  ------  -----------  --------
-		0.979   0.979        0.956     0.968
-	pr_auc (micro=0.984, macro=0.484):
+		0.975   0.973        0.982     0.995
+	fpr (micro=0.171, macro=0.054):
 		   OK    spam    vandalism    attack
 		-----  ------  -----------  --------
-		0.999   0.616        0.217     0.101
+		0.176   0.024        0.014     0.003
+	roc_auc (micro=0.979, macro=0.97):
+		   OK    spam    vandalism    attack
+		-----  ------  -----------  --------
+		0.979   0.979        0.955     0.968
+	pr_auc (micro=0.979, macro=0.477):
+		   OK    spam    vandalism    attack
+		-----  ------  -----------  --------
+		0.994   0.616        0.209     0.091
 
-	 - score_schema: {'title': 'Scikit learn-based classifier score with probability', 'properties': {'probability': {'properties': {'vandalism': {'type': 'number'}, 'spam': {'type': 'number'}, 'attack': {'type': 'number'}, 'OK': {'type': 'number'}}, 'type': 'object', 'description': 'A mapping of probabilities onto each of the potential output labels'}, 'prediction': {'type': 'string', 'description': 'The most likely label predicted by the estimator'}}, 'type': 'object'}
+	 - score_schema: {'title': 'Scikit learn-based classifier score with probability', 'properties': {'probability': {'description': 'A mapping of probabilities onto each of the potential output labels', 'properties': {'attack': {'type': 'number'}, 'spam': {'type': 'number'}, 'OK': {'type': 'number'}, 'vandalism': {'type': 'number'}}, 'type': 'object'}, 'prediction': {'description': 'The most likely label predicted by the estimator', 'type': 'string'}}, 'type': 'object'}
 
diff --git a/model_info/ptwiki.draft_quality.md b/model_info/ptwiki.draft_quality.md
@@ -1,12 +1,12 @@
 Model Information:
 	 - type: GradientBoosting
 	 - version: 0.2.1
-	 - params: {'n_iter_no_change': None, 'loss': 'deviance', 'criterion': 'friedman_mse', 'labels': ['OK', 'spam', 'unsuitable'], 'scale': False, 'multilabel': False, 'min_weight_fraction_leaf': 0.0, 'warm_start': False, 'min_samples_split': 2, 'init': None, 'subsample': 1.0, 'n_estimators': 500, 'max_features': 'log2', 'learning_rate': 0.1, 'validation_fraction': 0.1, 'max_leaf_nodes': None, 'center': False, 'random_state': None, 'presort': 'auto', 'min_impurity_decrease': 0.0, 'label_weights': None, 'tol': 0.0001, 'verbose': 0, 'max_depth': 7, 'population_rates': None, 'min_impurity_split': None, 'min_samples_leaf': 1}
+	 - params: {'ccp_alpha': 0.0, 'init': None, 'learning_rate': 0.1, 'warm_start': False, 'min_samples_leaf': 1, 'label_weights': None, 'loss': 'deviance', 'subsample': 1.0, 'min_impurity_decrease': 0.0, 'n_estimators': 500, 'max_leaf_nodes': None, 'multilabel': False, 'random_state': None, 'min_samples_split': 2, 'criterion': 'friedman_mse', 'n_iter_no_change': None, 'center': False, 'validation_fraction': 0.1, 'max_depth': 7, 'verbose': 0, 'tol': 0.0001, 'population_rates': None, 'presort': 'deprecated', 'labels': ['OK', 'spam', 'unsuitable'], 'scale': False, 'min_impurity_split': None, 'max_features': 'log2', 'min_weight_fraction_leaf': 0.0}
 	Environment:
-	 - revscoring_version: '2.6.9'
-	 - platform: 'Linux-4.9.0-8-amd64-x86_64-with-debian-9.4'
+	 - revscoring_version: '2.8.2'
+	 - platform: 'Linux-4.9.0-11-amd64-x86_64-with-debian-9.12'
 	 - machine: 'x86_64'
-	 - version: '#1 SMP Debian 4.9.144-3.1 (2019-02-19)'
+	 - version: '#1 SMP Debian 4.9.189-3+deb9u1 (2019-09-20)'
 	 - system: 'Linux'
 	 - processor: ''
 	 - python_build: ('default', 'Sep 27 2018 17:25:39')
@@ -15,68 +15,68 @@ Model Information:
 	 - python_implementation: 'CPython'
 	 - python_revision: ''
 	 - python_version: '3.5.3'
-	 - release: '4.9.0-8-amd64'
+	 - release: '4.9.0-11-amd64'
 
 	Statistics:
 	counts (n=4672):
 		label            n         ~OK    ~spam    ~unsuitable
 		------------  ----  ---  -----  -------  -------------
-		'OK'          1600  -->   1265      135            200
-		'spam'        1481  -->     86     1105            290
-		'unsuitable'  1591  -->    172      304           1115
+		'OK'          1600  -->   1286      129            185
+		'spam'        1481  -->     76     1124            281
+		'unsuitable'  1591  -->    169      292           1130
 	rates:
 		              'OK'    'spam'    'unsuitable'
 		----------  ------  --------  --------------
 		sample       0.342     0.317           0.341
 		population   0.962     0.018           0.02
-	match_rate (micro=0.74, macro=0.361):
+	match_rate (micro=0.752, macro=0.361):
 		   OK    spam    unsuitable
 		-----  ------  ------------
-		0.764   0.149          0.17
-	filter_rate (micro=0.26, macro=0.639):
+		0.776   0.143         0.162
+	filter_rate (micro=0.248, macro=0.639):
 		   OK    spam    unsuitable
 		-----  ------  ------------
-		0.236   0.851          0.83
-	recall (micro=0.788, macro=0.746):
+		0.224   0.857         0.838
+	recall (micro=0.801, macro=0.758):
 		   OK    spam    unsuitable
 		-----  ------  ------------
-		0.791   0.746         0.701
-	!recall (micro=0.914, macro=0.873):
+		0.804   0.759          0.71
+	!recall (micro=0.918, macro=0.879):
+		  OK    spam    unsuitable
+		----  ------  ------------
+		0.92   0.868         0.849
+	precision (micro=0.961, macro=0.393):
 		   OK    spam    unsuitable
 		-----  ------  ------------
-		0.916   0.862         0.841
-	precision (micro=0.961, macro=0.39):
+		0.996   0.098         0.087
+	!precision (micro=0.189, macro=0.715):
 		   OK    spam    unsuitable
 		-----  ------  ------------
-		0.996   0.092         0.082
-	!precision (micro=0.181, macro=0.712):
+		0.157   0.995         0.993
+	f1 (micro=0.862, macro=0.406):
+		  OK    spam    unsuitable
+		----  ------  ------------
+		0.89   0.173         0.155
+	!f1 (micro=0.293, macro=0.704):
 		   OK    spam    unsuitable
 		-----  ------  ------------
-		0.148   0.994         0.993
-	f1 (micro=0.854, macro=0.397):
+		0.269   0.927         0.915
+	accuracy (micro=0.81, macro=0.84):
 		   OK    spam    unsuitable
 		-----  ------  ------------
-		0.881   0.165         0.146
-	!f1 (micro=0.28, macro=0.697):
+		0.808   0.866         0.846
+	fpr (micro=0.082, macro=0.121):
+		  OK    spam    unsuitable
+		----  ------  ------------
+		0.08   0.132         0.151
+	roc_auc (micro=0.92, macro=0.897):
 		   OK    spam    unsuitable
 		-----  ------  ------------
-		0.255   0.924         0.911
-	accuracy (micro=0.797, macro=0.831):
+		0.921   0.901         0.867
+	pr_auc (micro=0.965, macro=0.461):
 		   OK    spam    unsuitable
 		-----  ------  ------------
-		0.795    0.86         0.838
-	fpr (micro=0.086, macro=0.127):
-		   OK    spam    unsuitable
-		-----  ------  ------------
-		0.084   0.138         0.159
-	roc_auc (micro=0.903, macro=0.887):
-		   OK    spam    unsuitable
-		-----  ------  ------------
-		0.904   0.896         0.859
-	pr_auc (micro=0.964, macro=0.45):
-		   OK    spam    unsuitable
-		-----  ------  ------------
-		0.995   0.202         0.152
+		0.996   0.213         0.174
 
-	 - score_schema: {'type': 'object', 'title': 'Scikit learn-based classifier score with probability', 'properties': {'prediction': {'type': 'string', 'description': 'The most likely label predicted by the estimator'}, 'probability': {'type': 'object', 'description': 'A mapping of probabilities onto each of the potential output labels', 'properties': {'OK': {'type': 'number'}, 'spam': {'type': 'number'}, 'unsuitable': {'type': 'number'}}}}}
+	 - score_schema: {'title': 'Scikit learn-based classifier score with probability', 'properties': {'probability': {'description': 'A mapping of probabilities onto each of the potential output labels', 'properties': {'spam': {'type': 'number'}, 'unsuitable': {'type': 'number'}, 'OK': {'type': 'number'}}, 'type': 'object'}, 'prediction': {'description': 'The most likely label predicted by the estimator', 'type': 'string'}}, 'type': 'object'}
 
diff --git a/models/enwiki.draft_quality.gradient_boosting.model.bz2 b/models/enwiki.draft_quality.gradient_boosting.model.bz2
diff --git a/models/ptwiki.draft_quality.gradient_boosting.model.bz2 b/models/ptwiki.draft_quality.gradient_boosting.model.bz2
diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,5 @@
 docopt==0.6.2
-revscoring >= 2.6.2, < 2.6.999
+revscoring >= 2.8.2, < 2.8.999
 mysqltsv >= 0.0.7, < 0.0.999
 yamlconf >= 0.2.2, < 0.2.999
 json2tsv >= 0.1.2

diff --git a/test-requirements.txt b/test-requirements.txt
@@ -0,0 +1,4 @@
+pytest
+codecov
+pytest-cov
+flake8 >= 3.8.1, < 3.8.999
diff --git a/tox.ini b/tox.ini
@@ -0,0 +1,5 @@
+[flake8]
+exclude =
+    # This file is code-generated by Sphinx, so we don't care.
+    docs/conf.py
+ignore = E126,E127,E741,W504,E722