diff --git a/revscoring/extractors/api/extractor.py b/revscoring/extractors/api/extractor.py index ed15c4ee..e7cc1d6e 100644 --- a/revscoring/extractors/api/extractor.py +++ b/revscoring/extractors/api/extractor.py @@ -237,7 +237,8 @@ def query_revisions_by_revids(self, revids, batch=50, **params): break else: doc = self.session.get(action='query', prop='revisions', - revids=batch_ids, **params) + revids=batch_ids, rvslots='main', + **params) for page_doc in doc['query'].get('pages', {}).values(): yield from _normalize_revisions(page_doc) diff --git a/revscoring/extractors/api/revision_oriented.py b/revscoring/extractors/api/revision_oriented.py index f12feed4..3e4a8791 100644 --- a/revscoring/extractors/api/revision_oriented.py +++ b/revscoring/extractors/api/revision_oriented.py @@ -27,11 +27,11 @@ def __init__(self, revision, extractor, rev_doc, id_datasource=None): name=revision.byte_len.name) self.minor = key_exists('minor', rev_doc, name=revision.minor.name) - self.content_model = key('contentmodel', rev_doc, + self.content_model = key(['slots', 'main', 'contentmodel'], rev_doc, revision.content_model.name) if hasattr(revision, 'text'): - self.text = key('*', rev_doc, name=revision.text.name, + self.text = key(['slots', 'main', '*'], rev_doc, name=revision.text.name, if_missing=(TextDeleted, revision.text)) if hasattr(revision, 'parent'):