Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions articlequality/feature_lists/wikidatawiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from revscoring.features.meta import aggregators
from revscoring.features.modifiers import not_

from . import wikibase, property_datatypes
from . import wikibase
from .wikidatawiki_data import property_datatypes, items_lists

name = "wikidatawiki"

Expand Down Expand Up @@ -139,6 +140,14 @@ def _process_important_description_translations(item_descriptions):
"`int` : A count of unique sources in the revision"


def _process_is_astronomical_object(entity):
statements = entity.properties.get(properties.INSTANCE_OF, [])
for s in statements:
if str(s.claim.datavalue) in items_lists.ASTRONOMICAL_OBJECTS:
return True
return False


def _process_item_completeness(current_properties, properties_suggested):
current_properties = set(current_properties.keys())

Expand Down Expand Up @@ -173,10 +182,15 @@ def _process_item_completeness(current_properties, properties_suggested):
items.SCHOLARLY_ARTICLE,
name=name + '.revision.is_scholarlyarticle'
)

is_astronomical_object = Feature(
name + '.revision.page.is_astronomical_object',
_process_is_astronomical_object,
returns=bool,
depends_on=[wikibase_.revision.datasources.entity])

local_wiki = [
is_scholarlyarticle,
is_astronomical_object,
is_human,
is_blp,
aggregators.len(complete_translations),
Expand Down
Empty file.
12 changes: 12 additions & 0 deletions articlequality/feature_lists/wikidatawiki_data/items_lists.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# The list is generated using https://w.wiki/aaD
# Picking the top 50 ones only because it covers 97% of cases
ASTRONOMICAL_OBJECTS = [
'Q523', 'Q318', 'Q1931185', 'Q1457376', 'Q2247863', 'Q3863', 'Q83373',
'Q2154519', 'Q726242', 'Q1153690', 'Q204107', 'Q71963409', 'Q67206691',
'Q1151284', 'Q67206701', 'Q66619666', 'Q72802727', 'Q2168098', 'Q6243',
'Q72802508', 'Q11282', 'Q72803170', 'Q1332364', 'Q72802977', 'Q6999',
'Q1491746', 'Q272447', 'Q497654', 'Q204194', 'Q130019', 'Q744691',
'Q71798532', 'Q46587', 'Q11276', 'Q71965429', 'Q5871', 'Q72803622',
'Q72803426', 'Q3937', 'Q72803708', 'Q168845', 'Q24452', 'Q67201574',
'Q2557101', 'Q691269', 'Q13632', 'Q10451997', 'Q28738741', 'Q22247'
]
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
# Wikidata feature lists

# To add/delete new properties from the `wikidatawiki.py` feature list we need to manually
# update this file.
# To add/delete new properties from the `wikidatawiki.py` feature list we need
# to manually update this file.

# Currently the file contains a list of `NON_EXTERNAL_IDENTIFIERS` that we want to extract
# as features. `NON_EXTERNAL_IDENTIFIERS` is used instead of `EXTERNAL_IDENTIFIERS` because
# external identifiers comprise about 5000+ of the total 8000+ currently on the database.
# Using NON_EXTERNAL_IDENTIFIERS (~2500) reduces the processing time.
# Currently the file contains a list of `NON_EXTERNAL_IDENTIFIERS` that we
# want to extract as features. `NON_EXTERNAL_IDENTIFIERS` is used instead of
# `EXTERNAL_IDENTIFIERS` because external identifiers comprise about 5000+ of
# the total 8000+ currently on the database.
# Using NON_EXTERNAL_IDENTIFIERS (~2500) reduces the processing time.

# This list was obtained by making this query: https://quarry.wmflabs.org/query/47645
# This list was obtained by making this query:
# https://quarry.wmflabs.org/query/47645
# and then doing a search and replace to format it as a python list.

NONEXTERNAL_IDENTIFIERS = [
Expand Down
36 changes: 36 additions & 0 deletions tests/feature_lists/test_wikidatawiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,39 @@ def test_references_features(q7251):
def test_external_identifiers(q7251):
assert solve(aggregators.len(wikidatawiki.external_identifiers),
cache={entity: q7251}) == 79


def test_is_astronomical_object(q7251):
crab_nebula = {
'title': 'Q207436',
'id': 'Q207436',
'claims': {
'P31': [
{
'mainsnak': {
'snaktype': 'value',
'property': 'P31',
'datavalue': {
'value': {
'entity-type': 'item',
'numeric-id': 1931185,
'id': 'Q1931185'
},
'type': 'wikibase-entityid'
},
'datatype': 'wikibase-item'
},
'type': 'statement',
'id': 'Q10934$46D9E951-DD5A-4832-A2D2-DB0CE0425E0E',
'rank': 'normal'
}
]
}
}

crab_nebula = mwbase.Entity.from_json(crab_nebula)

assert solve(wikidatawiki.is_astronomical_object,
cache={entity: crab_nebula}) is True
assert solve(wikidatawiki.is_astronomical_object,
cache={entity: q7251}) is False