diff --git a/pyproject.toml b/pyproject.toml index a702c4e..146ffd4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ loguru = "^0.6.0" orjson = "^3.8.2" drepr = "^2.10.0" rsoup = "^2.5.1" +nh3 = "^0.2.13" lat_lon_parser = "^1.3.0" diff --git a/sand/config.py b/sand/config.py index f91939e..89665a9 100644 --- a/sand/config.py +++ b/sand/config.py @@ -31,6 +31,7 @@ "ont_classes": { "constructor": "sand.extensions.wikidata.get_ontclass_db", "uri2id": "sand.extensions.wikidata.uri2id", + "id2uri": "sm.namespaces.prelude.WikidataNamespace.get_entity_abs_uri", "args": { "dbfile": "/tmp/wdclasses.db", "proxy": True, @@ -41,6 +42,7 @@ "ont_props": { "constructor": "sand.extensions.wikidata.get_ontprop_db", "uri2id": "sand.extensions.wikidata.uri2id", + "id2uri": "sm.namespaces.prelude.WikidataNamespace.get_prop_abs_uri", "args": { "dbfile": "/tmp/wdprops.db", "proxy": True, diff --git a/sand/extensions/search/wikidata_search.py b/sand/extensions/search/wikidata_search.py index 3b23609..5518f05 100644 --- a/sand/extensions/search/wikidata_search.py +++ b/sand/extensions/search/wikidata_search.py @@ -1,12 +1,15 @@ import requests from flask import request, jsonify +import nh3 from sand.controllers.search import ISearch - +from sand.models.entity import Entity +from sand.models.ontology import OntClass, OntProperty class WikidataSearch(ISearch): def __init__(self): self.wikidata_url = "https://www.wikidata.org/w/api.php" + self.local_class_idsearch_uri = "http://0.0.0.0:5525/api/classes/" self.PARAMS = { "action": "query", "format": "json", @@ -17,6 +20,12 @@ def __init__(self): "srlimit": 10, "srprop": "snippet|titlesnippet" } + self.search_item_template = { + "label": "", + "id": "", + "description": "", + "uri": "", + } def get_class_search_params(self, search_text): class_params = self.PARAMS.copy() @@ -24,6 +33,10 @@ def get_class_search_params(self, search_text): class_params['srsearch'] = f"haswbstatement:P279 {search_text}" return class_params + def get_local_class_properties(self, id): + api_data = requests.get(self.local_class_idsearch_uri+str(id)) + return api_data.json() + def get_entity_search_params(self, search_text): entity_params = self.PARAMS.copy() entity_params["srnamespace"] = 0 @@ -38,15 +51,43 @@ def get_props_search_params(self, search_text): def find_class_by_name(self, search_text): request_params = self.get_class_search_params(search_text) - data = requests.get(self.wikidata_url, request_params) - return data.json() + api_data = requests.get(self.wikidata_url, request_params) + search_items = api_data.json()['query']['search'] + payload = {"items": []} + for search_item in search_items: + item = self.search_item_template.copy() + item['id'] = search_item['title'] + local_class_props = self.get_local_class_properties(item['id']) + item['label'] = local_class_props['label'] + item['description'] = local_class_props['description'] + item['uri'] = OntClass.id2uri(item['id']) + payload['items'].append(item) + return payload def find_entity_by_name(self, search_text): request_params = self.get_entity_search_params(search_text) - data = requests.get(self.wikidata_url, request_params) - return data.json()['query']['search'] + api_data = requests.get(self.wikidata_url, request_params) + search_items = api_data.json()['query']['search'] + payload = {"items":[]} + for search_item in search_items: + item = self.search_item_template.copy() + item['label'] = nh3.clean(search_item['titlesnippet'], tags=set()) + item['id'] = search_item['title'] + item['description'] = nh3.clean(search_item['snippet'], tags=set()) + item['uri'] = Entity.id2uri(item['id']) + payload['items'].append(item) + return payload def find_props_by_name(self, search_text): request_params = self.get_props_search_params(search_text) - data = requests.get(self.wikidata_url, request_params) - return data.json()['query']['search'] + api_data = requests.get(self.wikidata_url, request_params) + search_items = api_data.json()['query']['search'] + payload = {"items": []} + for search_item in search_items: + item = self.search_item_template.copy() + item['label'] = nh3.clean(search_item['titlesnippet'], tags=set()) + item['id'] = search_item['title'].split(":")[1] + item['description'] = nh3.clean(search_item['snippet'], tags=set()) + item['uri'] = OntProperty.id2uri(item['id']) + payload['items'].append(item) + return payload diff --git a/sand/models/ontology.py b/sand/models/ontology.py index 3cd64d8..c156126 100644 --- a/sand/models/ontology.py +++ b/sand/models/ontology.py @@ -24,9 +24,15 @@ def readable_label(self): def uri2id(uri: str) -> str: """Convert class URI to entity ID.""" raise NotImplementedError( - "The method is set when its store is initialized. Check the call order to ensure `OntClassAR` is called first" + "The method is set when its store is initialized. Check the call order to ensure `OntClassAR`is called first" ) + @staticmethod + def id2uri(id: str) -> str: + """Convert class ID to class URI.""" + raise NotImplementedError( + "The method is set when its store is initialized. Check the call order to ensure `OntClassAR` is called first" + ) OntPropertyDataType = Literal[ "monolingualtext", @@ -62,6 +68,12 @@ def uri2id(uri: str) -> str: "The method is set when its store is initialized. Check the call order to ensure `OntPropertyAR` is called first" ) + @staticmethod + def id2uri(id: str) -> str: + """Convert property ID to property URI.""" + raise NotImplementedError( + "The method is set when its store is initialized. Check the call order to ensure `OntPropertyAR` is called first" + ) PROP_AR = None CLASS_AR = None @@ -89,6 +101,7 @@ def OntPropertyAR() -> Mapping[str, OntProperty]: func = import_func(cfg["constructor"]) PROP_AR = func(**cfg["args"]) OntProperty.uri2id = import_func(cfg["uri2id"]) + OntProperty.id2uri = import_func(cfg["id2uri"]) return PROP_AR @@ -102,5 +115,5 @@ def OntClassAR() -> Mapping[str, OntClass]: func = import_func(cfg["constructor"]) CLASS_AR = func(**cfg["args"]) OntClass.uri2id = import_func(cfg["uri2id"]) - + OntClass.id2uri = import_func(cfg["id2uri"]) return CLASS_AR