diff --git a/pyproject.toml b/pyproject.toml index a702c4e..146ffd4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ loguru = "^0.6.0" orjson = "^3.8.2" drepr = "^2.10.0" rsoup = "^2.5.1" +nh3 = "^0.2.13" lat_lon_parser = "^1.3.0" diff --git a/sand/app.py b/sand/app.py index a51b30b..5be571a 100644 --- a/sand/app.py +++ b/sand/app.py @@ -9,6 +9,7 @@ from sand.controllers.assistant import assistant_bp from sand.controllers.project import project_bp from sand.controllers.table import table_bp, table_row_bp +from sand.controllers.search import search_bp from sand.controllers.settings import setting_bp from sand.deserializer import deserialize_graph from sand.models import EntityAR, SemanticModel @@ -27,6 +28,7 @@ assistant_bp, table_row_bp, setting_bp, + search_bp, generate_api( SemanticModel, deserializers={"data": deserialize_graph}, @@ -35,25 +37,18 @@ generate_readonly_api_4dict( "entities", serialize=serialize_entity, - id2ent=ChainedMapping( - EntityAR(), import_attr(SETTINGS["entity"]["default"]) - ), + id2ent=EntityAR(), ), generate_readonly_api_4dict( "classes", serialize=serialize_class, - id2ent=ChainedMapping( - OntClassAR(), import_attr(SETTINGS["ont_classes"]["default"]) - ), + id2ent=OntClassAR(), unique_field_funcs={"uri": OntClass.uri2id}, ), generate_readonly_api_4dict( "properties", serialize=serialize_property, - id2ent=ChainedMapping( - OntPropertyAR(), - import_attr(SETTINGS["ont_props"]["default"]), - ), + id2ent=OntPropertyAR(), unique_field_funcs={"uri": OntProperty.uri2id}, ), ], diff --git a/sand/config.py b/sand/config.py index 4fb8e88..89665a9 100644 --- a/sand/config.py +++ b/sand/config.py @@ -31,6 +31,7 @@ "ont_classes": { "constructor": "sand.extensions.wikidata.get_ontclass_db", "uri2id": "sand.extensions.wikidata.uri2id", + "id2uri": "sm.namespaces.prelude.WikidataNamespace.get_entity_abs_uri", "args": { "dbfile": "/tmp/wdclasses.db", "proxy": True, @@ -41,6 +42,7 @@ "ont_props": { "constructor": "sand.extensions.wikidata.get_ontprop_db", "uri2id": "sand.extensions.wikidata.uri2id", + "id2uri": "sm.namespaces.prelude.WikidataNamespace.get_prop_abs_uri", "args": { "dbfile": "/tmp/wdprops.db", "proxy": True, @@ -62,6 +64,11 @@ "mtab": "sand.extensions.assistants.mtab.MTabAssistant", # "default": "mtab", }, + "search": { + "entities": "sand.extensions.search.wikidata_search.WikidataSearch", + "classes": "sand.extensions.search.wikidata_search.WikidataSearch", + "props": "sand.extensions.search.wikidata_search.WikidataSearch" + }, "exports": { "drepr": "sand.extensions.export.drepr.main.DreprExport", "default": "sand.extensions.export.drepr.main.DreprExport" diff --git a/sand/controllers/search.py b/sand/controllers/search.py new file mode 100644 index 0000000..fee8a66 --- /dev/null +++ b/sand/controllers/search.py @@ -0,0 +1,61 @@ +import threading +from typing import Dict, List, Union, Literal +from flask.blueprints import Blueprint +from sm.misc.funcs import import_func +from sand.config import SETTINGS +from flask import request, jsonify + +from sand.extension_interface.search import IEntitySearch, IOntologySearch +from sand.models.search import SearchResult +from gena.serializer import get_dataclass_serializer + +search_bp = Blueprint("search", "search") + +GetSearchCache = threading.local() +serializer = get_dataclass_serializer(SearchResult) + + +def get_search(name: Literal['classes', 'entities', 'props']) -> Union[IEntitySearch, IOntologySearch]: + """ + Returns an implementation of an ISearch Interface from the + configuration file. + """ + global GetSearchCache + + if not hasattr(GetSearchCache, "search"): + GetSearchCache.search = {} + search_config = SETTINGS["search"] + constructor = search_config[name] + GetSearchCache.search[name] = import_func(constructor)() + + return GetSearchCache.search[name] + + +@search_bp.route(f"/{search_bp.name}/classes", methods=["GET"]) +def search_classes(): + """API Route to search for classes with their names""" + search_text = request.args.get('q') + wikidata_search = get_search('classes') + search_results = wikidata_search.find_class_by_name(search_text) + serialized_payload = [serializer(item) for item in search_results] + return jsonify({'items': serialized_payload}) + + +@search_bp.route(f"/{search_bp.name}/entities", methods=["GET"]) +def search_entities(): + """API Route to search for entities with their names""" + search_text = request.args.get('q') + wikidata_search = get_search('entities') + search_results = wikidata_search.find_entity_by_name(search_text) + serialized_payload = [serializer(item) for item in search_results] + return jsonify({'items': serialized_payload}) + + +@search_bp.route(f"/{search_bp.name}/props", methods=["GET"]) +def search_props(): + """API Route to search for properties with their names""" + search_text = request.args.get('q') + wikidata_search = get_search('props') + search_results = wikidata_search.find_props_by_name(search_text) + serialized_payload = [serializer(item) for item in search_results] + return jsonify({'items': serialized_payload}) diff --git a/sand/extension_interface/search.py b/sand/extension_interface/search.py index 861b1cd..fd7333d 100644 --- a/sand/extension_interface/search.py +++ b/sand/extension_interface/search.py @@ -1,21 +1,29 @@ from abc import ABC, abstractmethod +from typing import List +from sand.models.search import SearchResult -class ISearch(ABC): - """ Search Interface to support searches from multiple +class IEntitySearch(ABC): + """ Entity Search Interface to support searches from multiple KG datastores. """ @abstractmethod - def find_class_by_name(self): - """Search Class using name""" + def find_entity_by_name(self, search_text: str) -> List[SearchResult]: + """Search Entity using name""" pass + +class IOntologySearch(ABC): + """ Class and Property Ontology Search Interface to support searches from multiple + KG datastores. + """ + @abstractmethod - def find_entity_by_name(self): - """Search Entity using name""" + def find_class_by_name(self, search_text: str) -> List[SearchResult]: + """Search Class using name""" pass @abstractmethod - def find_props_by_name(self): + def find_props_by_name(self, search_text: str) -> List[SearchResult]: """Search properties using name""" - pass + pass \ No newline at end of file diff --git a/sand/extensions/search/wikidata_search.py b/sand/extensions/search/wikidata_search.py new file mode 100644 index 0000000..a72f1f3 --- /dev/null +++ b/sand/extensions/search/wikidata_search.py @@ -0,0 +1,103 @@ +import requests +from typing import Dict, List +import nh3 +from sand.extension_interface.search import IEntitySearch, IOntologySearch +from sand.models.entity import Entity +from sand.models.ontology import OntClass, OntProperty, OntClassAR +from sand.models.search import SearchResult + + +class WikidataSearch(IEntitySearch, IOntologySearch): + + def __init__(self): + self.wikidata_url = "https://www.wikidata.org/w/api.php" + self.PARAMS = { + "action": "query", + "format": "json", + "list": "search", + "srsearch": "", + "utf8": "", + "srnamespace": 0, + "srlimit": 10, + "srprop": "snippet|titlesnippet" + } + self.ont_class_ar = None + + def get_class_search_params(self, search_text: str) -> Dict: + """Updates class search parameters for wikidata API""" + class_params = self.PARAMS.copy() + class_params["srnamespace"] = 0 + class_params['srsearch'] = f"haswbstatement:P279 {search_text}" + return class_params + + def get_local_class_properties(self, id: str) -> OntClass: + """Calls local class search API to fetch all class metadata using class ID""" + if self.ont_class_ar is None: + self.ont_class_ar = OntClassAR() + return self.ont_class_ar[id] + + def get_entity_search_params(self, search_text: str) -> Dict: + """Updates entity search parameters for wikidata API""" + entity_params = self.PARAMS.copy() + entity_params["srnamespace"] = 0 + entity_params['srsearch'] = search_text + return entity_params + + def get_props_search_params(self, search_text: str) -> Dict: + """Updates property search parameters for wikidata API""" + props_params = self.PARAMS.copy() + props_params["srnamespace"] = 120 + props_params['srsearch'] = search_text + return props_params + + def find_class_by_name(self, search_text: str) -> List[SearchResult]: + """ + Uses Wikidata API to search for classes using their name/text. + Uses local ID based class search to fetch label and description data. + """ + request_params = self.get_class_search_params(search_text) + api_data = requests.get(self.wikidata_url, request_params) + search_results = api_data.json()['query']['search'] + payload_results = [] + for search_result in search_results: + local_class_props = self.get_local_class_properties(search_result['title']) + item = SearchResult( + label=local_class_props.label, + id=search_result['title'], + description=local_class_props.description, + uri=OntClass.id2uri(search_result['title']) + ) + payload_results.append(item) + return payload_results + + def find_entity_by_name(self, search_text: str) -> List[SearchResult]: + """Uses Wikidata API to search for entities using their name/text.""" + request_params = self.get_entity_search_params(search_text) + api_data = requests.get(self.wikidata_url, request_params) + search_results = api_data.json()['query']['search'] + payload_results = [] + for search_result in search_results: + item = SearchResult( + label=nh3.clean(search_result['titlesnippet'], tags=set()), + id=search_result['title'], + description=nh3.clean(search_result['snippet'], tags=set()), + uri=Entity.id2uri(search_result['title']) + ) + payload_results.append(item) + return payload_results + + def find_props_by_name(self, search_text: str) -> List[SearchResult]: + """Uses Wikidata API to search for properties using their name/text.""" + request_params = self.get_props_search_params(search_text) + api_data = requests.get(self.wikidata_url, request_params) + search_results = api_data.json()['query']['search'] + payload_results = [] + for search_result in search_results: + item = SearchResult( + label=nh3.clean(search_result['titlesnippet'], tags=set()), + id=search_result['title'].split(":")[1], + description=nh3.clean(search_result['snippet'], tags=set()), + uri=OntProperty.id2uri(search_result['title'].split(":")[1]) + ) + payload_results.append(item) + return payload_results diff --git a/sand/models/ontology.py b/sand/models/ontology.py index 3cd64d8..5c61f5f 100644 --- a/sand/models/ontology.py +++ b/sand/models/ontology.py @@ -27,6 +27,12 @@ def uri2id(uri: str) -> str: "The method is set when its store is initialized. Check the call order to ensure `OntClassAR` is called first" ) + @staticmethod + def id2uri(id: str) -> str: + """Convert class ID to class URI.""" + raise NotImplementedError( + "The method is set when its store is initialized. Check the call order to ensure `OntClassAR` is called first" + ) OntPropertyDataType = Literal[ "monolingualtext", @@ -62,6 +68,12 @@ def uri2id(uri: str) -> str: "The method is set when its store is initialized. Check the call order to ensure `OntPropertyAR` is called first" ) + @staticmethod + def id2uri(id: str) -> str: + """Convert property ID to property URI.""" + raise NotImplementedError( + "The method is set when its store is initialized. Check the call order to ensure `OntPropertyAR` is called first" + ) PROP_AR = None CLASS_AR = None @@ -89,6 +101,7 @@ def OntPropertyAR() -> Mapping[str, OntProperty]: func = import_func(cfg["constructor"]) PROP_AR = func(**cfg["args"]) OntProperty.uri2id = import_func(cfg["uri2id"]) + OntProperty.id2uri = import_func(cfg["id2uri"]) return PROP_AR @@ -102,5 +115,5 @@ def OntClassAR() -> Mapping[str, OntClass]: func = import_func(cfg["constructor"]) CLASS_AR = func(**cfg["args"]) OntClass.uri2id = import_func(cfg["uri2id"]) - + OntClass.id2uri = import_func(cfg["id2uri"]) return CLASS_AR diff --git a/sand/models/search.py b/sand/models/search.py new file mode 100644 index 0000000..aaad64c --- /dev/null +++ b/sand/models/search.py @@ -0,0 +1,13 @@ +from dataclasses import dataclass + + +@dataclass +class SearchResult: + """ + Search Result dataclass to save the values of each search result in a search + """ + label: str + id: str + description: str + uri: str +