-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Wikidata Elastic Search - Backend Implementation (#14)
* Added wikidata search extension, and search API routes * Updated search config, added individual search options for classes, entities and props * Added transforms to search payload, id2uri function for classes and properties * Added function level documentation and typing to the search implementation * Updated OntClass uri2id method commet * Updated Search extension_interface, ISearch to IEntitySearch and IOntologySearch * Fixed API read directly from local implementation * Enhanced typing to search payload, introduced search payload dataclass, imporved typing by creating search models * Removed SearchPayload model and udpated all the functions in search interface to return List[SearchItem], Enhanced typing covered in the PR reviews * Updated and renamed SearchItem dataclass to SearchResult * Updated SearchResult dataclass docs comment
- Loading branch information
1 parent
4acc6b8
commit 55ebb0a
Showing
8 changed files
with
220 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import threading | ||
from typing import Dict, List, Union, Literal | ||
from flask.blueprints import Blueprint | ||
from sm.misc.funcs import import_func | ||
from sand.config import SETTINGS | ||
from flask import request, jsonify | ||
|
||
from sand.extension_interface.search import IEntitySearch, IOntologySearch | ||
from sand.models.search import SearchResult | ||
from gena.serializer import get_dataclass_serializer | ||
|
||
search_bp = Blueprint("search", "search") | ||
|
||
GetSearchCache = threading.local() | ||
serializer = get_dataclass_serializer(SearchResult) | ||
|
||
|
||
def get_search(name: Literal['classes', 'entities', 'props']) -> Union[IEntitySearch, IOntologySearch]: | ||
""" | ||
Returns an implementation of an ISearch Interface from the | ||
configuration file. | ||
""" | ||
global GetSearchCache | ||
|
||
if not hasattr(GetSearchCache, "search"): | ||
GetSearchCache.search = {} | ||
search_config = SETTINGS["search"] | ||
constructor = search_config[name] | ||
GetSearchCache.search[name] = import_func(constructor)() | ||
|
||
return GetSearchCache.search[name] | ||
|
||
|
||
@search_bp.route(f"/{search_bp.name}/classes", methods=["GET"]) | ||
def search_classes(): | ||
"""API Route to search for classes with their names""" | ||
search_text = request.args.get('q') | ||
wikidata_search = get_search('classes') | ||
search_results = wikidata_search.find_class_by_name(search_text) | ||
serialized_payload = [serializer(item) for item in search_results] | ||
return jsonify({'items': serialized_payload}) | ||
|
||
|
||
@search_bp.route(f"/{search_bp.name}/entities", methods=["GET"]) | ||
def search_entities(): | ||
"""API Route to search for entities with their names""" | ||
search_text = request.args.get('q') | ||
wikidata_search = get_search('entities') | ||
search_results = wikidata_search.find_entity_by_name(search_text) | ||
serialized_payload = [serializer(item) for item in search_results] | ||
return jsonify({'items': serialized_payload}) | ||
|
||
|
||
@search_bp.route(f"/{search_bp.name}/props", methods=["GET"]) | ||
def search_props(): | ||
"""API Route to search for properties with their names""" | ||
search_text = request.args.get('q') | ||
wikidata_search = get_search('props') | ||
search_results = wikidata_search.find_props_by_name(search_text) | ||
serialized_payload = [serializer(item) for item in search_results] | ||
return jsonify({'items': serialized_payload}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,21 +1,29 @@ | ||
from abc import ABC, abstractmethod | ||
from typing import List | ||
from sand.models.search import SearchResult | ||
|
||
|
||
class ISearch(ABC): | ||
""" Search Interface to support searches from multiple | ||
class IEntitySearch(ABC): | ||
""" Entity Search Interface to support searches from multiple | ||
KG datastores. | ||
""" | ||
@abstractmethod | ||
def find_class_by_name(self): | ||
"""Search Class using name""" | ||
def find_entity_by_name(self, search_text: str) -> List[SearchResult]: | ||
"""Search Entity using name""" | ||
pass | ||
|
||
|
||
class IOntologySearch(ABC): | ||
""" Class and Property Ontology Search Interface to support searches from multiple | ||
KG datastores. | ||
""" | ||
|
||
@abstractmethod | ||
def find_entity_by_name(self): | ||
"""Search Entity using name""" | ||
def find_class_by_name(self, search_text: str) -> List[SearchResult]: | ||
"""Search Class using name""" | ||
pass | ||
|
||
@abstractmethod | ||
def find_props_by_name(self): | ||
def find_props_by_name(self, search_text: str) -> List[SearchResult]: | ||
"""Search properties using name""" | ||
pass | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
import requests | ||
from typing import Dict, List | ||
import nh3 | ||
from sand.extension_interface.search import IEntitySearch, IOntologySearch | ||
from sand.models.entity import Entity | ||
from sand.models.ontology import OntClass, OntProperty, OntClassAR | ||
from sand.models.search import SearchResult | ||
|
||
|
||
class WikidataSearch(IEntitySearch, IOntologySearch): | ||
|
||
def __init__(self): | ||
self.wikidata_url = "https://www.wikidata.org/w/api.php" | ||
self.PARAMS = { | ||
"action": "query", | ||
"format": "json", | ||
"list": "search", | ||
"srsearch": "", | ||
"utf8": "", | ||
"srnamespace": 0, | ||
"srlimit": 10, | ||
"srprop": "snippet|titlesnippet" | ||
} | ||
self.ont_class_ar = None | ||
|
||
def get_class_search_params(self, search_text: str) -> Dict: | ||
"""Updates class search parameters for wikidata API""" | ||
class_params = self.PARAMS.copy() | ||
class_params["srnamespace"] = 0 | ||
class_params['srsearch'] = f"haswbstatement:P279 {search_text}" | ||
return class_params | ||
|
||
def get_local_class_properties(self, id: str) -> OntClass: | ||
"""Calls local class search API to fetch all class metadata using class ID""" | ||
if self.ont_class_ar is None: | ||
self.ont_class_ar = OntClassAR() | ||
return self.ont_class_ar[id] | ||
|
||
def get_entity_search_params(self, search_text: str) -> Dict: | ||
"""Updates entity search parameters for wikidata API""" | ||
entity_params = self.PARAMS.copy() | ||
entity_params["srnamespace"] = 0 | ||
entity_params['srsearch'] = search_text | ||
return entity_params | ||
|
||
def get_props_search_params(self, search_text: str) -> Dict: | ||
"""Updates property search parameters for wikidata API""" | ||
props_params = self.PARAMS.copy() | ||
props_params["srnamespace"] = 120 | ||
props_params['srsearch'] = search_text | ||
return props_params | ||
|
||
def find_class_by_name(self, search_text: str) -> List[SearchResult]: | ||
""" | ||
Uses Wikidata API to search for classes using their name/text. | ||
Uses local ID based class search to fetch label and description data. | ||
""" | ||
request_params = self.get_class_search_params(search_text) | ||
api_data = requests.get(self.wikidata_url, request_params) | ||
search_results = api_data.json()['query']['search'] | ||
payload_results = [] | ||
for search_result in search_results: | ||
local_class_props = self.get_local_class_properties(search_result['title']) | ||
item = SearchResult( | ||
label=local_class_props.label, | ||
id=search_result['title'], | ||
description=local_class_props.description, | ||
uri=OntClass.id2uri(search_result['title']) | ||
) | ||
payload_results.append(item) | ||
return payload_results | ||
|
||
def find_entity_by_name(self, search_text: str) -> List[SearchResult]: | ||
"""Uses Wikidata API to search for entities using their name/text.""" | ||
request_params = self.get_entity_search_params(search_text) | ||
api_data = requests.get(self.wikidata_url, request_params) | ||
search_results = api_data.json()['query']['search'] | ||
payload_results = [] | ||
for search_result in search_results: | ||
item = SearchResult( | ||
label=nh3.clean(search_result['titlesnippet'], tags=set()), | ||
id=search_result['title'], | ||
description=nh3.clean(search_result['snippet'], tags=set()), | ||
uri=Entity.id2uri(search_result['title']) | ||
) | ||
payload_results.append(item) | ||
return payload_results | ||
|
||
def find_props_by_name(self, search_text: str) -> List[SearchResult]: | ||
"""Uses Wikidata API to search for properties using their name/text.""" | ||
request_params = self.get_props_search_params(search_text) | ||
api_data = requests.get(self.wikidata_url, request_params) | ||
search_results = api_data.json()['query']['search'] | ||
payload_results = [] | ||
for search_result in search_results: | ||
item = SearchResult( | ||
label=nh3.clean(search_result['titlesnippet'], tags=set()), | ||
id=search_result['title'].split(":")[1], | ||
description=nh3.clean(search_result['snippet'], tags=set()), | ||
uri=OntProperty.id2uri(search_result['title'].split(":")[1]) | ||
) | ||
payload_results.append(item) | ||
return payload_results |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
from dataclasses import dataclass | ||
|
||
|
||
@dataclass | ||
class SearchResult: | ||
""" | ||
Search Result dataclass to save the values of each search result in a search | ||
""" | ||
label: str | ||
id: str | ||
description: str | ||
uri: str | ||
|