Skip to content

Commit

Permalink
Added transforms to search payload, id2uri function for classes and p…
Browse files Browse the repository at this point in the history
…roperties
  • Loading branch information
punith300i committed Jun 5, 2023
1 parent 23948d4 commit 41d964f
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 9 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ loguru = "^0.6.0"
orjson = "^3.8.2"
drepr = "^2.10.0"
rsoup = "^2.5.1"
nh3 = "^0.2.13"

lat_lon_parser = "^1.3.0"

Expand Down
2 changes: 2 additions & 0 deletions sand/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
"ont_classes": {
"constructor": "sand.extensions.wikidata.get_ontclass_db",
"uri2id": "sand.extensions.wikidata.uri2id",
"id2uri": "sm.namespaces.prelude.WikidataNamespace.get_entity_abs_uri",
"args": {
"dbfile": "/tmp/wdclasses.db",
"proxy": True,
Expand All @@ -41,6 +42,7 @@
"ont_props": {
"constructor": "sand.extensions.wikidata.get_ontprop_db",
"uri2id": "sand.extensions.wikidata.uri2id",
"id2uri": "sm.namespaces.prelude.WikidataNamespace.get_prop_abs_uri",
"args": {
"dbfile": "/tmp/wdprops.db",
"proxy": True,
Expand Down
55 changes: 48 additions & 7 deletions sand/extensions/search/wikidata_search.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import requests
from flask import request, jsonify
import nh3
from sand.controllers.search import ISearch

from sand.models.entity import Entity
from sand.models.ontology import OntClass, OntProperty

class WikidataSearch(ISearch):

def __init__(self):
self.wikidata_url = "https://www.wikidata.org/w/api.php"
self.local_class_idsearch_uri = "http://0.0.0.0:5525/api/classes/"
self.PARAMS = {
"action": "query",
"format": "json",
Expand All @@ -17,13 +20,23 @@ def __init__(self):
"srlimit": 10,
"srprop": "snippet|titlesnippet"
}
self.search_item_template = {
"label": "",
"id": "",
"description": "",
"uri": "",
}

def get_class_search_params(self, search_text):
class_params = self.PARAMS.copy()
class_params["srnamespace"] = 0
class_params['srsearch'] = f"haswbstatement:P279 {search_text}"
return class_params

def get_local_class_properties(self, id):
api_data = requests.get(self.local_class_idsearch_uri+str(id))
return api_data.json()

def get_entity_search_params(self, search_text):
entity_params = self.PARAMS.copy()
entity_params["srnamespace"] = 0
Expand All @@ -38,15 +51,43 @@ def get_props_search_params(self, search_text):

def find_class_by_name(self, search_text):
request_params = self.get_class_search_params(search_text)
data = requests.get(self.wikidata_url, request_params)
return data.json()
api_data = requests.get(self.wikidata_url, request_params)
search_items = api_data.json()['query']['search']
payload = {"items": []}
for search_item in search_items:
item = self.search_item_template.copy()
item['id'] = search_item['title']
local_class_props = self.get_local_class_properties(item['id'])
item['label'] = local_class_props['label']
item['description'] = local_class_props['description']
item['uri'] = OntClass.id2uri(item['id'])
payload['items'].append(item)
return payload

def find_entity_by_name(self, search_text):
request_params = self.get_entity_search_params(search_text)
data = requests.get(self.wikidata_url, request_params)
return data.json()['query']['search']
api_data = requests.get(self.wikidata_url, request_params)
search_items = api_data.json()['query']['search']
payload = {"items":[]}
for search_item in search_items:
item = self.search_item_template.copy()
item['label'] = nh3.clean(search_item['titlesnippet'], tags=set())
item['id'] = search_item['title']
item['description'] = nh3.clean(search_item['snippet'], tags=set())
item['uri'] = Entity.id2uri(item['id'])
payload['items'].append(item)
return payload

def find_props_by_name(self, search_text):
request_params = self.get_props_search_params(search_text)
data = requests.get(self.wikidata_url, request_params)
return data.json()['query']['search']
api_data = requests.get(self.wikidata_url, request_params)
search_items = api_data.json()['query']['search']
payload = {"items": []}
for search_item in search_items:
item = self.search_item_template.copy()
item['label'] = nh3.clean(search_item['titlesnippet'], tags=set())
item['id'] = search_item['title'].split(":")[1]
item['description'] = nh3.clean(search_item['snippet'], tags=set())
item['uri'] = OntProperty.id2uri(item['id'])
payload['items'].append(item)
return payload
17 changes: 15 additions & 2 deletions sand/models/ontology.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,15 @@ def readable_label(self):
def uri2id(uri: str) -> str:
"""Convert class URI to entity ID."""
raise NotImplementedError(
"The method is set when its store is initialized. Check the call order to ensure `OntClassAR` is called first"
"The method is set when its store is initialized. Check the call order to ensure `OntClassAR`is called first"
)

@staticmethod
def id2uri(id: str) -> str:
"""Convert class ID to class URI."""
raise NotImplementedError(
"The method is set when its store is initialized. Check the call order to ensure `OntClassAR` is called first"
)

OntPropertyDataType = Literal[
"monolingualtext",
Expand Down Expand Up @@ -62,6 +68,12 @@ def uri2id(uri: str) -> str:
"The method is set when its store is initialized. Check the call order to ensure `OntPropertyAR` is called first"
)

@staticmethod
def id2uri(id: str) -> str:
"""Convert property ID to property URI."""
raise NotImplementedError(
"The method is set when its store is initialized. Check the call order to ensure `OntPropertyAR` is called first"
)

PROP_AR = None
CLASS_AR = None
Expand Down Expand Up @@ -89,6 +101,7 @@ def OntPropertyAR() -> Mapping[str, OntProperty]:
func = import_func(cfg["constructor"])
PROP_AR = func(**cfg["args"])
OntProperty.uri2id = import_func(cfg["uri2id"])
OntProperty.id2uri = import_func(cfg["id2uri"])

return PROP_AR

Expand All @@ -102,5 +115,5 @@ def OntClassAR() -> Mapping[str, OntClass]:
func = import_func(cfg["constructor"])
CLASS_AR = func(**cfg["args"])
OntClass.uri2id = import_func(cfg["uri2id"])

OntClass.id2uri = import_func(cfg["id2uri"])
return CLASS_AR

0 comments on commit 41d964f

Please sign in to comment.