diff --git a/pyproject.toml b/pyproject.toml index ce2bf17..d1dc62e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,13 +1,11 @@ [tool.poetry] name = "web-sand" -version = "2.1.15" +version = "2.1.16" description = "UI for browsing/editing semantic descriptions" authors = ["Binh Vu "] repository = "https://github.com/usc-isi-i2/sand" license = "MIT" -packages = [ - { include = "sand" } -] +packages = [{ include = "sand" }] readme = "README.md" include = ["sand/www/**/*"] @@ -17,8 +15,8 @@ sand = 'sand.__main__:cli' [tool.poetry.dependencies] # restricting the upper bound python version to 3.12, due to RestrictedPython dependency. python = ">=3.8,<3.12" -kgdata = "^3.8.0" -sem-desc = "^4.4.2" +kgdata = "^5.3.1" +sem-desc = "^5.1.0" peewee = "^3.15.2" Flask = "^2.2.2" python-dotenv = ">= 0.19.0, < 0.20.0" diff --git a/sand/config.bk b/sand/config.bk new file mode 100644 index 0000000..09301c1 --- /dev/null +++ b/sand/config.bk @@ -0,0 +1,75 @@ +import os +from pathlib import Path + +_ROOT_DIR = Path(os.path.abspath(__file__)).parent.parent +PACKAGE_DIR = str(Path(os.path.abspath(__file__)).parent) +FROM_SITEPACKAGES = _ROOT_DIR.name == "site-packages" + +CACHE_SIZE = 10240 + +SETTINGS = { + "entity": { + "constructor": "sand.extensions.wikidata.get_entity_db", + "uri2id": "sand.extensions.wikidata.uri2id", + "id2uri": "sand.extensions.wikidata.id2uri", + "args": { + "dbfile": "/tmp/entities.db", + "proxy": True, + }, + # extra entities + "default": "sand.models.entity.DEFAULT_ENTITY", + # mapping from entity's namespace to the property id that will be used to indicate `instance_of` relationship + "instanceof": { + "http://www.wikidata.org": "P31", + }, + # id of an nil entity + "nil": {"id": "drepr:nil", "uri": "https://purl.org/drepr/ontology/1.0/nil"}, + # template for new entity uri + "new_entity_template": "http://www.wikidata.org/entity/{id}", + }, + "ont_classes": { + "constructor": "sand.extensions.wikidata.get_ontclass_db", + "uri2id": "sand.extensions.wikidata.uri2id", + "id2uri": "sand.extensions.wikidata.id2uri", + "args": { + "dbfile": "/tmp/classes.db", + "proxy": True, + }, + # extra classes + "default": "sand.extensions.wikidata.WD_ONT_CLASSES", + }, + "ont_props": { + "constructor": "sand.extensions.wikidata.get_ontprop_db", + "uri2id": "sand.extensions.wikidata.uri2id", + "id2uri": "sand.extensions.wikidata.id2uri", + "args": { + "dbfile": "/tmp/props.db", + "proxy": True, + }, + # extra props + "default": "sand.models.ontology.DEFAULT_ONT_PROPS", + }, + "semantic_model": { + # list of properties' uris that when a column is tagged with one of them, the column is an entity column + "identifiers": [ + "http://www.w3.org/2000/01/rdf-schema#label", + ], + # list of uri of classes that are used as intermediate nodes to represent n-ary relationships, e.g., wikidata's statement + "statements": ["http://wikiba.se/ontology#Statement"], + }, + "assistants": { + # list of assistants' names and their models + # "grams": "sand.extensions.grams.GRAMSAssistant", + "mtab": "sand.extensions.assistants.mtab.MTabAssistant", + # "default": "mtab", + }, + "search": { + "entities": "sand.extensions.search.wikidata_search.extended_wikidata_search", + "classes": "sand.extensions.search.wikidata_search.extended_wikidata_search", + "props": "sand.extensions.search.wikidata_search.extended_wikidata_search", + }, + "exports": { + "drepr": "sand.extensions.export.drepr.main.DreprExport", + "default": "sand.extensions.export.drepr.main.DreprExport", + }, +} diff --git a/sand/config.py b/sand/config.py index 5192c34..442872a 100644 --- a/sand/config.py +++ b/sand/config.py @@ -1,6 +1,9 @@ import os from pathlib import Path +from minmod.db import MNDR_NS, MNDRNamespace +from rdflib import RDF + _ROOT_DIR = Path(os.path.abspath(__file__)).parent.parent PACKAGE_DIR = str(Path(os.path.abspath(__file__)).parent) FROM_SITEPACKAGES = _ROOT_DIR.name == "site-packages" @@ -9,42 +12,37 @@ SETTINGS = { "entity": { - "constructor": "sand.extensions.wikidata.get_entity_db", - "uri2id": "sand.extensions.wikidata.uri2id", - "id2uri": "sand.extensions.wikidata.id2uri", + "constructor": "minmod.sand.get_entity_db", + "uri2id": "minmod.sand.WrappedEntity.uri2id", + "id2uri": "minmod.sand.WrappedEntity.id2uri", "args": { - "dbfile": "/tmp/wdentities.db", - "proxy": True, + "dbfile": "/Volumes/research/gramsplus/libraries/minmod/data/databases/entities.db", }, # extra entities "default": "sand.models.entity.DEFAULT_ENTITY", # mapping from entity's namespace to the property id that will be used to indicate `instance_of` relationship - "instanceof": { - "http://www.wikidata.org": "P31", - }, + "instanceof": {MNDR_NS: MNDRNamespace.create().get_rel_uri(str(RDF.type))}, # id of an nil entity "nil": {"id": "drepr:nil", "uri": "https://purl.org/drepr/ontology/1.0/nil"}, # template for new entity uri "new_entity_template": "http://www.wikidata.org/entity/{id}", }, "ont_classes": { - "constructor": "sand.extensions.wikidata.get_ontclass_db", - "uri2id": "sand.extensions.wikidata.uri2id", - "id2uri": "sand.extensions.wikidata.id2uri", + "constructor": "minmod.sand.get_ontclass_db", + "uri2id": "minmod.sand.WrappedOntClass.uri2id", + "id2uri": "minmod.sand.WrappedOntClass.id2uri", "args": { - "dbfile": "/tmp/wdclasses.db", - "proxy": True, + "dbfile": "/Volumes/research/gramsplus/libraries/minmod/data/databases/classes.db", }, # extra classes - "default": "sand.extensions.wikidata.WD_ONT_CLASSES", + "default": "sand.models.ontology.DEFAULT_ONT_CLASSES", }, "ont_props": { - "constructor": "sand.extensions.wikidata.get_ontprop_db", - "uri2id": "sand.extensions.wikidata.uri2id", - "id2uri": "sand.extensions.wikidata.id2uri", + "constructor": "minmod.sand.get_ontprop_db", + "uri2id": "minmod.sand.WrappedOntClass.uri2id", + "id2uri": "minmod.sand.WrappedOntClass.id2uri", "args": { - "dbfile": "/tmp/wdprops.db", - "proxy": True, + "dbfile": "/Volumes/research/gramsplus/libraries/minmod/data/databases/props.db", }, # extra props "default": "sand.models.ontology.DEFAULT_ONT_PROPS", @@ -60,13 +58,14 @@ "assistants": { # list of assistants' names and their models # "grams": "sand.extensions.grams.GRAMSAssistant", - "mtab": "sand.extensions.assistants.mtab.MTabAssistant", + # "mtab": "sand.extensions.assistants.mtab.MTabAssistant", + "mtab": "minmod.sand.GramsMinModAssistant", # "default": "mtab", }, "search": { - "entities": "sand.extensions.search.wikidata_search.extended_wikidata_search", - "classes": "sand.extensions.search.wikidata_search.extended_wikidata_search", - "props": "sand.extensions.search.wikidata_search.extended_wikidata_search", + "entities": "minmod.sand.mndr_search", + "classes": "minmod.sand.mndr_search", + "props": "minmod.sand.mndr_search", }, "exports": { "drepr": "sand.extensions.export.drepr.main.DreprExport", diff --git a/sand/deserializer.py b/sand/deserializer.py index 178f029..13b74e3 100644 --- a/sand/deserializer.py +++ b/sand/deserializer.py @@ -1,7 +1,9 @@ -from gena.deserializer import get_dataclass_deserializer import sm.outputs.semantic_model as O +from gena.deserializer import get_dataclass_deserializer + +# from sand.extensions.wikidata import get_rel_uri +from minmod.sand import get_rel_uri from sm.outputs.semantic_model import LiteralNodeDataType -from sand.extensions.wikidata import get_rel_uri def deserialize_graph(value) -> O.SemanticModel: diff --git a/sand/extensions/assistants/mtab.py b/sand/extensions/assistants/mtab.py index b7ce6dd..1a3c4bb 100644 --- a/sand/extensions/assistants/mtab.py +++ b/sand/extensions/assistants/mtab.py @@ -1,15 +1,15 @@ import copy -from rdflib import RDFS import subprocess from pathlib import Path from typing import Dict, List, Tuple + import serde.prelude as serde -from sm.namespaces.wikidata import WikidataNamespace import sm.outputs.semantic_model as O +from rdflib import RDFS from sand.controllers.assistant import IAssistant from sand.models.base import init_db - from sand.models.table import CandidateEntity, Link, Table, TableRow +from sm.namespaces.wikidata import WikidataNamespace class MTabAssistant(IAssistant): @@ -142,15 +142,15 @@ def create_sm_from_cta_cpa( # somehow, they may end-up predict multiple classes, we need to select one if qnode_id.find(" ") != -1: qnode_id = qnode_id.split(" ")[0] - curl = self.wdns.get_entity_abs_uri(qnode_id) + curl = self.wdns.id_to_uri(qnode_id) try: cnode_label = f"{self.id2label[qnode_id]} ({qnode_id})" except KeyError: - cnode_label = self.wdns.get_entity_rel_uri(qnode_id) + cnode_label = self.wdns.get_rel_uri(self.wdns.id_to_uri(qnode_id)) cnode = O.ClassNode( abs_uri=curl, - rel_uri=self.wdns.get_entity_rel_uri(qnode_id), + rel_uri=self.wdns.get_rel_uri(self.wdns.id_to_uri(qnode_id)), readable_label=cnode_label, ) sm.add_node(dnode) @@ -196,8 +196,8 @@ def create_sm_from_cta_cpa( O.Edge( source=source.id, target=target.id, - abs_uri=self.wdns.get_prop_abs_uri(prop), - rel_uri=self.wdns.get_prop_rel_uri(prop), + abs_uri=(tmp_abs_uri := self.wdns.id_to_uri(prop)), + rel_uri=self.wdns.get_rel_uri(tmp_abs_uri), readable_label=f"{self.id2label[prop]} ({prop})", ) ) diff --git a/sand/extensions/export/drepr/semanticmodel.py b/sand/extensions/export/drepr/semanticmodel.py index 3aeba90..a76cbe8 100644 --- a/sand/extensions/export/drepr/semanticmodel.py +++ b/sand/extensions/export/drepr/semanticmodel.py @@ -7,12 +7,11 @@ import orjson import sm.misc as M import sm.outputs.semantic_model as O -from sm.namespaces.wikidata import WikidataNamespace - from sand.config import SETTINGS from sand.models.entity import NIL_ENTITY_ID, Entity from sand.models.ontology import OntProperty, OntPropertyAR, OntPropertyDataType from sand.models.table import Table, TableRow +from sm.namespaces.wikidata import WikidataNamespace prefixes = WikidataNamespace.create().prefix2ns.copy() prefixes.update(drepr_sm.SemanticModel.get_default_prefixes()) @@ -94,6 +93,8 @@ def get_drepr_sm( label=edge.rel_uri, ) + print(edges) + # add drepr:uri relationship for node in get_entity_data_nodes(sm): new_node_id = str(node.id) + ":ents" diff --git a/sand/extensions/wikidata.py b/sand/extensions/wikidata.py index afdbc6f..912dbb7 100644 --- a/sand/extensions/wikidata.py +++ b/sand/extensions/wikidata.py @@ -5,8 +5,6 @@ from hugedict.misc import identity from kgdata.wikidata import db from kgdata.wikidata.models import WDClass, WDEntity, WDProperty, WDValue -from sm.namespaces.wikidata import WikidataNamespace - from sand.models.base import StoreWrapper from sand.models.entity import DEFAULT_ENTITY, Entity, Statement, Value from sand.models.ontology import ( @@ -16,13 +14,14 @@ OntProperty, OntPropertyDataType, ) +from sm.namespaces.wikidata import WikidataNamespace wdns = WikidataNamespace.create() WD_ONT_CLASSES = { - wdns.get_rel_uri(wdns.STATEMENT_URI): OntClass( - id=wdns.get_rel_uri(wdns.STATEMENT_URI), - uri=wdns.STATEMENT_URI, - label=wdns.get_rel_uri(wdns.STATEMENT_URI), + wdns.get_rel_uri(wdns.statement_uri): OntClass( + id=wdns.get_rel_uri(wdns.statement_uri), + uri=wdns.statement_uri, + label=wdns.get_rel_uri(wdns.statement_uri), aliases=[], description="Describes the claim of a statement and list references for this claim", parents=[], @@ -91,7 +90,7 @@ def get_entity_db(dbfile: str, proxy: bool): def get_ontclass_db(dbfile: str, proxy: bool): return StoreWrapper( - db.get_wdclass_db(dbfile, proxy=proxy, read_only=not proxy), + db.get_class_db(dbfile, proxy=proxy, read_only=not proxy), key_deser=get_wdclass_id, val_deser=ont_class_deser, ) @@ -99,7 +98,7 @@ def get_ontclass_db(dbfile: str, proxy: bool): def get_ontprop_db(dbfile: str, proxy: bool): return StoreWrapper( - db.get_wdprop_db(dbfile, proxy=proxy, read_only=not proxy), + db.get_prop_db(dbfile, proxy=proxy, read_only=not proxy), key_deser=get_wdprop_id, val_deser=ont_prop_deser, ) @@ -123,7 +122,7 @@ def qnode_deser(qnode: WDEntity): return WrapperWDEntity( id=qnode.id, - uri=wdns.get_entity_abs_uri(qnode.id), + uri=wdns.id_to_uri(qnode.id), label=qnode.label, aliases=qnode.aliases, description=qnode.description, @@ -134,7 +133,7 @@ def qnode_deser(qnode: WDEntity): def ont_class_deser(item: WDClass): return WrapperWDClass( id=item.id, - uri=WikidataNamespace.get_entity_abs_uri(item.id), + uri=WikidataNamespace.id_to_uri(item.id), aliases=item.aliases, label=item.label, description=item.description, @@ -147,7 +146,7 @@ def ont_prop_deser(item: WDProperty): global WD_DATATYPE_MAPPING return WrapperWDProperty( id=item.id, - uri=WikidataNamespace.get_prop_abs_uri(item.id), + uri=WikidataNamespace.id_to_uri(item.id), aliases=item.aliases, label=item.label, description=item.description, @@ -182,9 +181,9 @@ def get_wdclass_id(uri_or_id: str): def uri2id(uri: str): if uri.startswith("http://www.wikidata.org/prop/"): - return WikidataNamespace.get_prop_id(uri) + return WikidataNamespace.uri_to_id(uri) if uri.startswith("http://www.wikidata.org/entity/"): - return WikidataNamespace.get_entity_id(uri) + return wdns.uri_to_id(uri) if uri in INVERSE_DEFAULT_URI2ID: return INVERSE_DEFAULT_URI2ID[uri] return uri @@ -194,9 +193,9 @@ def id2uri(id: str): if id in DEFAULT_ID2URI: return DEFAULT_ID2URI[id] if id.startswith("P"): - return wdns.get_prop_abs_uri(id) + return wdns.id_to_uri(id) if id.startswith("Q"): - return wdns.get_entity_abs_uri(id) + return wdns.id_to_uri(id) raise ValueError(f"Cannot convert unknown id to uri: {id}") diff --git a/sand/models/base.py b/sand/models/base.py index 13030ba..a24b86a 100644 --- a/sand/models/base.py +++ b/sand/models/base.py @@ -1,12 +1,10 @@ import functools from pathlib import Path -from typing import Mapping, TypeVar, Callable, Any, Union - -from peewee import SqliteDatabase, Model, Field +from typing import Any, Callable, Mapping, TypeVar, Union +from peewee import Field, Model, SqliteDatabase from sand.config import CACHE_SIZE - # TODO: consider moving to APSWDatabase db = SqliteDatabase(None) @@ -44,7 +42,7 @@ def __init__(self, serialize, deserialize, **kwargs): V = TypeVar("V") -class StoreWrapper: +class StoreWrapper(Mapping[K, V]): def __init__( self, store: Mapping[K, V], @@ -55,12 +53,16 @@ def __init__( self.key_deser = key_deser self.val_deser = val_deser - @functools.lru_cache(maxsize=CACHE_SIZE) + # @functools.lru_cache(maxsize=CACHE_SIZE) def __contains__(self, key): + # print(key) + # print(self.key_deser(key)) return self.key_deser(key) in self.store - @functools.lru_cache(maxsize=CACHE_SIZE) + # @functools.lru_cache(maxsize=CACHE_SIZE) def __getitem__(self, key): + # print(key) + # print(self.key_deser(key)) val = self.store[self.key_deser(key)] return self.val_deser(val) @@ -79,6 +81,14 @@ def __len__(self): f"{self.__class__.__name__} does not support __len__ function" ) + def __iter__(self): + raise NotImplementedError( + f"{self.__class__.__name__} does not support __iter__ function" + ) + + def values(self): + return (self.val_deser(v) for v in self.store.values()) + def get(self, key, default=None): if not self.__contains__(key): return default diff --git a/sand/models/entity.py b/sand/models/entity.py index 38bd05e..057aa06 100644 --- a/sand/models/entity.py +++ b/sand/models/entity.py @@ -4,19 +4,15 @@ from typing import Literal, Mapping, Optional, Union from hugedict.chained_mapping import ChainedMapping -from kgdata.wikidata.models.multilingual import ( - MultiLingualString, - MultiLingualStringList, -) +from kgdata.models.multilingual import MultiLingualString, MultiLingualStringList from kgdata.wikidata.models.wdvalue import ( ValueGlobeCoordinate, ValueMonolingualText, ValueQuantity, ValueTime, ) -from sm.misc.funcs import import_attr, import_func - from sand.config import SETTINGS +from sm.misc.funcs import import_attr, import_func # represent that there is no entity NIL_ENTITY_ID = SETTINGS["entity"]["nil"]["id"] diff --git a/sand/models/ontology.py b/sand/models/ontology.py index 0eb6461..42b0540 100644 --- a/sand/models/ontology.py +++ b/sand/models/ontology.py @@ -2,10 +2,9 @@ from typing import Dict, List, Literal, Mapping, Set from hugedict.chained_mapping import ChainedMapping -from rdflib import RDFS -from sm.misc.funcs import import_attr, import_func - +from rdflib import RDF, RDFS from sand.config import SETTINGS +from sm.misc.funcs import import_attr, import_func @dataclass @@ -16,7 +15,7 @@ class OntClass: aliases: List[str] description: str parents: List[str] - ancestors: Set[str] = field(default_factory=set) + ancestors: Dict[str, int] = field(default_factory=dict) @property def readable_label(self): @@ -58,7 +57,7 @@ class OntProperty: aliases: List[str] description: str parents: List[str] - ancestors: Set[str] = field(default_factory=set) + ancestors: Dict[str, int] = field(default_factory=dict) @property def readable_label(self): @@ -91,7 +90,16 @@ def id2uri(id: str) -> str: datatype="string", description="Provides a human-readable version of a resource's name.", parents=[], - ) + ), + "rdf:type": OntProperty( + id="rdf:type", + uri=str(RDF.type), + label="rdf:type", + aliases=[], + datatype="entity", + description="Is used to state that a resource is an instance of a class", + parents=[], + ), } DEFAULT_ONT_CLASSES = {} diff --git a/sand/serializer.py b/sand/serializer.py index a394c09..94a1ac5 100644 --- a/sand/serializer.py +++ b/sand/serializer.py @@ -4,7 +4,6 @@ import sm.outputs.semantic_model as O from playhouse.shortcuts import model_to_dict - from sand.models import SemanticModel, Table from sand.models.entity import Entity, EntityAR from sand.models.ontology import OntClass, OntClassAR, OntProperty, OntPropertyAR @@ -20,7 +19,7 @@ def serialize_property(prop: OntProperty): "description": prop.description, "datatype": prop.datatype, "parents": prop.parents, - "ancestors": list(prop.ancestors), + "ancestors": list(prop.ancestors.keys()), } @@ -33,7 +32,7 @@ def serialize_class(cls: OntClass): "aliases": cls.aliases, "description": cls.description, "parents": cls.parents, - "ancestors": list(cls.ancestors), + "ancestors": list(cls.ancestors.keys()), } @@ -130,6 +129,7 @@ def batch_serialize_sms(sms: List[SemanticModel]): ontprops = OntPropertyAR() ontclasses = OntClassAR() + # TODO: id != uri fix me uri2lbl = partial(get_label, ontprops=ontprops, ontclasses=ontclasses) output = [] @@ -143,12 +143,20 @@ def batch_serialize_sms(sms: List[SemanticModel]): return output +from minmod.sand import kgns + + def get_label( - id: str, + uri_or_id: str, is_class: bool, ontprops: Mapping[str, OntProperty], ontclasses: Mapping[str, OntClass], ) -> Optional[str]: + if uri_or_id.startswith("http"): + id = kgns.get_rel_uri(uri_or_id) + else: + id = uri_or_id + if is_class: if id in ontclasses: return ontclasses[id].readable_label diff --git a/www/package.json b/www/package.json index 7581e38..3071f83 100644 --- a/www/package.json +++ b/www/package.json @@ -55,6 +55,7 @@ "start": "BROWSER=none react-scripts start", "start_nonroot": "PORT=5527 CHOKIDAR_USEPOLLING=true BROWSER=none DANGEROUSLY_DISABLE_HOST_CHECK=true react-scripts start", "build": "python ./scripts/build.py ../sand/www", + "build:macos": "NODE_OPTIONS=--openssl-legacy-provider python ./scripts/build.py ../sand/www", "test": "react-scripts test", "eject": "react-scripts eject", "build:lib": "yarn compile && yarn compile:post",