Skip to content

Commit

Permalink
update kgdata & sem-desc
Browse files Browse the repository at this point in the history
  • Loading branch information
Binh Vu committed Nov 5, 2023
1 parent d2539b6 commit 47387d5
Show file tree
Hide file tree
Showing 12 changed files with 176 additions and 79 deletions.
10 changes: 4 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
[tool.poetry]
name = "web-sand"
version = "2.1.15"
version = "2.1.16"
description = "UI for browsing/editing semantic descriptions"
authors = ["Binh Vu <binh@toan2.com>"]
repository = "https://github.com/usc-isi-i2/sand"
license = "MIT"
packages = [
{ include = "sand" }
]
packages = [{ include = "sand" }]
readme = "README.md"
include = ["sand/www/**/*"]

Expand All @@ -17,8 +15,8 @@ sand = 'sand.__main__:cli'
[tool.poetry.dependencies]
# restricting the upper bound python version to 3.12, due to RestrictedPython dependency.
python = ">=3.8,<3.12"
kgdata = "^3.8.0"
sem-desc = "^4.4.2"
kgdata = "^5.3.1"
sem-desc = "^5.1.0"
peewee = "^3.15.2"
Flask = "^2.2.2"
python-dotenv = ">= 0.19.0, < 0.20.0"
Expand Down
75 changes: 75 additions & 0 deletions sand/config.bk
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import os
from pathlib import Path

_ROOT_DIR = Path(os.path.abspath(__file__)).parent.parent
PACKAGE_DIR = str(Path(os.path.abspath(__file__)).parent)
FROM_SITEPACKAGES = _ROOT_DIR.name == "site-packages"

CACHE_SIZE = 10240

SETTINGS = {
"entity": {
"constructor": "sand.extensions.wikidata.get_entity_db",
"uri2id": "sand.extensions.wikidata.uri2id",
"id2uri": "sand.extensions.wikidata.id2uri",
"args": {
"dbfile": "/tmp/entities.db",
"proxy": True,
},
# extra entities
"default": "sand.models.entity.DEFAULT_ENTITY",
# mapping from entity's namespace to the property id that will be used to indicate `instance_of` relationship
"instanceof": {
"http://www.wikidata.org": "P31",
},
# id of an nil entity
"nil": {"id": "drepr:nil", "uri": "https://purl.org/drepr/ontology/1.0/nil"},
# template for new entity uri
"new_entity_template": "http://www.wikidata.org/entity/{id}",
},
"ont_classes": {
"constructor": "sand.extensions.wikidata.get_ontclass_db",
"uri2id": "sand.extensions.wikidata.uri2id",
"id2uri": "sand.extensions.wikidata.id2uri",
"args": {
"dbfile": "/tmp/classes.db",
"proxy": True,
},
# extra classes
"default": "sand.extensions.wikidata.WD_ONT_CLASSES",
},
"ont_props": {
"constructor": "sand.extensions.wikidata.get_ontprop_db",
"uri2id": "sand.extensions.wikidata.uri2id",
"id2uri": "sand.extensions.wikidata.id2uri",
"args": {
"dbfile": "/tmp/props.db",
"proxy": True,
},
# extra props
"default": "sand.models.ontology.DEFAULT_ONT_PROPS",
},
"semantic_model": {
# list of properties' uris that when a column is tagged with one of them, the column is an entity column
"identifiers": [
"http://www.w3.org/2000/01/rdf-schema#label",
],
# list of uri of classes that are used as intermediate nodes to represent n-ary relationships, e.g., wikidata's statement
"statements": ["http://wikiba.se/ontology#Statement"],
},
"assistants": {
# list of assistants' names and their models
# "grams": "sand.extensions.grams.GRAMSAssistant",
"mtab": "sand.extensions.assistants.mtab.MTabAssistant",
# "default": "mtab",
},
"search": {
"entities": "sand.extensions.search.wikidata_search.extended_wikidata_search",
"classes": "sand.extensions.search.wikidata_search.extended_wikidata_search",
"props": "sand.extensions.search.wikidata_search.extended_wikidata_search",
},
"exports": {
"drepr": "sand.extensions.export.drepr.main.DreprExport",
"default": "sand.extensions.export.drepr.main.DreprExport",
},
}
45 changes: 22 additions & 23 deletions sand/config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import os
from pathlib import Path

from minmod.db import MNDR_NS, MNDRNamespace
from rdflib import RDF

_ROOT_DIR = Path(os.path.abspath(__file__)).parent.parent
PACKAGE_DIR = str(Path(os.path.abspath(__file__)).parent)
FROM_SITEPACKAGES = _ROOT_DIR.name == "site-packages"
Expand All @@ -9,42 +12,37 @@

SETTINGS = {
"entity": {
"constructor": "sand.extensions.wikidata.get_entity_db",
"uri2id": "sand.extensions.wikidata.uri2id",
"id2uri": "sand.extensions.wikidata.id2uri",
"constructor": "minmod.sand.get_entity_db",
"uri2id": "minmod.sand.WrappedEntity.uri2id",
"id2uri": "minmod.sand.WrappedEntity.id2uri",
"args": {
"dbfile": "/tmp/wdentities.db",
"proxy": True,
"dbfile": "/Volumes/research/gramsplus/libraries/minmod/data/databases/entities.db",
},
# extra entities
"default": "sand.models.entity.DEFAULT_ENTITY",
# mapping from entity's namespace to the property id that will be used to indicate `instance_of` relationship
"instanceof": {
"http://www.wikidata.org": "P31",
},
"instanceof": {MNDR_NS: MNDRNamespace.create().get_rel_uri(str(RDF.type))},
# id of an nil entity
"nil": {"id": "drepr:nil", "uri": "https://purl.org/drepr/ontology/1.0/nil"},
# template for new entity uri
"new_entity_template": "http://www.wikidata.org/entity/{id}",
},
"ont_classes": {
"constructor": "sand.extensions.wikidata.get_ontclass_db",
"uri2id": "sand.extensions.wikidata.uri2id",
"id2uri": "sand.extensions.wikidata.id2uri",
"constructor": "minmod.sand.get_ontclass_db",
"uri2id": "minmod.sand.WrappedOntClass.uri2id",
"id2uri": "minmod.sand.WrappedOntClass.id2uri",
"args": {
"dbfile": "/tmp/wdclasses.db",
"proxy": True,
"dbfile": "/Volumes/research/gramsplus/libraries/minmod/data/databases/classes.db",
},
# extra classes
"default": "sand.extensions.wikidata.WD_ONT_CLASSES",
"default": "sand.models.ontology.DEFAULT_ONT_CLASSES",
},
"ont_props": {
"constructor": "sand.extensions.wikidata.get_ontprop_db",
"uri2id": "sand.extensions.wikidata.uri2id",
"id2uri": "sand.extensions.wikidata.id2uri",
"constructor": "minmod.sand.get_ontprop_db",
"uri2id": "minmod.sand.WrappedOntClass.uri2id",
"id2uri": "minmod.sand.WrappedOntClass.id2uri",
"args": {
"dbfile": "/tmp/wdprops.db",
"proxy": True,
"dbfile": "/Volumes/research/gramsplus/libraries/minmod/data/databases/props.db",
},
# extra props
"default": "sand.models.ontology.DEFAULT_ONT_PROPS",
Expand All @@ -60,13 +58,14 @@
"assistants": {
# list of assistants' names and their models
# "grams": "sand.extensions.grams.GRAMSAssistant",
"mtab": "sand.extensions.assistants.mtab.MTabAssistant",
# "mtab": "sand.extensions.assistants.mtab.MTabAssistant",
"mtab": "minmod.sand.GramsMinModAssistant",
# "default": "mtab",
},
"search": {
"entities": "sand.extensions.search.wikidata_search.extended_wikidata_search",
"classes": "sand.extensions.search.wikidata_search.extended_wikidata_search",
"props": "sand.extensions.search.wikidata_search.extended_wikidata_search",
"entities": "minmod.sand.mndr_search",
"classes": "minmod.sand.mndr_search",
"props": "minmod.sand.mndr_search",
},
"exports": {
"drepr": "sand.extensions.export.drepr.main.DreprExport",
Expand Down
6 changes: 4 additions & 2 deletions sand/deserializer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from gena.deserializer import get_dataclass_deserializer
import sm.outputs.semantic_model as O
from gena.deserializer import get_dataclass_deserializer

# from sand.extensions.wikidata import get_rel_uri
from minmod.sand import get_rel_uri
from sm.outputs.semantic_model import LiteralNodeDataType
from sand.extensions.wikidata import get_rel_uri


def deserialize_graph(value) -> O.SemanticModel:
Expand Down
16 changes: 8 additions & 8 deletions sand/extensions/assistants/mtab.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
import copy
from rdflib import RDFS
import subprocess
from pathlib import Path
from typing import Dict, List, Tuple

import serde.prelude as serde
from sm.namespaces.wikidata import WikidataNamespace
import sm.outputs.semantic_model as O
from rdflib import RDFS
from sand.controllers.assistant import IAssistant
from sand.models.base import init_db

from sand.models.table import CandidateEntity, Link, Table, TableRow
from sm.namespaces.wikidata import WikidataNamespace


class MTabAssistant(IAssistant):
Expand Down Expand Up @@ -142,15 +142,15 @@ def create_sm_from_cta_cpa(
# somehow, they may end-up predict multiple classes, we need to select one
if qnode_id.find(" ") != -1:
qnode_id = qnode_id.split(" ")[0]
curl = self.wdns.get_entity_abs_uri(qnode_id)
curl = self.wdns.id_to_uri(qnode_id)

try:
cnode_label = f"{self.id2label[qnode_id]} ({qnode_id})"
except KeyError:
cnode_label = self.wdns.get_entity_rel_uri(qnode_id)
cnode_label = self.wdns.get_rel_uri(self.wdns.id_to_uri(qnode_id))
cnode = O.ClassNode(
abs_uri=curl,
rel_uri=self.wdns.get_entity_rel_uri(qnode_id),
rel_uri=self.wdns.get_rel_uri(self.wdns.id_to_uri(qnode_id)),
readable_label=cnode_label,
)
sm.add_node(dnode)
Expand Down Expand Up @@ -196,8 +196,8 @@ def create_sm_from_cta_cpa(
O.Edge(
source=source.id,
target=target.id,
abs_uri=self.wdns.get_prop_abs_uri(prop),
rel_uri=self.wdns.get_prop_rel_uri(prop),
abs_uri=(tmp_abs_uri := self.wdns.id_to_uri(prop)),
rel_uri=self.wdns.get_rel_uri(tmp_abs_uri),
readable_label=f"{self.id2label[prop]} ({prop})",
)
)
Expand Down
5 changes: 3 additions & 2 deletions sand/extensions/export/drepr/semanticmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,11 @@
import orjson
import sm.misc as M
import sm.outputs.semantic_model as O
from sm.namespaces.wikidata import WikidataNamespace

from sand.config import SETTINGS
from sand.models.entity import NIL_ENTITY_ID, Entity
from sand.models.ontology import OntProperty, OntPropertyAR, OntPropertyDataType
from sand.models.table import Table, TableRow
from sm.namespaces.wikidata import WikidataNamespace

prefixes = WikidataNamespace.create().prefix2ns.copy()
prefixes.update(drepr_sm.SemanticModel.get_default_prefixes())
Expand Down Expand Up @@ -94,6 +93,8 @@ def get_drepr_sm(
label=edge.rel_uri,
)

print(edges)

# add drepr:uri relationship
for node in get_entity_data_nodes(sm):
new_node_id = str(node.id) + ":ents"
Expand Down
29 changes: 14 additions & 15 deletions sand/extensions/wikidata.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
from hugedict.misc import identity
from kgdata.wikidata import db
from kgdata.wikidata.models import WDClass, WDEntity, WDProperty, WDValue
from sm.namespaces.wikidata import WikidataNamespace

from sand.models.base import StoreWrapper
from sand.models.entity import DEFAULT_ENTITY, Entity, Statement, Value
from sand.models.ontology import (
Expand All @@ -16,13 +14,14 @@
OntProperty,
OntPropertyDataType,
)
from sm.namespaces.wikidata import WikidataNamespace

wdns = WikidataNamespace.create()
WD_ONT_CLASSES = {
wdns.get_rel_uri(wdns.STATEMENT_URI): OntClass(
id=wdns.get_rel_uri(wdns.STATEMENT_URI),
uri=wdns.STATEMENT_URI,
label=wdns.get_rel_uri(wdns.STATEMENT_URI),
wdns.get_rel_uri(wdns.statement_uri): OntClass(
id=wdns.get_rel_uri(wdns.statement_uri),
uri=wdns.statement_uri,
label=wdns.get_rel_uri(wdns.statement_uri),
aliases=[],
description="Describes the claim of a statement and list references for this claim",
parents=[],
Expand Down Expand Up @@ -91,15 +90,15 @@ def get_entity_db(dbfile: str, proxy: bool):

def get_ontclass_db(dbfile: str, proxy: bool):
return StoreWrapper(
db.get_wdclass_db(dbfile, proxy=proxy, read_only=not proxy),
db.get_class_db(dbfile, proxy=proxy, read_only=not proxy),
key_deser=get_wdclass_id,
val_deser=ont_class_deser,
)


def get_ontprop_db(dbfile: str, proxy: bool):
return StoreWrapper(
db.get_wdprop_db(dbfile, proxy=proxy, read_only=not proxy),
db.get_prop_db(dbfile, proxy=proxy, read_only=not proxy),
key_deser=get_wdprop_id,
val_deser=ont_prop_deser,
)
Expand All @@ -123,7 +122,7 @@ def qnode_deser(qnode: WDEntity):

return WrapperWDEntity(
id=qnode.id,
uri=wdns.get_entity_abs_uri(qnode.id),
uri=wdns.id_to_uri(qnode.id),
label=qnode.label,
aliases=qnode.aliases,
description=qnode.description,
Expand All @@ -134,7 +133,7 @@ def qnode_deser(qnode: WDEntity):
def ont_class_deser(item: WDClass):
return WrapperWDClass(
id=item.id,
uri=WikidataNamespace.get_entity_abs_uri(item.id),
uri=WikidataNamespace.id_to_uri(item.id),
aliases=item.aliases,
label=item.label,
description=item.description,
Expand All @@ -147,7 +146,7 @@ def ont_prop_deser(item: WDProperty):
global WD_DATATYPE_MAPPING
return WrapperWDProperty(
id=item.id,
uri=WikidataNamespace.get_prop_abs_uri(item.id),
uri=WikidataNamespace.id_to_uri(item.id),
aliases=item.aliases,
label=item.label,
description=item.description,
Expand Down Expand Up @@ -182,9 +181,9 @@ def get_wdclass_id(uri_or_id: str):

def uri2id(uri: str):
if uri.startswith("http://www.wikidata.org/prop/"):
return WikidataNamespace.get_prop_id(uri)
return WikidataNamespace.uri_to_id(uri)
if uri.startswith("http://www.wikidata.org/entity/"):
return WikidataNamespace.get_entity_id(uri)
return wdns.uri_to_id(uri)
if uri in INVERSE_DEFAULT_URI2ID:
return INVERSE_DEFAULT_URI2ID[uri]
return uri
Expand All @@ -194,9 +193,9 @@ def id2uri(id: str):
if id in DEFAULT_ID2URI:
return DEFAULT_ID2URI[id]
if id.startswith("P"):
return wdns.get_prop_abs_uri(id)
return wdns.id_to_uri(id)
if id.startswith("Q"):
return wdns.get_entity_abs_uri(id)
return wdns.id_to_uri(id)
raise ValueError(f"Cannot convert unknown id to uri: {id}")


Expand Down

0 comments on commit 47387d5

Please sign in to comment.