In [1]:
import pandas as pd
from linkml_runtime.linkml_model import (
    SchemaDefinition,
    ClassDefinition,
    SlotDefinition,
    Annotation,
)
from linkml_runtime.dumpers import yaml_dumper
from linkml_runtime.utils.schemaview import SchemaView
from strsimpy import Cosine

from dataclasses import dataclass

In [2]:
# model_file = "../nmdc-schema/src/schema/nmdc.yaml"
model_file = "../mixs-source/model/schema/mixs.yaml"
selected_class = "soil MIMS"

In [3]:
model_sv = SchemaView(model_file)

In [4]:
# model_sv.class_descendants("soil")

In [5]:
mvs = model_sv.all_subsets()

In [6]:
mvp = model_sv.schema.prefixes
# print(mvp)

In [7]:
all_slots_dict = model_sv.all_slots()

In [8]:
all_classes = model_sv.all_classes()
all_class_names = list(all_classes.keys())

In [9]:
all_types = model_sv.all_types()
all_type_names = list(all_types.keys())

In [10]:
all_enums = model_sv.all_enums()
all_enum_names = list(all_enums.keys())

In [11]:
class_slot_dict = {
    "pending_ranges": set(),
    "pending_slots": set(),
    "exhausted_ranges": set(),
    "exhausted_slots": set(),
    "exhausted_enums": set(),
    "exhausted_types": set(),
}

In [12]:
accum = class_slot_dict.copy()
accum["pending_ranges"].add(selected_class)

In [13]:
# use a dataclass instead of a dict?
# or do better testing of the dict
def exhaust_class(dict_to_exhaust):
    if (
        len(dict_to_exhaust["pending_ranges"]) == 0
        and len(dict_to_exhaust["pending_slots"]) == 0
    ):
        return dict_to_exhaust
    else:
        class_parents = set()
        usage_ranges = set()
        for pc in dict_to_exhaust["pending_ranges"]:
            dict_to_exhaust["exhausted_ranges"].add(pc)
            i_s = model_sv.class_induced_slots(pc)
            isnl = [slot.name for slot in i_s]
            isns = set(isnl)
            dict_to_exhaust["pending_slots"] = dict_to_exhaust["pending_slots"].union(
                isns
            )
            class_parents.update(set(model_sv.class_ancestors(pc)))
            cd = model_sv.get_class(pc)
            for k, v in cd.slot_usage.items():
                if v.range is not None:
                    usage_ranges.add(v.range)
        dict_to_exhaust["pending_ranges"] = (
            dict_to_exhaust["pending_ranges"] - dict_to_exhaust["exhausted_ranges"]
        )
        for cp in class_parents:
            if cp not in dict_to_exhaust["exhausted_ranges"]:
                dict_to_exhaust["pending_ranges"].add(cp)
        for ur in usage_ranges:
            if ur in all_class_names and ur not in dict_to_exhaust["exhausted_ranges"]:
                dict_to_exhaust["pending_ranges"].add(ur)
            if ur in all_type_names:
                # a typeof could sneak in here?
                dict_to_exhaust["exhausted_types"].add(ur)
        isas = set()
        for ps in dict_to_exhaust["pending_slots"]:
            if ps not in dict_to_exhaust["exhausted_slots"]:
                current_slot_def = all_slots_dict[ps]
                isas.update(set(model_sv.slot_ancestors(ps)))

                current_slot_range = current_slot_def.range
                if current_slot_range is not None:
                    if current_slot_range in all_type_names:
                        dict_to_exhaust["exhausted_types"].add(current_slot_range)
                        td = model_sv.get_type(current_slot_range)
                        tdto = td.typeof
                        if tdto is not None:
                            dict_to_exhaust["exhausted_types"].add(tdto)
                    if current_slot_range in all_enum_names:
                        dict_to_exhaust["exhausted_enums"].add(current_slot_range)
                    if current_slot_range in all_class_names:
                        if (
                            current_slot_range
                            not in dict_to_exhaust["exhausted_ranges"]
                        ):
                            dict_to_exhaust["pending_ranges"].add(current_slot_range)

                # refactor?
                current_slot_domain = current_slot_def.domain
                if current_slot_domain is not None:
                    if current_slot_domain not in dict_to_exhaust["exhausted_ranges"]:
                        dict_to_exhaust["pending_ranges"].add(current_slot_domain)

                dict_to_exhaust["exhausted_slots"].add(ps)
        dict_to_exhaust["pending_slots"] = (
            dict_to_exhaust["pending_slots"] - dict_to_exhaust["exhausted_slots"]
        )
        for parent in isas:
            dict_to_exhaust["pending_slots"].add(parent)

        return exhaust_class(dict_to_exhaust)

In [14]:
exhausted = exhaust_class(accum)

In [15]:
accum_sd = SchemaDefinition(name="accumulated", id="http://example.com/accumulated")

In [16]:
for i in exhausted["exhausted_ranges"]:
#     print(i)
    accum_sd.classes[i] = model_sv.get_class(i)

In [17]:
for i in exhausted["exhausted_slots"]:
#     print(i)
    accum_sd.slots[i] = model_sv.get_slot(i)

In [18]:
for i in exhausted["exhausted_types"]:
#     print(i)
    accum_sd.types[i] = model_sv.get_type(i)

In [19]:
for i in exhausted["exhausted_enums"]:
#     print(i)
    accum_sd.enums[i] = model_sv.get_enum(i)

In [20]:
# limit these to those subsets claimed by any element?
for k,v in mvs.items():
#     print(k)
    accum_sd.subsets[k] = v

In [21]:
# limit these to those prefixes claimed by any element?
for k,v in mvp.items():
#     print(k)
    accum_sd.prefixes[k] = v

In [22]:
print(yaml_dumper.dumps(accum_sd))

name: accumulated
id: http://example.com/accumulated
prefixes:
  linkml:
    prefix_prefix: linkml
    prefix_reference: https://w3id.org/linkml/
  mixs.vocab:
    prefix_prefix: mixs.vocab
    prefix_reference: https://w3id.org/mixs/vocab/
  MIXS:
    prefix_prefix: MIXS
    prefix_reference: https://w3id.org/mixs/terms/
  MIGS:
    prefix_prefix: MIGS
    prefix_reference: https://w3id.org/mixs/migs/
default_prefix: http://example.com/accumulated/
subsets:
  checklist:
    name: checklist
    description: A MIxS checklist. These can be combined with packages
    from_schema: http://w3id.org/mixs/terms
  package:
    name: package
    description: A MIxS package. These can be combined with checklists
    from_schema: http://w3id.org/mixs/terms
  checklist_package_combination:
    name: checklist_package_combination
    description: A combination of a checklist and a package
    from_schema: http://w3id.org/mixs/terms
types:
  double:
    name: double
    description: A real number tha

In [24]:
# yaml_dumper.dump(accum_sd, "accum_sd.yaml")