In [57]:
import rdflib
import json
import glob
# ファイルパスの設定
owl_file = 'path/to/owl'
ntriples_file = 'path/to/ntriples'
dir_path = 'path/to/output/directory'

# OWLグラフの読み込み
owl_graph = rdflib.Graph()
owl_graph.parse(owl_file, format='xml')

# RDFグラフの読み込み
g = rdflib.Graph()

for file in glob.glob(ntriples_file):
    g.parse(file, format='nt')
    



In [58]:
# クラス情報の抽出
class_labels = {}
dataTypeProperty = {}
objectProperty = {}
for subj, pred, obj in owl_graph:
    if str(subj).startswith('https://'):
        subj = rdflib.URIRef(str(subj).replace('https://', 'http://'))
    if str(pred).startswith('https://'):
        pred = rdflib.URIRef(str(pred).replace('https://', 'http://'))
    if str(obj).startswith('https://'):
        obj = rdflib.URIRef(str(obj).replace('https://', 'http://'))
    if pred == rdflib.RDF.type and obj == rdflib.OWL.Class:
        class_labels[str(subj)] = str(subj).split('#')[-1]
    elif pred == rdflib.RDF.type and obj == rdflib.OWL.DatatypeProperty:
        dataTypeProperty[str(subj)] = str(subj).split('#')[-1]
    elif pred == rdflib.RDF.type and (obj == rdflib.OWL.ObjectProperty or obj == rdflib.OWL.TransitiveProperty):
        objectProperty[str(subj)] = str(subj).split('#')[-1]
    # else:
    #     print("subj:", subj, "pred:", pred, "obj:", obj)
print(class_labels)
print(dataTypeProperty)
print(objectProperty)

{'http://swat.cse.lehigh.edu/onto/univ-bench.owl#PostDoc': 'PostDoc', 'http://swat.cse.lehigh.edu/onto/univ-bench.owl#UnofficialPublication': 'UnofficialPublication', 'http://swat.cse.lehigh.edu/onto/univ-bench.owl#Faculty': 'Faculty', 'http://swat.cse.lehigh.edu/onto/univ-bench.owl#Student': 'Student', 'http://swat.cse.lehigh.edu/onto/univ-bench.owl#Employee': 'Employee', 'http://swat.cse.lehigh.edu/onto/univ-bench.owl#Course': 'Course', 'http://swat.cse.lehigh.edu/onto/univ-bench.owl#Dean': 'Dean', 'http://swat.cse.lehigh.edu/onto/univ-bench.owl#ConferencePaper': 'ConferencePaper', 'http://swat.cse.lehigh.edu/onto/univ-bench.owl#Institute': 'Institute', 'http://swat.cse.lehigh.edu/onto/univ-bench.owl#Book': 'Book', 'http://swat.cse.lehigh.edu/onto/univ-bench.owl#Organization': 'Organization', 'http://swat.cse.lehigh.edu/onto/univ-bench.owl#College': 'College', 'http://swat.cse.lehigh.edu/onto/univ-bench.owl#TechnicalReport': 'TechnicalReport', 'http://swat.cse.lehigh.edu/onto/univ-be

In [60]:
# property graph用のmap
node_map = {}
edge_map = {}
# データとidの
node_id_map = {}
edge_id_map = {}
next_node_id = 1
next_edge_id = 1

for subj, pred, obj in g:
    if str(subj).startswith("https://"):
        subj = rdflib.URIRef(str(subj).replace("https://", "http://"))
    if str(pred).startswith("https://"):
        pred = rdflib.URIRef(str(pred).replace("https://", "http://"))
    if str(obj).startswith("https://"):
        obj = rdflib.URIRef(str(obj).replace("https://", "http://"))
    subj = str(subj)
    pred = str(pred)
    obj = str(obj)
    if "file://" in subj or "file://" in pred or "file://" in obj:
        continue
    sub_id = -1
    pre_id = -1
    obj_id = -1
    if subj not in node_id_map: # subjが初めての時．
        node_id_map[subj] = next_node_id
        node_map[next_node_id] = {"id": next_node_id, "property": {"uri": subj}}
        sub_id = next_node_id
        next_node_id += 1
    else:
        sub_id = node_id_map[subj]
    
    if pred in objectProperty:  # predがproperty graphのエッジの役割の時．
        if obj not in node_id_map:  # objが既出の時．
            node_id_map[obj] = next_node_id
            node_map[next_node_id] = {"id": next_node_id, "property": {"uri": obj}}
            obj_id = next_node_id
            next_node_id += 1
        else:
            obj_id = node_id_map[obj]
        if (pred, sub_id, obj_id) not in edge_id_map:
            edge_id_map[(pred, sub_id, obj_id)] = next_edge_id
            edge_map[next_edge_id] = {
                "id": next_edge_id,
                "label": objectProperty[pred],
                "src": sub_id,
                "dst": obj_id,
                "property": {"uri": pred},
            }
            next_edge_id += 1
    elif pred in dataTypeProperty: # プロパティ．
        node_map[sub_id]["property"][dataTypeProperty[pred]] = obj
    elif "type" in str(pred):
        if "label" in node_map[sub_id]:
            node_map[sub_id]["label"].append(class_labels[obj])
        else:
            node_map[sub_id]["label"] = [class_labels[obj]]
    else:
        print("sub:", subj, "pred:", pred, "obj:", obj)


In [56]:
import json
import os

# ディレクトリが存在しない場合は作成
if not os.path.exists(dir_path):
    os.makedirs(dir_path)
# ... existing code ...

# node_mapをリスト形式に変換
node_list = list(node_map.values())

# edge_mapをリスト形式に変換
edge_list = list(edge_map.values())

# node_listをjson形式で出力
with open(os.path.join(dir_path, "nodes.json"), "w") as nodes_file:
    json.dump(node_list, nodes_file, ensure_ascii=False, indent=4)

# edge_listをjson形式で出力
with open(os.path.join(dir_path, "edges.json"), "w") as edges_file:
    json.dump(edge_list, edges_file, ensure_ascii=False, indent=4)
