# Tracking a Term through the Collection & Thesaurus

Taking the term 'marron' (which refers to groups of people in the Americas; [Thesaurus link](https://hdl.handle.net/20.500.11840/termmaster3534). [Wikipedia link](https://nl.wikipedia.org/wiki/Marrons)) as an example, this notebook explores how a term can be tracked across both the collection and the thesaurus.

What do we want to do here? *to be expanded*

 - 

In [11]:
import glob
from tqdm import tqdm

import numpy.random as rand

import rdflib
from rdflib import Graph
from rdflib import URIRef

def load_graph_from_dir(d, until=-1, file_ext="rdf", randomise=False):
    file_listing = glob.glob(f"{d}/*.{file_ext}")
    file_listing = rand.permutation(file_listing) if randomise else sorted(file_listing)
    file_listing = file_listing[:until] # there are 1570 files in /objects, loop below has 1.5 it/s so takes 15+min
        
    if len(file_listing) == 0:
        raise ValueError(f"taking {until} files from directory /{d}/ somehow not possible, listing empty!")
    
    graph = Graph()
    for path in tqdm(file_listing, 
                     desc=f"Parsing{' random' if randomise else ''} files from /{d}"): 
        graph.parse(path, format="xml")
    return graph

In [21]:
obj_graph = load_graph_from_dir("objects", until=10, randomise=False)
thesaurus = load_graph_from_dir("thesaurus", randomise=False)

Parsing files from /objects: 100%|██████████| 10/10 [00:07<00:00,  1.40it/s]
Parsing files from /thesaurus: 100%|██████████| 43/43 [00:16<00:00,  2.62it/s]


In [22]:
granman_photo = rdflib.term.URIRef('https://hdl.handle.net/20.500.11840/206868')

granman_triples = list(obj_graph.triples((granman_photo, None, None))) + list(obj_graph.triples((None, None, granman_photo)))

In [34]:
marron = rdflib.term.URIRef('https://hdl.handle.net/20.500.11840/termmaster3534')

marron_obj_graph = list(obj_graph.triples((marron, None, None))) + list(obj_graph.triples((None, None, marron)))
marron_thesaurus = list(thesaurus.triples((marron, None, None))) + list(thesaurus.triples((None, None, marron)))


In [35]:
marron_obj_graph

[(rdflib.term.URIRef('https://hdl.handle.net/20.500.11840/206834'),
  rdflib.term.URIRef('http://purl.org/dc/elements/1.1/subject'),
  rdflib.term.URIRef('https://hdl.handle.net/20.500.11840/termmaster3534')),
 (rdflib.term.URIRef('https://hdl.handle.net/20.500.11840/206922'),
  rdflib.term.URIRef('http://purl.org/dc/elements/1.1/subject'),
  rdflib.term.URIRef('https://hdl.handle.net/20.500.11840/termmaster3534')),
 (rdflib.term.URIRef('https://hdl.handle.net/20.500.11840/206864'),
  rdflib.term.URIRef('http://purl.org/dc/elements/1.1/subject'),
  rdflib.term.URIRef('https://hdl.handle.net/20.500.11840/termmaster3534')),
 (rdflib.term.URIRef('https://hdl.handle.net/20.500.11840/206899'),
  rdflib.term.URIRef('http://purl.org/dc/elements/1.1/subject'),
  rdflib.term.URIRef('https://hdl.handle.net/20.500.11840/termmaster3534')),
 (rdflib.term.URIRef('https://hdl.handle.net/20.500.11840/206919'),
  rdflib.term.URIRef('http://purl.org/dc/elements/1.1/subject'),
  rdflib.term.URIRef('https