In [42]:
from convert_base import str_to_uri, RdfRepresent, QuoteOptions
from rdflib import Namespace, Literal, Graph, BNode
import pandas as pd
import time

DFG = Namespace("http://rdf.org/dfg#")

dfg_terms = {
    'freq': DFG.freq,
    'from': DFG['from'],
    'to': DFG.to,
    'next': DFG.next,
    'nil': DFG.nil,
}

def convert_dfg_rdf(dfg, save_to, ns, format):
    start = time.time_ns()
    start_conv = time.time_ns()

    g = Graph()
    
    # use to determine if activity is endpoint or not
    df = pd.DataFrame(
        [ [ src, tgt, freq ] for ((src, tgt), freq) in dfg.items() ],
        columns=[ 'src', 'tgt', 'freq' ]
    )
    
    for ((src, tgt), freq) in dfg.items():
        src_activ = str_to_uri(src, ns, QuoteOptions.REMOVE_SPECIAL)
        tgt_activ = str_to_uri(tgt, ns, QuoteOptions.REMOVE_SPECIAL)
        
        add_link(src_activ, tgt_activ, g, format, freq=freq)
        
        if tgt not in df['src']:
            add_link(tgt_activ, dfg_terms['nil'], g, format)        
    
    end_conv = time.time_ns()
    print("conversion time (ms):", (end_conv-start_conv)/1000000)

    start_save = time.time_ns()

    g.serialize(destination=save_to)

    end_save = time.time_ns()
    print("save time (ms):", (end_save-start_save)/1000000)

    end = time.time_ns()
    print("total time (ms):", (end-start)/1000000)
        
        
def add_link(prior_activ, next_activ, g, format, freq=False):
    if format == RdfRepresent.LINK_REIFIED:
        link = BNode()
        g.add((link, dfg_terms['from'], prior_activ))
        g.add((link, dfg_terms['to'], next_activ))
        if freq:
            g.add((link, dfg_terms['freq'], Literal(freq)))
                    
    elif format == RdfRepresent.LINK_PRED:
        # cannot represent frequency in this case
        g.add((prior_activ, dfg_terms['next'], next_activ))

In [4]:
dfg_path = "/Users/wvw/git/pm/compare_logs/tc pilot medical/data/Event Dictionaries/PMed_Reallocation_New2_Event Dictionary.py"
dfg = eval(open(dfg_path, "r").read())

In [43]:
from convert_log import convert_log_rdf, RdfRepresent
from rdflib import Namespace

TC = Namespace("http://canada.org/tc#")

convert_dfg_rdf(dfg, "../logs/pilot_medical.n3", TC, RdfRepresent.LINK_REIFIED)

quoted: Completed-AssessmentComplete
quoted: InProgress-Letter-Defer
quoted: Completed-AssessmentComplete
quoted: InProgress-Letter-Fit
quoted: InProgress-AssessorReviewing
quoted: Completed-AssessmentComplete
quoted: InProgress-AssessorReviewing
quoted: OnHoldPending-AwaitApplicant
quoted: InProgress-Assigned
quoted: ForAMO/RAMO
quoted: InProgress-ForFollow-upReview
quoted: Completed-AssessmentComplete
quoted: InProgress-ForFollow-upReview
quoted: InProgress-Letter-Defer
quoted: InProgress-Letter-Defer
quoted: Completed-AssessmentComplete
quoted: InProgress-Letter-Defer
quoted: OnHoldPending-AwaitApplicant
quoted: InProgress-Letter-Fit
quoted: Completed-AssessmentComplete
quoted: InProgress-Letter-Fit
quoted: OnHoldPending-AwaitApplicant
quoted: MedicalExamDate
quoted: CreateFile
quoted: MedicalExamDate
quoted: Received
quoted: CreateFile
quoted: Received
quoted: ForAMO/RAMO
quoted: Completed-AssessmentComplete
quoted: ForAMO/RAMO
quoted: InProgress-AssessorReviewing
quoted: ForAMO/RA