In [120]:
# Imports
import json
from pprint import pprint
from collections import OrderedDict, defaultdict
from rdflib import Graph, Namespace
from rdflib.term import Literal, URIRef#, _castPythonToLiteral
from rdflib.namespace import RDF, RDFS, OWL, XSD
import uuid
from urllib.parse import quote, unquote
import logging

In [50]:
### Constants
incident_filename = "/Users/v685573/Documents/Development/VCDB/data/json/validated/61E18813-E1B8-424E-BD4A-BA4D665A4A6F.json"
schema_filename = "/Users/v685573/Documents/Development/vzrisk/flow/verisc-owl.json"
attack_flow_namespace = "https://vz-risk.github.io/flow/attack-flow#"
veris_namespace = "https://veriscommunity.net/attack-flow#"



In [174]:
### Functions
def recurse_instances(d, lbl, owl=owl):
    exclusions = ["incident_id", "plus.master_id", "plus.created", "plus.analyst", "summary"]
    
    for k, v in d.items():
        try:
            if type(v) in [OrderedDict, dict]:
                #keys = keys.union(recurse_keys(v, (lbl + (k,)), keys))
                recurse_instances(v, (lbl + (k,)), owl)
            elif type(v) is list: 
                for item in v:
                    if type(item) == dict:
                        recurse_instances(item, (lbl + (k,)), owl)
                    elif k == "variety":
                        # convert it to a class instance of the parent class
                        # add it to the incident
                        instance_name = quote(item + "_" + str(enum_iterator[item] + 1))
                        enum_iterator[item] += 1
                        
                        # define instance as an instance and an instance of something
                        owl.add((i_ns[instance_name], RDF.type, OWL.NamedIndividual))
                        owl.add((i_ns[instance_name], RDF.type, anchor_map.get(".".join(lbl), veris_ns[quote(".".join(lbl))])))
                        
                        # Connect instance to flow
                        owl.add((i_ns[instance_name], af_ns['flow'], flowURI))

                        # if action:
                        if lbl[0] == "action":
                            # (type) = 'action'
                            # name = instance_name
                            # description
                            owl.add((i_ns[instance_name], af_ns["description.action"], Literal(incident["action"][lbl[1]].get("notes", "no decription"))))
                            # logic_operator = ""
                            owl.add((i_ns[instance_name], af_ns['logic_operator'], Literal("OR")))
        except:
            print("label: {0}, key: {1}, value: {2}".format(lbl, k, v))
            raise
                      
    return owl
                        
                        
def recurse_properties(d, lbl, owl=owl):
    exclusions = ["incident_id", "plus.master_id", "plus.created", "plus.analyst", "summary"]
    
    for k, v in d.items():
        try:
            if type(v) in [OrderedDict, dict]:
                owl = recurse_properties(v, (lbl + (k,)), owl)
                
            elif k == "variety":
                pass # varieties are all instances and should already be handled
            
            elif (type(v) is list):
                for item in v:
                    if type(item) == dict:
                        recurse_properties(item, (lbl + (k,)), owl)
                    else:
                        # define it's flow
                        owl.add((veris_ns[quote(".".join(lbl + (k, item)))], af_ns['flow'], flowURI))
                        
                        # if we know what instance it goes to, connect it.
                        if str(veris_ns[quote(".".join(lbl))]) in instances.keys() and len(instances[str(veris_ns[quote(".".join(lbl))])]) == 1:
                            owl.add((instances[str(veris_ns[quote(".".join(lbl))])][0], veris_ns[quote(".".join(lbl + (k, )))], veris_ns[quote(".".join(lbl + (k, item)))]))
            elif (".".join((lbl + (k,str(v)))) in exclusions):
                pass
            
            else:
                if quote(".".join(lbl + (k,))) in obj_props:
                    if str(veris_ns[quote(".".join(lbl))]) in instances.keys() and len(instances[str(veris_ns[quote(".".join(lbl))])]) == 1:
                        owl.add((instances[str(veris_ns[quote(".".join(lbl))])][0], veris_ns[quote(".".join(lbl + (k, )))], veris_ns[quote(".".join(lbl + (k, v)))]))
                    else:
                        owl.add((veris_ns[quote(".".join(lbl[:-1]))], af_ns['flow'], flowURI))
                        owl.add((veris_ns[quote(".".join(lbl[:-1]))], veris_ns[quote(".".join(lbl + (k, )))], veris_ns[quote(".".join(lbl + (k, v)))]))
                elif quote(".".join(lbl + (k,))) in data_props:
                    if str(veris_ns[quote(".".join(lbl))]) in instances.keys() and len(instances[str(veris_ns[quote(".".join(lbl))])]) == 1:
                        owl.add((instances[str(veris_ns[quote(".".join(lbl))])][0], veris_ns[quote(".".join(lbl + (k, )))], Literal(v)))
                    else:
                        owl.add((veris_ns[quote(".".join(lbl))], af_ns['flow'], flowURI))
                        owl.add((veris_ns[quote(".".join(lbl))], veris_ns[quote(".".join(lbl + (k, )))], Literal(v)))
                else:
                    logging.warning("{0} is not in the object property or datatype property lists.".format(".".join(lbl + (k,))))
                              
        except:
            print("label: {0}, key: {1}, value: {2}".format(lbl, k, v))
            raise
               
    return owl
                              
                              

In [175]:
def incident_to_owl(incident):
    global obj_props, data_props, enum_iterator, anchor_map, flowURI, instances
    
    incident_filename = "/Users/v685573/Documents/Development/VCDB/data/json/validated/61E18813-E1B8-424E-BD4A-BA4D665A4A6F.json"
    schema_filename = "/Users/v685573/Documents/Development/vzrisk/flow/verisc-owl.jsonLD"
    attack_flow_namespace = "https://vz-risk.github.io/flow/attack-flow#"
    veris_namespace = "https://veriscommunity.net/attack-flow#"
    
    # create namespace from victim_id
    i_ns = Namespace("urn:absolute:" + quote(incident['victim'].get('victim_id', uuid.uuid4()).lower()) + "#")
    af_ns = Namespace(attack_flow_namespace)
    veris_ns = Namespace(veris_namespace)
    
    # open veris schema
    veris = Graph()
    veris.parse(schema_filename)
    # Get object and data properties so we know which are which when parsing them out of the incident
    query = ("""SELECT DISTINCT  ?p 
    WHERE { 
      ?p rdf:type owl:ObjectProperty .
    }""")
    qres = veris.query(query)
    obj_props = list(qres)
    obj_props = [item[0].split("#")[1] for item in obj_props]
    query = ("""SELECT DISTINCT  ?p 
    WHERE { 
      ?p rdf:type owl:DatatypeProperty .
    }""")
    qres = veris.query(query)
    data_props = list(qres)
    data_props = [item[0].split("#")[1] for item in data_props]   
    # all we needed were the property lists
    del(veris)
    
    # to number instances
    enum_iterator = defaultdict(int)

    # to map from veris_ns to attack flow ns
    anchor_map = {
        "action": af_ns["action"],
        "asset": af_ns["asset"],
        "extra": af_ns["property"]
    }
    
    # start the incident's graph
    owl = Graph()
    
    ### create any manditory fields in AF
    # Create flow instance, flow id
    flowURI = i_ns[incident['plus']['master_id']] # to object
    owl.add((flowURI, RDF.type, OWL.NamedIndividual))
    owl.add((flowURI, RDF.type, af_ns['attack-flow']))
    # flow name literal
    owl.add((flowURI, af_ns['name.attack-flow'], Literal(incident['incident_id'])))
    # flow created literal
    owl.add((flowURI, af_ns['created'], Literal(incident['plus'].get("created", "1970-01-01T01:00:00Z"))))
    # flow author literal
    owl.add((flowURI, af_ns['author'], Literal(incident['plus'].get("analyst", "Unknown"))))
    # flow description literal
    owl.add((flowURI, af_ns['description.attack-flow'], Literal(incident['summary'])))
    
    recurse_instances(incident, (), owl)
    
    query = ("""SELECT DISTINCT  ?inst ?thing
    WHERE { 
      ?inst rdf:type owl:NamedIndividual .
      ?inst rdf:type ?thing .
       FILTER (?thing != owl:NamedIndividual)
    }""")
    qres = oincident.query(query)
    instances = defaultdict(set)
    for inst,thing in qres:
        instances[str(thing)].add(str(inst))
    instances = dict()
    instances = {k:list(v) for k,v in instances.items()}
    
    recurse_properties(incident, (), owl)    
    
    # Determine causal linkages between actions if possible (use value.chain and or single-action)
    # TODO
    
    return(owl)

In [176]:
with open(incident_filename, 'r') as filehandle:
    incident = json.load(filehandle)

In [177]:
oincident = incident_to_owl(incident)



In [178]:
with open("/Users/v685573/Documents/Development/vzrisk/flow/61E18813-E1B8-424E-BD4A-BA4D665A4A6F.owl", 'w') as filehandle:
    filehandle.write(oincident.serialize(format="xml"))

In [24]:
schema = Graph()
schema.parse(schema_filename)

<Graph identifier=N745bdef112eb4a909071cbf235a1de1d (<class 'rdflib.graph.Graph'>)>

In [20]:
recurse_keys(incident, ())

{(),
 ('action', 'hacking'),
 ('actor', 'external'),
 ('asset',),
 ('attribute', 'availability'),
 ('attribute', 'confidentiality'),
 ('attribute', 'integrity'),
 ('discovery_method', 'external'),
 ('impact',),
 ('plus',),
 ('plus', 'attribute', 'confidentiality'),
 ('plus', 'timeline', 'notification'),
 ('timeline', 'incident'),
 ('victim',)}

In [15]:
incident

{'action': {'hacking': {'variety': ['Unknown'],
   'vector': ['Web application']}},
 'actor': {'external': {'country': ['SY'],
   'motive': ['Ideology'],
   'notes': 'RedHack',
   'region': ['142145'],
   'variety': ['Activist']}},
 'asset': {'assets': [{'variety': 'S - Web application'}],
  'cloud': ['Unknown']},
 'attribute': {'availability': {'variety': ['Loss']},
  'confidentiality': {'data': [{'variety': 'Unknown'}],
   'data_disclosure': 'Unknown',
   'state': ['Unknown']},
  'integrity': {'notes': 'defacement',
   'variety': ['Modify data', 'Defacement']}},
 'discovery_method': {'external': {'variety': ['Actor disclosure']}},
 'impact': {'overall_rating': 'Unknown'},
 'incident_id': '61E18813-E1B8-424E-BD4A-BA4D665A4A6F',
 'plus': {'analysis_status': 'First pass',
  'analyst': 'whbaker',
  'attribute': {'confidentiality': {'credit_monitoring': 'Unknown'}},
  'created': '2013-09-19T15:55:00Z',
  'github': '219',
  'master_id': '61E18813-E1B8-424E-BD4A-BA4D665A4A6F',
  'modified':