Skip to content

Commit

Permalink
Merge branch 'master' of github.com:trungdong/prov
Browse files Browse the repository at this point in the history
  • Loading branch information
trungdong committed Aug 21, 2014
2 parents 9eef38e + e521df2 commit f8b1e33
Show file tree
Hide file tree
Showing 55 changed files with 2,805 additions and 16 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Expand Up @@ -11,7 +11,7 @@ before_install:
# Install packages
install:
- pip install -r requirements.txt
- pip install coverage coveralls pydot
- pip install coverage coveralls pydot lxml

# Run test
script:
Expand Down
41 changes: 41 additions & 0 deletions cla/krischer.rst
@@ -0,0 +1,41 @@
========================================
Individual Contributor License Agreement
========================================

Thank you for Your interest in contributing to `the prov package <https://pypi.python.org/pypi/prov>`_. This document clarifies the terms under which You, the person listed below, may make Contributions which may include without limitation, software, bug fixes, configuration changes, documentation, or any other materials to `the PROV Python package project <https://github.com/trungdong/prov>`_ owned or managed by the University of Southampton.

Please complete the following information about **You** and the **Contributions**. If You have questions about these terms, please contact us at tdh@ecs.soton.ac.uk.

You accept and agree to the following terms and conditions for Your present and future Contributions submitted to `the PROV Python package project <https://github.com/trungdong/prov>`_. Except for the license granted herein to the University of Southampton, You reserve all right, title, and interest in and to Your Contributions.

********
Licenses
********

The PROV Python package project (code, documentation, and any other materials) are released under the terms of the MIT license as specified in the project's LICENSE file.

*****************
You certify that:
*****************

(a) Your Contributions are created in whole or in part by You and You have the right to submit it under the designated license; or

(b) Your Contributions are based upon previous work that, to the best of your knowledge, is covered under an appropriate open source license and You have the right under that license to submit that work with modifications, whether created in whole or in part by You, under the designated license; or

(c) Your Contributions are provided directly to You by some other person who certified (a) or (b) and You have not modified them.

(d) You understand and agree that the PROV Python package package and Your Contributions are public and that a record of the Contributions (including all metadata and personal information You submit with them) is maintained indefinitely and may be redistributed consistent with the University of Southampton's policies and the requirements of the MIT license where they are relevant.

(e) You are granting Your Contributions to the University of Southampton under the terms of the MIT open source license. Please complete the following information to indicate your agreement.


Full name: Lion Krischer
************************
Github account: `krischer <https://github.com/krischer>`_
*********************************************************

Please type "I AGREE" below to indicate you agree to these terms. Your full name and Github account will be publicly available.

Confirmation:
*********************
I AGREE
49 changes: 49 additions & 0 deletions prov/constants.py
Expand Up @@ -8,6 +8,7 @@

XSD = Namespace('xsd', 'http://www.w3.org/2001/XMLSchema#')
PROV = Namespace('prov', 'http://www.w3.org/ns/prov#')
XSI = Namespace('xsi', 'http://www.w3.org/2001/XMLSchema-instance')

# C1. Entities/Activities
PROV_ENTITY = PROV['Entity']
Expand Down Expand Up @@ -59,6 +60,54 @@
PROV_BUNDLE: u'bundle',
}

# Records defined as subtypes in PROV-N but top level types in for example
# PROV XML also need a mapping.
ADDITIONAL_N_MAP = {
PROV['Revision']: u'wasRevisionOf',
PROV['Quotation']: u'wasQuotedFrom',
PROV['PrimarySource']: u'hadPrimarySource',
PROV['SofwareAgent']: u'softwareAgent',
PROV['Person']: u'person',
PROV['Organization']: u'organization',
PROV['Plan']: u'plan',
PROV['Collection']: u'collection',
PROV['EmptyCollection']: u'emptyCollection',
}

# Maps qualified names from the PROV namespace to their base class. If it
# has no baseclass it maps to itsself. This is needed for example for PROV
# XML (de)serializer where extended types are used a lot.
PROV_BASE_CLS = {
PROV_ENTITY: PROV_ENTITY,
PROV_ACTIVITY: PROV_ACTIVITY,
PROV_GENERATION: PROV_GENERATION,
PROV_USAGE: PROV_USAGE,
PROV_COMMUNICATION: PROV_COMMUNICATION,
PROV_START: PROV_START,
PROV_END: PROV_END,
PROV_INVALIDATION: PROV_INVALIDATION,
PROV_DERIVATION: PROV_DERIVATION,
PROV['Revision']: PROV_DERIVATION,
PROV['Quotation']: PROV_DERIVATION,
PROV['PrimarySource']: PROV_DERIVATION,
PROV_AGENT: PROV_AGENT,
PROV['SofwareAgent']: PROV_AGENT,
PROV['Person']: PROV_AGENT,
PROV['Organization']: PROV_AGENT,
PROV_ATTRIBUTION: PROV_ATTRIBUTION,
PROV_ASSOCIATION: PROV_ASSOCIATION,
PROV['Plan']: PROV_ENTITY,
PROV_DELEGATION: PROV_DELEGATION,
PROV_INFLUENCE: PROV_INFLUENCE,
PROV_ALTERNATE: PROV_ALTERNATE,
PROV_SPECIALIZATION: PROV_SPECIALIZATION,
PROV_MENTION: PROV_MENTION,
PROV['Collection']: PROV_ENTITY,
PROV['EmptyCollection']: PROV_ENTITY,
PROV_MEMBERSHIP: PROV_MEMBERSHIP,
PROV_BUNDLE: PROV_ENTITY
}

# Identifiers for PROV's attributes
PROV_ATTR_ENTITY = PROV['entity']
PROV_ATTR_ACTIVITY = PROV['activity']
Expand Down
36 changes: 31 additions & 5 deletions prov/dot.py
Expand Up @@ -16,14 +16,14 @@
__email__ = 'trungdong@donggiang.com'

import cgi
from datetime import datetime
import pydot

from prov.model import (
ProvBundle, PROV_ACTIVITY, PROV_AGENT, PROV_ALTERNATE, PROV_ASSOCIATION, PROV_ATTRIBUTION, PROV_BUNDLE,
PROV_COMMUNICATION, PROV_DERIVATION, PROV_DELEGATION, PROV_ENTITY, PROV_GENERATION, PROV_INFLUENCE,
PROV_INVALIDATION, PROV_END, PROV_MEMBERSHIP, PROV_MENTION, PROV_SPECIALIZATION, PROV_START, PROV_USAGE,
Identifier, PROV_ATTRIBUTE_QNAMES
)
Identifier, PROV_ATTRIBUTE_QNAMES, sorted_attributes)


# Visual styles for various elements (nodes) and relations (edges)
Expand Down Expand Up @@ -105,12 +105,17 @@ def _attach_attribute_annotation(node, record):
if not attributes:
return # No attribute to display

# Sort the attributes.
attributes = sorted_attributes(record.get_type(), attributes)

ann_rows = [ANNOTATION_START_ROW]
ann_rows.extend(
ANNOTATION_ROW_TEMPLATE % (
attr.uri, cgi.escape(unicode(attr)),
' href=\"%s\"' % value.uri if isinstance(value, Identifier) else '',
cgi.escape(unicode(value)))
cgi.escape(unicode(value)
if not isinstance(value, datetime) else
unicode(value.isoformat())))
for attr, value in attributes
)
ann_rows.append(ANNOTATION_END_ROW)
Expand All @@ -123,7 +128,18 @@ def _add_bundle(bundle):
count[2] += 1
subdot = pydot.Cluster(graph_name='c%d' % count[2], URL='"%s"' % bundle.identifier.uri)
if use_labels:
subdot.set_label('"%s"' % unicode(bundle.label))
if bundle.label == bundle.identifier:
bundle_label = '"%s"' % unicode(bundle.label)
else:
# Fancier label if both are different. The label will be
# the main node text, whereas the identifier will be a
# kind of suptitle.
bundle_label = ('<%s<br />'
'<font color="#333333" point-size="10">'
'%s</font>>')
bundle_label = bundle_label % (unicode(bundle.label),
unicode(bundle.identifier))
subdot.set_label('"%s"' % unicode(bundle_label))
else:
subdot.set_label('"%s"' % unicode(bundle.identifier))
_bundle_to_dot(subdot, bundle)
Expand All @@ -134,7 +150,17 @@ def _add_node(record):
count[0] += 1
node_id = 'n%d' % count[0]
if use_labels:
node_label = '"%s"' % unicode(record.label)
if record.label == record.identifier:
node_label = '"%s"' % unicode(record.label)
else:
# Fancier label if both are different. The label will be
# the main node text, whereas the identifier will be a
# kind of suptitle.
node_label = ('<%s<br />'
'<font color="#333333" point-size="10">'
'%s</font>>')
node_label = node_label % (unicode(record.label),
unicode(record.identifier))
else:
node_label = '"%s"' % unicode(record.identifier)

Expand Down
70 changes: 66 additions & 4 deletions prov/model.py
Expand Up @@ -47,6 +47,14 @@ def parse_xsd_datetime(value):
pass
return None

def parse_boolean(value):
if value.lower() in ("false", "0"):
return False
elif value.lower() in ("true", "1"):
return True
else:
return None

DATATYPE_PARSERS = {
datetime.datetime: parse_xsd_datetime,
}
Expand All @@ -58,7 +66,7 @@ def parse_xsd_datetime(value):
XSD_DOUBLE: float,
XSD_LONG: long,
XSD_INT: int,
XSD_BOOLEAN: bool,
XSD_BOOLEAN: parse_boolean,
XSD_DATETIME: parse_xsd_datetime,
XSD_ANYURI: Identifier
}
Expand Down Expand Up @@ -97,7 +105,10 @@ def __init__(self, value, datatype=None, langtag=None):
if datatype is None:
logger.debug('Assuming prov:InternationalizedString as the type of "%s"@%s' % (value, langtag))
datatype = PROV["InternationalizedString"]
elif datatype != PROV["InternationalizedString"] and datatype != XSD_STRING:
# PROV JSON states that the type field must not be set when
# using the lang attribute and PROV XML requires it to be an
# internationalized string.
elif datatype != PROV["InternationalizedString"]:
logger.warn(
'Invalid data type (%s) for "%s"@%s, overridden as prov:InternationalizedString.' %
(datatype, value, langtag)
Expand Down Expand Up @@ -265,6 +276,15 @@ def add_attributes(self, attributes):
if isinstance(attributes, dict):
# Converting the dictionary into a list of tuples (i.e. attribute-value pairs)
attributes = attributes.items()

# Check if one of the attributes specifies that the current type
# is a collection. In that case multiple attributes of the same
# type are allowed.
if PROV_ATTR_COLLECTION in [_i[0] for _i in attributes]:
is_collection = True
else:
is_collection = False

for attr_name, original_value in attributes:
if original_value is None:
continue
Expand All @@ -285,7 +305,8 @@ def add_attributes(self, attributes):
if value is None:
raise ProvException(u'Invalid value for attribute %s: %s' % (attr, original_value))

if attr in PROV_ATTRIBUTES and self._attributes[attr]:
if not is_collection and attr in PROV_ATTRIBUTES and \
self._attributes[attr]:
existing_value = first(self._attributes[attr])
if value != existing_value:
raise ProvException(u'Cannot have more than one value for attribute %s' % attr)
Expand Down Expand Up @@ -596,7 +617,7 @@ def get_type(self):
}


DEFAULT_NAMESPACES = {'prov': PROV, 'xsd': XSD}
DEFAULT_NAMESPACES = {'prov': PROV, 'xsd': XSD, 'xsi': XSI}


# Bundle
Expand Down Expand Up @@ -1424,3 +1445,44 @@ def deserialize(source=None, content=None, format='json', **args):
else:
with open(source) as f:
return serializer.deserialize(f, **args)


def sorted_attributes(element, attributes):
"""
Helper function sorting attributes into the order required by PROV-XML.
:param element: The prov element used to derive the type and the
attribute order for the type.
:param attributes: The attributes to sort.
"""
attributes = list(attributes)
order = list(PROV_REC_CLS[element].FORMAL_ATTRIBUTES)

# Append label, location, role, type, and value attributes. This is
# universal amongst all elements.
order.extend([PROV_LABEL, PROV_LOCATION, PROV_ROLE, PROV_TYPE,
PROV_VALUE])

# Sort function. The PROV XML specification talks about alphabetical
# sorting. We now interpret it as sorting by tag including the prefix
# first and then sorting by the text, also including the namespace
# prefix if given.
sort_fct = lambda x: (
unicode(x[0]), unicode(x[1].value if hasattr(x[1], "value") else x[1]))

sorted_elements = []
for item in order:
this_type_list = []
for e in list(attributes):
if e[0] != item:
continue
this_type_list.append(e)
attributes.remove(e)
this_type_list.sort(key=sort_fct)
sorted_elements.extend(this_type_list)
# Add remaining attributes. According to the spec, the other attributes
# have a fixed alphabetical order.
attributes.sort(key=sort_fct)
sorted_elements.extend(attributes)

return sorted_elements
4 changes: 3 additions & 1 deletion prov/serializers/__init__.py
Expand Up @@ -20,9 +20,11 @@ class Registry:
@staticmethod
def load_serializers():
from prov.serializers.provjson import ProvJSONSerializer
from prov.serializers.provxml import ProvXMLSerializer

Registry.serializers = {
'json': ProvJSONSerializer
'json': ProvJSONSerializer,
'xml': ProvXMLSerializer
}


Expand Down
16 changes: 16 additions & 0 deletions prov/serializers/provjson.py
Expand Up @@ -9,7 +9,10 @@

from collections import defaultdict
import datetime
import io
import json
import platform
import StringIO
from prov import Serializer, Error
from prov.constants import *
from prov.model import Literal, Identifier, QualifiedName, XSDQName, Namespace, ProvDocument, ProvBundle, \
Expand Down Expand Up @@ -44,6 +47,19 @@ def get_anon_id(self, obj, local_prefix="id"):

class ProvJSONSerializer(Serializer):
def serialize(self, stream, **kwargs):
if isinstance(stream, (io.StringIO, io.BytesIO)):
buf = StringIO.StringIO()
try:
json.dump(self.document, buf, cls=ProvJSONEncoder,
**kwargs)
buf.seek(0, 0)
if isinstance(stream, io.BytesIO):
stream.write(buf.read().encode('utf-8'))
else:
stream.write(unicode(buf.read()))
finally:
buf.close()
return
json.dump(self.document, stream, cls=ProvJSONEncoder, **kwargs)

def deserialize(self, stream, **kwargs):
Expand Down

0 comments on commit f8b1e33

Please sign in to comment.