Merge branch 'master' of github.com:trungdong/prov

trungdong · Aug 21, 2014 · f8b1e33 · f8b1e33
2 parents 9eef38e + e521df2
commit f8b1e33
Show file tree

Hide file tree

Showing 55 changed files with 2,805 additions and 16 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -11,7 +11,7 @@ before_install:
 # Install packages
 install:
   - pip install -r requirements.txt
-  - pip install coverage coveralls pydot
+  - pip install coverage coveralls pydot lxml
 
 # Run test
 script:

diff --git a/cla/krischer.rst b/cla/krischer.rst
@@ -0,0 +1,41 @@
+========================================
+Individual Contributor License Agreement
+========================================
+
+Thank you for Your interest in contributing to `the prov package <https://pypi.python.org/pypi/prov>`_. This document clarifies the terms under which You, the person listed below, may make Contributions which may include without limitation, software, bug fixes, configuration changes, documentation, or any other materials to `the PROV Python package project <https://github.com/trungdong/prov>`_ owned or managed by the University of Southampton.
+
+Please complete the following information about **You** and the **Contributions**. If You have questions about these terms, please contact us at tdh@ecs.soton.ac.uk.
+
+You accept and agree to the following terms and conditions for Your present and future Contributions submitted to `the PROV Python package project <https://github.com/trungdong/prov>`_. Except for the license granted herein to the University of Southampton, You reserve all right, title, and interest in and to Your Contributions.
+
+********
+Licenses
+********
+
+The PROV Python package project (code, documentation, and any other materials) are released under the terms of the MIT license as specified in the project's LICENSE file.
+
+*****************
+You certify that:
+*****************
+
+(a) Your Contributions are created in whole or in part by You and You have the right to submit it under the designated license; or
+
+(b) Your Contributions are based upon previous work that, to the best of your knowledge, is covered under an appropriate open source license and You have the right under that license to submit that work with modifications, whether created in whole or in part by You, under the designated license; or
+
+(c) Your Contributions are provided directly to You by some other person who certified (a) or (b) and You have not modified them.
+
+(d) You understand and agree that the PROV Python package package and Your Contributions are public and that a record of the Contributions (including all metadata and personal information You submit with them) is maintained indefinitely and may be redistributed consistent with the University of Southampton's policies and the requirements of the MIT license where they are relevant.
+
+(e) You are granting Your Contributions to the University of Southampton under the terms of the MIT open source license. Please complete the following information to indicate your agreement.
+
+
+Full name: Lion Krischer
+************************
+Github account: `krischer <https://github.com/krischer>`_
+*********************************************************
+
+Please type "I AGREE" below to indicate you agree to these terms. Your full name and Github account will be publicly available.
+
+Confirmation:
+*********************
+I AGREE
diff --git a/prov/constants.py b/prov/constants.py
@@ -8,6 +8,7 @@
 
 XSD = Namespace('xsd', 'http://www.w3.org/2001/XMLSchema#')
 PROV = Namespace('prov', 'http://www.w3.org/ns/prov#')
+XSI = Namespace('xsi', 'http://www.w3.org/2001/XMLSchema-instance')
 
 #  C1. Entities/Activities
 PROV_ENTITY = PROV['Entity']
@@ -59,6 +60,54 @@
     PROV_BUNDLE:               u'bundle',
 }
 
+# Records defined as subtypes in PROV-N but top level types in for example
+# PROV XML also need a mapping.
+ADDITIONAL_N_MAP = {
+    PROV['Revision']:          u'wasRevisionOf',
+    PROV['Quotation']:         u'wasQuotedFrom',
+    PROV['PrimarySource']:     u'hadPrimarySource',
+    PROV['SofwareAgent']:      u'softwareAgent',
+    PROV['Person']:            u'person',
+    PROV['Organization']:      u'organization',
+    PROV['Plan']:              u'plan',
+    PROV['Collection']:        u'collection',
+    PROV['EmptyCollection']:   u'emptyCollection',
+}
+
+# Maps qualified names from the PROV namespace to their base class. If it
+# has no baseclass it maps to itsself. This is needed for example for PROV
+# XML (de)serializer where extended types are used a lot.
+PROV_BASE_CLS = {
+    PROV_ENTITY:               PROV_ENTITY,
+    PROV_ACTIVITY:             PROV_ACTIVITY,
+    PROV_GENERATION:           PROV_GENERATION,
+    PROV_USAGE:                PROV_USAGE,
+    PROV_COMMUNICATION:        PROV_COMMUNICATION,
+    PROV_START:                PROV_START,
+    PROV_END:                  PROV_END,
+    PROV_INVALIDATION:         PROV_INVALIDATION,
+    PROV_DERIVATION:           PROV_DERIVATION,
+    PROV['Revision']:          PROV_DERIVATION,
+    PROV['Quotation']:         PROV_DERIVATION,
+    PROV['PrimarySource']:     PROV_DERIVATION,
+    PROV_AGENT:                PROV_AGENT,
+    PROV['SofwareAgent']:      PROV_AGENT,
+    PROV['Person']:            PROV_AGENT,
+    PROV['Organization']:      PROV_AGENT,
+    PROV_ATTRIBUTION:          PROV_ATTRIBUTION,
+    PROV_ASSOCIATION:          PROV_ASSOCIATION,
+    PROV['Plan']:              PROV_ENTITY,
+    PROV_DELEGATION:           PROV_DELEGATION,
+    PROV_INFLUENCE:            PROV_INFLUENCE,
+    PROV_ALTERNATE:            PROV_ALTERNATE,
+    PROV_SPECIALIZATION:       PROV_SPECIALIZATION,
+    PROV_MENTION:              PROV_MENTION,
+    PROV['Collection']:        PROV_ENTITY,
+    PROV['EmptyCollection']:   PROV_ENTITY,
+    PROV_MEMBERSHIP:           PROV_MEMBERSHIP,
+    PROV_BUNDLE:               PROV_ENTITY
+}
+
 # Identifiers for PROV's attributes
 PROV_ATTR_ENTITY = PROV['entity']
 PROV_ATTR_ACTIVITY = PROV['activity']

diff --git a/prov/dot.py b/prov/dot.py
@@ -16,14 +16,14 @@
 __email__ = 'trungdong@donggiang.com'
 
 import cgi
+from datetime import datetime
 import pydot
 
 from prov.model import (
     ProvBundle, PROV_ACTIVITY, PROV_AGENT, PROV_ALTERNATE, PROV_ASSOCIATION, PROV_ATTRIBUTION, PROV_BUNDLE,
     PROV_COMMUNICATION, PROV_DERIVATION, PROV_DELEGATION, PROV_ENTITY, PROV_GENERATION, PROV_INFLUENCE,
     PROV_INVALIDATION, PROV_END, PROV_MEMBERSHIP, PROV_MENTION, PROV_SPECIALIZATION, PROV_START, PROV_USAGE,
-    Identifier, PROV_ATTRIBUTE_QNAMES
-)
+    Identifier, PROV_ATTRIBUTE_QNAMES, sorted_attributes)
 
 
 # Visual styles for various elements (nodes) and relations (edges)
@@ -105,12 +105,17 @@ def _attach_attribute_annotation(node, record):
             if not attributes:
                 return  # No attribute to display
 
+            # Sort the attributes.
+            attributes = sorted_attributes(record.get_type(), attributes)
+
             ann_rows = [ANNOTATION_START_ROW]
             ann_rows.extend(
                 ANNOTATION_ROW_TEMPLATE % (
                     attr.uri, cgi.escape(unicode(attr)),
                     ' href=\"%s\"' % value.uri if isinstance(value, Identifier) else '',
-                    cgi.escape(unicode(value)))
+                    cgi.escape(unicode(value)
+                               if not isinstance(value, datetime) else
+                               unicode(value.isoformat())))
                 for attr, value in attributes
             )
             ann_rows.append(ANNOTATION_END_ROW)
@@ -123,7 +128,18 @@ def _add_bundle(bundle):
             count[2] += 1
             subdot = pydot.Cluster(graph_name='c%d' % count[2], URL='"%s"' % bundle.identifier.uri)
             if use_labels:
-                subdot.set_label('"%s"' % unicode(bundle.label))
+                if bundle.label == bundle.identifier:
+                    bundle_label = '"%s"' % unicode(bundle.label)
+                else:
+                    # Fancier label if both are different. The label will be
+                    # the main node text, whereas the identifier will be a
+                    # kind of suptitle.
+                    bundle_label = ('<%s<br />'
+                                    '<font color="#333333" point-size="10">'
+                                    '%s</font>>')
+                    bundle_label = bundle_label % (unicode(bundle.label),
+                                                   unicode(bundle.identifier))
+                subdot.set_label('"%s"' % unicode(bundle_label))
             else:
                 subdot.set_label('"%s"' % unicode(bundle.identifier))
             _bundle_to_dot(subdot, bundle)
@@ -134,7 +150,17 @@ def _add_node(record):
             count[0] += 1
             node_id = 'n%d' % count[0]
             if use_labels:
-                node_label = '"%s"' % unicode(record.label)
+                if record.label == record.identifier:
+                    node_label = '"%s"' % unicode(record.label)
+                else:
+                    # Fancier label if both are different. The label will be
+                    # the main node text, whereas the identifier will be a
+                    # kind of suptitle.
+                    node_label = ('<%s<br />'
+                                  '<font color="#333333" point-size="10">'
+                                  '%s</font>>')
+                    node_label = node_label % (unicode(record.label),
+                                               unicode(record.identifier))
             else:
                 node_label = '"%s"' % unicode(record.identifier)
 

diff --git a/prov/model.py b/prov/model.py
@@ -47,6 +47,14 @@ def parse_xsd_datetime(value):
         pass
     return None
 
+def parse_boolean(value):
+    if value.lower() in ("false", "0"):
+        return False
+    elif value.lower() in ("true", "1"):
+        return True
+    else:
+        return None
+
 DATATYPE_PARSERS = {
     datetime.datetime: parse_xsd_datetime,
 }
@@ -58,7 +66,7 @@ def parse_xsd_datetime(value):
     XSD_DOUBLE: float,
     XSD_LONG: long,
     XSD_INT: int,
-    XSD_BOOLEAN: bool,
+    XSD_BOOLEAN: parse_boolean,
     XSD_DATETIME: parse_xsd_datetime,
     XSD_ANYURI: Identifier
 }
@@ -97,7 +105,10 @@ def __init__(self, value, datatype=None, langtag=None):
             if datatype is None:
                 logger.debug('Assuming prov:InternationalizedString as the type of "%s"@%s' % (value, langtag))
                 datatype = PROV["InternationalizedString"]
-            elif datatype != PROV["InternationalizedString"] and datatype != XSD_STRING:
+            # PROV JSON states that the type field must not be set when
+            # using the lang attribute and PROV XML requires it to be an
+            # internationalized string.
+            elif datatype != PROV["InternationalizedString"]:
                 logger.warn(
                     'Invalid data type (%s) for "%s"@%s, overridden as prov:InternationalizedString.' %
                     (datatype, value, langtag)
@@ -265,6 +276,15 @@ def add_attributes(self, attributes):
             if isinstance(attributes, dict):
                 #  Converting the dictionary into a list of tuples (i.e. attribute-value pairs)
                 attributes = attributes.items()
+
+            # Check if one of the attributes specifies that the current type
+            # is a collection. In that case multiple attributes of the same
+            # type are allowed.
+            if PROV_ATTR_COLLECTION in [_i[0] for _i in attributes]:
+                is_collection = True
+            else:
+                is_collection = False
+
             for attr_name, original_value in attributes:
                 if original_value is None:
                     continue
@@ -285,7 +305,8 @@ def add_attributes(self, attributes):
                 if value is None:
                     raise ProvException(u'Invalid value for attribute %s: %s' % (attr, original_value))
 
-                if attr in PROV_ATTRIBUTES and self._attributes[attr]:
+                if not is_collection and attr in PROV_ATTRIBUTES and \
+                        self._attributes[attr]:
                     existing_value = first(self._attributes[attr])
                     if value != existing_value:
                         raise ProvException(u'Cannot have more than one value for attribute %s' % attr)
@@ -596,7 +617,7 @@ def get_type(self):
 }
 
 
-DEFAULT_NAMESPACES = {'prov': PROV, 'xsd': XSD}
+DEFAULT_NAMESPACES = {'prov': PROV, 'xsd': XSD, 'xsi': XSI}
 
 
 #  Bundle
@@ -1424,3 +1445,44 @@ def deserialize(source=None, content=None, format='json', **args):
             else:
                 with open(source) as f:
                     return serializer.deserialize(f, **args)
+
+
+def sorted_attributes(element, attributes):
+    """
+    Helper function sorting attributes into the order required by PROV-XML.
+
+    :param element: The prov element used to derive the type and the
+        attribute order for the type.
+    :param attributes: The attributes to sort.
+    """
+    attributes = list(attributes)
+    order = list(PROV_REC_CLS[element].FORMAL_ATTRIBUTES)
+
+    # Append label, location, role, type, and value attributes. This is
+    # universal amongst all elements.
+    order.extend([PROV_LABEL, PROV_LOCATION, PROV_ROLE, PROV_TYPE,
+                  PROV_VALUE])
+
+    # Sort function. The PROV XML specification talks about alphabetical
+    # sorting. We now interpret it as sorting by tag including the prefix
+    # first and then sorting by the text, also including the namespace
+    # prefix if given.
+    sort_fct = lambda x: (
+        unicode(x[0]), unicode(x[1].value if hasattr(x[1], "value") else x[1]))
+
+    sorted_elements = []
+    for item in order:
+        this_type_list = []
+        for e in list(attributes):
+            if e[0] != item:
+                continue
+            this_type_list.append(e)
+            attributes.remove(e)
+        this_type_list.sort(key=sort_fct)
+        sorted_elements.extend(this_type_list)
+    # Add remaining attributes. According to the spec, the other attributes
+    # have a fixed alphabetical order.
+    attributes.sort(key=sort_fct)
+    sorted_elements.extend(attributes)
+
+    return sorted_elements
diff --git a/prov/serializers/__init__.py b/prov/serializers/__init__.py
@@ -20,9 +20,11 @@ class Registry:
     @staticmethod
     def load_serializers():
         from prov.serializers.provjson import ProvJSONSerializer
+        from prov.serializers.provxml import ProvXMLSerializer
 
         Registry.serializers = {
-            'json': ProvJSONSerializer
+            'json': ProvJSONSerializer,
+            'xml': ProvXMLSerializer
         }
 
 

diff --git a/prov/serializers/provjson.py b/prov/serializers/provjson.py
@@ -9,7 +9,10 @@
 
 from collections import defaultdict
 import datetime
+import io
 import json
+import platform
+import StringIO
 from prov import Serializer, Error
 from prov.constants import *
 from prov.model import Literal, Identifier, QualifiedName, XSDQName, Namespace, ProvDocument, ProvBundle, \
@@ -44,6 +47,19 @@ def get_anon_id(self, obj, local_prefix="id"):
 
 class ProvJSONSerializer(Serializer):
     def serialize(self, stream, **kwargs):
+        if isinstance(stream, (io.StringIO, io.BytesIO)):
+            buf = StringIO.StringIO()
+            try:
+                json.dump(self.document, buf, cls=ProvJSONEncoder,
+                          **kwargs)
+                buf.seek(0, 0)
+                if isinstance(stream, io.BytesIO):
+                    stream.write(buf.read().encode('utf-8'))
+                else:
+                    stream.write(unicode(buf.read()))
+            finally:
+                buf.close()
+            return
         json.dump(self.document, stream, cls=ProvJSONEncoder, **kwargs)
 
     def deserialize(self, stream, **kwargs):