Skip to content

Commit

Permalink
Merge 32cab89 into 008828b
Browse files Browse the repository at this point in the history
  • Loading branch information
zimeon committed Apr 27, 2021
2 parents 008828b + 32cab89 commit 6b041ff
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 21 deletions.
18 changes: 15 additions & 3 deletions ocfl/data/validation-errors.json
Original file line number Diff line number Diff line change
Expand Up @@ -415,17 +415,29 @@
}
},
"E066b": {
"params": ["version_dir" , "prior_head", "where"],
"params": ["version" , "prior_head", "where"],
"description": {
"en": "OCFL Object inventory manifest for %s in %s doesn't have a subset of manifest entries of inventory for %s"
}
},
"E066c": {
"params": ["prior_head", "version_dir", "file", "prior_content", "where", "current_content"],
"params": ["prior_head", "version", "file", "prior_content", "where", "current_content"],
"description": {
"en": "OCFL Object %s inventory %s version state has file %s that maps to different content files (%s) than in the %s inventory (%s)"
}
},
"E066d": {
"params": ["where", "version", "digest", "logical_files", "prior_head"],
"description": {
"en": "OCFL Object %s inventory %s version state has digest %s (mapping to logical files %s) that does not appear in the %s inventory"
}
},
"E066e": {
"params": ["prior_head", "version", "digest", "logical_files", "where"],
"description": {
"en": "OCFL Object %s inventory %s version state has digest %s (mapping to logical files %s) that does not appear in the %s inventory"
}
},
"E067": {
"params": ["entry"],
"description": {
Expand Down Expand Up @@ -614,7 +626,7 @@
"spec": "In addition to the inventory in the OCFL Object Root, every version directory SHOULD include an inventory file that is an Inventory of all content for versions up to and including that particular version"
},
"W011": {
"params": ["key", "version_dir" , "prior_head", "where"],
"params": ["key", "version" , "prior_head", "where"],
"description": {
"en": "OCFL Object version metadata '%s' for %s in %s inventory does not match that in %s inventory"
},
Expand Down
73 changes: 56 additions & 17 deletions ocfl/inventory_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,15 @@
from .w3c_datetime import str_to_datetime


def get_file_map(inventory, version_dir):
"""Get a map of files in state to files on disk for version_dir in inventory."""
state = inventory['versions'][version_dir]['state']
def get_file_map(inventory, version):
"""Get a map of files in state to files on disk for version in inventory.
Returns a dictionary: file_in_state -> set(content_files)
The set of content_files may includes references to duplicate files in
later versions than the version being described.
"""
state = inventory['versions'][version]['state']
manifest = inventory['manifest']
file_map = {}
for digest in state:
Expand Down Expand Up @@ -432,32 +438,65 @@ def check_content_path(self, path, content_paths, content_directories):
return True

def validate_as_prior_version(self, prior):
"""Check that prior is a valid InventoryValidator for a prior version of the current inventory object.
"""Check that prior is a valid prior version of the current inventory object.
Both inventories are assumed to have been checked for internal consistency.
The input prior is also expected to be an InventoryValidator object and
both self and prior inventories are assumed to have been checked for
internal consistency.
"""
# Must have a subset of versions which also check zero padding format etc.
if not set(prior.all_versions) < set(self.all_versions):
self.error('E066a', prior_head=prior.head)
else:
# Check references to files but realize that there might be different
# digest algorithms between versions
version_dir = 'no-version'
for version_dir in prior.all_versions:
prior_map = get_file_map(prior.inventory, version_dir)
self_map = get_file_map(self.inventory, version_dir)
version = 'no-version'
for version in prior.all_versions:
# If the digest algorithm is the same then we can make a
# direct check on whether the state blocks match
if prior.digest_algorithm == self.digest_algorithm:
self.compare_states_for_version(prior, version)
# Now check the mappings from state to content files which must
# be consistent even if the digestAlgorithm is different between
# versions
prior_map = get_file_map(prior.inventory, version)
self_map = get_file_map(self.inventory, version)
if prior_map.keys() != self_map.keys():
self.error('E066b', version_dir=version_dir, prior_head=prior.head)
self.error('E066b', version=version, prior_head=prior.head)
else:
# Check them all...
for file in prior_map:
if not prior_map[file].issubset(self_map[file]):
self.error('E066c', version_dir=version_dir, prior_head=prior.head,
self.error('E066c', version=version, prior_head=prior.head,
file=file, prior_content=','.join(prior_map[file]),
current_content=','.join(self_map[file]))
# Check metadata
prior_version = prior.inventory['versions'][version_dir]
self_version = self.inventory['versions'][version_dir]
for key in ('created', 'message', 'user'):
if prior_version.get(key) != self_version.get(key):
self.warning('W011', version_dir=version_dir, prior_head=prior.head, key=key)
# Check metadata
prior_version = prior.inventory['versions'][version]
self_version = self.inventory['versions'][version]
for key in ('created', 'message', 'user'):
if prior_version.get(key) != self_version.get(key):
self.warning('W011', version=version, prior_head=prior.head, key=key)

def compare_states_for_version(self, prior, version):
"""Compare state blocks for version between self and prior.
The digest algorithm must be the same in both, do not call otherwise!
Looks only for digests that appear in one but not in the other, the code
in validate_as_prior_version(..) does a check for whether the same sets
of logical files appear and we don't want to duplicate an error message
about that.
While the mapping checks in validate_as_prior_version(..) do all that is
necessary to detect an error, the additional errors that may be generated
here provide more detailed diagnostics in the case that the digest
algorithm is the same across versions being compared.
"""
self_state = self.inventory['versions'][version]['state']
prior_state = prior.inventory['versions'][version]['state']
for digest in set(self_state.keys()).union(prior_state.keys()):
if digest not in prior_state:
self.error('E066d', version=version, prior_head=prior.head,
digest=digest, logical_files=', '.join(self_state[digest]))
elif digest not in self_state:
self.error('E066e', version=version, prior_head=prior.head,
digest=digest, logical_files=', '.join(prior_state[digest]))
27 changes: 27 additions & 0 deletions tests/test_inventory_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,11 +300,13 @@ def test_validate_as_prior_version(self):
log.clear()
# Good inventory in spite of diferent digests
iv.all_versions = ['v1', 'v2']
iv.digest_algorithm = 'a1'
iv.inventory = {"manifest": {"a1d1": ["v1/content/f1"],
"a1d2": ["v1/content/f2"],
"a1d3": ["v2/content/f3"]},
"versions": {"v1": {"state": {"a1d1": ["f1"], "a1d2": ["f2"]}},
"v2": {"state": {"a1d1": ["f1"], "a1d3": ["f3"]}}}}
prior.digest_algorithm = 'a2'
prior.inventory = {"manifest": {"a2d1": ["v1/content/f1"],
"a2d2": ["v1/content/f2"]},
"versions": {"v1": {"state": {"a2d1": ["f1"], "a2d2": ["f2"]}}}}
Expand All @@ -322,6 +324,31 @@ def test_validate_as_prior_version(self):
iv.validate_as_prior_version(prior)
self.assertEqual(log.errors, ["E066c"])

def test_compare_states_for_version(self):
"""Test compare_states_for_version method."""
log = TLogger()
iv = InventoryValidator(log=log)
prior = InventoryValidator(log=TLogger())
# Same digests
iv.inventory = {
"versions": {"v99": {"state": {"a1d1": ["f1"], "a1d2": ["f2", "f3"]}}}}
prior.inventory = {
"versions": {"v99": {"state": {"a1d1": ["f1"], "a1d2": ["f2", "f3"]}}}}
iv.compare_states_for_version(prior, 'v99')
self.assertEqual(log.errors, [])
log.clear()
# Extra in iv
iv.inventory = {
"versions": {"v99": {"state": {"a1d1": ["f1"], "a1d2": ["f2", "f3"], "a1d3": ["f4"]}}}}
iv.compare_states_for_version(prior, 'v99')
self.assertEqual(log.errors, ['E066d'])
log.clear()
# Extra in prior
iv.inventory = {
"versions": {"v99": {"state": {"a1d2": ["f2", "f3"]}}}}
iv.compare_states_for_version(prior, 'v99')
self.assertEqual(log.errors, ['E066e'])

def test_check_content_path(self):
"""Test check_content_path method."""
log = TLogger()
Expand Down
2 changes: 1 addition & 1 deletion tests/test_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def test01_bad(self):
'E061_invalid_sidecar': ['E061'],
'E063_no_inv': ['E063'],
'E064_different_root_and_latest_inventories': ['E064'],
'E066_E092_old_manifest_digest_incorrect': ['E092a'],
'E066_E092_old_manifest_digest_incorrect': ['E066d', 'E066e', 'E092a'],
'E066_algorithm_change_state_mismatch': ['E066b'],
'E066_inconsistent_version_state': ['E066b'],
'E067_file_in_extensions_dir': ['E067'],
Expand Down

0 comments on commit 6b041ff

Please sign in to comment.