diff --git a/.travis.yml b/.travis.yml index c465dd7..71a1c7a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,9 +1,10 @@ -dist: xenial +dist: bionic language: python python: - "3.6" - "3.7" - "3.8" + - "3.9" # Install sub-module using public HTTPS not SSH git: submodules: false @@ -11,11 +12,13 @@ before_install: - sed -i 's/git@github.com:/https:\/\/github.com\//' .gitmodules - git submodule update --init --recursive install: - - pip install coveralls pycodestyle pep257 + - pip install bagit #pylint is unhappy with bagit install via setup.py, not sure why + - pip install coveralls pycodestyle pydocstyle pylint - python setup.py install script: - - pycodestyle --ignore=E501,W503 ocfl/*.py tests/*.py *.py - - pep257 ocfl/*.py tests/*.py *.py + - pycodestyle --ignore=E501,W503 *.py ocfl tests + - pydocstyle *.py ocfl tests + - pylint --disable=unneeded-not,line-too-long,unnecessary-semicolon,trailg-whitespace,missing-final-newline,bad-indentation,multiple-statements,bare-except,missing-module-docstring,missing-class-docstring,missing-function-docstring,W0511,W0622,W0707,C0103,R0902,R0911,R0912,R0913,R0914,R0915,R1702 *.py ocfl tests - python setup.py test after_success: - python setup.py coverage diff --git a/CHANGES.md b/CHANGES.md index e1e48be..69cbb8e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,13 @@ # ocfl-py changelog +## 2021-04-18 v1.2.1 + + * Add use of `pylint` in addition to `pycodestyle` and `pydocstyle` (was `pep257`). Numerous minor fixes as a result of errors/warnings reported. + * Use additional fixtures in https://github.com/OCFL/fixtures for tests + * Validation: + * Correct missing root inventory from E034 to E063 + * Add tests for digests in prior version manifests + ## 2021-03-24 v1.2.0 * Add ability for `ocfl-validate.py` to validate a standalone inventory @@ -44,7 +52,7 @@ ## 2020-05-18 v0.0.7 * Validator now checks fixity block structure, additional fixity values in fixity block - * Validator now checks for repeated digests in manifest, fixity and state blocks (https://ocfl.io/1.0/spec/#E096, https://ocfl.io/1.0/spec/#E097, https://ocfl.io/1.0/spec/#E098) + * Validator now checks for repeated digests in manifest, fixity and state blocks (https://ocfl.io/1.0/spec/#E096, https://ocfl.io/1.0/spec/#E097, https://ocfl.io/1.0/spec/#E098) * Move all the many README_*.md demos into docs folder * Add build_demo_docs.sh to build demo descriptions in docs folder @@ -60,7 +68,7 @@ ## 2020-05-05 v0.0.5 - * Renumber errors to align somewhat with the canonical code set extracted + * Renumber errors to align somewhat with the canonical code set extracted at https://github.com/OCFL/spec/blob/main/validation/validation-codes.md * Add --version parameter to scripts to show version number diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index fa59d70..e3d9291 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -12,10 +12,18 @@ If you propose a major change or new feature, please submit an issue to discuss If submitting a pull request: - * Understand that this code and any merged contributions are covered by the [MIT license](LICENSE.txt). - * Please discuss in a issue before submitting a pull request for significant changes. - * Please submit pull requests against the `main` branch (at this early stage of development changes are merged to `main`, this may change at some stage). - * Please follow [PEP8](https://www.python.org/dev/peps/pep-0008/) and [PEP257](https://www.python.org/dev/peps/pep-0257/) style rules. PEP8 line length (E501) is not enforced, just be reasonable. - * The warning W504 is enabled, but W503 is disables -- line breaks should occur before binary operators - * Please don't repeat code. - * Please cover the code with tests. + * Understand that this code and any merged contributions are covered by the [MIT license](LICENSE.txt) + * Please discuss in a issue before submitting a pull request for significant changes + * Please submit pull requests against the `develop` branch + * Please write code that passes the linting tests in `.travis.yml`, these include: + * `pycodestyle` implements [PEP8](https://www.python.org/dev/peps/pep-0008/) with the following warnings disabled: + * Line length is not enforced (E501), just be reasonable + * Where necessary, line breaks should occur before binary operators (warning W504 is enabled, but W503 is disabled) + * `pydocstyle` implements [PEP257](https://www.python.org/dev/peps/pep-0257/) style rules: + * Nothing is disabled + * `pylint` implements more complex static analysis and looks for code smells, some rules are disabled including: + * [Checks already implemented in `pycodestyle` and `pydocstyle`](http://pylint.pycqa.org/en/latest/faq.html#i-am-using-another-popular-linter-alongside-pylint-which-messages-should-i-disable-to-avoid-duplicates) + * A FIXME doesn't generate and error, but please avoid anyway (W0511) + * See `.travis.yml` for current exclusions + * Please don't repeat code + * Please cover the code with tests diff --git a/docs/demo_build_spec_examples.md b/docs/demo_build_spec_examples.md index 4313129..288f5e9 100644 --- a/docs/demo_build_spec_examples.md +++ b/docs/demo_build_spec_examples.md @@ -10,8 +10,7 @@ The digest type sha512-spec-ex is sha512 with most of the content stripped out a ``` > python ocfl-object.py --build --src fixtures/1.0/content/spec-ex-minimal --id http://example.org/minimal --digest sha512-spec-ex --created 2018-10-02T12:00:00Z --message One file --name Alice --address alice@example.org -v -WARNING:ocfl.object:### Inventory for v1 -{ +WARNING:ocfl.object:### Inventory for v1{ "digestAlgorithm": "sha512-spec-ex", "head": "v1", "id": "http://example.org/minimal", @@ -37,6 +36,7 @@ WARNING:ocfl.object:### Inventory for v1 } } } + ``` @@ -48,8 +48,7 @@ This is inventory should match the example with 3 versions in python ocfl-object.py --build --src fixtures/1.0/content/spec-ex-full --id ark:/12345/bcd987 --fixity md5 --fixity sha1 --digest sha512-spec-ex -v -WARNING:ocfl.object:### Inventory for v1 -{ +WARNING:ocfl.object:### Inventory for v1{ "digestAlgorithm": "sha512-spec-ex", "fixity": { "md5": { @@ -91,7 +90,7 @@ WARNING:ocfl.object:### Inventory for v1 "type": "https://ocfl.io/1.0/spec/#inventory", "versions": { "v1": { - "created": "2021-03-24T20:22:33.307165Z", + "created": "2021-04-18T15:14:51.784558Z", "state": { "7dcc352f96c56dc...c31": [ "foo/bar.xml" @@ -106,8 +105,8 @@ WARNING:ocfl.object:### Inventory for v1 } } } -WARNING:ocfl.object:### Inventory for v2 -{ + +WARNING:ocfl.object:### Inventory for v2{ "digestAlgorithm": "sha512-spec-ex", "fixity": { "md5": { @@ -158,7 +157,7 @@ WARNING:ocfl.object:### Inventory for v2 "type": "https://ocfl.io/1.0/spec/#inventory", "versions": { "v1": { - "created": "2021-03-24T20:22:33.307165Z", + "created": "2021-04-18T15:14:51.784558Z", "state": { "7dcc352f96c56dc...c31": [ "foo/bar.xml" @@ -172,7 +171,7 @@ WARNING:ocfl.object:### Inventory for v2 } }, "v2": { - "created": "2021-03-24T20:22:33.308961Z", + "created": "2021-04-18T15:14:51.786202Z", "state": { "4d27c86b026ff70...b53": [ "foo/bar.xml" @@ -185,8 +184,8 @@ WARNING:ocfl.object:### Inventory for v2 } } } -WARNING:ocfl.object:### Inventory for v3 -{ + +WARNING:ocfl.object:### Inventory for v3{ "digestAlgorithm": "sha512-spec-ex", "fixity": { "md5": { @@ -237,7 +236,7 @@ WARNING:ocfl.object:### Inventory for v3 "type": "https://ocfl.io/1.0/spec/#inventory", "versions": { "v1": { - "created": "2021-03-24T20:22:33.307165Z", + "created": "2021-04-18T15:14:51.784558Z", "state": { "7dcc352f96c56dc...c31": [ "foo/bar.xml" @@ -251,7 +250,7 @@ WARNING:ocfl.object:### Inventory for v3 } }, "v2": { - "created": "2021-03-24T20:22:33.308961Z", + "created": "2021-04-18T15:14:51.786202Z", "state": { "4d27c86b026ff70...b53": [ "foo/bar.xml" @@ -263,7 +262,7 @@ WARNING:ocfl.object:### Inventory for v3 } }, "v3": { - "created": "2021-03-24T20:22:33.310220Z", + "created": "2021-04-18T15:14:51.787447Z", "state": { "4d27c86b026ff70...b53": [ "foo/bar.xml" @@ -278,6 +277,7 @@ WARNING:ocfl.object:### Inventory for v3 } } } + ``` @@ -289,8 +289,7 @@ This is inventory should match the example showing how content paths may differ ``` > python ocfl-object.py --build --src fixtures/1.0/content/spec-ex-diff-paths --id http://example.org/diff-paths --digest sha512-spec-ex --normalization md5 --created 2019-03-14T20:31:00Z -v -WARNING:ocfl.object:### Inventory for v1 -{ +WARNING:ocfl.object:### Inventory for v1{ "digestAlgorithm": "sha512-spec-ex", "head": "v1", "id": "http://example.org/diff-paths", @@ -317,5 +316,6 @@ WARNING:ocfl.object:### Inventory for v1 } } } + ``` diff --git a/docs/demo_ocfl_object_script.md b/docs/demo_ocfl_object_script.md index 2a24122..ed19443 100644 --- a/docs/demo_ocfl_object_script.md +++ b/docs/demo_ocfl_object_script.md @@ -10,7 +10,7 @@ The `--version` argument will show version number and exit ``` > python ocfl-object.py --version -ocfl-object.py is part of ocfl-py version 1.2.0 +ocfl-object.py is part of ocfl-py version 1.2.1 ``` @@ -22,8 +22,7 @@ Without an `--objdir` argument the script just writes out the inventory for the ``` > python ocfl-object.py --create --id http://example.org/obj1 --src fixtures/1.0/content/cf1/v1 -WARNING:ocfl.object:### Inventory for v1 -{ +WARNING:ocfl.object:### Inventory for v1{ "digestAlgorithm": "sha512", "head": "v1", "id": "http://example.org/obj1", @@ -35,7 +34,7 @@ WARNING:ocfl.object:### Inventory for v1 "type": "https://ocfl.io/1.0/spec/#inventory", "versions": { "v1": { - "created": "2021-03-24T20:22:34.576958Z", + "created": "2021-04-18T15:14:53.278161Z", "state": { "43a43fe8a8a082d3b5343dfaf2fd0c8b8e370675b1f376e92e9994612c33ea255b11298269d72f797399ebb94edeefe53df243643676548f584fb8603ca53a0f": [ "a_file.txt" @@ -44,6 +43,7 @@ WARNING:ocfl.object:### Inventory for v1 } } } + ``` @@ -53,8 +53,7 @@ Without an `--objdir` argument the script just writes out the inventory for each ``` > python ocfl-object.py --build --id http://example.org/obj2 --src fixtures/1.0/content/cf3 -WARNING:ocfl.object:### Inventory for v1 -{ +WARNING:ocfl.object:### Inventory for v1{ "digestAlgorithm": "sha512", "head": "v1", "id": "http://example.org/obj2", @@ -66,7 +65,7 @@ WARNING:ocfl.object:### Inventory for v1 "type": "https://ocfl.io/1.0/spec/#inventory", "versions": { "v1": { - "created": "2021-03-24T20:22:34.974946Z", + "created": "2021-04-18T15:14:53.721542Z", "state": { "43a43fe8a8a082d3b5343dfaf2fd0c8b8e370675b1f376e92e9994612c33ea255b11298269d72f797399ebb94edeefe53df243643676548f584fb8603ca53a0f": [ "a_file.txt" @@ -75,8 +74,8 @@ WARNING:ocfl.object:### Inventory for v1 } } } -WARNING:ocfl.object:### Inventory for v2 -{ + +WARNING:ocfl.object:### Inventory for v2{ "digestAlgorithm": "sha512", "head": "v2", "id": "http://example.org/obj2", @@ -91,7 +90,7 @@ WARNING:ocfl.object:### Inventory for v2 "type": "https://ocfl.io/1.0/spec/#inventory", "versions": { "v1": { - "created": "2021-03-24T20:22:34.974946Z", + "created": "2021-04-18T15:14:53.721542Z", "state": { "43a43fe8a8a082d3b5343dfaf2fd0c8b8e370675b1f376e92e9994612c33ea255b11298269d72f797399ebb94edeefe53df243643676548f584fb8603ca53a0f": [ "a_file.txt" @@ -99,7 +98,7 @@ WARNING:ocfl.object:### Inventory for v2 } }, "v2": { - "created": "2021-03-24T20:22:34.975605Z", + "created": "2021-04-18T15:14:53.722350Z", "state": { "296e72b8fd5f7f0ac1473993600ae34953d5dab646f17e7b182b8648aff830d7bf01b56490777cb3e72b33fcc1ae520506badea1032252d1a55fd7362e269975": [ "a_file.txt" @@ -108,8 +107,8 @@ WARNING:ocfl.object:### Inventory for v2 } } } -WARNING:ocfl.object:### Inventory for v3 -{ + +WARNING:ocfl.object:### Inventory for v3{ "digestAlgorithm": "sha512", "head": "v3", "id": "http://example.org/obj2", @@ -124,7 +123,7 @@ WARNING:ocfl.object:### Inventory for v3 "type": "https://ocfl.io/1.0/spec/#inventory", "versions": { "v1": { - "created": "2021-03-24T20:22:34.974946Z", + "created": "2021-04-18T15:14:53.721542Z", "state": { "43a43fe8a8a082d3b5343dfaf2fd0c8b8e370675b1f376e92e9994612c33ea255b11298269d72f797399ebb94edeefe53df243643676548f584fb8603ca53a0f": [ "a_file.txt" @@ -132,7 +131,7 @@ WARNING:ocfl.object:### Inventory for v3 } }, "v2": { - "created": "2021-03-24T20:22:34.975605Z", + "created": "2021-04-18T15:14:53.722350Z", "state": { "296e72b8fd5f7f0ac1473993600ae34953d5dab646f17e7b182b8648aff830d7bf01b56490777cb3e72b33fcc1ae520506badea1032252d1a55fd7362e269975": [ "a_file.txt" @@ -140,7 +139,7 @@ WARNING:ocfl.object:### Inventory for v3 } }, "v3": { - "created": "2021-03-24T20:22:34.976141Z", + "created": "2021-04-18T15:14:53.723184Z", "state": { "43a43fe8a8a082d3b5343dfaf2fd0c8b8e370675b1f376e92e9994612c33ea255b11298269d72f797399ebb94edeefe53df243643676548f584fb8603ca53a0f": [ "a_file.txt" @@ -149,6 +148,7 @@ WARNING:ocfl.object:### Inventory for v3 } } } + ``` diff --git a/docs/demo_ocfl_sidecar_script.md b/docs/demo_ocfl_sidecar_script.md index bb41d8a..f9f2e04 100644 --- a/docs/demo_ocfl_sidecar_script.md +++ b/docs/demo_ocfl_sidecar_script.md @@ -10,7 +10,7 @@ The `--version` argument will show version number and exit ``` > python ocfl-sidecar.py --version -ocfl-sidecar.py is part of ocfl-py version 1.2.0 +ocfl-sidecar.py is part of ocfl-py version 1.2.1 ``` diff --git a/docs/demo_ocfl_store_script.md b/docs/demo_ocfl_store_script.md index 0727955..7ac1fd5 100644 --- a/docs/demo_ocfl_store_script.md +++ b/docs/demo_ocfl_store_script.md @@ -10,7 +10,7 @@ The `--version` argument will show version number and exit (but we still tave to ``` > python ocfl-store.py --version --root=tmp/root --list -ocfl-store.py is part of ocfl-py version 1.2.0 +ocfl-store.py is part of ocfl-py version 1.2.1 ``` diff --git a/docs/demo_using_bagit_bags.md b/docs/demo_using_bagit_bags.md index ec0c686..e6b45ab 100644 --- a/docs/demo_using_bagit_bags.md +++ b/docs/demo_using_bagit_bags.md @@ -161,8 +161,8 @@ Taking the newly created OCFL object `/tmp/obj` we can `--extract` the `v4` cont INFO:ocfl.object:Extracted v4 into tmp/extracted_v4 INFO:bagit:Creating bag for directory tmp/extracted_v4 INFO:bagit:Creating data directory -INFO:bagit:Moving my_content to /privatetmp/extracted_v4/tmpmv9q6tbn/my_content -INFO:bagit:Moving /privatetmp/extracted_v4/tmpmv9q6tbn to data +INFO:bagit:Moving my_content to /privatetmp/extracted_v4/tmp1z_q233f/my_content +INFO:bagit:Moving /privatetmp/extracted_v4/tmp1z_q233f to data INFO:bagit:Using 1 processes to generate manifests: sha512 INFO:bagit:Generating manifest lines for file data/my_content/dracula.txt INFO:bagit:Generating manifest lines for file data/my_content/dunwich.txt @@ -184,12 +184,12 @@ We note that the OCFL object had only one `content` file in `v4` but the extract diff -r tmp/extracted_v4/bag-info.txt tests/testdata/bags/uaa_v4/bag-info.txt 1,2c1 < Bag-Software-Agent: bagit.py v1.8.1 -< Bagging-Date: 2021-03-24 +< Bagging-Date: 2021-04-18 --- > Bagging-Date: 2020-01-04 diff -r tmp/extracted_v4/tagmanifest-sha512.txt tests/testdata/bags/uaa_v4/tagmanifest-sha512.txt 2c2 -< 4354c74d41f895abfcb617379a7a4114dcff7193a8f30ae27f8d5553fdecce4c533cfdf6f8f94ad2ab15adff22f0e5768ee8961f5ac6aa5156c79ae8a6642651 bag-info.txt +< b28abe8d128a679ebfa949ac900b4f63119d5734ab4cf60c102aacdf69f486cb69ff143a3f95d0be9d0d942ee7a69b7587d72abf61d2c281dc10d9f62b1b0339 bag-info.txt --- > 10624e6d45462def7af66d1a0d977606c7b073b01809c1d42258cfab5c34a275480943cbe78044416aee1f23822cc3762f92247b8f39b5c6ddc5ae32a8f94ce5 bag-info.txt ``` diff --git a/docs/validation_status.md b/docs/validation_status.md index 2747e9e..d0afb50 100644 --- a/docs/validation_status.md +++ b/docs/validation_status.md @@ -7,21 +7,21 @@ The following tables show the implementation status of all errors and warnings i | Code | Specification text (or suffixed code) | Implementation status and message/links | | --- | --- | --- | | [E001](https://ocfl.io/1.0/spec#E001) | 'The OCFL Object Root must not contain files or directories other than those specified in the following sections.' | _See multiple cases identified with suffixes below_ | -| | E001a | OCFL Object root contains unexpected file: %s \[[ocfl/validator.py#L164](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L164)\] | -| | E001b | OCFL Object root contains unexpected directory: %s \[[ocfl/validator.py#L171](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L171)\] | -| | E001c | OCFL Object root contains unexpected entry that isn't a file or directory: %s \[[ocfl/validator.py#L173](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L173)\] | +| | E001a | OCFL Object root contains unexpected file: %s \[[ocfl/validator.py#L171](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L171)\] | +| | E001b | OCFL Object root contains unexpected directory: %s \[[ocfl/validator.py#L178](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L178)\] | +| | E001c | OCFL Object root contains unexpected entry that isn't a file or directory: %s \[[ocfl/validator.py#L180](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L180)\] | | [E002](https://ocfl.io/1.0/spec#E002) | 'The version declaration must be formatted according to the NAMASTE specification.' | NOTE - E002 is redundant to more specific errors E003, E004, E005, E006. \[_Not implemented_\] | | [E003](https://ocfl.io/1.0/spec#E003) | '[The version declaration] must be a file in the base directory of the OCFL Object Root giving the OCFL version in the filename.' | _See multiple cases identified with suffixes below_ | -| | E003a | OCFL Object version declaration file is missing \[[ocfl/validator.py#L76](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L76)\] | -| | E003b | OCFL Object includes more that one file that looks like an object declaration (got %s) \[[ocfl/validator.py#L78](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L78)\] | -| | E003c | No OCFL Object to validate at path %s. The root of an OCFL Object must be a directory containing an object declaration \[[ocfl/validator.py#L71](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L71)\] | -| | E003d | OCFL Storage Root hierarchy includes directory %s with more that one file that looks like an object declaration, ignoring \[[ocfl/store.py#L166](https://github.com/zimeon/ocfl-py/blob/main/ocfl/store.py#L166)\] | +| | E003a | OCFL Object version declaration file is missing \[[ocfl/validator.py#L83](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L83)\] | +| | E003b | OCFL Object includes more that one file that looks like an object declaration (got %s) \[[ocfl/validator.py#L85](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L85)\] | +| | E003c | No OCFL Object to validate at path %s. The root of an OCFL Object must be a directory containing an object declaration \[[ocfl/validator.py#L78](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L78)\] | +| | E003d | OCFL Storage Root hierarchy includes directory %s with more that one file that looks like an object declaration, ignoring \[[ocfl/store.py#L158](https://github.com/zimeon/ocfl-py/blob/main/ocfl/store.py#L158)\] | | [E004](https://ocfl.io/1.0/spec#E004) | 'The [version declaration] filename MUST conform to the pattern T=dvalue, where T must be 0, and dvalue must be ocfl_object_, followed by the OCFL specification version number.' | _See multiple cases identified with suffixes below_ | -| | E004a | OCFL Storage Root hierarchy includes directory %s with an object declaration giving unknown version %s, ignoring \[[ocfl/store.py#L173](https://github.com/zimeon/ocfl-py/blob/main/ocfl/store.py#L173)\] | -| | E004b | OCFL Storage Root hierarchy includes directory %s with an unrecognized object declaration %s, ignoring \[[ocfl/store.py#L175](https://github.com/zimeon/ocfl-py/blob/main/ocfl/store.py#L175)\] | +| | E004a | OCFL Storage Root hierarchy includes directory %s with an object declaration giving unknown version %s, ignoring \[[ocfl/store.py#L165](https://github.com/zimeon/ocfl-py/blob/main/ocfl/store.py#L165)\] | +| | E004b | OCFL Storage Root hierarchy includes directory %s with an unrecognized object declaration %s, ignoring \[[ocfl/store.py#L167](https://github.com/zimeon/ocfl-py/blob/main/ocfl/store.py#L167)\] | | [E005](https://ocfl.io/1.0/spec#E005) | 'The [version declaration] filename must conform to the pattern T=dvalue, where T MUST be 0, and dvalue must be ocfl_object_, followed by the OCFL specification version number.' | _Not implemented_ | | [E006](https://ocfl.io/1.0/spec#E006) | 'The [version declaration] filename must conform to the pattern T=dvalue, where T must be 0, and dvalue MUST be ocfl_object_, followed by the OCFL specification version number.' | _Not implemented_ | -| [E007](https://ocfl.io/1.0/spec#E007) | 'The text contents of the [version declaration] file must be the same as dvalue, followed by a newline (\n).' | OCFL Object declaration file contents do not match file name without leading 0= (the 'dvalue') \[[ocfl/validator.py#L80](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L80)\] | +| [E007](https://ocfl.io/1.0/spec#E007) | 'The text contents of the [version declaration] file must be the same as dvalue, followed by a newline (\n).' | OCFL Object declaration file contents do not match file name without leading 0= (the 'dvalue') \[[ocfl/validator.py#L87](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L87)\] | | [E008](https://ocfl.io/1.0/spec#E008) | 'OCFL Object content must be stored as a sequence of one or more versions.' | OCFL Object %s inventory versions block does not contain any versions, there must be at least version 1 \[[ocfl/inventory_validator.py#L208](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L208)\] | | [E009](https://ocfl.io/1.0/spec#E009) | 'The version number sequence MUST start at 1 and must be continuous without missing integers.' | OCFL Object %s inventory versions block does not contain v1 or a zero padded equivalent \[[ocfl/inventory_validator.py#L228](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L228)\] | | [E010](https://ocfl.io/1.0/spec#E010) | 'The version number sequence must start at 1 and MUST be continuous without missing integers.' | OCFL Object %s inventory versions block includes an out-of-sequence version \[[ocfl/inventory_validator.py#L239](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L239)\] | @@ -29,7 +29,7 @@ The following tables show the implementation status of all errors and warnings i | [E012](https://ocfl.io/1.0/spec#E012) | 'All version directories of an object must use the same naming convention: either a non-padded version directory number, or a zero-padded version directory number of consistent length.' | _Not implemented_ | | [E013](https://ocfl.io/1.0/spec#E013) | 'Operations that add a new version to an object must follow the version directory naming convention established by earlier versions.' | _Not implemented_ | | [E014](https://ocfl.io/1.0/spec#E014) | 'In all cases, references to files inside version directories from inventory files must use the actual version directory names.' | _Not implemented_ | -| [E015](https://ocfl.io/1.0/spec#E015) | 'There must be no other files as children of a version directory, other than an inventory file and a inventory digest.' | OCFL Object version directory %s includes an illegal file (%s) \[[ocfl/validator.py#L255](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L255)\] | +| [E015](https://ocfl.io/1.0/spec#E015) | 'There must be no other files as children of a version directory, other than an inventory file and a inventory digest.' | OCFL Object version directory %s includes an illegal file (%s) \[[ocfl/validator.py#L275](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L275)\] | | [E016](https://ocfl.io/1.0/spec#E016) | 'Version directories must contain a designated content sub-directory if the version contains files to be preserved, and should not contain this sub-directory otherwise.' | _Not implemented_ | | [E017](https://ocfl.io/1.0/spec#E017) | 'The contentDirectory value MUST NOT contain the forward slash (/) path separator and must not be either one or two periods (. or ..).' | _Not implemented_ | | [E018](https://ocfl.io/1.0/spec#E018) | 'The contentDirectory value must not contain the forward slash (/) path separator and MUST NOT be either one or two periods (. or ..).' | Content directory must not contain a forward slash (/) or be . or .. \[[ocfl/inventory_validator.py#L83](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L83)\] | @@ -38,9 +38,9 @@ The following tables show the implementation status of all errors and warnings i | [E021](https://ocfl.io/1.0/spec#E021) | 'If the key contentDirectory is not present in the inventory file then the name of the designated content sub-directory must be content.' | _Not implemented_ | | [E022](https://ocfl.io/1.0/spec#E022) | 'OCFL-compliant tools (including any validators) must ignore all directories in the object version directory except for the designated content directory.' | _Not implemented_ | | [E023](https://ocfl.io/1.0/spec#E023) | 'Every file within a version\'s content directory must be referenced in the manifest section of the inventory.' | _See multiple cases identified with suffixes below_ | -| | E023a | OCFL Object %s inventory manifest refers to a file path that is not present in the object (%s) \[[ocfl/validator.py#L264](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L264)\] | -| | E023b | OCFL Object includes one or more files that are not mentioned in the %s inventory manifest (%s) \[[ocfl/validator.py#L282](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L282)\] | -| [E024](https://ocfl.io/1.0/spec#E024) | 'There must not be empty directories within a version\'s content directory.' | OCFL Object version %s content directory includes empty path %s \[[ocfl/validator.py#L246](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L246)\] | +| | E023a | OCFL Object %s inventory manifest refers to a file path that is not present in the object (%s) \[[ocfl/validator.py#L283](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L283)\] | +| | E023b | OCFL Object includes one or more files that are not mentioned in the %s inventory manifest (%s) \[[ocfl/validator.py#L311](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L311)\] | +| [E024](https://ocfl.io/1.0/spec#E024) | 'There must not be empty directories within a version\'s content directory.' | OCFL Object version %s content directory includes empty path %s \[[ocfl/validator.py#L266](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L266)\] | | [E025](https://ocfl.io/1.0/spec#E025) | 'For content-addressing, OCFL Objects must use either sha512 or sha256, and should use sha512.' | _See multiple cases identified with suffixes below_ | | | E025a | OCFL Object %s inventory manifest block includes a digest (%s) that doesn't have the correct form for the %s algorithm \[[ocfl/inventory_validator.py#L130](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L130)\] | | [E026](https://ocfl.io/1.0/spec#E026) | 'For storage of additional fixity values, or to support legacy content migration, implementers must choose from the following controlled vocabulary of digest algorithms, or from a list of additional algorithms given in the [Digest-Algorithms-Extension].' | _See multiple cases identified with suffixes below_ | @@ -51,8 +51,8 @@ The following tables show the implementation status of all errors and warnings i | [E030](https://ocfl.io/1.0/spec#E030) | 'SHA-256 algorithm defined by [FIPS-180-4] and must be encoded using hex (base16) encoding [RFC4648].' | _Not implemented_ | | [E031](https://ocfl.io/1.0/spec#E031) | 'SHA-512 algorithm defined by [FIPS-180-4] and must be encoded using hex (base16) encoding [RFC4648].' | _Not implemented_ | | [E032](https://ocfl.io/1.0/spec#E032) | '[blake2b-512] must be encoded using hex (base16) encoding [RFC4648].' | _Not implemented_ | -| [E033](https://ocfl.io/1.0/spec#E033) | 'An OCFL Object Inventory MUST follow the [JSON] structure described in this section and must be named inventory.json.' | OCFL Object %s inventory is not valid JSON (%s) \[[ocfl/validator.py#L117](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L117) [ocfl/object.py#L553](https://github.com/zimeon/ocfl-py/blob/main/ocfl/object.py#L553)\] | -| [E034](https://ocfl.io/1.0/spec#E034) | 'An OCFL Object Inventory must follow the [JSON] structure described in this section and MUST be named inventory.json.' | OCFL Object root inventory is missing \[[ocfl/validator.py#L84](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L84)\] | +| [E033](https://ocfl.io/1.0/spec#E033) | 'An OCFL Object Inventory MUST follow the [JSON] structure described in this section and must be named inventory.json.' | OCFL Object %s inventory is not valid JSON (%s) \[[ocfl/object.py#L544](https://github.com/zimeon/ocfl-py/blob/main/ocfl/object.py#L544) [ocfl/validator.py#L124](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L124)\] | +| [E034](https://ocfl.io/1.0/spec#E034) | 'An OCFL Object Inventory must follow the [JSON] structure described in this section and MUST be named inventory.json.' | _Not implemented_ | | [E035](https://ocfl.io/1.0/spec#E035) | 'The forward slash (/) path separator must be used in content paths in the manifest and fixity blocks within the inventory.' | _Not implemented_ | | [E036](https://ocfl.io/1.0/spec#E036) | 'An OCFL Object Inventory must include the following keys: [id, type, digestAlgorithm, head]' | _See multiple cases identified with suffixes below_ | | | E036a | OCFL Object %s inventory missing `id` attribute \[[ocfl/inventory_validator.py#L63](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L63)\] | @@ -71,9 +71,9 @@ The following tables show the implementation status of all errors and warnings i | [E043](https://ocfl.io/1.0/spec#E043) | 'An OCFL Object Inventory must include a block for storing versions.' | _Not implemented_ | | [E044](https://ocfl.io/1.0/spec#E044) | 'This block MUST have the key of versions within the inventory, and it must be a JSON object.' | OCFL Object %s inventory versions block is not a JSON object \[[ocfl/inventory_validator.py#L205](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L205)\] | | [E045](https://ocfl.io/1.0/spec#E045) | 'This block must have the key of versions within the inventory, and it MUST be a JSON object.' | _Not implemented_ | -| [E046](https://ocfl.io/1.0/spec#E046) | 'The keys of [the versions object] must correspond to the names of the version directories used.' | OCFL Object root inventory describes versions %s but no corresponding version directory is present \[[ocfl/validator.py#L258](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L258)\] | +| [E046](https://ocfl.io/1.0/spec#E046) | 'The keys of [the versions object] must correspond to the names of the version directories used.' | OCFL Object root inventory describes versions %s but no corresponding version directory is present \[[ocfl/validator.py#L277](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L277)\] | | [E047](https://ocfl.io/1.0/spec#E047) | 'Each value [of the versions object] must be another JSON object that characterizes the version, as described in the 3.5.3.1 Version section.' | _Not implemented_ | -| [E048](https://ocfl.io/1.0/spec#E048) | 'A JSON object to describe one OCFL Version, which must include the following keys: [created, state, message, user]' | OCFL Object %s inventory %s version block does not include a created date or it is malformed \[[ocfl/inventory_validator.py#L266](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L266)\] | +| [E048](https://ocfl.io/1.0/spec#E048) | 'A JSON object to describe one OCFL Version, which must include the following keys: [created, state]' | OCFL Object %s inventory %s version block does not include a created date or it is malformed \[[ocfl/inventory_validator.py#L266](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L266)\] | | | E048c | OCFL Object %s inventory %s version block does not include a state block \[[ocfl/inventory_validator.py#L282](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L282)\] | | [E049](https://ocfl.io/1.0/spec#E049) | '[the value of the "created" key] must be expressed in the Internet Date/Time Format defined by [RFC3339].' | _See multiple cases identified with suffixes below_ | | | E049a | OCFL Object %s inventory %s version block created date SHOULD include a timezone designator \[[ocfl/inventory_validator.py#L274](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L274)\] | @@ -104,25 +104,25 @@ The following tables show the implementation status of all errors and warnings i | | E057c | OCFL Object %s inventory fixity block entry for digest algorithm %s, digest %s is not a JSON list \[[ocfl/inventory_validator.py#L181](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L181)\] | | | E057d | OCFL Object %s inventory fixity block entry for digest algorithm %s, digest %s includes a content path %s that is not in the manifest \[[ocfl/inventory_validator.py#L194](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L194)\] | | [E058](https://ocfl.io/1.0/spec#E058) | 'Every occurrence of an inventory file must have an accompanying sidecar file stating its digest.' | _See multiple cases identified with suffixes below_ | -| | E058a | OCFL Object %s inventory is missing sidecar digest file at %s \[[ocfl/validator.py#L128](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L128)\] | -| | E058b | Cannot extract digest type from inventory digest file name %s \[[ocfl/validator.py#L151](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L151)\] | +| | E058a | OCFL Object %s inventory is missing sidecar digest file at %s \[[ocfl/validator.py#L135](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L135)\] | +| | E058b | Cannot extract digest type from inventory digest file name %s \[[ocfl/validator.py#L158](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L158)\] | | [E059](https://ocfl.io/1.0/spec#E059) | 'This value must match the value given for the digestAlgorithm key in the inventory.' | _Not implemented_ | -| [E060](https://ocfl.io/1.0/spec#E060) | 'The digest sidecar file must contain the digest of the inventory file.' | Mismatch between actual and recorded inventory digests for %s (calcuated %s but read %s from %s) \[[ocfl/validator.py#L147](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L147)\] | -| [E061](https://ocfl.io/1.0/spec#E061) | '[The digest sidecar file] must follow the format: DIGEST inventory.json' | Cannot extract digest from inventory digest file (%s) \[[ocfl/validator.py#L149](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L149)\] | +| [E060](https://ocfl.io/1.0/spec#E060) | 'The digest sidecar file must contain the digest of the inventory file.' | Mismatch between actual and recorded inventory digests for %s (calcuated %s but read %s from %s) \[[ocfl/validator.py#L154](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L154)\] | +| [E061](https://ocfl.io/1.0/spec#E061) | '[The digest sidecar file] must follow the format: DIGEST inventory.json' | Cannot extract digest from inventory digest file (%s) \[[ocfl/validator.py#L156](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L156)\] | | [E062](https://ocfl.io/1.0/spec#E062) | 'The digest of the inventory must be computed only after all changes to the inventory have been made, and thus writing the digest sidecar file is the last step in the versioning process.' | _Not implemented_ | -| [E063](https://ocfl.io/1.0/spec#E063) | 'Every OCFL Object must have an inventory file within the OCFL Object Root, corresponding to the state of the OCFL Object at the current version.' | _Not implemented_ | -| [E064](https://ocfl.io/1.0/spec#E064) | 'Where an OCFL Object contains inventory.json in version directories, the inventory file in the OCFL Object Root must be the same as the file in the most recent version.' | Object root inventory and copy in last version MUST be identical but are not (%s and %s) \[[ocfl/validator.py#L211](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L211)\] | +| [E063](https://ocfl.io/1.0/spec#E063) | 'Every OCFL Object must have an inventory file within the OCFL Object Root, corresponding to the state of the OCFL Object at the current version.' | OCFL Object root inventory is missing \[[ocfl/validator.py#L91](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L91)\] | +| [E064](https://ocfl.io/1.0/spec#E064) | 'Where an OCFL Object contains inventory.json in version directories, the inventory file in the OCFL Object Root must be the same as the file in the most recent version.' | Object root inventory and copy in last version MUST be identical but are not (%s and %s) \[[ocfl/validator.py#L217](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L217)\] | | [E066](https://ocfl.io/1.0/spec#E066) | 'Each version block in each prior inventory file must represent the same object state as the corresponding version block in the current inventory file.' | _See multiple cases identified with suffixes below_ | | | E066a | OCFL Object inventory for %s doesn't have a subset of version blocks of inventory for %s \[[ocfl/inventory_validator.py#L405](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L405)\] | -| | E066b | OCFL Object inventory manifest for %s in %s doesn't have a subset of manifest entries of inventory for %s \[[ocfl/inventory_validator.py#L413](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L413)\] | -| | E066c | OCFL Object %s inventory %s version block has no state description \[[ocfl/inventory_validator.py#L418](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L418)\] | -| [E067](https://ocfl.io/1.0/spec#E067) | 'The extensions directory must not contain any files, and no sub-directories other than extension sub-directories.' | OCFL Object extensions direct contains an unexpected non-directory entry: %s \[[ocfl/validator.py#L189](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L189)\] | +| | E066b | OCFL Object inventory manifest for %s in %s doesn't have a subset of manifest entries of inventory for %s \[[ocfl/inventory_validator.py#L414](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L414)\] | +| | E066c | OCFL Object %s inventory %s version block has no state description \[[ocfl/inventory_validator.py#L419](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L419)\] | +| [E067](https://ocfl.io/1.0/spec#E067) | 'The extensions directory must not contain any files, and no sub-directories other than extension sub-directories.' | OCFL Object extensions direct contains an unexpected non-directory entry: %s \[[ocfl/validator.py#L196](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L196)\] | | [E068](https://ocfl.io/1.0/spec#E068) | 'The specific structure and function of the extension, as well as a declaration of the registered extension name must be defined in one of the following locations: The OCFL Extensions repository OR The Storage Root, as a plain text document directly in the Storage Root.' | _Not implemented_ | | [E069](https://ocfl.io/1.0/spec#E069) | 'An OCFL Storage Root MUST contain a Root Conformance Declaration identifying it as such.' | _Not implemented_ | | [E070](https://ocfl.io/1.0/spec#E070) | 'If present, [the ocfl_layout.json document] MUST include the following two keys in the root JSON object: [key, description]' | _Not implemented_ | | [E071](https://ocfl.io/1.0/spec#E071) | 'The value of the [ocfl_layout.json] extension key must be the registered extension name for the extension defining the arrangement under the storage root.' | _Not implemented_ | -| [E072](https://ocfl.io/1.0/spec#E072) | 'The directory hierarchy used to store OCFL Objects MUST NOT contain files that are not part of an OCFL Object.' | OCFL storage root hierarchy include directory %s with at least one file but no object declaration. Such additional files are not allowed \[[ocfl/store.py#L177](https://github.com/zimeon/ocfl-py/blob/main/ocfl/store.py#L177)\] | -| [E073](https://ocfl.io/1.0/spec#E073) | 'Empty directories MUST NOT appear under a storage root.' | OCFL storage root hierarchy contains an empty directory: %s \[[ocfl/store.py#L159](https://github.com/zimeon/ocfl-py/blob/main/ocfl/store.py#L159)\] | +| [E072](https://ocfl.io/1.0/spec#E072) | 'The directory hierarchy used to store OCFL Objects MUST NOT contain files that are not part of an OCFL Object.' | OCFL storage root hierarchy include directory %s with at least one file but no object declaration. Such additional files are not allowed \[[ocfl/store.py#L169](https://github.com/zimeon/ocfl-py/blob/main/ocfl/store.py#L169)\] | +| [E073](https://ocfl.io/1.0/spec#E073) | 'Empty directories MUST NOT appear under a storage root.' | OCFL storage root hierarchy contains an empty directory: %s \[[ocfl/store.py#L151](https://github.com/zimeon/ocfl-py/blob/main/ocfl/store.py#L151)\] | | [E074](https://ocfl.io/1.0/spec#E074) | 'Although implementations may require multiple OCFL Storage Roots - that is, several logical or physical volumes, or multiple "buckets" in an object store - each OCFL Storage Root MUST be independent.' | _Not implemented_ | | [E075](https://ocfl.io/1.0/spec#E075) | 'The OCFL version declaration MUST be formatted according to the NAMASTE specification.' | _Not implemented_ | | [E076](https://ocfl.io/1.0/spec#E076) | '[The OCFL version declaration] MUST be a file in the base directory of the OCFL Storage Root giving the OCFL version in the filename.' | _Not implemented_ | @@ -141,8 +141,8 @@ The following tables show the implementation status of all errors and warnings i | [E089](https://ocfl.io/1.0/spec#E089) | 'If the preservation of non-OCFL-compliant features is required then the content MUST be wrapped in a suitable disk or filesystem image format which OCFL can treat as a regular file.' | _Not implemented_ | | [E090](https://ocfl.io/1.0/spec#E090) | 'Hard and soft (symbolic) links are not portable and MUST NOT be used within OCFL Storage hierachies.' | NOTE - E090 is a processing instruction and can't be tested for. \[_Not implemented_\] | | [E091](https://ocfl.io/1.0/spec#E091) | 'Filesystems MUST preserve the case of OCFL filepaths and filenames.' | OCFL Object %s inventory manifest file list for digest %s is not a JSON array \[_Not implemented_\] | -| [E092](https://ocfl.io/1.0/spec#E092) | 'The value for each key in the manifest must be an array containing the content paths of files in the OCFL Object that have content with the given digest.' | OCFL Object %s inventory manifest has digest %s for file %s which doesn't match calculated digest %s for that file \[[ocfl/validator.py#L269](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L269) [ocfl/inventory_validator.py#L132](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L132)\] | -| [E093](https://ocfl.io/1.0/spec#E093) | 'Where included in the fixity block, the digest values given must match the digests of the files at the corresponding content paths.' | OCFL Object %s inventory fixity block for digest algorithm %s has digest %s for file %s which doesn't match calculated digest %s for that file \[[ocfl/validator.py#L279](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L279)\] | +| [E092](https://ocfl.io/1.0/spec#E092) | 'The value for each key in the manifest must be an array containing the content paths of files in the OCFL Object that have content with the given digest.' | OCFL Object %s inventory manifest has digest %s for file %s which doesn't match calculated digest %s for that file \[[ocfl/inventory_validator.py#L132](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L132) [ocfl/validator.py#L297](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L297) [ocfl/validator.py#L288](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L288)\] | +| [E093](https://ocfl.io/1.0/spec#E093) | 'Where included in the fixity block, the digest values given must match the digests of the files at the corresponding content paths.' | OCFL Object %s inventory fixity block for digest algorithm %s has digest %s for file %s which doesn't match calculated digest %s for that file \[[ocfl/validator.py#L308](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L308)\] | | [E094](https://ocfl.io/1.0/spec#E094) | 'The value of [the message] key is freeform text, used to record the rationale for creating this version. It must be a JSON string.' | OCFL Object %s inventory %s version block has message key with value that isn't a string \[[ocfl/inventory_validator.py#L286](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L286)\] | | [E095](https://ocfl.io/1.0/spec#E095) | 'Within a version, logical paths must be unique and non-conflicting, so the logical path for a file cannot appear as the initial part of another logical path.' | OCFL Object %s inventory version %s state has logical path %s used as both a directory and a file path. \[[ocfl/inventory_validator.py#L334](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L334)\] | | [E096](https://ocfl.io/1.0/spec#E096) | 'As JSON keys are case sensitive, while digests may not be, there is an additional requirement that each digest value must occur only once in the manifest regardless of case.' | OCFL Object %s inventory manifest block includes digest %s more than once with different normalizations \[[ocfl/inventory_validator.py#L138](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L138)\] | @@ -158,8 +158,8 @@ The following tables show the implementation status of all errors and warnings i | Code | Specification text (or suffixed code) | Implementation status and message/links | | --- | --- | --- | | [W001](https://ocfl.io/1.0/spec#W001) | 'Implementations SHOULD use version directory names constructed without zero-padding the version number, ie. v1, v2, v3, etc.'' | OCFL Object %s inventory version numbers SHOULD NOT be zero-padded \[[ocfl/inventory_validator.py#L231](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L231)\] | -| [W002](https://ocfl.io/1.0/spec#W002) | 'The version directory SHOULD NOT contain any directories other than the designated content sub-directory. Once created, the contents of a version directory are expected to be immutable.' | OCFL Object version directory %s SHOULD NOT contain any directory except the designated content directory (found %s) \[[ocfl/validator.py#L253](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L253)\] | -| [W003](https://ocfl.io/1.0/spec#W003) | 'Version directories must contain a designated content sub-directory if the version contains files to be preserved, and SHOULD NOT contain this sub-directory otherwise.' | OCFL Object version directory %s SHOULD NOT contain an empty content directory \[[ocfl/validator.py#L251](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L251)\] | +| [W002](https://ocfl.io/1.0/spec#W002) | 'The version directory SHOULD NOT contain any directories other than the designated content sub-directory. Once created, the contents of a version directory are expected to be immutable.' | OCFL Object version directory %s SHOULD NOT contain any directory except the designated content directory (found %s) \[[ocfl/validator.py#L273](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L273)\] | +| [W003](https://ocfl.io/1.0/spec#W003) | 'Version directories must contain a designated content sub-directory if the version contains files to be preserved, and SHOULD NOT contain this sub-directory otherwise.' | OCFL Object version directory %s SHOULD NOT contain an empty content directory \[[ocfl/validator.py#L271](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L271)\] | | [W004](https://ocfl.io/1.0/spec#W004) | 'For content-addressing, OCFL Objects SHOULD use sha512.' | OCFL Object %s inventory SHOULD use sha512 but uses sha256 as the DigestAlgorithm \[[ocfl/inventory_validator.py#L75](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L75)\] | | [W005](https://ocfl.io/1.0/spec#W005) | 'The OCFL Object Inventory id SHOULD be a URI.' | OCFL Object %s inventory id SHOULD be a URI (got %s) \[[ocfl/inventory_validator.py#L61](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L61)\] | | [W007](https://ocfl.io/1.0/spec#W007) | 'In the OCFL Object Inventory, the JSON object describing an OCFL Version, SHOULD include the message and user keys.' | _See multiple cases identified with suffixes below_ | @@ -167,11 +167,11 @@ The following tables show the implementation status of all errors and warnings i | | W007b | OCFL Object %s inventory %s version block SHOULD include a user key \[[ocfl/inventory_validator.py#L288](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L288)\] | | [W008](https://ocfl.io/1.0/spec#W008) | 'In the OCFL Object Inventory, in the version block, the value of the user key SHOULD contain an address key, address.' | OCFL Object %s inventory %s version block user description SHOULD have an address \[[ocfl/inventory_validator.py#L297](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L297)\] | | [W009](https://ocfl.io/1.0/spec#W009) | 'In the OCFL Object Inventory, in the version block, the address value SHOULD be a URI: either a mailto URI [RFC6068] with the e-mail address of the user or a URL to a personal identifier, e.g., an ORCID iD.' | OCFL Object %s inventory %s version block user description SHOULD be a mailto: or person identifier URI \[[ocfl/inventory_validator.py#L301](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L301)\] | -| [W010](https://ocfl.io/1.0/spec#W010) | 'In addition to the inventory in the OCFL Object Root, every version directory SHOULD include an inventory file that is an Inventory of all content for versions up to and including that particular version.' | OCFL Object %s SHOULD have an inventory file but does not \[[ocfl/validator.py#L200](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L200)\] | -| [W011](https://ocfl.io/1.0/spec#W011) | 'In the case that prior version directories include an inventory file, the values of the created, message and user keys in each version block in each prior inventory file SHOULD have the same values as the corresponding keys in the corresponding version block in the current inventory file.' | OCFL Object version metadata '%s' for %s in %s inventory does not match that in %s inventory \[[ocfl/inventory_validator.py#L424](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L424)\] | +| [W010](https://ocfl.io/1.0/spec#W010) | 'In addition to the inventory in the OCFL Object Root, every version directory SHOULD include an inventory file that is an Inventory of all content for versions up to and including that particular version.' | OCFL Object %s SHOULD have an inventory file but does not \[[ocfl/validator.py#L211](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L211)\] | +| [W011](https://ocfl.io/1.0/spec#W011) | 'In the case that prior version directories include an inventory file, the values of the created, message and user keys in each version block in each prior inventory file SHOULD have the same values as the corresponding keys in the corresponding version block in the current inventory file.' | OCFL Object version metadata '%s' for %s in %s inventory does not match that in %s inventory \[[ocfl/inventory_validator.py#L425](https://github.com/zimeon/ocfl-py/blob/main/ocfl/inventory_validator.py#L425)\] | | [W012](https://ocfl.io/1.0/spec#W012) | 'Implementers SHOULD use the logs directory, if present, for storing files that contain a record of actions taken on the object.' | _Not implemented_ | -| [W013](https://ocfl.io/1.0/spec#W013) | 'In an OCFL Object, extension sub-directories SHOULD be named according to a registered extension name.' | OCFL Object includes unregistered extension directory '%s' \[[ocfl/validator.py#L187](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L187)\] | +| [W013](https://ocfl.io/1.0/spec#W013) | 'In an OCFL Object, extension sub-directories SHOULD be named according to a registered extension name.' | OCFL Object includes unregistered extension directory '%s' \[[ocfl/validator.py#L194](https://github.com/zimeon/ocfl-py/blob/main/ocfl/validator.py#L194)\] | | [W014](https://ocfl.io/1.0/spec#W014) | 'Storage hierarchies within the same OCFL Storage Root SHOULD use just one layout pattern.' | _Not implemented_ | | [W015](https://ocfl.io/1.0/spec#W015) | 'Storage hierarchies within the same OCFL Storage Root SHOULD consistently use either a directory hierarchy of OCFL Objects or top-level OCFL Objects.' | _Not implemented_ | -_Generated by `extract_codes.py` at 2021-03-24 16:21:53.209724_ \ No newline at end of file +_Generated by `extract_codes.py` at 2021-04-18 11:14:45.025954_ \ No newline at end of file diff --git a/extra_fixtures/bad-objects/E041_manifest_not_object/0=ocfl_object_1.0 b/extra_fixtures/bad-objects/E041_manifest_not_object/0=ocfl_object_1.0 new file mode 100644 index 0000000..4d1d62c --- /dev/null +++ b/extra_fixtures/bad-objects/E041_manifest_not_object/0=ocfl_object_1.0 @@ -0,0 +1 @@ +ocfl_object_1.0 diff --git a/extra_fixtures/bad-objects/E041_manifest_not_object/inventory.json b/extra_fixtures/bad-objects/E041_manifest_not_object/inventory.json new file mode 100644 index 0000000..e2f8a44 --- /dev/null +++ b/extra_fixtures/bad-objects/E041_manifest_not_object/inventory.json @@ -0,0 +1,19 @@ +{ + "digestAlgorithm": "sha512", + "head": "v1", + "id": "http://example.org/E041_no_manifest", + "manifest": "this should be a JSON object not a string!", + "type": "https://ocfl.io/1.0/spec/#inventory", + "versions": { + "v1": { + "created": "2019-01-01T02:03:04Z", + "message": "a message", + "state": { + }, + "user": { + "address": "https://example.org/a_person", + "name": "A Person" + } + } + } +} diff --git a/extra_fixtures/bad-objects/E041_manifest_not_object/inventory.json.sha512 b/extra_fixtures/bad-objects/E041_manifest_not_object/inventory.json.sha512 new file mode 100644 index 0000000..9fc5c13 --- /dev/null +++ b/extra_fixtures/bad-objects/E041_manifest_not_object/inventory.json.sha512 @@ -0,0 +1 @@ +a3e526a026fc9cd3e39673c7218355cea677b7696153116d4ee42e9b96b6bf91965ba48aa6f2c97c082f152338015ba82e09a55d0d461e5e574259ee58b66edd inventory.json diff --git a/extra_fixtures/bad-objects/E041_manifest_not_object/v1/inventory.json b/extra_fixtures/bad-objects/E041_manifest_not_object/v1/inventory.json new file mode 100644 index 0000000..e2f8a44 --- /dev/null +++ b/extra_fixtures/bad-objects/E041_manifest_not_object/v1/inventory.json @@ -0,0 +1,19 @@ +{ + "digestAlgorithm": "sha512", + "head": "v1", + "id": "http://example.org/E041_no_manifest", + "manifest": "this should be a JSON object not a string!", + "type": "https://ocfl.io/1.0/spec/#inventory", + "versions": { + "v1": { + "created": "2019-01-01T02:03:04Z", + "message": "a message", + "state": { + }, + "user": { + "address": "https://example.org/a_person", + "name": "A Person" + } + } + } +} diff --git a/extra_fixtures/bad-objects/E041_manifest_not_object/v1/inventory.json.sha512 b/extra_fixtures/bad-objects/E041_manifest_not_object/v1/inventory.json.sha512 new file mode 100644 index 0000000..9fc5c13 --- /dev/null +++ b/extra_fixtures/bad-objects/E041_manifest_not_object/v1/inventory.json.sha512 @@ -0,0 +1 @@ +a3e526a026fc9cd3e39673c7218355cea677b7696153116d4ee42e9b96b6bf91965ba48aa6f2c97c082f152338015ba82e09a55d0d461e5e574259ee58b66edd inventory.json diff --git a/extra_fixtures/bad-objects/E046_versions_keys_not_zero_padded/0=ocfl_object_1.0 b/extra_fixtures/bad-objects/E046_versions_keys_not_zero_padded/0=ocfl_object_1.0 new file mode 100644 index 0000000..4d1d62c --- /dev/null +++ b/extra_fixtures/bad-objects/E046_versions_keys_not_zero_padded/0=ocfl_object_1.0 @@ -0,0 +1 @@ +ocfl_object_1.0 diff --git a/extra_fixtures/bad-objects/E046_versions_keys_not_zero_padded/inventory.json b/extra_fixtures/bad-objects/E046_versions_keys_not_zero_padded/inventory.json new file mode 100644 index 0000000..c51115b --- /dev/null +++ b/extra_fixtures/bad-objects/E046_versions_keys_not_zero_padded/inventory.json @@ -0,0 +1,26 @@ +{ + "digestAlgorithm": "sha512", + "head": "v1", + "id": "uri:something451", + "manifest": { + "43a43fe8a8a082d3b5343dfaf2fd0c8b8e370675b1f376e92e9994612c33ea255b11298269d72f797399ebb94edeefe53df243643676548f584fb8603ca53a0f": [ + "v001/content/a_file.txt" + ] + }, + "type": "https://ocfl.io/1.0/spec/#inventory", + "versions": { + "v1": { + "created": "2019-01-01T01:01:01Z", + "state": { + "43a43fe8a8a082d3b5343dfaf2fd0c8b8e370675b1f376e92e9994612c33ea255b11298269d72f797399ebb94edeefe53df243643676548f584fb8603ca53a0f": [ + "a_file.txt" + ] + }, + "message": "Intial import", + "user": { + "name": "A person", + "address": "https://orcid.org/0000-0000-0000-0000" + } + } + } +} diff --git a/extra_fixtures/bad-objects/E046_versions_keys_not_zero_padded/inventory.json.sha512 b/extra_fixtures/bad-objects/E046_versions_keys_not_zero_padded/inventory.json.sha512 new file mode 100644 index 0000000..11f2322 --- /dev/null +++ b/extra_fixtures/bad-objects/E046_versions_keys_not_zero_padded/inventory.json.sha512 @@ -0,0 +1 @@ +a662f8139848c99b4045c7955e438a1ecd0c6586c64e2dbfdc028c336ab9dd24695981a8c6e55031a2f55e0b5a1ce38a2e4f6ee4dd3f6462a071c49b8137eda3 inventory.json diff --git a/extra_fixtures/bad-objects/E046_versions_keys_not_zero_padded/v001/content/a_file.txt b/extra_fixtures/bad-objects/E046_versions_keys_not_zero_padded/v001/content/a_file.txt new file mode 100644 index 0000000..2baefe4 --- /dev/null +++ b/extra_fixtures/bad-objects/E046_versions_keys_not_zero_padded/v001/content/a_file.txt @@ -0,0 +1 @@ +Hello! I am a file. diff --git a/extra_fixtures/bad-objects/E046_versions_keys_not_zero_padded/v001/inventory.json b/extra_fixtures/bad-objects/E046_versions_keys_not_zero_padded/v001/inventory.json new file mode 100644 index 0000000..c51115b --- /dev/null +++ b/extra_fixtures/bad-objects/E046_versions_keys_not_zero_padded/v001/inventory.json @@ -0,0 +1,26 @@ +{ + "digestAlgorithm": "sha512", + "head": "v1", + "id": "uri:something451", + "manifest": { + "43a43fe8a8a082d3b5343dfaf2fd0c8b8e370675b1f376e92e9994612c33ea255b11298269d72f797399ebb94edeefe53df243643676548f584fb8603ca53a0f": [ + "v001/content/a_file.txt" + ] + }, + "type": "https://ocfl.io/1.0/spec/#inventory", + "versions": { + "v1": { + "created": "2019-01-01T01:01:01Z", + "state": { + "43a43fe8a8a082d3b5343dfaf2fd0c8b8e370675b1f376e92e9994612c33ea255b11298269d72f797399ebb94edeefe53df243643676548f584fb8603ca53a0f": [ + "a_file.txt" + ] + }, + "message": "Intial import", + "user": { + "name": "A person", + "address": "https://orcid.org/0000-0000-0000-0000" + } + } + } +} diff --git a/extra_fixtures/bad-objects/E046_versions_keys_not_zero_padded/v001/inventory.json.sha512 b/extra_fixtures/bad-objects/E046_versions_keys_not_zero_padded/v001/inventory.json.sha512 new file mode 100644 index 0000000..11f2322 --- /dev/null +++ b/extra_fixtures/bad-objects/E046_versions_keys_not_zero_padded/v001/inventory.json.sha512 @@ -0,0 +1 @@ +a662f8139848c99b4045c7955e438a1ecd0c6586c64e2dbfdc028c336ab9dd24695981a8c6e55031a2f55e0b5a1ce38a2e4f6ee4dd3f6462a071c49b8137eda3 inventory.json diff --git a/extract_codes.py b/extract_codes.py index 40cbc75..0841980 100755 --- a/extract_codes.py +++ b/extract_codes.py @@ -1,12 +1,11 @@ #!/usr/bin/env python """Extract list of currently implemented error and warning codes.""" - import datetime -import fs import logging import os.path import re import requests +import fs from ocfl.validation_logger import ValidationLogger @@ -16,7 +15,7 @@ VALIDATION_STATUS_MD = 'docs/validation_status.md' -class Code(object): +class Code(): """Class for details of one error or warning code.""" def __init__(self, code, desc): @@ -37,16 +36,16 @@ def add_suffix(self, suffix, link=None, desc=None): self.suffixes[suffix]['desc'] = desc def desc_links(self, suffix): - """Description and link for this suffix in code.""" + """Generate description and link for this suffix in code.""" if len(self.suffixes[suffix]['link']) > 0: links = ' '.join(self.suffixes[suffix]['link']) else: links = "_Not implemented_" desc = self.suffixes[suffix]['desc'] or "**Missing description**" - return("%s \\[%s\\]" % (desc, links)) + return "%s \\[%s\\]" % (desc, links) def as_str(self): - """String output for markdown.""" + """Create string output for markdown.""" status = "_See multiple cases identified with suffixes below_" if len(self.suffixes) == 0: status = "_Not implemented_" @@ -63,10 +62,10 @@ def as_str(self): if suffix == '': continue s += '| | %s%s | %s |\n' % (self.code, suffix, self.desc_links(suffix)) - return(s) + return s -class Codes(object): +class Codes(): """Class for the complete set of error and warning codes.""" def __init__(self): @@ -83,64 +82,70 @@ def add_impl(self, code, suffix, link=None, desc=None): self.codes[code] = Code(code, None) self.codes[code].add_suffix(suffix, link=link, desc=desc) - def as_str(self, filter=''): - """String output for markdown.""" + def as_str(self, exclude=''): + """Create string output for markdown.""" s = '' for code in sorted(self.codes.keys()): - if code.startswith(filter): + if code.startswith(exclude): s += self.codes[code].as_str() - return(s) - - -# 0. Assemble all data in codes -codes = Codes() - -# 1. Get validation codes from github -md = requests.get(VALIDATION_CODES_URL).text -for line in md.split('\n'): - m = re.match(r'''\|\s*([EW]\d\d\d)\s*\|\s*([^\|]+)\|''', line) - if m: - code = m.group(1) - desc = m.group(2).rstrip() - codes.add_spec(code, desc) - -# 2. Get validation codes and messages from strings file -vl = ValidationLogger() -for code_suffix in vl.validation_codes: - try: - desc = vl.validation_codes[code_suffix]['description']['en'] - except KeyError: - desc = "MISSING ENGLISH DESCRIPTION" - m = re.match(r'''([EW]\d\d\d)(\w?)$''', code_suffix) - if m: - codes.add_impl(m.group(1), m.group(2), desc=desc) - else: - logging.error("Bad entry for code+suffix '%s' in strings file" % (code_suffix)) - -# 3. Get validation codes from ocfl-py Python codes -code_fs = fs.open_fs('ocfl') -for file in code_fs.walk.files(filter=['*.py']): - with code_fs.open(file) as fh: - n = 0 - for line in fh: - n += 1 - m = re.search(r'''(["'])([EW]\d\d\d)(\w)?\1''', line) - if m: - file_line = 'ocfl%s#L%d' % (file, n) - link = '[' + file_line + '](' + GITHUB_REPO + '/blob/main/' + file_line + ')' - codes.add_impl(m.group(2), m.group(3), link=link) - -# 4. Write table of what is implemented and raise warnings -logging.info("Writing summary to %s" % (VALIDATION_STATUS_MD)) -with open(VALIDATION_STATUS_MD, "w") as fh: - fh.write("# Implementation status for errors and warnings\n\n") - fh.write("The following tables show the implementation status of all errors and warnings in the OCFL v1.0 specification, with links to the specification and into the code repository.\n\n") - fh.write("## Errors\n\n") - fh.write("| Code | Specification text (or suffixed code) | Implementation status and message/links |\n") - fh.write("| --- | --- | --- |\n") - fh.write(codes.as_str(filter='E') + "\n") - fh.write("## Warnings\n\n") - fh.write("| Code | Specification text (or suffixed code) | Implementation status and message/links |\n") - fh.write("| --- | --- | --- |\n") - fh.write(codes.as_str(filter='W') + "\n") - fh.write("_Generated by `%s` at %s_" % (os.path.basename(__file__), datetime.datetime.now())) + return s + + +def main(): + """Run from command line.""" + # 0. Assemble all data in codes + codes = Codes() + + # 1. Get validation codes from github + md = requests.get(VALIDATION_CODES_URL).text + for line in md.split('\n'): + m = re.match(r'''\|\s*([EW]\d\d\d)\s*\|\s*([^\|]+)\|''', line) + if m: + code = m.group(1) + desc = m.group(2).rstrip() + codes.add_spec(code, desc) + + # 2. Get validation codes and messages from strings file + vl = ValidationLogger() + for code_suffix in vl.validation_codes: + try: + desc = vl.validation_codes[code_suffix]['description']['en'] + except KeyError: + desc = "MISSING ENGLISH DESCRIPTION" + m = re.match(r'''([EW]\d\d\d)(\w?)$''', code_suffix) + if m: + codes.add_impl(m.group(1), m.group(2), desc=desc) + else: + logging.error("Bad entry for code+suffix '%s' in strings file", code_suffix) + + # 3. Get validation codes from ocfl-py Python codes + code_fs = fs.open_fs('ocfl') + for file in code_fs.walk.files(filter=['*.py']): + with code_fs.open(file) as fh: + n = 0 + for line in fh: + n += 1 + m = re.search(r'''(["'])([EW]\d\d\d)(\w)?\1''', line) + if m: + file_line = 'ocfl%s#L%d' % (file, n) + link = '[' + file_line + '](' + GITHUB_REPO + '/blob/main/' + file_line + ')' + codes.add_impl(m.group(2), m.group(3), link=link) + + # 4. Write table of what is implemented and raise warnings + logging.info("Writing summary to %s", VALIDATION_STATUS_MD) + with open(VALIDATION_STATUS_MD, "w") as fh: + fh.write("# Implementation status for errors and warnings\n\n") + fh.write("The following tables show the implementation status of all errors and warnings in the OCFL v1.0 specification, with links to the specification and into the code repository.\n\n") + fh.write("## Errors\n\n") + fh.write("| Code | Specification text (or suffixed code) | Implementation status and message/links |\n") + fh.write("| --- | --- | --- |\n") + fh.write(codes.as_str(exclude='E') + "\n") + fh.write("## Warnings\n\n") + fh.write("| Code | Specification text (or suffixed code) | Implementation status and message/links |\n") + fh.write("| --- | --- | --- |\n") + fh.write(codes.as_str(exclude='W') + "\n") + fh.write("_Generated by `%s` at %s_" % (os.path.basename(__file__), datetime.datetime.now())) + + +if __name__ == "__main__": + main() diff --git a/fixtures b/fixtures index ba07688..042b5bf 160000 --- a/fixtures +++ b/fixtures @@ -1 +1 @@ -Subproject commit ba0768868c90f721aca6d368a2fabbd285acb8d9 +Subproject commit 042b5bfd9ec21c7fb91c3fd564690f9481e9260f diff --git a/ocfl-object.py b/ocfl-object.py index e38e4ad..5e50f54 100755 --- a/ocfl-object.py +++ b/ocfl-object.py @@ -2,15 +2,14 @@ """OCFL Object and Inventory Builder.""" import argparse import logging -import ocfl import sys +import ocfl + class FatalError(Exception): """Exception class for conditions that should abort with message.""" - pass - def parse_arguments(): """Parse command line arguments.""" @@ -77,7 +76,7 @@ def parse_arguments(): def do_object_operation(args): """Implement object operations in a way that can be reused by ocfl-store.py.""" - obj = ocfl.Object(id=args.id, + obj = ocfl.Object(identifier=args.id, digest_algorithm=args.digest, filepath_normalization=args.normalization, forward_delta=not args.no_forward_delta, @@ -140,9 +139,9 @@ def do_object_operation(args): if __name__ == "__main__": try: - args = parse_arguments() - logging.basicConfig(level=logging.INFO if args.verbose else logging.WARN) - do_object_operation(args) + aargs = parse_arguments() + logging.basicConfig(level=logging.INFO if aargs.verbose else logging.WARN) + do_object_operation(aargs) except (FatalError, ocfl.ObjectException) as e: # Show message but otherwise exit quietly print('Error - ' + str(e)) diff --git a/ocfl-sidecar.py b/ocfl-sidecar.py index 65bc273..d0ea82a 100755 --- a/ocfl-sidecar.py +++ b/ocfl-sidecar.py @@ -2,9 +2,10 @@ """OCFL inventory sidecar generator and updater.""" import argparse import logging -import ocfl import os.path +import ocfl + INVENTORY_NAME = "inventory.json" @@ -22,36 +23,41 @@ def parse_arguments(): return args -def create_sidecar(dir): +def create_sidecar(args, directory): """Create sidecar for inventory in dir.""" - inventory_path = os.path.join(dir, INVENTORY_NAME) + inventory_path = os.path.join(directory, INVENTORY_NAME) if not os.path.isfile(inventory_path): - logging.error("Ignoring path %s because there is no inventory file %s." % (dir, inventory_path)) + logging.error("Ignoring path %s because there is no inventory file %s.", directory, inventory_path) else: - object = ocfl.Object(path=dir) + obj = ocfl.Object(path=directory) if args.digest is not None: - object.digest_algorithm = args.digest + obj.digest_algorithm = args.digest else: # Read inventory in the hope of setting digest_algoritm try: - object.parse_inventory() + obj.parse_inventory() except ocfl.ObjectException as e: - logging.warning("Failed to read inventory in directory %s (%s)" % (dir, str(e))) - sidecar = object.write_inventory_sidecar() - logging.info("Written sidecar file %s" % (sidecar)) + logging.warning("Failed to read inventory in directory %s (%s)", directory, e) + sidecar = obj.write_inventory_sidecar() + logging.info("Written sidecar file %s", sidecar) -if __name__ == "__main__": +def main(): + """Run from command line.""" args = parse_arguments() logging.basicConfig(level=logging.INFO if args.verbose else logging.WARN) paths = ["."] if len(args.path) == 0 else args.path for path in paths: - logging.info("Looking at path %s" % (path)) + logging.info("Looking at path %s", path) if os.path.isdir(path): - create_sidecar(path) + create_sidecar(args, path) else: - (dir, filename) = os.path.split(path) + (directory, filename) = os.path.split(path) if filename == INVENTORY_NAME: - create_sidecar(dir) + create_sidecar(args, directory) else: logging.error("Ignoring path %s with filename that is not inventory.json") + + +if __name__ == "__main__": + main() print("Done.") diff --git a/ocfl-store.py b/ocfl-store.py index f0dbe1e..b0ba03b 100755 --- a/ocfl-store.py +++ b/ocfl-store.py @@ -2,9 +2,10 @@ """OCFL Storage Root Tool.""" import argparse import logging -import ocfl import sys +import ocfl + parser = argparse.ArgumentParser(description='Manpulate or validate an OCFL Storage Root.', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--root', required=True, @@ -83,7 +84,7 @@ else: logging.error("create/build/validate not implemented") else: - logging.warn("Nuttin' happenin' 'round 'ere.") + logging.warning("No command, nothing to do.") except (ocfl.StoreException, ocfl.ObjectException) as e: logging.error(str(e)) sys.exit(1) diff --git a/ocfl-validate.py b/ocfl-validate.py index 0edc40c..30e1d9c 100755 --- a/ocfl-validate.py +++ b/ocfl-validate.py @@ -2,9 +2,10 @@ """Validate an OCFL Object.""" import argparse import logging -import ocfl import sys +import ocfl + parser = argparse.ArgumentParser( description='Validate one or more OCFL objects, storage roots or standalone ' 'inventory files. By default shows any errors or warnings, and final ' @@ -40,7 +41,7 @@ num += 1 path_type = ocfl.find_path_type(path) if path_type == 'object': - log.info("Validating OCFL Object at " + path) + log.info("Validating OCFL Object at %s", path) obj = ocfl.Object(lax_digests=args.lax_digests) if obj.validate(path, show_warnings=show_warnings, @@ -48,7 +49,7 @@ check_digests=not args.no_check_digests): num_good += 1 elif path_type == 'root': - log.info("Validating OCFL Storage Root at " + path) + log.info("Validating OCFL Storage Root at %s", path) store = ocfl.Store(root=path, lax_digests=args.lax_digests) if store.validate(show_warnings=show_warnings, @@ -56,15 +57,15 @@ check_digests=not args.no_check_digests): num_good += 1 elif path_type == 'file': - log.info("Validating separate OCFL Inventory at " + path) + log.info("Validating separate OCFL Inventory at %s", path) obj = ocfl.Object(lax_digests=args.lax_digests) if obj.validate_inventory(path, show_warnings=show_warnings, show_errors=not args.very_quiet): num_good += 1 else: - log.error("Bad path %s (%s)" % (path, path_type)) + log.error("Bad path %s (%s)", path, path_type) if num_paths > 1: - log.info(" [%d / %d paths validated, %d / %d VALID]\n" % (num, num_paths, num_good, num)) + log.info(" [%d / %d paths validated, %d / %d VALID]\n", num, num_paths, num_good, num) if num_good != num: sys.exit(1) diff --git a/ocfl/_version.py b/ocfl/_version.py index 72f5f25..c16ca30 100644 --- a/ocfl/_version.py +++ b/ocfl/_version.py @@ -1,2 +1,2 @@ """Version number for this Python implementation of OCFL.""" -__version__ = '1.2.0' +__version__ = '1.2.1' diff --git a/ocfl/bagger.py b/ocfl/bagger.py index ee5034b..040deb5 100644 --- a/ocfl/bagger.py +++ b/ocfl/bagger.py @@ -15,15 +15,13 @@ suited to transferring content that might be used to update an OCLF object or disseminate a particular version. """ -import bagit import os.path +import bagit class BaggerError(Exception): """Exception class for conditions that should abort with message.""" - pass - def bag_as_source(srcbag, metadata): """Validate and read metadata from srcbag as input. @@ -76,4 +74,4 @@ def bag_extracted_version(dst, metadata): tags['Contact-Name'] = metadata.name if metadata.address and metadata.address.startswith('mailto:'): tags['Contact-Email'] = metadata.address[7:] - bag = bagit.make_bag(dst, bag_info=tags, checksums=['sha512']) + bagit.make_bag(dst, bag_info=tags, checksums=['sha512']) diff --git a/ocfl/data/validation-errors.json b/ocfl/data/validation-errors.json index 40987c1..056a025 100644 --- a/ocfl/data/validation-errors.json +++ b/ocfl/data/validation-errors.json @@ -127,11 +127,6 @@ "en": "OCFL Object %s inventory is not valid JSON (%s)" } }, - "E034": { - "description": { - "en": "OCFL Object root inventory is missing" - } - }, "E036a": { "params": ["where"], "description": { @@ -383,6 +378,11 @@ "en": "Cannot extract digest from inventory digest file (%s)" } }, + "E063": { + "description": { + "en": "OCFL Object root inventory is missing" + } + }, "E064": { "params": ["root_inv_file", "inv_file"], "description": { @@ -477,7 +477,7 @@ } }, "E097": { - "params": ["where", "algoritm", "digest"], + "params": ["where", "algorithm", "digest"], "description": { "en": "OCFL Object %s inventory fixity block for digest algorithm %s, includes digest %s more than once with different normalizations" } diff --git a/ocfl/digest.py b/ocfl/digest.py index 16970b8..31c1812 100755 --- a/ocfl/digest.py +++ b/ocfl/digest.py @@ -1,6 +1,6 @@ """Digest handling for OCFL.""" -import fs import hashlib +import fs from .pyfs import open_fs BUFSIZE = 64 * 1024 # 64kB for want of better info... @@ -43,32 +43,31 @@ def file_digest(filename, digest_type='sha512', pyfs=None): # From spec if digest_type == 'sha512': return _file_digest(pyfs, filename, hashlib.sha512()) - elif digest_type == 'sha256': + if digest_type == 'sha256': return _file_digest(pyfs, filename, hashlib.sha256()) - elif digest_type == 'sha1': + if digest_type == 'sha1': return _file_digest(pyfs, filename, hashlib.sha1()) - elif digest_type == 'md5': + if digest_type == 'md5': return _file_digest(pyfs, filename, hashlib.md5()) - elif digest_type == 'blake2b-512': + if digest_type == 'blake2b-512': return _file_digest(pyfs, filename, hashlib.blake2b()) # From extensions - elif digest_type == 'blake2b-160': + if digest_type == 'blake2b-160': return _file_digest(pyfs, filename, hashlib.blake2b(digest_size=20)) - elif digest_type == 'blake2b-256': + if digest_type == 'blake2b-256': return _file_digest(pyfs, filename, hashlib.blake2b(digest_size=32)) - elif digest_type == 'blake2b-384': + if digest_type == 'blake2b-384': return _file_digest(pyfs, filename, hashlib.blake2b(digest_size=48)) # Specification examples: 15/6 chars ... 3 chars. The truncated # sha512 is twice as many chars as the truncated sha256 to give # a appropriate impression in examples - elif digest_type == 'sha512-spec-ex': + if digest_type == 'sha512-spec-ex': d = _file_digest(pyfs, filename, hashlib.sha512()) return d[:15] + '...' + d[-3:] - elif digest_type == 'sha256-spec-ex': + if digest_type == 'sha256-spec-ex': d = _file_digest(pyfs, filename, hashlib.sha256()) return d[:6] + '...' + d[-3:] - else: - raise ValueError("Unsupport digest type %s" % (digest_type)) + raise ValueError("Unsupport digest type %s" % (digest_type)) DIGEST_REGEXES = { @@ -94,11 +93,11 @@ def digest_regex(digest_type='sha512'): def normalized_digest(digest, digest_type='sha512'): - """Normalized version of the digest that enables string comparison. + """Normalize the digest to return version that enables string comparison. All forms (except the spec example forms) are case insensitive. We use lowercase as the normalized form. """ - if digest_type != 'sha512-spec-ex' and digest_type != 'sha256-spec-ex': - return digest.lower() - return digest + if digest_type in ('sha512-spec-ex', 'sha256-spec-ex'): + return digest + return digest.lower() diff --git a/ocfl/disposition.py b/ocfl/disposition.py index 5faeb23..51949c7 100644 --- a/ocfl/disposition.py +++ b/ocfl/disposition.py @@ -8,13 +8,12 @@ def get_dispositor(disposition=None): """Find Dispositor object for the given disposition.""" if disposition == 'pairtree': return Ntree(n=2) - elif disposition == 'tripletree': + if disposition == 'tripletree': return Ntree(n=3) - elif disposition == 'quadtree': + if disposition == 'quadtree': return Ntree(n=4) - elif disposition == 'uuid_quadtree': + if disposition == 'uuid_quadtree': return UUIDQuadtree() - elif disposition == 'identity': + if disposition == 'identity': return Identity() - else: - raise Exception("Unsupported disposition %s, aborting!" % (disposition)) + raise Exception("Unsupported disposition %s, aborting!" % (disposition)) diff --git a/ocfl/dispositor.py b/ocfl/dispositor.py index bf0aedf..a3bc68c 100644 --- a/ocfl/dispositor.py +++ b/ocfl/dispositor.py @@ -7,35 +7,30 @@ class Dispositor: """Base class for disposition handlers -- let's call them Dispositors.""" - def __init__(self): - """Initialize Dispositor.""" - pass - - def strip_root(self, path, root): + def strip_root(self, path, root): # pylint: disable=no-self-use """Remove root from path, throw exception on failure.""" root = root.rstrip(os.sep) # ditch any trailing path separator if os.path.commonprefix((path, root)) == root: return os.path.relpath(path, start=root) - else: - raise Exception("Path %s is not in root %s" % (path, root)) + raise Exception("Path %s is not in root %s" % (path, root)) - def is_valid(self, identifier): - """True if identifier is valid, always True in this base implementation.""" + def is_valid(self, identifier): # pylint: disable=unused-argument,no-self-use + """Return True if identifier is valid, always True in this base implementation.""" return True - def encode(self, identifier): + def encode(self, identifier): # pylint: disable=no-self-use """Encode identifier to get rid of unsafe chars.""" return quote_plus(identifier) - def decode(self, identifier): + def decode(self, identifier): # pylint: disable=no-self-use """Decode identifier to put back unsafe chars.""" return unquote_plus(identifier) - def identifier_to_path(self, identifier): + def identifier_to_path(self, identifier): # pylint: disable=no-self-use """Convert identifier to path relative to some root.""" raise Exception("No yet implemented") - def relative_path_to_identifier(self, path): + def relative_path_to_identifier(self, path): # pylint: disable=no-self-use """Convert relative path to identifier.""" raise Exception("No yet implemented") diff --git a/ocfl/identity.py b/ocfl/identity.py index 7d60ca5..7abcdc2 100644 --- a/ocfl/identity.py +++ b/ocfl/identity.py @@ -7,10 +7,6 @@ class Identity(Dispositor): """Class to support trivial identity disposition.""" - def __init__(self): - """Initialize Dispositor.""" - super(Identity, self).__init__() - def identifier_to_path(self, identifier): """Convert identifier to path relative to root.""" return self.encode(identifier) diff --git a/ocfl/inventory_validator.py b/ocfl/inventory_validator.py index 4053212..44a15b4 100644 --- a/ocfl/inventory_validator.py +++ b/ocfl/inventory_validator.py @@ -22,7 +22,7 @@ def get_file_map(inventory, version_dir): return file_map -class InventoryValidator(object): +class InventoryValidator(): """Class for OCFL Inventory Validator.""" def __init__(self, log=None, where='???', @@ -55,7 +55,7 @@ def validate(self, inventory): self.inventory = inventory if 'id' in inventory: iid = inventory['id'] - if type(iid) != str or iid == '': + if not isinstance(iid, str) or iid == '': self.error("E037") elif not re.match(r'''(\w+):.+''', iid): self.warning("W005", id=iid) @@ -79,7 +79,7 @@ def validate(self, inventory): if 'contentDirectory' in inventory: # Careful only to set self.content_directory if value is safe cd = inventory['contentDirectory'] - if type(cd) != str or '/' in cd or cd in ['.', '..']: + if not isinstance(cd, str) or '/' in cd or cd in ['.', '..']: self.error("E018") else: self.content_directory = cd @@ -119,7 +119,7 @@ def validate_manifest(self, manifest): manifest_files = {} unnormalized_digests = set() manifest_digests = set() - if type(manifest) != dict: + if not isinstance(manifest, dict): self.error('E041c') else: content_paths = set() @@ -128,7 +128,7 @@ def validate_manifest(self, manifest): m = re.match(self.digest_regex(), digest) if not m: self.error('E025a', digest=digest, algorithm=self.digest_algorithm) # wrong form of digest - elif type(manifest[digest]) != list: + elif not isinstance(manifest[digest], list): self.error('E092', digest=digest) # must have path list value else: unnormalized_digests.add(digest) @@ -154,7 +154,7 @@ def validate_fixity(self, fixity, manifest_files): Check the structure of the fixity block and makes sure that only files listed in the manifest are referenced. """ - if type(fixity) != dict: + if not isinstance(fixity, dict): self.error('E056a') else: for digest_algorithm in fixity: @@ -169,7 +169,7 @@ def validate_fixity(self, fixity, manifest_files): regex = r'''^.*$''' known_digest = False fixity_algoritm_block = fixity[digest_algorithm] - if type(fixity_algoritm_block) != dict: + if not isinstance(fixity_algoritm_block, dict): self.error('E057a', algorithm=self.digest_algorithm) else: digests_seen = set() @@ -177,7 +177,7 @@ def validate_fixity(self, fixity, manifest_files): m = re.match(regex, digest) if not m: self.error('E057b', digest=digest, algorithm=digest_algorithm) # wrong form of digest - elif type(fixity_algoritm_block[digest]) != list: + elif not isinstance(fixity_algoritm_block[digest], list): self.error('E057c', digest=digest, algorithm=digest_algorithm) # must have path list value else: if known_digest: @@ -201,10 +201,10 @@ def validate_version_sequence(self, versions): not part of the valid sequence if an error is thrown. """ all_versions = [] - if type(versions) != dict: + if not isinstance(versions, dict): self.error("E044") return all_versions - elif len(versions) == 0: + if len(versions) == 0: self.error("E008") return all_versions # Validate version sequence @@ -264,12 +264,12 @@ def validate_versions(self, versions, all_versions, unnormalized_digests): version = versions[v] if 'created' not in version: self.error('E048', version=v) # No created - elif type(versions[v]['created']) != str: + elif not isinstance(versions[v]['created'], str): self.error('E049d', version=v) # Bad created else: created = versions[v]['created'] try: - dt = str_to_datetime(created) + str_to_datetime(created) # catch ValueError if fails if not re.search(r'''(Z|[+-]\d\d:\d\d)$''', created): # FIXME - kludge self.error('E049a', version=v) if not re.search(r'''T\d\d:\d\d:\d\d''', created): # FIXME - kludge @@ -282,20 +282,20 @@ def validate_versions(self, versions, all_versions, unnormalized_digests): self.error('E048c', version=v) if 'message' not in version: self.warning('W007a', version=v) - elif type(version['message']) != str: + elif not isinstance(version['message'], str): self.error('E094', version=v) if 'user' not in version: self.warning('W007b', version=v) else: user = version['user'] - if type(user) != dict: + if not isinstance(user, dict): self.error('E054a', version=v) else: - if 'name' not in user or type(user['name']) != str: + if 'name' not in user or not isinstance(user['name'], str): self.error('E054b', version=v) if 'address' not in user: self.warning('W008', version=v) - elif type(user['address']) != str: + elif not isinstance(user['address'], str): self.error('E054c', version=v) elif not re.match(r'''\w{3,6}:''', user['address']): self.warning('W009', version=v) @@ -311,14 +311,14 @@ def validate_state_block(self, state, version, unnormalized_digests): digests = [] logical_paths = set() logical_directories = set() - if type(state) != dict: + if not isinstance(state, dict): self.error('E050c', version=version) else: - digest_regex = self.digest_regex() + digest_re = re.compile(self.digest_regex()) for digest in state: - if not re.match(self.digest_regex(), digest): + if not digest_re.match(digest): self.error('E050d', version=version, digest=digest) - elif type(state[digest]) != list: + elif not isinstance(state[digest], list): self.error('E050e', version=version, digest=digest) else: for path in state[digest]: @@ -346,7 +346,7 @@ def check_digests_present_and_used(self, manifest_files, digests_used): self.error("E050b", digests=", ".join(sorted(not_in_state))) def digest_regex(self): - """A regex for validating un-normalized digest format.""" + """Return regex for validating un-normalized digest format.""" try: return digest_regex(self.digest_algorithm) except ValueError: @@ -406,6 +406,7 @@ def validate_as_prior_version(self, prior): else: # Check references to files but realize that there might be different # digest algorithms between versions + version_dir = 'no-version' for version_dir in prior.all_versions: prior_map = get_file_map(prior.inventory, version_dir) self_map = get_file_map(self.inventory, version_dir) diff --git a/ocfl/namaste.py b/ocfl/namaste.py index 0afa357..6dff2b5 100644 --- a/ocfl/namaste.py +++ b/ocfl/namaste.py @@ -4,10 +4,10 @@ See also command line tool: http://github.com/mjgiarlo/namaste """ -import fs import os import os.path import re +import fs def content_to_tvalue(content): @@ -21,20 +21,19 @@ def content_to_tvalue(content): return re.sub(r'''[^\w\.\-:]''', '_', content[:40]) -def find_namastes(d, dir='', pyfs=None, max=10): +def find_namastes(d, dir='', pyfs=None, limit=10): """Find NAMASTE files with tag d in dir, return list of Namaste objects. - max sets a limit on the number of Namaste objects returned, a NamasteException - will be raised if more than max files with tag d are found. + limit sets a limit on the number of Namaste objects returned, a NamasteException + will be raised if more than limit files with tag d are found. """ prefix = str(d) + '=' - namastes = [] if pyfs is not None: filenames = [f for f in pyfs.listdir(dir) if f.startswith(prefix)] else: filenames = [f for f in os.listdir(dir) if f.startswith(prefix)] - if len(filenames) > max: - raise NamasteException("Found too many Namaste files with tag %s in %s" % (str(d), dir)) + if len(filenames) > limit: + raise NamasteException("Found too many Namaste files with tag %s in %s" % (d, dir)) return [Namaste(d, tvalue=filename[len(prefix):]) for filename in filenames] @@ -43,7 +42,7 @@ def get_namaste(d, dir): Raises NamasteException if not exaclty one. """ - namastes = find_namastes(d, dir, max=1) + namastes = find_namastes(d, dir, limit=1) if len(namastes) != 1: raise NamasteException("Failed to find one Namaste file with tag %s in %s" % (str(d), dir)) return namastes[0] @@ -52,10 +51,8 @@ def get_namaste(d, dir): class NamasteException(Exception): """Class for exceptions from Namaste.""" - pass - -class Namaste(object): +class Namaste(): """Class implementing NAMASTE specification.""" def __init__(self, d=0, content='', tvalue=None, tr_func=content_to_tvalue): @@ -80,11 +77,10 @@ def filename(self): @property def tvalue(self): - """tvalue of Namaste file.""" + """Tvalue of Namaste file.""" if self._tvalue is not None: return self._tvalue - else: - return self._tr_func(self.content) + return self._tr_func(self.content) def write(self, dir='', pyfs=None): """Write NAMASTE file to dir, optionally in fs. @@ -122,9 +118,9 @@ def check_content(self, dir='', pyfs=None): raise NamasteException("Content of Namaste file %s doesn't match tvalue %s" % (filepath, self.tvalue)) def content_ok(self, dir='', pyfs=None): - """True is check_content() does not raise an exception.""" + """Return True if check_content() does not raise a NamasteException exception.""" try: self.check_content(dir, pyfs) - except Exception: + except NamasteException: return False return True diff --git a/ocfl/ntree.py b/ocfl/ntree.py index ccce8dd..1649b89 100644 --- a/ocfl/ntree.py +++ b/ocfl/ntree.py @@ -24,7 +24,7 @@ class Ntree(Dispositor): def __init__(self, n=2, encapsulate=True): """Initialize Dispositor.""" - super(Ntree, self).__init__() + super().__init__() self.n = n self.encapsulate = encapsulate @@ -39,16 +39,15 @@ def decode(self, identifier): def identifier_to_path(self, identifier): """Convert identifier to path relative to root.""" identifier = self.encode(identifier) - path = '' id_remains = identifier segments = [] while len(id_remains) > self.n: segments.append(id_remains[0:self.n]) id_remains = id_remains[self.n:] - segments.append(id_remains) + segments.append(id_remains) # the statement means that segmets will always have at least one element if self.encapsulate: segments.append(identifier) - return os.path.join(*segments) + return os.path.join(*segments) # pylint: disable=no-value-for-parameter def relative_path_to_identifier(self, path): """Convert relative path to identifier.""" diff --git a/ocfl/object.py b/ocfl/object.py index bced274..83b3e74 100755 --- a/ocfl/object.py +++ b/ocfl/object.py @@ -1,23 +1,20 @@ # -*- coding: utf-8 -*- """Core of OCFL Object library.""" import copy -import fs -import fs.path -import fs.copy import hashlib import json import os.path import re import logging -import sys -try: - from urllib.parse import quote as urlquote # py3 -except ImportError: # pragma: no cover -- py2 - from urllib import quote as urlquote # pragma: no cover -- py2 +from urllib.parse import quote as urlquote + +import fs +import fs.path +import fs.copy from .digest import file_digest, normalized_digest from .inventory_validator import InventoryValidator -from .object_utils import remove_first_directory, make_unused_filepath, next_version +from .object_utils import make_unused_filepath, next_version, ObjectException from .pyfs import open_fs from .namaste import Namaste from .validator import Validator, ValidatorAbortException @@ -26,16 +23,21 @@ INVENTORY_FILENAME = 'inventory.json' -class ObjectException(Exception): - """Exception class for OCFL Object.""" +def parse_version_directory(dirname): + """Get version number from version directory name.""" + m = re.match(r'''v(\d{1,5})$''', dirname) + if not m: + raise Exception("Bad version directory name: %s" % (dirname)) + v = int(m.group(1)) + if v == 0: + raise Exception("Bad version directory name: %s, v0 no allowed" % (dirname)) + return v - pass - -class Object(object): +class Object(): """Class for handling OCFL Object data and operations.""" - def __init__(self, id=None, content_directory='content', + def __init__(self, identifier=None, content_directory='content', digest_algorithm='sha512', filepath_normalization='uri', forward_delta=True, dedupe=True, lax_digests=False, fixity=None, verbose=True, @@ -43,7 +45,7 @@ def __init__(self, id=None, content_directory='content', """Initialize OCFL object. Parameters relevant to building an object: - id - id for this object + identifier - id for this object content_directory - allow override of the default 'content' digest_algorithm - allow override of the default 'sha512' filepath_normalization = allow override of default 'uri' @@ -65,7 +67,7 @@ def __init__(self, id=None, content_directory='content', path - if set then open a pyfs filesystem at path (alternative to obj_fs) create - set True to allow opening filesystem at path to create a directory """ - self.id = id + self.id = identifier self.content_directory = content_directory self.digest_algorithm = digest_algorithm self.filepath_normalization = filepath_normalization @@ -85,7 +87,7 @@ def open_fs(self, objdir, create=False): try: self.obj_fs = open_fs(fs_url=objdir, create=create) except (fs.opener.errors.OpenerError, fs.errors.CreateFailed) as e: - raise ObjectException("Failed to open object filesystem '%s' (%s)" % (objdir, str(e))) + raise ObjectException("Failed to open object filesystem '%s' (%s)" % (objdir, e)) def copy_into_object(self, src_fs, srcfile, filepath, create_dirs=False): """Copy from srcfile to filepath in object.""" @@ -94,16 +96,6 @@ def copy_into_object(self, src_fs, srcfile, filepath, create_dirs=False): self.obj_fs.makedirs(dstpath) fs.copy.copy_file(src_fs, srcfile, self.obj_fs, filepath) - def parse_version_directory(self, dirname): - """Get version number from version directory name.""" - m = re.match(r'''v(\d{1,5})$''', dirname) - if not m: - raise Exception("Bad version directory name: %s" % (dirname)) - v = int(m.group(1)) - if v == 0: - raise Exception("Bad version directory name: %s, v0 no allowed" % (dirname)) - return v - def digest(self, pyfs, filename): """Digest for file filename in the object filesystem.""" return file_digest(filename, self.digest_algorithm, pyfs=pyfs) @@ -232,7 +224,7 @@ def add_version(self, inventory, src_fs, src_dir, vdir, metadata=None): return manifest_to_srcfile def build_inventory(self, src_fs, metadata=None): - """Generator for building an OCFL inventory from a set of source files. + """Generate an OCFL inventory from a set of source files. Parameters: src_fc - pyfs filesystem of source files @@ -249,7 +241,7 @@ def build_inventory(self, src_fs, metadata=None): for vdir in src_fs.listdir('/'): if not src_fs.isdir(vdir): continue - vn = self.parse_version_directory(vdir) + vn = parse_version_directory(vdir) versions[vn] = vdir # Go through versions in order building versions array, deduping if selected for vn in sorted(versions.keys()): @@ -309,11 +301,12 @@ def build(self, srcdir, metadata=None, objdir=None): self.open_fs(objdir, create=True) num_versions = 0 src_fs = open_fs(srcdir) + inventory = None for (vdir, inventory, manifest_to_srcfile) in self.build_inventory(src_fs, metadata): num_versions += 1 if objdir is None: - self.log.warning("### Inventory for %s\n" % (vdir) - + json.dumps(inventory, sort_keys=True, indent=2)) + self.log.warning("### Inventory for %s\n", + vdir + json.dumps(inventory, sort_keys=True, indent=2)) else: self.write_inventory_and_sidecar(inventory, vdir) # Copy files into this version @@ -324,7 +317,7 @@ def build(self, srcdir, metadata=None, objdir=None): # Write object declaration, inventory and sidecar self.write_object_declaration() self.write_inventory_and_sidecar(inventory) - self.log.info("Built object %s with %s versions" % (self.id, num_versions)) + self.log.info("Built object %s with %s versions", self.id, num_versions) def create(self, srcdir, metadata=None, objdir=None): """Create a new OCFL object with v1 content from srcdir. @@ -345,8 +338,8 @@ def create(self, srcdir, metadata=None, objdir=None): vdir = 'v1' manifest_to_srcfile = self.add_version(inventory, src_fs, '', vdir, metadata=metadata) if objdir is None: - self.log.warning("### Inventory for %s\n" % (vdir) - + json.dumps(inventory, sort_keys=True, indent=2)) + self.log.warning("### Inventory for %s\n", + vdir + json.dumps(inventory, sort_keys=True, indent=2)) return # Else write out object self.write_inventory_and_sidecar(inventory, vdir) @@ -354,11 +347,11 @@ def create(self, srcdir, metadata=None, objdir=None): self.write_object_declaration() self.write_inventory_and_sidecar(inventory) # Write version files - for digest, paths in inventory['manifest'].items(): + for paths in inventory['manifest'].values(): for path in paths: srcfile = manifest_to_srcfile[path] self.copy_into_object(src_fs, srcfile, path, create_dirs=True) - self.log.info("Created OCFL object %s in %s" % (self.id, objdir)) + self.log.info("Created OCFL object %s in %s", self.id, objdir) def update(self, objdir, srcdir=None, metadata=None): """Update object creating a new version with content matching srcdir. @@ -379,9 +372,8 @@ def update(self, objdir, srcdir=None, metadata=None): inventory = self.parse_inventory() self.id = inventory['id'] old_head = inventory['head'] - versions = inventory['versions'] head = next_version(old_head) - self.log.info("Will update %s %s -> %s" % (self.id, old_head, head)) + self.log.info("Will update %s %s -> %s", self.id, old_head, head) self.obj_fs.makedir(head) # Is this a request to change the digest algorithm? old_digest_algorithm = inventory['digestAlgorithm'] @@ -389,8 +381,8 @@ def update(self, objdir, srcdir=None, metadata=None): if digest_algorithm is None: digest_algorithm = old_digest_algorithm elif digest_algorithm != old_digest_algorithm: - self.log.info("New version with use %s instead of %s digestAlgorithm" % - (digest_algorithm, old_digest_algorithm)) + self.log.info("New version with use %s instead of %s digestAlgorithm", + digest_algorithm, old_digest_algorithm) inventory['digestAlgorithm'] = digest_algorithm # Is this a request to change the set of fixity information? fixity = self.fixity @@ -412,10 +404,10 @@ def update(self, objdir, srcdir=None, metadata=None): for digest in old_fixity.difference(fixity): inventory['fixity'].pop(digest) for digest in fixity.difference(old_fixity): - self.log.info("FIXME - need to add fixity with digest %s" % digest) + self.log.info("FIXME - need to add fixity with digest %s", digest) if fixity != old_fixity: - self.log.info("New version will have %s instead of %s fixity" % - (','.join(sorted(fixity)), ','.join(sorted(old_fixity)))) + self.log.info("New version will have %s instead of %s fixity", + ','.join(sorted(fixity)), ','.join(sorted(old_fixity))) # Now look at contents, manifest and state manifest = copy.deepcopy(inventory['manifest']) if digest_algorithm != old_digest_algorithm: @@ -458,30 +450,29 @@ def update(self, objdir, srcdir=None, metadata=None): # Delete old root inventory sidecar if we changed digest algorithm if digest_algorithm != old_digest_algorithm: self.obj_fs.remove(INVENTORY_FILENAME + '.' + old_digest_algorithm) - self.log.info("Updated OCFL object %s in %s by adding %s" % (self.id, objdir, head)) - - def _show_indent(self, level, last=False, last_v=False): - """Indent string for tree view at level for intermediate or last.""" - tree_next = '├── ' - tree_last = '└── ' - tree_pass = '│ ' - tree_indent = ' ' - if level == 0: - return (tree_last if last else tree_next) - else: - return (tree_indent if last else tree_pass) + (tree_last if last_v else tree_next) + self.log.info("Updated OCFL object %s in %s by adding %s", self.id, objdir, head) def show(self, objdir): """Show OCFL object at objdir.""" + def _show_indent(level, last=False, last_v=False): + """Indent string for tree view at level for intermediate or last.""" + tree_next = '├── ' + tree_last = '└── ' + tree_pass = '│ ' + tree_indent = ' ' + if level == 0: + return tree_last if last else tree_next + return (tree_indent if last else tree_pass) + (tree_last if last_v else tree_next) + validator = Validator(show_warnings=False, show_errors=True, check_digests=False, lax_digests=self.lax_digests) passed = validator.validate(objdir) if passed: - self.log.warning("OCFL object at %s has VALID STRUCTURE (DIGESTS NOT CHECKED) " % (objdir)) + self.log.warning("OCFL object at %s has VALID STRUCTURE (DIGESTS NOT CHECKED) ", objdir) else: - self.log.warning("OCFL object at %s is INVALID" % (objdir)) + self.log.warning("OCFL object at %s is INVALID", objdir) tree = '[' + objdir + ']\n' self.open_fs(objdir) entries = sorted(self.obj_fs.listdir('')) @@ -503,7 +494,7 @@ def show(self, objdir): seen_v_sidecar = True elif v_entry == 'content': num_files = 0 - for (v_dirpath, v_dirs, v_files) in self.obj_fs.walk(fs.path.join(entry, v_entry)): + for (v_dirpath, v_dirs, v_files) in self.obj_fs.walk(fs.path.join(entry, v_entry)): # pylint: disable=unused-variable num_files += len(v_files) v_note += '(%d files)' % num_files else: @@ -518,12 +509,12 @@ def show(self, objdir): else: note += '<--- ???' last = (n == len(entries)) - tree += self._show_indent(0, last) + note + "\n" + tree += _show_indent(0, last) + note + "\n" nn = 0 for v_note in v_notes: nn += 1 - tree += self._show_indent(1, last, (nn == len(v_notes))) + v_note + "\n" - self.log.warning("Object tree\n" + tree) + tree += _show_indent(1, last, (nn == len(v_notes))) + v_note + "\n" + self.log.warning("Object tree\n%s", tree) def validate(self, objdir, show_warnings=True, show_errors=True, check_digests=True): """Validate OCFL object at objdir.""" @@ -536,9 +527,9 @@ def validate(self, objdir, show_warnings=True, show_errors=True, check_digests=T if messages != '': print(messages) if passed: - self.log.info("OCFL object at %s is VALID" % (objdir)) + self.log.info("OCFL object at %s is VALID", objdir) else: - self.log.info("OCFL object at %s is INVALID" % (objdir)) + self.log.info("OCFL object at %s is INVALID", objdir) return passed def validate_inventory(self, path, show_warnings=True, show_errors=True): @@ -558,9 +549,9 @@ def validate_inventory(self, path, show_warnings=True, show_errors=True): if messages != '': print(messages) if passed: - self.log.info("Standalone OCFL inventory at %s is VALID" % (path)) + self.log.info("Standalone OCFL inventory at %s is VALID", path) else: - self.log.info("Standalone OCFL inventory at %s is INVALID" % (path)) + self.log.info("Standalone OCFL inventory at %s is INVALID", path) return passed def extract(self, objdir, version, dstdir): @@ -575,7 +566,7 @@ def extract(self, objdir, version, dstdir): inv = self.parse_inventory() if version == 'head': version = inv['head'] - self.log.info("Object at %s has head %s" % (objdir, version)) + self.log.info("Object at %s has head %s", objdir, version) elif version not in inv['versions']: raise ObjectException("Object at %s does not include a version '%s'" % (objdir, version)) # Sanity check on destination @@ -586,7 +577,7 @@ def extract(self, objdir, version, dstdir): try: parent_fs = open_fs(parentdir) except (fs.opener.errors.OpenerError, fs.errors.CreateFailed) as e: - raise ObjectException("Destination parent %s does not exist or could not be opened (%s)" % (parentdir, str(e))) + raise ObjectException("Destination parent %s does not exist or could not be opened (%s)" % (parentdir, e)) parent_fs.makedir(dir) dst_fs = parent_fs.opendir(dir) # Open a sub-filesystem as our destination # Now extract... @@ -595,10 +586,10 @@ def extract(self, objdir, version, dstdir): for (digest, logical_files) in state.items(): existing_file = manifest[digest][0] # FIXME - pick "best" (closest version?) not first? for logical_file in logical_files: - self.log.debug("Copying %s -> %s" % (digest, logical_file)) + self.log.debug("Copying %s -> %s", digest, logical_file) dst_fs.makedirs(fs.path.dirname(logical_file), recreate=True) fs.copy.copy_file(self.obj_fs, existing_file, dst_fs, logical_file) - self.log.info("Extracted %s into %s" % (version, dstdir)) + self.log.info("Extracted %s into %s", version, dstdir) return VersionMetadata(inventory=inv, version=version) def parse_inventory(self): diff --git a/ocfl/object_utils.py b/ocfl/object_utils.py index 6ad2535..3ff2298 100755 --- a/ocfl/object_utils.py +++ b/ocfl/object_utils.py @@ -1,14 +1,10 @@ # -*- coding: utf-8 -*- """Utility functions to support the OCFL Object library.""" -import fs -import fs.path -import os import re import sys -try: - from urllib.parse import quote as urlquote # py3 -except ImportError: # pragma: no cover -- py2 - from urllib import quote as urlquote # pragma: no cover -- py2 + +import fs +import fs.path from ._version import __version__ from .namaste import find_namastes @@ -18,6 +14,10 @@ NORMALIZATIONS = ['uri', 'md5'] # Must match possibilities in map_filepaths() +class ObjectException(Exception): + """Exception class for OCFL Object.""" + + def add_object_args(parser): """Add Object settings to argparse or argument group instance parser.""" # Disk scanning @@ -65,13 +65,12 @@ def next_version(version): next = int(m.group(1)) + 1 if m.group(2) == '0': # Zero-padded version - next_version = ('v0%0' + str(len(version) - 2) + 'd') % next - if len(next_version) != len(version): - raise ObjectException("Version number overflow for zero-padded version %d to %d" % (version, next_version)) - return next_version - else: - # Not zero-padded - return 'v' + str(next) + next_v = ('v0%0' + str(len(version) - 2) + 'd') % next + if len(next_v) != len(version): + raise ObjectException("Version number overflow for zero-padded version %d to %d" % (version, next_v)) + return next_v + # Not zero-padded + return 'v' + str(next) def remove_first_directory(path): @@ -86,11 +85,10 @@ def remove_first_directory(path): rpath = '' while True: (head, tail) = fs.path.split(path) - if head == path or tail == path: + if path in (head, tail): break - else: - path = head - rpath = tail if rpath == '' else fs.path.join(tail, rpath) + path = head + rpath = tail if rpath == '' else fs.path.join(tail, rpath) return rpath @@ -117,28 +115,28 @@ def find_path_type(path): """ try: pyfs = open_fs(path, create=False) - except (fs.opener.errors.OpenerError, fs.errors.CreateFailed) as e: + except (fs.opener.errors.OpenerError, fs.errors.CreateFailed): # Failed to open path as a filesystem, try enclosing directory # in case path is a file (parent, filename) = fs.path.split(path) try: pyfs = open_fs(parent, create=False) except (fs.opener.errors.OpenerError, fs.errors.CreateFailed) as e: - return("path cannot be opened, and nor can parent (" + str(e) + ")") + return "path cannot be opened, and nor can parent (" + str(e) + ")" # Can open parent, is filename a file there? try: info = pyfs.getinfo(filename) except fs.errors.ResourceNotFound: - return("path does not exist") + return "path does not exist" if info.is_dir: - return("directory that could not be opened as a filesystem, this should not happen") # pragma: no cover - return('file') + return "directory that could not be opened as a filesystem, this should not happen" # pragma: no cover + return 'file' namastes = find_namastes(0, pyfs=pyfs) if len(namastes) == 0: - return("no 0= declaration file") - elif len(namastes) > 1: - return("more than one 0= declaration file") + return "no 0= declaration file" + if len(namastes) > 1: + return "more than one 0= declaration file" m = re.match(r'''ocfl(_object)?_(\d+\.\d+)$''', namastes[0].tvalue) if m: - return('root' if m.group(1) is None else 'object') - return("unrecognized 0= declaration file 0=%s" % (namastes[0].tvalue)) + return 'root' if m.group(1) is None else 'object' + return "unrecognized 0= declaration file 0=%s" % (namastes[0].tvalue) diff --git a/ocfl/pyfs.py b/ocfl/pyfs.py index ac96136..d897698 100644 --- a/ocfl/pyfs.py +++ b/ocfl/pyfs.py @@ -41,7 +41,7 @@ def open_fs(fs_url, **kwargs): # but adjust the handling of strict to default to strict=False bucket_name, _, dir_path = parse_result.resource.partition("/") if not bucket_name: - raise OpenerError("invalid bucket name in '{}'".format(fs_url)) + raise fs.opener.errors.OpenerError("invalid bucket name in '{}'".format(fs_url)) # Instead of allowing this to be turned on by a strict=1 in the # URL query params, allow it to be turned off by strict!=1 strict = ( @@ -58,11 +58,11 @@ def open_fs(fs_url, **kwargs): acl=parse_result.params.get("acl", None), cache_control=parse_result.params.get("cache_control", None), strict=strict) - s3fs.getinfo = s3fs._getinfo # Patch in version of method that doesn't check parent directory + # Patch in version of getinfo method that doesn't check parent directory + s3fs.getinfo = s3fs._getinfo # pylint: disable=protected-access return s3fs - else: - # Non-S3 URL - return fs.open_fs(fs_url, **kwargs) + # Non-S3 URL + return fs.open_fs(fs_url, **kwargs) def ocfl_walk(f, dir='/', is_storage_root=False): @@ -111,7 +111,7 @@ def ocfl_walk(f, dir='/', is_storage_root=False): def ocfl_opendir(pyfs, dir, **kwargs): - """A version of opendir that handles the case of S3 without directory objects. + """Open directory while handling the case of S3 without directory objects. FIXME - DIRTY HACK """ @@ -120,7 +120,7 @@ def ocfl_opendir(pyfs, dir, **kwargs): # isn't a directory object (even with strict=False) new_dir_path = fs.path.join(pyfs.dir_path, dir) s3fs = S3FS( - pyfs._bucket_name, + pyfs._bucket_name, # pylint: disable=protected-access dir_path=new_dir_path, aws_access_key_id=pyfs.aws_access_key_id, aws_secret_access_key=pyfs.aws_secret_access_key, @@ -128,8 +128,23 @@ def ocfl_opendir(pyfs, dir, **kwargs): # acl=pyfs.acl, # cache_control=pyfs.cache_control), strict=pyfs.strict) - s3fs.getinfo = s3fs._getinfo + # Patch in version of getinfo method that doesn't check parent directory + s3fs.getinfo = s3fs._getinfo # pylint: disable=protected-access return s3fs - else: - # Just use regular opendir(..) - return pyfs.opendir(dir, **kwargs) + # Not S3, just use regular opendir(..) + return pyfs.opendir(dir, **kwargs) + + +def ocfl_files_identical(pyfs, file1, file2): + """Compare file1 and file2 on the filesystem pyfs. + + Returns True if the files are identical, False otherwise. + + FIXME - Make this more efficient by comparing stat info first, then only + comparing content in chunks if necessary. + """ + with pyfs.open(file1, 'r') as fh1: + c1 = fh1.read() + with pyfs.open(file2, 'r') as fh2: + c2 = fh2.read() + return c1 == c2 diff --git a/ocfl/store.py b/ocfl/store.py index a6909b4..71ec5ec 100644 --- a/ocfl/store.py +++ b/ocfl/store.py @@ -3,18 +3,12 @@ This code uses PyFilesystem (import fs) exclusively for access to files. This should enable application beyond the operating system filesystem. """ -import fs -from fs.copy import copy_dir -import hashlib import json -import re import logging -try: - from urllib.parse import quote_plus # py3 -except ImportError: # pragma: no cover -- py2 - from urllib import quote_plus # pragma: no cover -- py2 +import re +import fs +from fs.copy import copy_dir -from .digest import file_digest from .disposition import get_dispositor from .namaste import find_namastes, Namaste from .object import Object @@ -26,10 +20,8 @@ class StoreException(Exception): """Exception class for OCFL Storage Root.""" - pass - -class Store(object): +class Store(): """Class for handling OCFL Storage Root and include OCFL Objects.""" def __init__(self, root=None, disposition=None, lax_digests=False): @@ -71,7 +63,7 @@ def traversal_error(self, code, **kwargs): self.num_traversal_errors += 1 if self.log is None: # FIXME - What to do in non-validator context? args = ', '.join('{0}={1!r}'.format(k, v) for k, v in kwargs.items()) - logging.error("Traversal error %s - %s" % (code, args)) + logging.error("Traversal error %s - %s", code, args) else: self.log.error(code, **kwargs) @@ -86,7 +78,7 @@ def initialize(self): if parent_fs.exists(root_dir): raise StoreException("OCFL storage root %s already exists, aborting!" % (self.root)) self.root_fs = parent_fs.makedir(root_dir) - logging.debug("Created OCFL storage root at %s" % (self.root)) + logging.debug("Created OCFL storage root at %s", self.root) # Create root declaration Namaste(d=0, content=self.declaration_tvalue).write(pyfs=self.root_fs) # Create a layout declaration @@ -95,7 +87,7 @@ def initialize(self): layout = {'key': self.disposition, 'description': "Non-standard layout from ocfl-py disposition -- FIXME"} json.dump(layout, fh, sort_keys=True, indent=2) - logging.info("Created OCFL storage root %s" % (self.root)) + logging.info("Created OCFL storage root %s", self.root) def check_root_structure(self): """Check the OCFL storage root structure. @@ -107,11 +99,11 @@ def check_root_structure(self): namastes = find_namastes(0, pyfs=self.root_fs) if len(namastes) == 0: raise StoreException("Storage root %s lacks required 0= declaration file" % (self.root)) - elif len(namastes) > 1: + if len(namastes) > 1: raise StoreException("Storage root %s has more than one 0= style declaration file" % (self.root)) - elif namastes[0].tvalue != self.declaration_tvalue: + if namastes[0].tvalue != self.declaration_tvalue: raise StoreException("Storage root %s declaration file not as expected, got %s" % (self.root, namastes[0].filename)) - elif not namastes[0].content_ok(pyfs=self.root_fs): + if not namastes[0].content_ok(pyfs=self.root_fs): raise StoreException("Storage root %s required declaration file %s has invalid content" % (self.root, namastes[0].filename)) # Specification file and layout file if self.root_fs.exists(self.spec_file) and not self.root_fs.isfile(self.spec_file): @@ -132,10 +124,10 @@ def parse_layout_file(self): try: with self.root_fs.open(self.layout_file) as fh: layout = json.load(fh) - if type(layout) != dict: + if not isinstance(layout, dict): raise StoreException("Storage root %s has layout file that isn't a JSON object" % (self.root)) - elif ('extension' not in layout or type(layout['extension']) != str - or 'description' not in layout or type(layout['description']) != str): + if ('extension' not in layout or not isinstance(layout['extension'], str) + or 'description' not in layout or not isinstance(layout['description'], str)): raise StoreException("Storage root %s has layout file doesn't have required extension and description string entries" % (self.root)) return layout['extension'], layout['description'] except Exception as e: # FIXME - more specific? @@ -144,7 +136,7 @@ def parse_layout_file(self): return None, None def object_paths(self): - """Generator for object paths for every obect in the OCFL storage root. + """Generate object paths for every obect in the OCFL storage root. Yields (dirpath) that is the path to the directory for each object located, relative to the OCFL storage root and without a preceding /. @@ -168,7 +160,7 @@ def object_paths(self): declaration = zero_eqs[0] match = re.match(r'''0=ocfl_object_(\d+\.\d+)''', declaration) if match and match.group(1) == '1.0': - yield (dirpath.lstrip('/')) + yield dirpath.lstrip('/') elif match: self.traversal_error("E004a", path=dirpath, version=match.group(1)) else: @@ -188,7 +180,7 @@ def list(self): print("%s -- id=%s" % (dirpath, id)) num_objects += 1 # FIXME - maybe do some more stuff in here - logging.info("Found %d OCFL Objects under root %s" % (num_objects, self.root)) + logging.info("Found %d OCFL Objects under root %s", num_objects, self.root) def validate_hierarchy(self, validate_objects=True, check_digests=True, show_warnings=False): """Validate storage root hierarchy. @@ -207,7 +199,7 @@ def validate_hierarchy(self, validate_objects=True, check_digests=True, show_war if validator.validate(ocfl_opendir(self.root_fs, dirpath)): good_objects += 1 else: - logging.info("Object at %s in INVALID" % (dirpath)) + logging.info("Object at %s in INVALID", dirpath) messages = validator.__str__(prefix='[[' + dirpath + ']]') # FIXME - how to show warnings sensibly? if messages != '': print(messages) @@ -224,20 +216,20 @@ def validate(self, validate_objects=True, check_digests=True, show_warnings=Fals logging.info("Storage root structure is VALID") except StoreException as e: valid = False - logging.info("Storage root structure is INVALID (%s)" % (str(e))) + logging.info("Storage root structure is INVALID (%s)", str(e)) num_objects, good_objects = self.validate_hierarchy(validate_objects=validate_objects, check_digests=check_digests, show_warnings=show_warnings) if validate_objects: if good_objects == num_objects: - logging.info("Objects checked: %d / %d are VALID" % (good_objects, num_objects)) + logging.info("Objects checked: %d / %d are VALID", good_objects, num_objects) else: valid = False - logging.info("Objects checked: %d / %d are INVALID" % (num_objects - good_objects, num_objects)) + logging.info("Objects checked: %d / %d are INVALID", num_objects - good_objects, num_objects) else: logging.info("Not checking OCFL objects") if self.num_traversal_errors > 0: valid = False print(str(self.log)) - logging.info("Encountered %d errors traversing storage root" % (self.num_traversal_errors)) + logging.info("Encountered %d errors traversing storage root", self.num_traversal_errors) # FIXME - do some stuff in here if valid: logging.info("Storage root %s is VALID", self.root) @@ -256,10 +248,10 @@ def add(self, object_path): identifier = inventory['id'] # Now copy path = self.object_path(identifier) - logging.info("Copying from %s to %s" % (object_path, fs.path.join(self.root, path))) + logging.info("Copying from %s to %s", object_path, fs.path.join(self.root, path)) try: copy_dir(o.obj_fs, '/', self.root_fs, path) logging.info("Copied") except Exception as e: - logging.error("Copy failed: " + str(e)) + logging.error("Copy failed: %s", str(e)) raise StoreException("Add object failed!") diff --git a/ocfl/uuid_quadtree.py b/ocfl/uuid_quadtree.py index b9bb74e..8515c6e 100644 --- a/ocfl/uuid_quadtree.py +++ b/ocfl/uuid_quadtree.py @@ -14,7 +14,7 @@ class UUIDQuadtree(Dispositor): def __init__(self, prefix='urn:uuid:'): """Initialize Dispositor.""" - super(UUIDQuadtree, self).__init__() + super().__init__() self.prefix = prefix def encode(self, identifier): @@ -30,14 +30,13 @@ def identifier_to_path(self, identifier): Must match prefix:6ba7b810-9dad-11d1-80b4-00c04fd430c8 """ - path = '' if identifier.startswith(self.prefix): identifier = identifier[len(self.prefix):] else: raise Exception("UUIDQuadtree identifier %s does not start with prefix %s" % (identifier, self.prefix)) match = re.match(r'''([\da-f]{4})([\da-f]{4})\-([\da-f]{4})\-([\da-f]{4})\-([\da-f]{4})\-([\da-f]{4})([\da-f]{4})([\da-f]{4})$''', identifier) if not match: - raise Exception("UUIDQuadtree identifier %s not valid" % (identifer)) + raise Exception("UUIDQuadtree identifier %s not valid" % (identifier)) return os.path.join(match.group(1), match.group(2), match.group(3), match.group(4), match.group(5), match.group(6), match.group(7), match.group(8)) diff --git a/ocfl/validation_logger.py b/ocfl/validation_logger.py index ba2b4b8..2306fd9 100644 --- a/ocfl/validation_logger.py +++ b/ocfl/validation_logger.py @@ -6,10 +6,9 @@ import os import os.path import re -import logging -class ValidationLogger(object): +class ValidationLogger(): """Class for OCFL ValidationLogger.""" validation_codes = None @@ -78,7 +77,7 @@ def warning(self, code, **args): self.num_warnings += 1 def __str__(self, prefix=''): - """String of validator status.""" + """Return string of validator status.""" s = '' for message in sorted(self.messages): s += prefix + message + '\n' diff --git a/ocfl/validator.py b/ocfl/validator.py index 9ba7bc5..53b51e7 100644 --- a/ocfl/validator.py +++ b/ocfl/validator.py @@ -9,25 +9,22 @@ This code uses PyFilesystem (import fs) exclusively for access to files. This should enable application beyond the operating system filesystem. """ -import fs import json import re +import fs from .digest import file_digest, normalized_digest from .inventory_validator import InventoryValidator -from .namaste import find_namastes, NamasteException -from .pyfs import open_fs, ocfl_walk +from .namaste import find_namastes +from .pyfs import open_fs, ocfl_walk, ocfl_files_identical from .validation_logger import ValidationLogger -from .w3c_datetime import str_to_datetime class ValidatorAbortException(Exception): """Exception class to bail out of validation.""" - pass - -class Validator(object): +class Validator(): """Class for OCFL Validator.""" def __init__(self, log=None, show_warnings=False, show_errors=True, check_digests=True, lax_digests=False, lang='en'): @@ -37,7 +34,17 @@ def __init__(self, log=None, show_warnings=False, show_errors=True, check_digest self.lax_digests = lax_digests if self.log is None: self.log = ValidationLogger(show_warnings=show_warnings, show_errors=show_errors, lang=lang) - self.registered_extensions = ['FIXME'] # FIXME - add names when something registered + self.registered_extensions = [ + '0001-digest-algorithms', '0002-flat-direct-storage-layout', + '0003-hash-and-id-n-tuple-storage-layout', '0004-hashed-n-tuple-storage-layout', + '0005-mutable-head' + ] + # The following actually initialized in initialize() method + self.digest_algorithm = None + self.content_directory = None + self.inventory_digest_files = None + self.root_inv_validator = None + self.obj_fs = None self.initialize() def initialize(self): @@ -52,7 +59,7 @@ def initialize(self): self.obj_fs = None def __str__(self, prefix=''): - """String representation of validation log.""" + """Make string representation of validation log.""" return self.log.__str__(prefix=prefix) def validate(self, path): @@ -62,7 +69,7 @@ def validate(self, path): """ self.initialize() try: - if type(path) == str: + if isinstance(path, str): self.obj_fs = open_fs(path) else: self.obj_fs = path @@ -81,7 +88,7 @@ def validate(self, path): # Object root inventory file inv_file = 'inventory.json' if not self.obj_fs.exists(inv_file): - self.log.error('E034') + self.log.error('E063') return False try: inventory, inv_validator = self.validate_inventory(inv_file) @@ -96,9 +103,9 @@ def validate(self, path): # Object root self.validate_object_root(all_versions) # Version inventory files - self.validate_version_inventories(inventory, all_versions) + prior_manifest_digests = self.validate_version_inventories(all_versions) # Object content - self.validate_content(inventory, all_versions) + self.validate_content(inventory, all_versions, prior_manifest_digests) except ValidatorAbortException: pass return self.log.num_errors == 0 @@ -145,7 +152,7 @@ def validate_inventory_digest_match(self, inv_file, inv_digest_file): digest_actual = file_digest(inv_file, digest_algorithm, pyfs=self.obj_fs) if digest_actual != digest_recorded: self.log.error("E060", inv_file=inv_file, actual=digest_actual, recorded=digest_recorded, inv_digest_file=inv_digest_file) - except Exception as e: + except Exception as e: # pylint: disable=broad-except self.log.error("E061", description=str(e)) else: self.log.error("E058b", inv_digest_file=inv_digest_file) @@ -188,11 +195,15 @@ def validate_extensions_dir(self): else: self.log.error('E067', entry=entry.name) - def validate_version_inventories(self, inventory, version_dirs): - """Each version SHOULD have an inventory up to that point.""" + def validate_version_inventories(self, version_dirs): + """Each version SHOULD have an inventory up to that point. + + Also keep a record of any content digests different from those in the root inventory + so that we can also check them when validating the content. + """ + prior_manifest_digests = {} # file -> algorithm -> digest -> [versions] if len(version_dirs) == 0: - return - inv_digest_files = {} # index by version_dir + return prior_manifest_digests last_version = version_dirs[-1] for version_dir in version_dirs: inv_file = fs.path.join(version_dir, 'inventory.json') @@ -202,31 +213,40 @@ def validate_version_inventories(self, inventory, version_dirs): # Don't validate in this case. Per the spec the inventory in the last version # MUST be identical to the copy in the object root root_inv_file = 'inventory.json' - # FIXME -- how to diff efficiently? - with self.obj_fs.open(inv_file, 'r') as ifh: - inv = ifh.read() - with self.obj_fs.open(root_inv_file, 'r') as rifh: - root_inv = rifh.read() - if inv != root_inv: + if not ocfl_files_identical(self.obj_fs, inv_file, root_inv_file): self.log.error('E064', root_inv_file=root_inv_file, inv_file=inv_file) else: - # FIXME - could just compare digest files... + # We could also just compare digest files but this gives a more helpful error for + # which file has the incorrect digest if they don't match self.validate_inventory_digest(inv_file, self.digest_algorithm, where=version_dir) self.inventory_digest_files[version_dir] = 'inventory.json.' + self.digest_algorithm else: # Note that inventories in prior versions may use different digest algorithms version_inventory, inv_validator = self.validate_inventory(inv_file, where=version_dir) - self.validate_inventory_digest(inv_file, inv_validator.digest_algorithm, where=version_dir) - self.inventory_digest_files[version_dir] = 'inventory.json.' + inv_validator.digest_algorithm + digest_algorithm = inv_validator.digest_algorithm + self.validate_inventory_digest(inv_file, digest_algorithm, where=version_dir) + self.inventory_digest_files[version_dir] = 'inventory.json.' + digest_algorithm + # Record all prior digests + if 'manifest' in version_inventory: + for digest in version_inventory['manifest']: + for filepath in version_inventory['manifest'][digest]: + if filepath not in prior_manifest_digests: + prior_manifest_digests[filepath] = {} + if digest_algorithm not in prior_manifest_digests[filepath]: + prior_manifest_digests[filepath][digest_algorithm] = {} + if digest not in prior_manifest_digests[filepath][digest_algorithm]: + prior_manifest_digests[filepath][digest_algorithm][digest] = [] + prior_manifest_digests[filepath][digest_algorithm][digest].append(version_dir) # Is this inventory an appropriate prior version of the object root inventory? if self.root_inv_validator is not None: self.root_inv_validator.validate_as_prior_version(inv_validator) + return prior_manifest_digests - def validate_content(self, inventory, version_dirs): + def validate_content(self, inventory, version_dirs, prior_manifest_digests): """Validate file presence and content against inventory. - The inventory is assumed to be valid and safe to use for construction - of file paths etc.. + The root inventory in `inventory` is assumed to be valid and safe to use + for construction of file paths etc.. """ files_seen = set() # Check files in each version directory @@ -253,8 +273,7 @@ def validate_content(self, inventory, version_dirs): self.log.warning("W002", where=version_dir, entry=entry) else: self.log.error("E015", where=version_dir, entry=entry) - except (fs.errors.ResourceNotFound, fs.errors.DirectoryExpected) as e: - print(str(e)) + except (fs.errors.ResourceNotFound, fs.errors.DirectoryExpected): self.log.error('E046', version_dir=version_dir) # Check all files in root manifest if 'manifest' in inventory: @@ -267,6 +286,16 @@ def validate_content(self, inventory, version_dirs): content_digest = file_digest(filepath, digest_type=self.digest_algorithm, pyfs=self.obj_fs) if content_digest != normalized_digest(digest, digest_type=self.digest_algorithm): self.log.error('E092', where='root', digest=digest, content_path=filepath, content_digest=content_digest) + # Are there other digests for this same file from other inventories? + # If so then check those also + if filepath in prior_manifest_digests: + for digest_algorithm in prior_manifest_digests[filepath]: + for other_digest in prior_manifest_digests[filepath][digest_algorithm]: + content_digest = file_digest(filepath, digest_type=digest_algorithm, pyfs=self.obj_fs) + if content_digest != normalized_digest(other_digest, digest_type=digest_algorithm): + where = ','.join(prior_manifest_digests[filepath][digest_algorithm][other_digest]) + self.log.error('E092', where=where, digest=other_digest, content_path=filepath, content_digest=content_digest) + # FIXME - Also other fixity blocks files_seen.discard(filepath) # Check any additional digests in root fixity block if 'fixity' in inventory and self.check_digests: @@ -292,6 +321,6 @@ def read_inventory_digest(self, inv_digest_file): m = re.match(r'''(\w+)\s+(\S+)\s*$''', line) if not m: raise Exception("Bad inventory digest file %s, wrong format" % (inv_digest_file)) - elif m.group(2) != 'inventory.json': + if m.group(2) != 'inventory.json': raise Exception("Bad inventory name in inventory digest file %s" % (inv_digest_file)) return m.group(1) diff --git a/ocfl/version_metadata.py b/ocfl/version_metadata.py index 17b091d..97215fc 100644 --- a/ocfl/version_metadata.py +++ b/ocfl/version_metadata.py @@ -1,5 +1,4 @@ """Metadata for a specific version of OCFL Object's content.""" -import logging from .w3c_datetime import datetime_to_str @@ -19,16 +18,14 @@ def add_version_metadata_args(parser): class VersionMetadataException(Exception): """Exception class for OCFL Object.""" - pass - -class VersionMetadata(object): +class VersionMetadata(): """Class for metadata for a specific version of an OCFL Object.""" def __init__(self, args=None, inventory=None, version=None, - id=None, created=None, message=None, name=None, address=None): + identifier=None, created=None, message=None, name=None, address=None): """Initialize by various means, including command line arguments from argparse.""" - self.id = id + self.id = identifier self.created = created self.message = message self.name = name @@ -74,7 +71,7 @@ def from_inventory(self, inventory, version=None): self.address = inv_version['user']['address'] def as_dict(self, **kwargs): - """Dictionary object with version metedata.""" + """Return dictionary object with version metedata.""" m = {} self.add_to_dict(m, **kwargs) return m diff --git a/ocfl/w3c_datetime.py b/ocfl/w3c_datetime.py index cb34388..e9cf5f0 100644 --- a/ocfl/w3c_datetime.py +++ b/ocfl/w3c_datetime.py @@ -7,15 +7,15 @@ The timestamp is assumed to be stored in UTC. """ +import re import time from calendar import timegm from datetime import datetime from dateutil import parser as dateutil_parser -import re def datetime_to_str(dt='now', no_fractions=False): - """The Last-Modified data in ISO8601 syntax, Z notation. + """Return Last-Modified data in ISO8601 syntax, Z notation. The lastmod is stored as unix timestamp which is already in UTC. At preesent this code will return 6 decimal digits @@ -29,7 +29,7 @@ def datetime_to_str(dt='now', no_fractions=False): """ if dt is None: return None - elif dt == 'now': + if dt == 'now': dt = time.time() if no_fractions: dt = int(dt) @@ -76,7 +76,7 @@ def str_to_datetime(s, context='datetime'): """ t = None if s is None: - return(t) + return t if s == '': raise ValueError('Attempt to set empty %s' % (context)) # Make a date into a full datetime @@ -107,8 +107,7 @@ def str_to_datetime(s, context='datetime'): r"(\d\d):(\d\d))?$", s) if m is None: raise ValueError("Bad datetime format (%s)" % s) - str = m.group(1) + 'Z' - dt = dateutil_parser.parse(str) + dt = dateutil_parser.parse(m.group(1) + 'Z') offset_seconds = 0 if m.group(3) and m.group(3) != 'Z': hh = int(m.group(5)) @@ -120,4 +119,4 @@ def str_to_datetime(s, context='datetime'): offset_seconds = -offset_seconds # timetuple() ignores timezone information so we have to add in # the offset here, and any fractional component of the seconds - return(timegm(dt.timetuple()) + offset_seconds + fractional_seconds) + return timegm(dt.timetuple()) + offset_seconds + fractional_seconds diff --git a/setup.py b/setup.py index a2c4412..7bdfd09 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ """Setup for ocfl-py.""" -from setuptools import setup, Command import os import re +from setuptools import setup, Command # Extract version number verfile = open("ocfl/_version.py", "rt").read() @@ -20,11 +20,9 @@ class ShellCommand(Command): def initialize_options(self): """Empty initialize_options.""" - pass def finalize_options(self): """Empty finalize_options.""" - pass class Coverage(ShellCommand): @@ -32,7 +30,7 @@ class Coverage(ShellCommand): description = "run coverage" - def run(self): + def run(self): # pylint: disable=no-self-use """Run coverage program.""" os.system("coverage run --source=ocfl setup.py test") os.system("coverage report") @@ -62,11 +60,11 @@ def run(self): description='ocfl-py - A Python implementation of OCFL', long_description=open('README').read(), install_requires=[ - 'bagit>=1.7.0', + 'bagit>=1.8.1', 'dateutils>=0.6.6', 'fs>2.4.0', - 'pairtree>=0.8.1', - 'fs_s3fs>=1.1.1' + 'fs_s3fs>=1.1.1', + 'pairtree>=0.8.1' ], test_suite="tests", cmdclass={ diff --git a/tests/test_bagger.py b/tests/test_bagger.py index 774b25b..ac5fd65 100644 --- a/tests/test_bagger.py +++ b/tests/test_bagger.py @@ -1,8 +1,10 @@ """Bagger tests.""" import unittest -import bagit import os.path import tempfile + +import bagit + from ocfl.bagger import BaggerError, bag_as_source, bag_extracted_version from ocfl.version_metadata import VersionMetadata diff --git a/tests/test_demo_build_spec_examples.py b/tests/test_demo_build_spec_examples.py index 7ee69ca..7d76488 100644 --- a/tests/test_demo_build_spec_examples.py +++ b/tests/test_demo_build_spec_examples.py @@ -1,13 +1,5 @@ # -*- coding: utf-8 -*- """Tests/demo building spec examples.""" -import json -import os -import re -import shutil -import subprocess -import sys -import tempfile - from testlib import DemoTestCase diff --git a/tests/test_demo_ocfl_object_script.py b/tests/test_demo_ocfl_object_script.py index fceb78c..edb68fa 100644 --- a/tests/test_demo_ocfl_object_script.py +++ b/tests/test_demo_ocfl_object_script.py @@ -1,12 +1,6 @@ # -*- coding: utf-8 -*- """Tests/demo of ocfl-object.py client.""" -import json -import os -import re -import shutil -import subprocess -import sys -import tempfile +import os.path from testlib import DemoTestCase @@ -78,6 +72,7 @@ def test04_extract(self): # ├── [ 102] foo # │   └── [ 272] bar.xml # └── [ 2021] image.tiff + self.assertIn('Extracted content for v1 in', out) self.assertEqual(os.path.getsize(os.path.join(self.tmpdir, "v1/empty.txt")), 0) self.assertFalse(os.path.exists(os.path.join(self.tmpdir, "v1/empty2.txt"))) self.assertEqual(os.path.getsize(os.path.join(self.tmpdir, "v1/foo/bar.xml")), 272) @@ -94,6 +89,7 @@ def test04_extract(self): # ├── [ 0] empty2.txt # └── [ 102] foo # └── [ 272] bar.xml + self.assertIn('Extracted content for v2 in', out) self.assertEqual(os.path.getsize(os.path.join(self.tmpdir, "v2/empty.txt")), 0) self.assertEqual(os.path.getsize(os.path.join(self.tmpdir, "v2/empty2.txt")), 0) self.assertEqual(os.path.getsize(os.path.join(self.tmpdir, "v2/foo/bar.xml")), 272) diff --git a/tests/test_demo_ocfl_sidecar_script.py b/tests/test_demo_ocfl_sidecar_script.py index 8d253bc..d212546 100644 --- a/tests/test_demo_ocfl_sidecar_script.py +++ b/tests/test_demo_ocfl_sidecar_script.py @@ -1,13 +1,5 @@ # -*- coding: utf-8 -*- """Tests/demo of ocfl-sidecar.py client.""" -import json -import os -import re -import shutil -import subprocess -import sys -import tempfile - from testlib import DemoTestCase diff --git a/tests/test_demo_ocfl_store_script.py b/tests/test_demo_ocfl_store_script.py index 8d86c6a..cb2f520 100644 --- a/tests/test_demo_ocfl_store_script.py +++ b/tests/test_demo_ocfl_store_script.py @@ -1,13 +1,5 @@ # -*- coding: utf-8 -*- """Tests/demo of ocfl-store.py client.""" -import json -import os -import re -import shutil -import subprocess -import sys -import tempfile - from testlib import DemoTestCase diff --git a/tests/test_demo_ocfl_validate_script.py b/tests/test_demo_ocfl_validate_script.py index 06d619e..bac578d 100644 --- a/tests/test_demo_ocfl_validate_script.py +++ b/tests/test_demo_ocfl_validate_script.py @@ -1,13 +1,5 @@ # -*- coding: utf-8 -*- """Tests/demo of ocfl-validate.py client.""" -import json -import os -import re -import shutil -import subprocess -import sys -import tempfile - from testlib import DemoTestCase diff --git a/tests/test_demo_using_bagit_bags.py b/tests/test_demo_using_bagit_bags.py index 039e169..1936d8f 100644 --- a/tests/test_demo_using_bagit_bags.py +++ b/tests/test_demo_using_bagit_bags.py @@ -1,13 +1,5 @@ # -*- coding: utf-8 -*- """Tests/demo building spec examples.""" -import json -import os -import re -import shutil -import subprocess -import sys -import tempfile - from testlib import DemoTestCase diff --git a/tests/test_digest.py b/tests/test_digest.py index c0f9eeb..39d6fb3 100644 --- a/tests/test_digest.py +++ b/tests/test_digest.py @@ -1,7 +1,9 @@ """Digest tests.""" -import fs import unittest import sys + +import fs + from ocfl.digest import file_digest, digest_regex, normalized_digest diff --git a/tests/test_inventory_validator.py b/tests/test_inventory_validator.py index e402711..bc87693 100644 --- a/tests/test_inventory_validator.py +++ b/tests/test_inventory_validator.py @@ -1,21 +1,20 @@ """Identity dispositor tests.""" -import os.path import unittest from ocfl.inventory_validator import InventoryValidator -class TLogger(object): +class TLogger(): """Simplified logger to replace ValidationLogger.""" def __init__(self): """Initialize.""" self.clear() - def error(self, code, **args): + def error(self, code, **args): # pylint: disable=unused-argument """Add error code, discard args.""" self.errors.append(code) - def warning(self, code, **args): + def warning(self, code, **args): # pylint: disable=unused-argument """Add warn code, discard args.""" self.warns.append(code) diff --git a/tests/test_namaste.py b/tests/test_namaste.py index 3fb922e..a2e6fd5 100644 --- a/tests/test_namaste.py +++ b/tests/test_namaste.py @@ -1,9 +1,11 @@ """Namaste tests.""" -import fs -import fs.tempfs import os.path import tempfile import unittest + +import fs +import fs.tempfs + from ocfl.namaste import content_to_tvalue, find_namastes, get_namaste, Namaste, NamasteException @@ -24,13 +26,13 @@ def test02_find_namastes(self): """Test find_namastes.""" # With plain filesystem namastes1 = find_namastes(0, 'tests/testdata/namaste') - self.assertEqual(set([x.tvalue for x in namastes1]), set(['frog', 'bison', 'snake'])) - self.assertRaises(NamasteException, find_namastes, 0, 'tests/testdata/namaste', max=2) + self.assertEqual({x.tvalue for x in namastes1}, {'frog', 'bison', 'snake'}) + self.assertRaises(NamasteException, find_namastes, 0, 'tests/testdata/namaste', limit=2) # With pysf filesystem tdfs = fs.open_fs('tests/testdata') namastes2 = find_namastes(0, 'namaste', pyfs=tdfs) - self.assertEqual(set([x.tvalue for x in namastes2]), set(['frog', 'bison', 'snake'])) - self.assertRaises(NamasteException, find_namastes, 0, 'namaste', pyfs=tdfs, max=1) + self.assertEqual({x.tvalue for x in namastes2}, {'frog', 'bison', 'snake'}) + self.assertRaises(NamasteException, find_namastes, 0, 'namaste', pyfs=tdfs, limit=1) def test03_get_namaste(self): """Test get_namaste.""" @@ -45,7 +47,7 @@ def test11_init(self): """Test initialization.""" n = Namaste() self.assertEqual(n.d, 0) - self.assertEqual(n._tr_func, content_to_tvalue) + self.assertEqual(n._tr_func, content_to_tvalue) # pylint: disable=protected-access n = Namaste(0, 'myspec') self.assertEqual(n.d, 0) self.assertEqual(n.content, 'myspec') diff --git a/tests/test_object.py b/tests/test_object.py index 6a64a42..0cfcc39 100644 --- a/tests/test_object.py +++ b/tests/test_object.py @@ -1,25 +1,35 @@ -# -*- coding: utf-8 -*- """Object tests.""" -import fs -import fs.tempfs import io import json import logging import os -import sys import tempfile import unittest -from ocfl.object import Object, ObjectException + +import fs +import fs.tempfs + +from ocfl.object import Object, ObjectException, parse_version_directory from ocfl.version_metadata import VersionMetadata class TestAll(unittest.TestCase): """TestAll class to run tests.""" - if sys.version_info < (3, 2): - def assertRegex(self, *args, **kwargs): - """Hack for Python 2.7.""" - return self.assertRegexpMatches(*args, **kwargs) + def test02_parse_version_directory(self): + """Test parse_version_directory function.""" + self.assertEqual(parse_version_directory('v1'), 1) + self.assertEqual(parse_version_directory('v00001'), 1) + self.assertEqual(parse_version_directory('v99999'), 99999) + # Bad + self.assertRaises(Exception, parse_version_directory, None) + self.assertRaises(Exception, parse_version_directory, '') + self.assertRaises(Exception, parse_version_directory, '1') + self.assertRaises(Exception, parse_version_directory, 'v0') + self.assertRaises(Exception, parse_version_directory, 'v-1') + self.assertRaises(Exception, parse_version_directory, 'v0000') + self.assertRaises(Exception, parse_version_directory, 'vv') + self.assertRaises(Exception, parse_version_directory, 'v000001') def test00_init(self): """Test Object init.""" @@ -27,7 +37,7 @@ def test00_init(self): self.assertEqual(oo.id, None) self.assertEqual(oo.digest_algorithm, 'sha512') self.assertEqual(oo.fixity, None) - oo = Object(id='a:b', digest_algorithm='sha1', + oo = Object(identifier='a:b', digest_algorithm='sha1', fixity=['md5', 'crc16']) self.assertEqual(oo.id, 'a:b') self.assertEqual(oo.digest_algorithm, 'sha1') @@ -41,22 +51,6 @@ def test01_open_fs(self): self.assertNotEqual(oo.obj_fs, None) self.assertRaises(ObjectException, oo.open_fs, 'tests/testdata/i_do_not_exist') - def test02_parse_version_directory(self): - """Test parse_version_directory.""" - oo = Object() - self.assertEqual(oo.parse_version_directory('v1'), 1) - self.assertEqual(oo.parse_version_directory('v00001'), 1) - self.assertEqual(oo.parse_version_directory('v99999'), 99999) - # Bad - self.assertRaises(Exception, oo.parse_version_directory, None) - self.assertRaises(Exception, oo.parse_version_directory, '') - self.assertRaises(Exception, oo.parse_version_directory, '1') - self.assertRaises(Exception, oo.parse_version_directory, 'v0') - self.assertRaises(Exception, oo.parse_version_directory, 'v-1') - self.assertRaises(Exception, oo.parse_version_directory, 'v0000') - self.assertRaises(Exception, oo.parse_version_directory, 'vv') - self.assertRaises(Exception, oo.parse_version_directory, 'v000001') - def test03_digest(self): """Test digest wrapper mathod.""" oo = Object(digest_algorithm='md5') @@ -68,7 +62,7 @@ def test03_digest(self): def test04_start_inventory(self): """Test start_inventory mehthod stub.""" - oo = Object(id="info:a", digest_algorithm="sha256") + oo = Object(identifier="info:a", digest_algorithm="sha256") inventory = oo.start_inventory() self.assertEqual(inventory['id'], "info:a") self.assertEqual(inventory['digestAlgorithm'], "sha256") @@ -76,12 +70,12 @@ def test04_start_inventory(self): self.assertEqual(inventory['manifest'], {}) self.assertNotIn('contentDirectory', inventory) self.assertNotIn('fixity', inventory) - oo = Object(id="info:b", digest_algorithm="sha256", + oo = Object(identifier="info:b", digest_algorithm="sha256", fixity=['md5', 'sha1']) inventory = oo.start_inventory() self.assertEqual(inventory['fixity'], {'md5': {}, 'sha1': {}}) # - oo = Object(id="info:b", content_directory="stuff") + oo = Object(identifier="info:b", content_directory="stuff") inventory = oo.start_inventory() self.assertEqual(inventory['id'], "info:b") self.assertEqual(inventory['contentDirectory'], "stuff") @@ -195,8 +189,9 @@ def test06_build_inventory(self): """Test build_inventory.""" oo = Object(digest_algorithm="md5") src_fs = fs.open_fs('fixtures/1.0/content/spec-ex-full') - for (vdir, inventory, manifest_to_srcfile) in oo.build_inventory(src_fs, - metadata=VersionMetadata()): + inventory = None + for (dummy_vdir, inventory, dummy_manifest_to_srcfile) in oo.build_inventory(src_fs, + metadata=VersionMetadata()): pass self.assertEqual(inventory['type'], 'https://ocfl.io/1.0/spec/#inventory') self.assertEqual(inventory['head'], 'v3') diff --git a/tests/test_object_utils.py b/tests/test_object_utils.py index 826107a..acfe3fd 100644 --- a/tests/test_object_utils.py +++ b/tests/test_object_utils.py @@ -60,7 +60,7 @@ def test_check_shared_args(self): """Test check of shared args.""" parser = argparse.ArgumentParser() add_shared_args(parser) - args = parser.parse_args(['--version', '-v']) + parser.parse_args(['--version', '-v']) check_shared_args(parser.parse_args(['-v'])) self.assertRaises(SystemExit, check_shared_args, parser.parse_args(['--version'])) diff --git a/tests/test_store.py b/tests/test_store.py index dc14931..ea65fbb 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -1,13 +1,11 @@ # -*- coding: utf-8 -*- """Store tests.""" -import fs import io -import json import logging import os -import sys import tempfile import unittest + from ocfl.store import Store, StoreException from ocfl.identity import Identity from ocfl.validation_logger import ValidationLogger @@ -100,7 +98,7 @@ def test_check_root_structure(self): fh.close() self.assertTrue(s.check_root_structure()) # Spec "file" a directory - spec = os.mkdir(os.path.join(tempdir, "ocfl_1.0.txt")) + os.mkdir(os.path.join(tempdir, "ocfl_1.0.txt")) self.assertRaises(StoreException, s.check_root_structure) def test_parse_layout_file(self): diff --git a/tests/test_validation_logger.py b/tests/test_validation_logger.py index 2c04b5f..22523df 100644 --- a/tests/test_validation_logger.py +++ b/tests/test_validation_logger.py @@ -1,5 +1,4 @@ """Version tests.""" -import argparse import unittest from ocfl.validation_logger import ValidationLogger diff --git a/tests/test_validator.py b/tests/test_validator.py index c429a56..0c51f3f 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -1,8 +1,6 @@ -"""Test validation.""" +"""Test validator module.""" import os import os.path -import shutil -from zipfile import ZipFile import unittest from ocfl.validator import Validator @@ -14,8 +12,8 @@ def extra_fixture_maybe_zip(filepath): else: zippath = filepath + '.zip' if os.path.isfile(zippath): - return('zip://' + zippath) - return(filepath) + return 'zip://' + zippath + return filepath class TestAll(unittest.TestCase): @@ -26,7 +24,8 @@ def test01_bad(self): for bad, codes in {'does_not_even_exist': ['E003c'], 'E001_extra_dir_in_root': ['E001b'], 'E001_extra_file_in_root': ['E001a'], - 'E003_E034_empty': ['E003a', 'E034'], + 'E001_v2_file_in_root': [], + 'E003_E063_empty': ['E003a', 'E063'], 'E003_no_decl': ['E003a'], 'E008_E036_no_versions_no_head': ['E008', 'E036d'], 'E009_version_two_only': ['E009'], @@ -35,27 +34,37 @@ def test01_bad(self): 'E023_missing_file': ['E023a'], 'E024_empty_dir_in_content': ['E024'], 'E033_inventory_bad_json': ['E033'], - 'E034_no_inv': ['E034'], 'E036_no_id': ['E036a'], 'E040_wrong_head_doesnt_exist': ['E040'], 'E040_wrong_head_format': ['E040'], 'E041_no_manifest': ['E041a'], + "E041_manifest_not_object": ['E041c'], 'E042_bad_manifest_content_path': ['E042'], 'E046_missing_version_dir': ['E046'], + 'E046_versions_keys_not_zero_padded': ['E046'], 'E049_E050_E054_bad_version_block_values': ['E049d', 'E050b', 'E050c', 'E054a', 'E094'], 'E049_created_no_timezone': ['E049a'], 'E049_created_not_to_seconds': ['E049b'], 'E050_file_in_manifest_not_used': ['E050b'], 'E050_state_repeated_digest': ['E050f'], 'E058_no_sidecar': ['E058a'], + 'E063_no_inv': ['E063'], 'E064_different_root_and_latest_inventories': ['E064'], 'E067_file_in_extensions_dir': ['E067'], 'E092_bad_manifest_digest': ['E092'], + 'E092_content_file_digest_mismatch': ['E092'], + # 'E092_algorithm_change_incorrect_digest': ['E092'], FIXME https://github.com/OCFL/fixtures/pull/67 + 'E093_fixity_digest_mismatch': ['E093'], 'E094_message_not_a_string': ['E094'], 'E095_conflicting_logical_paths': ['E095'], + # 'E095_non_unique_logical_paths': ['E095'], FIXME https://github.com/zimeon/ocfl-py/issues/39 'E096_manifest_repeated_digest': ['E096'], + 'E096_manifest_duplicate_digests': ['E096'], 'E097_fixity_repeated_digest': ['E097'], - 'E099_bad_content_path_elements': ['E099']}.items(): + 'E097_fixity_duplicate_digests': ['E097'], + 'E099_bad_content_path_elements': ['E099'], + 'E100_E099_fixity_invalid_content_paths': ['E057d'], # E057 OK, different test approach + 'E100_E099_manifest_invalid_content_paths': ['E099', 'E100']}.items(): v = Validator() filepath = 'fixtures/1.0/bad-objects/' + bad if not os.path.isdir(filepath): @@ -77,7 +86,8 @@ def test02_warn(self): 'W008_user_no_address': ['W008'], 'W009_user_address_not_uri': ['W009'], 'W010_no_version_inventory': ['W010'], - 'W011_version_inv_diff_metadata': ['W011']}.items(): + 'W011_version_inv_diff_metadata': ['W011'], + 'W013_unregistered_extension': ['W013']}.items(): v = Validator() filepath = 'fixtures/1.0/warn-objects/' + warn if not os.path.isdir(filepath): diff --git a/tests/test_w3c_datetime.py b/tests/test_w3c_datetime.py index 6901289..ba962ca 100644 --- a/tests/test_w3c_datetime.py +++ b/tests/test_w3c_datetime.py @@ -6,7 +6,7 @@ def rt(dts): """Do simple round-trip.""" - return(datetime_to_str(str_to_datetime(dts))) + return datetime_to_str(str_to_datetime(dts)) class TestW3cDatetime(unittest.TestCase): diff --git a/tests/testlib/demo_unittest.py b/tests/testlib/demo_unittest.py index 28572e3..7890bac 100644 --- a/tests/testlib/demo_unittest.py +++ b/tests/testlib/demo_unittest.py @@ -1,11 +1,8 @@ # -*- coding: utf-8 -*- """Modified version of unittest.TestCase that includes demo support.""" -import json -import os import re import shutil import subprocess -import sys import tempfile import unittest import __main__ as main @@ -21,7 +18,7 @@ class DemoTestCase(unittest.TestCase): keep_tmpdirs = False def setUp(self): - """Setup for each test.""" + """Do setup for each test.""" type(self).n += 1 # access class variable not copy self.m = 0 self.tmpdir = tempfile.mkdtemp(prefix='test' + str(self.n) + '_') @@ -33,7 +30,7 @@ def tearDown(self): if self.tmpdir is not None and not self.keep_tmpdirs: shutil.rmtree(self.tmpdir) - def run_script(self, desc, options, text=None, treedir=None): + def run_script(self, desc, options, text=None): """Run the ocfl-store.py script.""" self.m += 1 if self.demo: @@ -62,7 +59,7 @@ def demo_tree(self, treedir, text=None): if self.demo: if text is not None: print(text + "\n") - ree = subprocess.check_output('cd %s; tree -a %s' % (self.tmpdir, treedir), + tree = subprocess.check_output('cd %s; tree -a %s' % (self.tmpdir, treedir), stderr=subprocess.STDOUT, shell=True).decode('utf-8') print("```\n" + tree + "```\n")