Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add filename normalization and generate rename example for spec #8

Merged
merged 4 commits into from
Mar 15, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 8 additions & 0 deletions README_build_spec_example.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,11 @@ to get <examples/spec-minimal.txt>
```

to get <examples/spec-full.txt>

## Rename example

```
./ocfl-object.py --build --src fixtures/1.0/content/spec-ex-rename --id http://example.org/rename --digest sha512 --normalization uri --created "2019-03-14T20:31:00Z" -v | ./compactify_spec_examples.py > examples/spec-rename.txt
```

to get <examples/spec-rename.txt>
1 change: 1 addition & 0 deletions examples/spec-rename.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

2 changes: 1 addition & 1 deletion fixtures
1 change: 1 addition & 0 deletions ocfl-object.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def do_object_operation(args):
"""Implement object operations in a way that can be reused by ocfl-store.py."""
obj = ocfl.Object(identifier=args.id,
digest_algorithm=args.digest,
filename_normalization=args.normalization,
skips=args.skip,
forward_delta=not args.no_forward_delta,
dedupe=not args.no_dedupe,
Expand Down
33 changes: 30 additions & 3 deletions ocfl/object.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
import logging
from shutil import copyfile
import sys
try:
from urllib.parse import quote as urlquote # python3
except:
from urllib import quote as urlquote # python2

from .digest import file_digest
from .namaste import Namaste
Expand All @@ -19,6 +23,8 @@ def add_object_args(parser):
# Disk scanning
parser.add_argument('--skip', action='append', default=['README.md', '.DS_Store'],
help='directories and files to ignore')
parser.add_argument('--normalization', '--norm', default=None,
help='filename normalization strategy')
# Versioning strategy settings
parser.add_argument('--no-forward-delta', action='store_true',
help='do not use forward deltas')
Expand All @@ -41,8 +47,8 @@ class Object(object):
"""Class for handling OCFL Object data and operations."""

def __init__(self, identifier=None,
digest_algorithm='sha512', skips=None,
forward_delta=True, dedupe=True,
digest_algorithm='sha512', filename_normalization='uri',
skips=None, forward_delta=True, dedupe=True,
ocfl_version='draft', fixity=None, fhout=sys.stdout):
"""Initialize OCFL builder.

Expand All @@ -54,6 +60,7 @@ def __init__(self, identifier=None,
"""
self.identifier = identifier
self.digest_algorithm = digest_algorithm
self.filename_normalization = filename_normalization
self.skips = set() if skips is None else set(skips)
self.forward_delta = forward_delta
self.dedupe = dedupe
Expand All @@ -78,7 +85,13 @@ def digest(self, filename):

def normalize_filename(self, filename):
"""Translate source filename to a normalized (safe and sanitized) name within object."""
# FIXME - noop for now
if self.filename_normalization == 'uri':
filename = urlquote(filename)
# also encode any leading period to unhide files
if filename[0] == '.':
filename = '%2E' + filename[1:]
elif self.filename_normalization is not None:
raise Exception("Unknown filename filename normalization '%s' requested" % (filename_normalization))
return filename

def start_inventory(self):
Expand Down Expand Up @@ -129,6 +142,10 @@ def add_version(self, inventory, srcdir, vdir, metadata=None):
sfilepath = os.path.relpath(filepath, srcdir) # path relative to this version
norm_path = self.normalize_filename(sfilepath)
vfilepath = os.path.join(vdir, 'content', norm_path) # path relative to root, inc v#/content
# Check we don't already have this vfilepath from many to one normalization,
# add suffix to distinguish if necessary
if vfilepath in manifest_to_srcfile:
vfilepath = make_unused_filepath(vfilepath, manifest_to_srcfile)
digest = self.digest(filepath)
# Always add file to state
if digest not in state:
Expand Down Expand Up @@ -390,3 +407,13 @@ def remove_first_directory(path):
path = head
rpath = tail if rpath == '' else os.path.join(tail, rpath)
return rpath


def make_unused_filepath(filepath, used, separator='__'):
"""Find filepath with string appended that makes it disjoint from those in used."""
n = 1
while True:
n += 1
f = filepath + separator + str(n)
if f not in used:
return f
14 changes: 9 additions & 5 deletions ocfl/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ def add_version_metadata_args(parser):
parser.add_argument('--created', default=None,
help='creation time to be used with version(s) added, else '
'current time will be recorded')
parser.add_argument('--message', default='',
parser.add_argument('--message', default=None,
help='message to be recorded with version(s) added')
parser.add_argument('--name', default='someone',
parser.add_argument('--name', default=None,
help='name of user adding version(s) to object')
parser.add_argument('--address', default='somewhere',
parser.add_argument('--address', default=None,
help='address of user adding version(s) to object')


Expand Down Expand Up @@ -83,8 +83,12 @@ def as_dict(self, **kwargs):
def add_to_dict(self, m, **kwargs):
"""Add metadata to dictionary m."""
m['created'] = self.created if self.created else datetime_to_str()
m['message'] = self.message
m['user'] = {'name': self.name, 'address': self.address}
if self.message is not None:
m['message'] = self.message
if self.name is not None or self.address is not None:
m['user'] = {'name': self.name}
if self.address is not None:
m['user']['address'] = self.address
# Add any extra values, and they will override instance variables
for (key, value) in kwargs.items():
m[key] = value
11 changes: 9 additions & 2 deletions tests/test_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import sys
import tempfile
import unittest
from ocfl.object import Object, ObjectException, remove_first_directory
from ocfl.object import Object, ObjectException, remove_first_directory, make_unused_filepath
from ocfl.version import VersionMetadata


Expand Down Expand Up @@ -235,9 +235,16 @@ def test13_parse_inventory(self):
self.assertRaises(ObjectException, oo.parse_inventory, path='fixtures/1.0/bad-objects/bad02_no_id')

def test90_remove_first_directory(self):
"""Test encode."""
"""Test remove_first_directory function."""
self.assertEqual(remove_first_directory(''), '')
self.assertEqual(remove_first_directory('a'), '')
self.assertEqual(remove_first_directory('a/b'), 'b')
self.assertEqual(remove_first_directory('a/b/'), 'b')
self.assertEqual(remove_first_directory('a/b/c'), 'b/c')

def test91_make_unused_filepath(self):
"""Test make_unused_filepath function."""
self.assertEqual(make_unused_filepath('x/y', []), 'x/y__2')
self.assertEqual(make_unused_filepath('x/y', {'x/y__2': 1}), 'x/y__3')
self.assertEqual(make_unused_filepath('x/y', {'x/y': 1}, ''), 'x/y2')
self.assertEqual(make_unused_filepath('x/y', ['x/y', 'x/y2', 'x/y3'], ''), 'x/y4')