Skip to content

Commit

Permalink
Add filename normalization and generate rename example for spec (#8)
Browse files Browse the repository at this point in the history
Build rename example for OCFL/spec#310
  • Loading branch information
zimeon committed Mar 15, 2019
1 parent 2591476 commit 01ce95f
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 11 deletions.
8 changes: 8 additions & 0 deletions README_build_spec_example.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,11 @@ to get <examples/spec-minimal.txt>
```

to get <examples/spec-full.txt>

## Rename example

```
./ocfl-object.py --build --src fixtures/1.0/content/spec-ex-rename --id http://example.org/rename --digest sha512 --normalization uri --created "2019-03-14T20:31:00Z" -v | ./compactify_spec_examples.py > examples/spec-rename.txt
```

to get <examples/spec-rename.txt>
1 change: 1 addition & 0 deletions examples/spec-rename.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

2 changes: 1 addition & 1 deletion fixtures
1 change: 1 addition & 0 deletions ocfl-object.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def do_object_operation(args):
"""Implement object operations in a way that can be reused by ocfl-store.py."""
obj = ocfl.Object(identifier=args.id,
digest_algorithm=args.digest,
filename_normalization=args.normalization,
skips=args.skip,
forward_delta=not args.no_forward_delta,
dedupe=not args.no_dedupe,
Expand Down
33 changes: 30 additions & 3 deletions ocfl/object.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
import logging
from shutil import copyfile
import sys
try:
from urllib.parse import quote as urlquote # python3
except:
from urllib import quote as urlquote # python2

from .digest import file_digest
from .namaste import Namaste
Expand All @@ -19,6 +23,8 @@ def add_object_args(parser):
# Disk scanning
parser.add_argument('--skip', action='append', default=['README.md', '.DS_Store'],
help='directories and files to ignore')
parser.add_argument('--normalization', '--norm', default=None,
help='filename normalization strategy')
# Versioning strategy settings
parser.add_argument('--no-forward-delta', action='store_true',
help='do not use forward deltas')
Expand All @@ -41,8 +47,8 @@ class Object(object):
"""Class for handling OCFL Object data and operations."""

def __init__(self, identifier=None,
digest_algorithm='sha512', skips=None,
forward_delta=True, dedupe=True,
digest_algorithm='sha512', filename_normalization='uri',
skips=None, forward_delta=True, dedupe=True,
ocfl_version='draft', fixity=None, fhout=sys.stdout):
"""Initialize OCFL builder.
Expand All @@ -54,6 +60,7 @@ def __init__(self, identifier=None,
"""
self.identifier = identifier
self.digest_algorithm = digest_algorithm
self.filename_normalization = filename_normalization
self.skips = set() if skips is None else set(skips)
self.forward_delta = forward_delta
self.dedupe = dedupe
Expand All @@ -78,7 +85,13 @@ def digest(self, filename):

def normalize_filename(self, filename):
"""Translate source filename to a normalized (safe and sanitized) name within object."""
# FIXME - noop for now
if self.filename_normalization == 'uri':
filename = urlquote(filename)
# also encode any leading period to unhide files
if filename[0] == '.':
filename = '%2E' + filename[1:]
elif self.filename_normalization is not None:
raise Exception("Unknown filename filename normalization '%s' requested" % (filename_normalization))
return filename

def start_inventory(self):
Expand Down Expand Up @@ -129,6 +142,10 @@ def add_version(self, inventory, srcdir, vdir, metadata=None):
sfilepath = os.path.relpath(filepath, srcdir) # path relative to this version
norm_path = self.normalize_filename(sfilepath)
vfilepath = os.path.join(vdir, 'content', norm_path) # path relative to root, inc v#/content
# Check we don't already have this vfilepath from many to one normalization,
# add suffix to distinguish if necessary
if vfilepath in manifest_to_srcfile:
vfilepath = make_unused_filepath(vfilepath, manifest_to_srcfile)
digest = self.digest(filepath)
# Always add file to state
if digest not in state:
Expand Down Expand Up @@ -390,3 +407,13 @@ def remove_first_directory(path):
path = head
rpath = tail if rpath == '' else os.path.join(tail, rpath)
return rpath


def make_unused_filepath(filepath, used, separator='__'):
"""Find filepath with string appended that makes it disjoint from those in used."""
n = 1
while True:
n += 1
f = filepath + separator + str(n)
if f not in used:
return f
14 changes: 9 additions & 5 deletions ocfl/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ def add_version_metadata_args(parser):
parser.add_argument('--created', default=None,
help='creation time to be used with version(s) added, else '
'current time will be recorded')
parser.add_argument('--message', default='',
parser.add_argument('--message', default=None,
help='message to be recorded with version(s) added')
parser.add_argument('--name', default='someone',
parser.add_argument('--name', default=None,
help='name of user adding version(s) to object')
parser.add_argument('--address', default='somewhere',
parser.add_argument('--address', default=None,
help='address of user adding version(s) to object')


Expand Down Expand Up @@ -83,8 +83,12 @@ def as_dict(self, **kwargs):
def add_to_dict(self, m, **kwargs):
"""Add metadata to dictionary m."""
m['created'] = self.created if self.created else datetime_to_str()
m['message'] = self.message
m['user'] = {'name': self.name, 'address': self.address}
if self.message is not None:
m['message'] = self.message
if self.name is not None or self.address is not None:
m['user'] = {'name': self.name}
if self.address is not None:
m['user']['address'] = self.address
# Add any extra values, and they will override instance variables
for (key, value) in kwargs.items():
m[key] = value
11 changes: 9 additions & 2 deletions tests/test_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import sys
import tempfile
import unittest
from ocfl.object import Object, ObjectException, remove_first_directory
from ocfl.object import Object, ObjectException, remove_first_directory, make_unused_filepath
from ocfl.version import VersionMetadata


Expand Down Expand Up @@ -235,9 +235,16 @@ def test13_parse_inventory(self):
self.assertRaises(ObjectException, oo.parse_inventory, path='fixtures/1.0/bad-objects/bad02_no_id')

def test90_remove_first_directory(self):
"""Test encode."""
"""Test remove_first_directory function."""
self.assertEqual(remove_first_directory(''), '')
self.assertEqual(remove_first_directory('a'), '')
self.assertEqual(remove_first_directory('a/b'), 'b')
self.assertEqual(remove_first_directory('a/b/'), 'b')
self.assertEqual(remove_first_directory('a/b/c'), 'b/c')

def test91_make_unused_filepath(self):
"""Test make_unused_filepath function."""
self.assertEqual(make_unused_filepath('x/y', []), 'x/y__2')
self.assertEqual(make_unused_filepath('x/y', {'x/y__2': 1}), 'x/y__3')
self.assertEqual(make_unused_filepath('x/y', {'x/y': 1}, ''), 'x/y2')
self.assertEqual(make_unused_filepath('x/y', ['x/y', 'x/y2', 'x/y3'], ''), 'x/y4')

0 comments on commit 01ce95f

Please sign in to comment.