Skip to content

Commit

Permalink
Avoid seeking on import (#188)
Browse files Browse the repository at this point in the history
* Don't seek on imports

(other than the possible seek for custom importers)

We were seeking to handle blob markers. This has two major drawbacks:

1. It wasn't possible to use a non-seekable file.  A use case for
   export/import is to copy database data.  An intermediate file, and
   associated I/O, could be avoided using a pipe, but pipes aren't
   seekable.

2. Seeks cause file-buffer data to be discarded, making IO far more
   expensive.

We didn't really need blob markers, because the preceeding blob data
records serve as markers.  (Now we're stuck with them for backward
compatibility.)

* Make cp's buffer size larger and configurable.

* Use the storage temprary directory when importing blobs

To avoid an extra copy.

Also, allow the copy (cp) buffer sie to be overridden on export.

(I see no obvious way to plumb it on import. :( )

* Oops, need to use a binary literal (Python 3)

* Respond to PR comments
  • Loading branch information
jimfulton committed Feb 15, 2018
1 parent de1f24c commit 2115c90
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 9 deletions.
19 changes: 12 additions & 7 deletions src/ZODB/ExportImport.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

class ExportImport(object):

def exportFile(self, oid, f=None):
def exportFile(self, oid, f=None, bufsize=64 * 1024):
if f is None:
f = TemporaryFile(prefix="EXP")
elif isinstance(f, six.string_types):
Expand Down Expand Up @@ -64,7 +64,7 @@ def exportFile(self, oid, f=None):
f.write(blob_begin_marker)
f.write(p64(os.stat(blobfilename).st_size))
blobdata = open(blobfilename, "rb")
cp(blobdata, f)
cp(blobdata, f, bufsize=bufsize)
blobdata.close()

f.write(export_end_marker)
Expand Down Expand Up @@ -158,18 +158,23 @@ def persistent_load(ooid):
oids[ooid] = oid = self._storage.new_oid()
return_oid_list.append(oid)

# Blob support
blob_begin = f.read(len(blob_begin_marker))
if blob_begin == blob_begin_marker:
if (b'blob' in data and
isinstance(self._reader.getGhost(data), Blob)
):
# Blob support

# Make sure we have a (redundant, overly) blob marker.
if f.read(len(blob_begin_marker)) != blob_begin_marker:
raise ValueError("No data for blob object")

# Copy the blob data to a temporary file
# and remember the name
blob_len = u64(f.read(8))
blob_filename = mktemp()
blob_filename = mktemp(self._storage.temporaryDirectory())
blob_file = open(blob_filename, "wb")
cp(f, blob_file, blob_len)
blob_file.close()
else:
f.seek(-len(blob_begin_marker),1)
blob_filename = None

pfile = BytesIO(data)
Expand Down
4 changes: 2 additions & 2 deletions src/ZODB/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def u64(v):
U64 = u64


def cp(f1, f2, length=None):
def cp(f1, f2, length=None, bufsize=64 * 1024):
"""Copy all data from one file to another.
It copies the data from the current position of the input file (f1)
Expand All @@ -106,7 +106,7 @@ def cp(f1, f2, length=None):
"""
read = f1.read
write = f2.write
n = 8192
n = bufsize

if length is None:
old_pos = f1.tell()
Expand Down

0 comments on commit 2115c90

Please sign in to comment.