Avoid seeking on import (#188)

* Don't seek on imports (other than the possible seek for custom importers) We were seeking to handle blob markers. This has two major drawbacks: 1. It wasn't possible to use a non-seekable file. A use case for export/import is to copy database data. An intermediate file, and associated I/O, could be avoided using a pipe, but pipes aren't seekable. 2. Seeks cause file-buffer data to be discarded, making IO far more expensive. We didn't really need blob markers, because the preceeding blob data records serve as markers. (Now we're stuck with them for backward compatibility.) * Make cp's buffer size larger and configurable. * Use the storage temprary directory when importing blobs To avoid an extra copy. Also, allow the copy (cp) buffer sie to be overridden on export. (I see no obvious way to plumb it on import. :( ) * Oops, need to use a binary literal (Python 3) * Respond to PR comments
zopefoundation · Feb 15, 2018 · 2115c90 · 2115c90
1 parent de1f24c
commit 2115c90
Show file tree

Hide file tree

Showing 2 changed files with 14 additions and 9 deletions.
diff --git a/src/ZODB/ExportImport.py b/src/ZODB/ExportImport.py
@@ -31,7 +31,7 @@
 
 class ExportImport(object):
 
-    def exportFile(self, oid, f=None):
+    def exportFile(self, oid, f=None, bufsize=64 * 1024):
         if f is None:
             f = TemporaryFile(prefix="EXP")
         elif isinstance(f, six.string_types):
@@ -64,7 +64,7 @@ def exportFile(self, oid, f=None):
                     f.write(blob_begin_marker)
                     f.write(p64(os.stat(blobfilename).st_size))
                     blobdata = open(blobfilename, "rb")
-                    cp(blobdata, f)
+                    cp(blobdata, f, bufsize=bufsize)
                     blobdata.close()
 
         f.write(export_end_marker)
@@ -158,18 +158,23 @@ def persistent_load(ooid):
                 oids[ooid] = oid = self._storage.new_oid()
                 return_oid_list.append(oid)
 
-            # Blob support
-            blob_begin = f.read(len(blob_begin_marker))
-            if blob_begin == blob_begin_marker:
+            if (b'blob' in data and
+                isinstance(self._reader.getGhost(data), Blob)
+                ):
+                # Blob support
+
+                # Make sure we have a (redundant, overly) blob marker.
+                if f.read(len(blob_begin_marker)) != blob_begin_marker:
+                    raise ValueError("No data for blob object")
+
                 # Copy the blob data to a temporary file
                 # and remember the name
                 blob_len = u64(f.read(8))
-                blob_filename = mktemp()
+                blob_filename = mktemp(self._storage.temporaryDirectory())
                 blob_file = open(blob_filename, "wb")
                 cp(f, blob_file, blob_len)
                 blob_file.close()
             else:
-                f.seek(-len(blob_begin_marker),1)
                 blob_filename = None
 
             pfile = BytesIO(data)

diff --git a/src/ZODB/utils.py b/src/ZODB/utils.py
@@ -95,7 +95,7 @@ def u64(v):
 U64 = u64
 
 
-def cp(f1, f2, length=None):
+def cp(f1, f2, length=None, bufsize=64 * 1024):
     """Copy all data from one file to another.
 
     It copies the data from the current position of the input file (f1)
@@ -106,7 +106,7 @@ def cp(f1, f2, length=None):
     """
     read = f1.read
     write = f2.write
-    n = 8192
+    n = bufsize
 
     if length is None:
         old_pos = f1.tell()