Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Apply some heuristics to make certain datasets look more like a binder #376

Merged
merged 1 commit into from Jan 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
62 changes: 59 additions & 3 deletions plugin_tests/import_test.py
@@ -1,10 +1,18 @@
import mock
import os
import json
import shutil
import tarfile
import tempfile
import time
import vcr
import zipfile
from webdavfs.webdavfs import WebDAVFS
from fs.osfs import OSFS
from fs.copy import copy_fs
from tests import base
from girder import config
from girder.models.token import Token


SCRIPTDIRS_NAME = None
Expand Down Expand Up @@ -56,9 +64,9 @@ def tearDownModule():
base.stopServer()


class TaleTestCase(base.TestCase):
class ImportTaleTestCase(base.TestCase):
def setUp(self):
super(TaleTestCase, self).setUp()
super(ImportTaleTestCase, self).setUp()
users = (
{
'email': 'root@dev.null',
Expand Down Expand Up @@ -109,6 +117,19 @@ def setUp(self):
),
)

from girder.plugins.wt_home_dir import HOME_DIRS_APPS
self.homeDirsApps = HOME_DIRS_APPS # nopep8
for e in self.homeDirsApps.entries():
provider = e.app.providerMap['/']['provider']
provider.updateAssetstore()
self.clearDAVAuthCache()

def clearDAVAuthCache(self):
# need to do this because the DB is wiped on every test, but the dav domain
# controller keeps a cache with users/tokens
for e in self.homeDirsApps.entries():
e.app.config['domaincontroller'].clearCache()

@mock.patch('gwvolman.tasks.import_tale')
def testTaleImport(self, it):
with mock.patch(
Expand Down Expand Up @@ -277,8 +298,43 @@ def testTaleImportZip(self):
)
self.model('image', 'wholetale').remove(image)

def test_binder_heuristics(self):
from girder.plugins.wholetale.tasks.import_binder import sanitize_binder
tale = Tale().createTale(self.image, [], creator=self.user, title="Binder")
token = Token().createToken(user=self.user, days=0.25)
tmpdir = tempfile.mkdtemp()

with open(tmpdir + "/i_am_a_binder", "w") as fobj:
fobj.write("but well hidden!")

with tarfile.open(tmpdir + "/tale.tar.gz", "w:gz") as tar:
tar.add(tmpdir + "/i_am_a_binder", arcname="dir_in_tar/i_am_a_binder")
os.remove(tmpdir + "/i_am_a_binder")

with zipfile.ZipFile(tmpdir + "/tale.zip", "w") as myzip:
myzip.write(tmpdir + "/tale.tar.gz", arcname="dir_in_zip/tale.tar.gz")
os.remove(tmpdir + "/tale.tar.gz")
os.makedirs(tmpdir + "/hidden_binder")
os.rename(tmpdir + "/tale.zip", tmpdir + "/hidden_binder" + "/tale.zip")

girder_root = "http://localhost:{}".format(
config.getConfig()["server.socket_port"]
)
with WebDAVFS(
girder_root,
login=self.user["login"],
password="token:{_id}".format(**token),
root="/tales/{_id}".format(**tale),
) as destination_fs, OSFS(tmpdir) as source_fs:
copy_fs(source_fs, destination_fs)
sanitize_binder(destination_fs)
self.assertEqual(destination_fs.listdir("/"), ["i_am_a_binder"])

shutil.rmtree(tmpdir)
Tale().remove(tale)

def tearDown(self):
self.model('user').remove(self.user)
self.model('user').remove(self.admin)
self.model('image', 'wholetale').remove(self.image)
super(TaleTestCase, self).tearDown()
super(ImportTaleTestCase, self).tearDown()
32 changes: 32 additions & 0 deletions server/tasks/import_binder.py
Expand Up @@ -17,6 +17,8 @@
from fs.mode import Mode
from fs.path import basename
from fs.permissions import Permissions
from fs.tarfs import ReadTarFS
from fs.zipfs import ReadZipFS
from girderfs.core import WtDmsGirderFS
from girder_client import GirderClient
from girder.constants import AccessType
Expand All @@ -34,6 +36,35 @@
from ..utils import getOrCreateRootFolder


def sanitize_binder(root):
root_listdir = root.listdir("/")

if len(root_listdir) != 1:
return

single_file_or_dir = root_listdir[0]

if root.isdir(single_file_or_dir):
with root.opendir(single_file_or_dir) as subdir:
copy_fs(subdir, root)
root.removetree("/" + single_file_or_dir)
sanitize_binder(root)

if root.isfile(single_file_or_dir):
if single_file_or_dir.endswith(".zip"):
archive_fs = ReadZipFS
elif ".tar" in single_file_or_dir:
archive_fs = ReadTarFS
else:
archive_fs = None

if archive_fs is not None:
with archive_fs(root.openbin(single_file_or_dir)) as archive:
copy_fs(archive, root)
root.remove("/" + single_file_or_dir)
sanitize_binder(root)


def run(job):
jobModel = Job()
jobModel.updateJob(job, status=JobStatus.RUNNING)
Expand Down Expand Up @@ -120,6 +151,7 @@ def run(job):
str(session["_id"]), girder_root + "/api/v1", str(token["_id"])
) as source_fs:
copy_fs(source_fs, destination_fs)
sanitize_binder(destination_fs)

Session().deleteSession(user, session)

Expand Down