diff --git a/.travis.yml b/.travis.yml index e600e8a..089b298 100644 --- a/.travis.yml +++ b/.travis.yml @@ -61,7 +61,7 @@ matrix: # Coverage test, pass the results to coveralls. - os: linux python: 3.5 - env: MAIN_CMD='coverage' SETUP_CMD='run hpsspy/test/hpsspy_test_suite.py' + env: MAIN_CMD='coverage' SETUP_CMD='run setup.py test' # PEP 8 compliance. - os: linux diff --git a/doc/changes.rst b/doc/changes.rst index e8567c5..990424f 100644 --- a/doc/changes.rst +++ b/doc/changes.rst @@ -5,7 +5,9 @@ Release Notes 0.4.1 (unreleased) ------------------ -* No changes yet. +* Handle directory names that contain underscore characters (PR `#4`_). + +.. _`#4`: https://github.com/weaverba137/hpsspy/pull/4 0.4.0 (2017-08-10) ------------------ diff --git a/doc/configuration.rst b/doc/configuration.rst index bcc6260..b80eb2b 100644 --- a/doc/configuration.rst +++ b/doc/configuration.rst @@ -160,10 +160,12 @@ imposes some additional requirements, conventions and idioms: For example ``batch.tar`` means "archive a batch/ directory". For longer file names, the "suffix" of the file will be used. ``data_d1_batch.tar`` also means "archive a batch/ directory", because - ``data_d1_`` is stripped off. + ``data_d1_`` is stripped off. The directory name will be verified, so + if the directory to back up is actually ``d1_batch/``, ``batch/`` will be + searched for, then ``d1_batch/``. - An archive filename that ends with ``_files.tar``, *e.g.* ``foo/bar_files.tar`` is a signal to :command:`missing_from_hpss` to construct - the archive file in a certain way, not by decending into a directory, + the archive file in a certain way, not by descending into a directory, but by constructing an explicit list of files and building an archive file out of that. @@ -175,7 +177,7 @@ imposes some additional requirements, conventions and idioms: ``"foo/(bar|baz|flub)/.*$" : "foo/foo_\\1.tar"``. The name of the directory matched in parentheses will be substituted into the file name. - Archive arbitrary subdirectories of a *set* of subdirectories: - ``"d1/foo/(ab|bc|cd|de|ef)/([^/]+)/.*$":"d1/foo/\\1/d1_foo_\\1_\\2.tar"`` + ``"d1/foo/(ab|bc|cd|de|ef)/([^/]+)/.*$" : "d1/foo/\\1/d1_foo_\\1_\\2.tar"`` - Match files in a directory, but not any files in any subdirectory: ``"foo/[^/]+$" : "foo_files.tar"``. See also the ``_files.tar`` convention mentioned above. @@ -183,7 +185,8 @@ imposes some additional requirements, conventions and idioms: archive file for efficiency: ``"foo/([0-9])([0-9][0-9])/.*$" : "foo/foo_\\1XX.tar"``. Note the ending of the archive file, and that the directories have to have a very uniform naming convention (three and only three digits - in this example). + in this example). Also, the placeholder ``X`` needs to be at the *end* of + the file name. - Do not create an archive file, just copy the file, as is, to HPSS: ``"d1/README\\.txt$" : "d1/README.txt"``. Similarly, for a set of TXT files: ``"d1/([^/]+\\.txt)$" : "d1/\\1"``. diff --git a/hpsspy/os/__init__.py b/hpsspy/os/__init__.py index 3a0c390..ad8f803 100644 --- a/hpsspy/os/__init__.py +++ b/hpsspy/os/__init__.py @@ -10,7 +10,7 @@ from __future__ import (absolute_import, division, print_function, unicode_literals) # -from . import path +# from . import path from ._os import * import re diff --git a/hpsspy/os/_os.py b/hpsspy/os/_os.py index 3c6fbf1..f30071a 100644 --- a/hpsspy/os/_os.py +++ b/hpsspy/os/_os.py @@ -1,8 +1,8 @@ # Licensed under a 3-clause BSD style license - see LICENSE.rst # -*- coding: utf-8 -*- """ -hpsspy._os -~~~~~~~~~~ +hpsspy.os._os +~~~~~~~~~~~~~ Contains the actual functions in :mod:`hpsspy.os`. """ diff --git a/hpsspy/scan.py b/hpsspy/scan.py index c63a4ba..4e37f6a 100644 --- a/hpsspy/scan.py +++ b/hpsspy/scan.py @@ -311,12 +311,27 @@ def process_missing(missing_cache, disk_root, hpss_root, dirmode='2770', fp.write(Lfile_lines) else: Lfile = None - htar_dir = [basename(h).split('_')[-1].split('.')[0]] - if 'X' in htar_dir[0]: - htar_re = re.compile(htar_dir[0].replace('X', '.') + '$') - htar_dir = [d for d in listdir(full_chdir) - if isdir(join(full_chdir, d)) and - htar_re.match(d) is not None] + # + # Be careful, because the directory name may itself + # contain underscore characters, or X characters. + # + htar_base = basename(h).rsplit('.', 1)[0] # remove .tar + htar_dir = [] + for b in iterrsplit(htar_base, '_'): + if b.endswith('X'): + htar_re = re.compile(b.replace('X', '.') + '$') + htar_dir = [d for d in listdir(full_chdir) + if isdir(join(full_chdir, d)) and + htar_re.match(d) is not None] + else: + if isdir(join(full_chdir, b)): + htar_dir = [b] + if len(htar_dir) > 0: + break + if len(htar_dir) == 0: + logger.error(("Could not find directories corresponding " + + "to %s!"), h) + continue logger.debug("chdir('%s')", full_chdir) chdir(full_chdir) h_dir = join(hpss_root, disk_chdir) @@ -368,6 +383,29 @@ def process_missing(missing_cache, disk_root, hpss_root, dirmode='2770', return +def iterrsplit(s, c): + """Split string `s` on `c` and rejoin on `c` from the end of `s`. + + Parameters + ---------- + s : :class:`str` + String to split + c : :class:`str` + Split on this string. + + Returns + ------- + :class:`str` + Iteratively return the joined parts of `s`. + """ + ss = s.split(c) + i = -1 + while abs(i) <= len(ss): + yield c.join(ss[i:]) + i -= 1 + return + + def scan_disk(disk_roots, disk_files_cache, clobber=False): """Scan a directory tree on disk and cache the files found there. diff --git a/hpsspy/test/__init__.py b/hpsspy/test/__init__.py index 7b6f15f..722d477 100644 --- a/hpsspy/test/__init__.py +++ b/hpsspy/test/__init__.py @@ -1,3 +1,33 @@ # Licensed under a 3-clause BSD style license - see LICENSE.rst # -*- coding: utf-8 -*- -from __future__ import absolute_import +""" +hpsspy.test +~~~~~~~~~~~ + +Used to initialize the unit test framework via ``python setup.py test``. +""" +from __future__ import (absolute_import, division, + print_function, unicode_literals) +# The line above will help with 2to3 support. +import unittest + + +def hpsspy_test_suite(): + """Returns unittest.TestSuite of hpsspy tests. + + This is factored out separately from runtests() so that it can be used by + ``python setup.py test``. + """ + from os.path import dirname + py_dir = dirname(dirname(__file__)) + return unittest.defaultTestLoader.discover(py_dir, + top_level_dir=dirname(py_dir)) + + +def runtests(): + """Run all tests in hpsspy.test.test_*. + """ + # Load all TestCase classes from hpsspy/test/test_*.py + tests = hpsspy_test_suite() + # Run them + unittest.TextTestRunner(verbosity=2).run(tests) diff --git a/hpsspy/test/hpsspy_test_suite.py b/hpsspy/test/hpsspy_test_suite.py deleted file mode 100644 index 041633e..0000000 --- a/hpsspy/test/hpsspy_test_suite.py +++ /dev/null @@ -1,37 +0,0 @@ -# Licensed under a 3-clause BSD style license - see LICENSE.rst -# -*- coding: utf-8 -*- -""" -hpsspy.test.hpsspy_test_suite -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Used to initialize the unit test framework via ``python setup.py test``. -""" -from __future__ import (absolute_import, division, - print_function, unicode_literals) -# The line above will help with 2to3 support. -import unittest - - -def hpsspy_test_suite(): - """Returns unittest.TestSuite of hpsspy tests. - - This is factored out separately from runtests() so that it can be used by - ``python setup.py test``. - """ - from os.path import dirname - py_dir = dirname(dirname(__file__)) - return unittest.defaultTestLoader.discover(py_dir, - top_level_dir=dirname(py_dir)) - - -def runtests(): - """Run all tests in hpsspy.test.test_*. - """ - # Load all TestCase classes from hpsspy/test/test_*.py - tests = hpsspy_test_suite() - # Run them - unittest.TextTestRunner(verbosity=2).run(tests) - - -if __name__ == "__main__": - runtests() diff --git a/hpsspy/test/test_os.py b/hpsspy/test/test_os.py index 493f2b0..a15ad55 100644 --- a/hpsspy/test/test_os.py +++ b/hpsspy/test/test_os.py @@ -14,29 +14,185 @@ # import json # from pkg_resources import resource_filename import os -from ..os import chmod, lstat, stat +from ..os._os import chmod, listdir, makedirs, mkdir, lstat, stat from ..os.path import isdir, isfile, islink +from .. import HpssOSError from .test_util import MockHpss +mock_available = True +try: + from unittest.mock import call, patch, MagicMock +except ImportError: + mock_available = False + class TestOs(MockHpss): """Test the functions in the os subpackage. """ + @unittest.skipUnless(mock_available, + "Skipping test that requires unittest.mock.") + def test_chmod(self): + """Test the chmod() function. + """ + with patch('hpsspy.os._os.hsi') as h: + h.return_value = '** Error!' + with self.assertRaises(HpssOSError) as err: + chmod('/home/b/bweaver/foo.txt', 0o664) + self.assertEqual(str(err.exception), "** Error!") + h.assert_called_with('chmod', '436', '/home/b/bweaver/foo.txt') + with patch('hpsspy.os._os.hsi') as h: + h.return_value = 'All good!' + chmod('/home/b/bweaver/foo.txt', 0o664) + h.assert_called_with('chmod', '436', '/home/b/bweaver/foo.txt') + + @unittest.skipUnless(mock_available, + "Skipping test that requires unittest.mock.") + def test_listdir(self): + """Test the listdir() function. + """ + with patch('hpsspy.os._os.hsi') as h: + h.return_value = '** Error!' + with self.assertRaises(HpssOSError) as err: + files = listdir('/home/b/bweaver') + self.assertEqual(str(err.exception), "** Error!") + h.assert_called_with('ls', '-la', '/home/b/bweaver') + with patch('hpsspy.os._os.hsi') as h: + h.return_value = '/home/b/bweaver:\nGarbage line' + with self.assertRaises(HpssOSError) as err: + files = listdir('/home/b/bweaver') + self.assertEqual(str(err.exception), + "Could not match line!\nGarbage line") + h.assert_called_with('ls', '-la', '/home/b/bweaver') + with patch('hpsspy.os._os.hsi') as h: + h.return_value = ('/home/b/bweaver:\n' + '-rw-rw---- 1 bweaver desi ' + + '29956061184 May 15 2014 cosmos_nvo.tar\n' + + '-rw-rw---- 1 bweaver desi ' + + ' 61184 May 15 2014 cosmos_nvo.tar.idx\n') + files = listdir('/home/b/bweaver') + h.assert_called_with('ls', '-la', '/home/b/bweaver') + self.assertTrue(files[0].ishtar) + + @unittest.skipUnless(mock_available, + "Skipping test that requires unittest.mock.") + def test_makedirs(self): + """Test the makedirs() function. + """ + with patch('hpsspy.os._os.hsi') as h: + h.return_value = '** Error!' + with self.assertRaises(HpssOSError) as err: + makedirs('/home/b/bweaver', '2775') + self.assertEqual(str(err.exception), "** Error!") + h.assert_called_with('mkdir', '-p', '-m', '2775', + '/home/b/bweaver') + with patch('hpsspy.os._os.hsi') as h: + h.return_value = 'All good!' + makedirs('/home/b/bweaver', '2775') + h.assert_called_with('mkdir', '-p', '-m', '2775', + '/home/b/bweaver') + with patch('hpsspy.os._os.hsi') as h: + h.return_value = 'All good!' + makedirs('/home/b/bweaver') + h.assert_called_with('mkdir', '-p', '/home/b/bweaver') + + @unittest.skipUnless(mock_available, + "Skipping test that requires unittest.mock.") + def test_mkdir(self): + """Test the mkdir() function. + """ + with patch('hpsspy.os._os.hsi') as h: + h.return_value = '** Error!' + with self.assertRaises(HpssOSError) as err: + mkdir('/home/b/bweaver', '2775') + self.assertEqual(str(err.exception), "** Error!") + h.assert_called_with('mkdir', '-m', '2775', '/home/b/bweaver') + with patch('hpsspy.os._os.hsi') as h: + h.return_value = 'All good!' + mkdir('/home/b/bweaver', '2775') + h.assert_called_with('mkdir', '-m', '2775', '/home/b/bweaver') + with patch('hpsspy.os._os.hsi') as h: + h.return_value = 'All good!' + mkdir('/home/b/bweaver') + h.assert_called_with('mkdir', '/home/b/bweaver') + + @unittest.skipUnless(mock_available, + "Skipping test that requires unittest.mock.") def test_stat(self): """Test the stat() function. """ - s = stat("desi/cosmos_nvo.tar") - self.assertEqual(s.st_size, 29956061184) - self.assertEqual(s.st_mode, 33200) + with patch('hpsspy.os._os.hsi') as h: + h.return_value = '** Error!' + with self.assertRaises(HpssOSError) as err: + s = stat("desi/cosmos_nvo.tar") + self.assertEqual(str(err.exception), "** Error!") + h.assert_called_with('ls', '-ld', 'desi/cosmos_nvo.tar') + with patch('hpsspy.os._os.hsi') as h: + h.return_value = 'Garbage line' + with self.assertRaises(HpssOSError) as err: + s = stat("desi/cosmos_nvo.tar") + self.assertEqual(str(err.exception), + "Could not match line!\nGarbage line") + h.assert_called_with('ls', '-ld', 'desi/cosmos_nvo.tar') + with patch('hpsspy.os._os.hsi') as h: + h.return_value = ('desi:\n-rw-rw---- 1 bweaver desi ' + + '29956061184 May 15 2014 cosmos_nvo.tar\n') + s = stat("desi/cosmos_nvo.tar") + h.assert_called_with('ls', '-ld', 'desi/cosmos_nvo.tar') + self.assertEqual(s.st_size, 29956061184) + self.assertEqual(s.st_mode, 33200) + with patch('hpsspy.os._os.hsi') as h: + h.return_value = ('desi:\n-rw-rw---- 1 bweaver desi ' + + '29956061184 May 15 2014 cosmos_nvo.tar\n' + + 'desi:\n-rw-rw---- 1 bweaver desi ' + + '29956061184 May 15 2014 cosmos_nvo.tar.idx\n') + with self.assertRaises(HpssOSError) as err: + s = stat("desi/cosmos_nvo.tar") + self.assertEqual(str(err.exception), + "Non-unique response for desi/cosmos_nvo.tar!") + h.assert_called_with('ls', '-ld', 'desi/cosmos_nvo.tar') + with patch('hpsspy.os._os.hsi') as h: + h.side_effect = [('lrwxrwxrwx 1 bweaver bweaver ' + + '21 Aug 22 2014 cosmo@ -> ' + + '/nersc/projects/cosmo\n'), + ('drwxrws--- 6 nugent cosmo ' + + '512 Dec 16 2016 cosmo')] + s = stat("cosmo") + self.assertTrue(s.isdir) + h.assert_has_calls([call('ls', '-ld', 'cosmo'), + call('ls', '-ld', '/nersc/projects/cosmo')]) + # + # This may be pointing to some unexpected behavior. + # + # with patch('hpsspy.os._os.hsi') as h: + # h.side_effect = [('lrwxrwxrwx 1 bweaver bweaver ' + + # '21 Aug 22 2014 cosmo@ -> ' + + # 'cosmo.old\n'), + # ('drwxrws--- 6 nugent cosmo ' + + # '512 Dec 16 2016 cosmo.old')] + # s = stat("cosmo") + # self.assertTrue(s.isdir) + # h.assert_has_calls([call('ls', '-ld', 'cosmo'), + # call('ls', '-ld', 'cosmo.old')]) + @unittest.skipUnless(mock_available, + "Skipping test that requires unittest.mock.") def test_lstat(self): """Test the lstat() function. """ - s = lstat("cosmo") - self.assertTrue(s.islink) - s = lstat("test") - self.assertFalse(s.islink) + with patch('hpsspy.os._os.hsi') as h: + h.side_effect = [('lrwxrwxrwx 1 bweaver bweaver ' + + '21 Aug 22 2014 cosmo@ -> ' + + '/nersc/projects/cosmo\n'), + ('drwxrws--- 6 nugent cosmo ' + + '512 Dec 16 2016 cosmo')] + s = lstat("cosmo") + self.assertTrue(s.islink) + with patch('hpsspy.os._os.hsi') as h: + h.return_value = ('drwxr-sr-x 3 bweaver bweaver ' + + '512 Oct 4 2010 test') + s = lstat("test") + self.assertFalse(s.islink) def test_isdir(self): """Test the isdir() function. diff --git a/hpsspy/test/test_scan.py b/hpsspy/test/test_scan.py index 4169d94..03d3e28 100644 --- a/hpsspy/test/test_scan.py +++ b/hpsspy/test/test_scan.py @@ -20,7 +20,7 @@ from logging.handlers import MemoryHandler from pkg_resources import resource_filename, resource_stream from ..scan import (compile_map, files_to_hpss, physical_disks, - validate_configuration) + validate_configuration, iterrsplit) class TestHandler(MemoryHandler): @@ -85,6 +85,13 @@ def assertLog(self, index=-1, message=''): self.assertEqual(logger.handlers[0].buffer[index].getMessage(), message) + def test_iterrsplit(self): + """Test reverse re-joining a string. + """ + results = ['d', 'c_d', 'b_c_d', 'a_b_c_d'] + for i, s in enumerate(iterrsplit('a_b_c_d', '_')): + self.assertEqual(s, results[i]) + def test_compile_map(self): """Test compiling regular expressions in the JSON configuration file. """ diff --git a/hpsspy/test/test_util.py b/hpsspy/test/test_util.py index 78f5db1..a42cc1a 100644 --- a/hpsspy/test/test_util.py +++ b/hpsspy/test/test_util.py @@ -20,6 +20,12 @@ from .. import HpssOSError from ..util import HpssFile, get_hpss_dir, get_tmpdir, hsi, htar +mock_available = True +try: + from unittest.mock import patch, MagicMock +except ImportError: + mock_available = False + class MockHpss(unittest.TestCase): """Provide access to mock HPSS commands. @@ -127,6 +133,68 @@ def test_HpssFile(self): else: self.assertIsNone(f.htar_contents()) self.assertEqual(f.st_mtime, int(mtimes[i].strftime('%s'))) + # + # Test funky modes. + # + f = HpssFile(lspath, 's', 'rw-rw----', 1, 'bweaver', 'bweaver', + 1000, 'Feb', 2, '2016', 'fake.socket') + with self.assertRaises(AttributeError) as err: + m = f.st_mode + self.assertEqual(str(err.exception), + "Unknown file type, s, for fake.socket!") + + @unittest.skipUnless(mock_available, + "Skipping test that requires unittest.mock.") + def test_HpssFile_isdir(self): + """Test the isdir property on symbolic links. + """ + lspath = '/home/b/bweaver' + with patch('hpsspy.os.stat') as s: + m = MagicMock() + m.isdir = True + s.return_value = m + f = HpssFile(lspath, 'l', 'rwxrwxrwx', 1, 'bweaver', 'bweaver', + 21, 'Aug', 22, '2014', + 'cosmo@ -> /nersc/projects/cosmo') + self.assertTrue(f.islink) + self.assertTrue(f.isdir) + s.assert_called_with('/nersc/projects/cosmo') + with patch('hpsspy.os.stat') as s: + m = MagicMock() + m.isdir = False + s.return_value = m + f = HpssFile(lspath, 'l', 'rwxrwxrwx', 1, 'bweaver', 'bweaver', + 21, 'Aug', 22, '2014', 'cosmo@ -> cosmo.txt') + self.assertTrue(f.islink) + self.assertFalse(f.isdir) + s.assert_called_with('/home/b/bweaver/cosmo.txt') + + @unittest.skipUnless(mock_available, + "Skipping test that requires unittest.mock.") + def test_HpssFile_htar_contents(self): + """Test retrieval of htar file contents. + """ + lspath = '/home/b/bweaver' + f = HpssFile(lspath, '-', 'rw-rw-r--', 1, 'bweaver', 'bweaver', + 12345, 'Aug', 22, '2014', 'bundle.tar') + self.assertIsNone(f.htar_contents()) + f.ishtar = True + f._contents = ['foo.txt'] + self.assertListEqual(f.htar_contents(), ['foo.txt']) + f._contents = None + with patch('hpsspy.util.htar') as h: + h.return_value = ("HTAR: -rw-rw-r-- bweaver/bweaver 100 " + + "2012-07-03 12:00 foo.txt\n" + + "HTAR: -rw-rw-r-- bweaver/bweaver 100 " + + "2012-07-03 12:00 bar.txt", '') + self.assertListEqual(f.htar_contents(), + [('-', 'rw-rw-r--', 'bweaver', 'bweaver', + '100', '2012', '07', '03', '12:00', + 'foo.txt'), + ('-', 'rw-rw-r--', 'bweaver', 'bweaver', + '100', '2012', '07', '03', '12:00', + 'bar.txt')]) + h.assert_called_with('-t', '-f', '/home/b/bweaver/bundle.tar') def test_get_hpss_dir(self): """Test searching for the HPSS_DIR variable. @@ -162,12 +230,8 @@ def test_htar(self): """ command = ['-cvf', 'foo/bar.tar', '-H', 'crc:verify=all', 'bar'] out, err = htar(*command) - if self.PY3: - self.assertEqual(out.decode('utf8').strip(), ' '.join(command)) - self.assertEqual(err.decode('utf8').strip(), '') - else: - self.assertEqual(out.strip(), ' '.join(command)) - self.assertEqual(err.strip(), '') + self.assertEqual(out.strip(), ' '.join(command)) + self.assertEqual(err.strip(), '') def test_suite(): diff --git a/hpsspy/util.py b/hpsspy/util.py index a95e150..6fe35a5 100644 --- a/hpsspy/util.py +++ b/hpsspy/util.py @@ -146,7 +146,8 @@ def st_mode(self): try: mode = self._file_modes[self.raw_type] except KeyError: - raise + raise AttributeError(("Unknown file type, {0.raw_type}, " + + "for {0.name}!").format(self)) if self.raw_permission[0] == 'r': mode |= stat.S_IRUSR if self.raw_permission[1] == 'w': @@ -341,4 +342,4 @@ def htar(*args): err = errfile.read() outfile.close() errfile.close() - return (out, err) + return (out.decode('utf8'), err.decode('utf8')) diff --git a/setup.py b/setup.py index 4a8c5a0..58a8dd6 100755 --- a/setup.py +++ b/setup.py @@ -76,7 +76,7 @@ # # Test suite # -setup_keywords['test_suite'] = 'hpsspy.test.hpsspy_test_suite.hpsspy_test_suite' +setup_keywords['test_suite'] = 'hpsspy.test.hpsspy_test_suite' # # Run setup command. #