Skip to content

Commit

Permalink
Merge pull request #854 from benkrikler/BK_add_xrootd_glob
Browse files Browse the repository at this point in the history
Add a first attempt at a glob file
  • Loading branch information
simonmichal committed May 2, 2019
2 parents 5e158d3 + a22f924 commit 13cba48
Show file tree
Hide file tree
Showing 3 changed files with 173 additions and 0 deletions.
1 change: 1 addition & 0 deletions bindings/python/libs/client/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import absolute_import, division, print_function

from .glob_funcs import glob, iglob
from .filesystem import FileSystem as FileSystem
from .file import File as File
from .url import URL as URL
Expand Down
129 changes: 129 additions & 0 deletions bindings/python/libs/client/glob_funcs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#-------------------------------------------------------------------------------
# Copyright (c) 2012-2018 by European Organization for Nuclear Research (CERN)
# Author: Benjamin Krikler <b.krikler@cern.ch>
#-------------------------------------------------------------------------------
# This file is part of the XRootD software suite.
#
# XRootD is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# XRootD is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with XRootD. If not, see <http://www.gnu.org/licenses/>.
#
# In applying this licence, CERN does not waive the privileges and immunities
# granted to it by virtue of its status as an Intergovernmental Organization
# or submit itself to any jurisdiction.
#-------------------------------------------------------------------------------
from __future__ import absolute_import, division, print_function

from XRootD.client.filesystem import FileSystem

import glob as gl
import os
import fnmatch
import sys
if sys.version_info[0] > 2:
from urllib.parse import urlparse
else:
from urlparse import urlparse


__all__ = ["glob", "iglob"]


def split_url(url):
parsed_uri = urlparse(url)
domain = '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri)
path = parsed_uri.path
return domain, path


def iglob(pathname, raise_error=False):
"""
Generates paths based on a wild-carded path, potentially via xrootd.
Multiple wild-cards can be present in the path.
Args:
pathname (str): The wild-carded path to be expanded.
raise_error (bool): Whether or not to let xrootd raise an error if
there's a problem. If False (default), and there's a problem for a
particular directory or file, then that will simply be skipped,
likely resulting in an empty list.
Yields:
(str): A single path that matches the wild-carded string
"""
# Let normal python glob try first
generator = gl.iglob(pathname)
path = next(generator, None)
if path is not None:
yield path
for path in generator:
yield path
return

# Else try xrootd instead
for path in xrootd_iglob(pathname, raise_error=raise_error):
yield path


def xrootd_iglob(pathname, raise_error):
"""Handles the actual interaction with xrootd
Provides a python generator over files that match the wild-card expression.
"""
# Split the pathname into a directory and basename
dirs, basename = os.path.split(pathname)

if gl.has_magic(dirs):
dirs = list(xrootd_iglob(dirs, raise_error))
else:
dirs = [dirs]

for dirname in dirs:
host, path = split_url(dirname)
query = FileSystem(host)

if not query:
raise RuntimeError("Cannot prepare xrootd query")

status, dirlist = query.dirlist(path)
if status.error:
if not raise_error:
continue
raise RuntimeError("'{!s}' for path '{}'".format(status, dirname))

for entry in dirlist.dirlist:
filename = entry.name
if filename in [".", ".."]:
continue
if not fnmatch.fnmatchcase(filename, basename):
continue
yield os.path.join(dirname, filename)


def glob(pathname, raise_error=False):
"""
Creates a list of paths that match pathname.
Multiple wild-cards can be present in the path.
Args:
pathname (str): The wild-carded path to be expanded.
raise_error (bool): Whether or not to let xrootd raise an error if
there's a problem. If False (default), and there's a problem for a
particular directory or file, then that will simply be skipped,
likely resulting in an empty list.
Returns:
(str): A single path that matches the wild-carded string
"""
return list(iglob(pathname, raise_error=raise_error))
43 changes: 43 additions & 0 deletions bindings/python/tests/test_glob.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import pytest
import os
import glob as norm_glob
import XRootD.client.glob_funcs as glob
from pathlib2 import Path


@pytest.fixture
def tmptree(tmpdir):
subdir1 = tmpdir / "subdir1"
subdir1.mkdir()
subdir2 = tmpdir / "subdir2"
subdir2.mkdir()
for i in range(3):
dummy = subdir1 / ("a_file_%d.txt" % i)
dummy.write_text(u"This is file %d\n" % i, encoding="utf-8")
return tmpdir


def test_glob_local(tmptree):
normal_glob_result = norm_glob.glob(str(tmptree / "not-there"))
assert glob.glob(str(tmptree / "not-there")) == normal_glob_result
assert len(glob.glob(str(tmptree / "not-there"))) == 0
assert len(glob.glob(str(tmptree / "not-there*"))) == 0
assert len(glob.glob(str(tmptree / "sub*"))) == 2
assert len(glob.glob(str(tmptree / "subdir1" / "*txt"))) == 3
assert len(glob.glob(str(tmptree / "subdir*" / "*txt"))) == 3

with pytest.raises(RuntimeError) as excinfo:
glob.glob(str(tmptree / "not-there"), raise_error=True)
assert "[ERROR]" in str(excinfo.value)
assert str(tmptree) in str(excinfo.value)


def test_glob_remote(tmptree):
assert len(glob.glob("root://eospublic.cern.ch//eos/root-eos/cms_opendata_2012_nanoad/")) == 0
assert len(glob.glob("root://eospublic.cern.ch//eos/root-eos/cms_opendata_2012_nanoa*")) == 1
assert len(glob.glob("root://eospublic.cern.ch//eos/root-eos/cms_opendata_2012_nanoaod/*")) > 0
assert len(glob.glob("root://eospublic.cern.ch//eos/root-*/cms_opendata_2012_nanoaod/*")) > 0

with pytest.raises(RuntimeError) as excinfo:
glob.glob("root://eospublic.cern.ch//eos/root-NOTREAL/cms_opendata_2012_nanoaod/*", raise_error=True)
assert "[ERROR]" in str(excinfo.value)

0 comments on commit 13cba48

Please sign in to comment.