https://codereview.stackexchange.com/questions/196017/splitting-urls-into-hierarchy-level-directories

In [1]:
from __future__ import print_function
try:
    import urlparse
    
except ModuleNotFoundError:
    import urllib.parse as urlparse
    xrange = range

In [2]:
URL = 'http://example.com/a/b/c/'
repetitions = 100

In [3]:
def get_domain_with_protocol(url):
    url_parts = urlparse.urlparse(url)
    return "{scheme}://{netloc}/".format(
        scheme=url_parts.scheme, 
        netloc=url_parts.netloc
    )

In [4]:
def get_url_directories_op(url):
    path = urlparse.urlparse(url).path
    parts = path.strip('/').split('/')
    domain_with_protocol = get_domain_with_protocol(url)
    url_combinations = [
        domain_with_protocol + '/'.join(parts[:index + 1])
        for index in xrange(len(parts))
    ]
    return url_combinations
print(get_url_directories_op(URL))

['http://example.com/a', 'http://example.com/a/b', 'http://example.com/a/b/c']


In [5]:
def get_url_directories_combinations_enumerate(url):
    path = urlparse.urlparse(url).path
    parts = path.strip('/').split('/')
    domain_with_protocol = get_domain_with_protocol(url)
    url_combinations = [
        domain_with_protocol + '/'.join(parts[:index + 1])
        for index, _ in enumerate(parts)
    ]
    return url_combinations
print(get_url_directories_combinations_enumerate(URL))

['http://example.com/a', 'http://example.com/a/b', 'http://example.com/a/b/c']


In [6]:
def get_url_directories_iter(url):
    path = urlparse.urlparse(url).path
    parts = path.strip('/').split('/')
    domain_with_protocol = get_domain_with_protocol(url)
    for index in xrange(len(parts)):
        yield domain_with_protocol + '/'.join(parts[:index + 1])
print(list(get_url_directories_iter(URL)))

['http://example.com/a', 'http://example.com/a/b', 'http://example.com/a/b/c']


In [7]:
def accumulate_parts(parts, sep='/'):
    parts_iter = iter(parts)
    substring = next(parts_iter)
    yield substring
    for part in parts_iter:
        substring += sep + part
        yield substring

In [8]:
list(accumulate_parts('abc'))

['a', 'a/b', 'a/b/c']

In [9]:
def get_url_directories_accumulate(url):
    path = urlparse.urlparse(url).path
    parts = path.strip('/').split('/')
    domain_with_protocol = get_domain_with_protocol(url)
    for substring in accumulate_parts(parts):
        yield domain_with_protocol + substring
list(get_url_directories_accumulate(URL))

['http://example.com/a', 'http://example.com/a/b', 'http://example.com/a/b/c']

In [10]:
result_op = get_url_directories_op(URL)
assert list(get_url_directories_iter(URL)) == result_op
assert get_url_directories_combinations_enumerate(URL) == result_op
assert list(get_url_directories_accumulate(URL)) == result_op

In [11]:
%timeit [get_url_directories_op(URL) for _ in range(repetitions)]

957 µs ± 65.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [12]:
%timeit [list(get_url_directories_iter(URL)) for _ in range(repetitions)]

940 µs ± 48.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [13]:
%timeit [get_url_directories_combinations_enumerate(URL) for _ in range(repetitions)]

906 µs ± 5.78 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [14]:
%timeit [list(get_url_directories_accumulate(URL)) for _ in range(repetitions)]

910 µs ± 13.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
