Skip to content

Commit

Permalink
loaders: add to_file_url() for converting between filename and file://,
Browse files Browse the repository at this point in the history
used in live rewrite and tests
  • Loading branch information
ikreymer committed Jan 11, 2015
1 parent ba853a4 commit cf0a215
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 27 deletions.
8 changes: 3 additions & 5 deletions pywb/rewrite/rewrite_live.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,9 @@
import logging
import os

from urlparse import urlsplit, urljoin
from urllib import pathname2url
from urlparse import urlsplit

from pywb.utils.loaders import is_http, LimitReader, BlockLoader
from pywb.utils.loaders import is_http, LimitReader, BlockLoader, to_file_url
from pywb.utils.loaders import extract_client_cookie
from pywb.utils.timeutils import datetime_to_timestamp
from pywb.utils.statusandheaders import StatusAndHeaders
Expand Down Expand Up @@ -187,8 +186,7 @@ def fetch_request(self, url, urlrewriter,
else:
is_remote = False
if not url.startswith('file:'):
url = os.path.abspath(url)
url = urljoin('file:', pathname2url(url))
url = to_file_url(url)

# explicit urlkey may be passed in (say for testing)
if not urlkey:
Expand Down
21 changes: 16 additions & 5 deletions pywb/utils/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import hmac
import urllib
import urllib2
import urlparse
import time
import pkg_resources
from io import open
Expand All @@ -17,6 +18,15 @@ def is_http(filename):
return filename.startswith(('http://', 'https://'))


#=================================================================
def to_file_url(filename):
""" Convert a filename to a file:// url
"""
url = os.path.abspath(filename)
url = urlparse.urljoin('file:', urllib.pathname2url(url))
return url


#=================================================================
def load_yaml_config(config_file):
import yaml
Expand Down Expand Up @@ -129,13 +139,14 @@ def load_file_or_resource(self, url, offset=0, length=-1):
# if starting with . or /, can only be a file path..
file_only = url.startswith(('/', '.'))

# convert to filename
if url.startswith('file://'):
file_only = True
url = urllib.url2pathname(url[len('file://'):])

try:
# first, try as file
if url.startswith('file://'):
file_only = True
afile = urllib.urlopen(url)
else:
afile = open(url, 'rb')
afile = open(url, 'rb')

except IOError:
if file_only:
Expand Down
9 changes: 2 additions & 7 deletions pywb/utils/test/test_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
100
# no length specified, read full amount requested
>>> len(BlockLoader().load(to_local_url(test_cdx_dir + 'example.cdx'), 0, -1).read(400))
>>> len(BlockLoader().load(to_file_url(test_cdx_dir + 'example.cdx'), 0, -1).read(400))
400
# HMAC Cookie Maker
Expand Down Expand Up @@ -63,11 +63,9 @@
import re
import os
from io import BytesIO
from pywb.utils.loaders import BlockLoader, HMACCookieMaker
from pywb.utils.loaders import BlockLoader, HMACCookieMaker, to_file_url
from pywb.utils.loaders import LimitReader, extract_client_cookie

from urllib import pathname2url

from pywb import get_test_dir

test_cdx_dir = get_test_dir() + 'cdx/'
Expand All @@ -84,9 +82,6 @@ def seek_read_full(seekable_reader, offset):
seekable_reader.readline() #skip
return seekable_reader.readline()

def to_local_url(filename):
filename = os.path.abspath(filename)
return 'file://' + pathname2url(filename)

if __name__ == "__main__":
import doctest
Expand Down
14 changes: 4 additions & 10 deletions pywb/warc/test/test_pathresolvers.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,13 @@
RedisResolver('redis://myhost.example.com:1234/1')
# a file
>>> r = make_best_resolver(to_local_url(os.path.realpath(__file__)))
>>> r = make_best_resolver(to_file_url(os.path.realpath(__file__)))
>>> r.__class__.__name__
'PathIndexResolver'
# a dir
>>> path = os.path.realpath(__file__)
>>> r = make_best_resolver(to_local_url(os.path.dirname(path)))
>>> r = make_best_resolver(to_file_url(os.path.dirname(path)))
>>> r.__class__.__name__
'PrefixResolver'
Expand All @@ -54,9 +54,9 @@
from pywb import get_test_dir
from pywb.warc.pathresolvers import PrefixResolver, PathIndexResolver, RedisResolver
from pywb.warc.pathresolvers import make_best_resolver, make_best_resolvers
import os
from pywb.utils.loaders import to_file_url

from urllib import pathname2url
import os

from fakeredis import FakeStrictRedis
from mock import patch
Expand All @@ -69,12 +69,6 @@ def init_redis_resolver():
def hset_path(filename, path):
redis_resolver.redis.hset(redis_resolver.key_prefix + filename, 'path', path)

def to_local_url(filename):
filename = os.path.abspath(filename)
res = 'file:' + pathname2url(filename)
#print(res)
return res

redis_resolver = init_redis_resolver()

#=================================================================
Expand Down

0 comments on commit cf0a215

Please sign in to comment.