Skip to content

Commit

Permalink
Handle CDXException and respond with HTTP 400 Bad Request (#626)
Browse files Browse the repository at this point in the history
* FrontendApp: forward HTTP status of CDX backend to allow clients
to handle errors more easily

* Handle CDXExceptions properly, returning the exception status code
- make that CDXException is raised early so that it can be handled
  in the IndexHandler
  • Loading branch information
sebastian-nagel committed Apr 27, 2021
1 parent 13ea5ba commit 212691b
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 2 deletions.
19 changes: 17 additions & 2 deletions pywb/warcserver/handlers.py
Expand Up @@ -4,6 +4,7 @@

from warcio.recordloader import ArchiveLoadFailed

from pywb.warcserver.index.cdxobject import CDXException
from pywb.warcserver.index.fuzzymatcher import FuzzyMatcher
from pywb.warcserver.resource.responseloader import WARCPathLoader, LiveWebLoader, VideoLoader

Expand Down Expand Up @@ -98,13 +99,27 @@ def __call__(self, params):
content_type, res = handler(cdx_iter, fields, params)
out_headers = {'Content-Type': content_type}

def check_str(lines):
first_line = None
try:
# raise exceptions early so that they can be handled properly
first_line = next(res)
except StopIteration:
pass
except CDXException as e:
errs = dict(last_exc=e)
return None, None, errs

def check_str(first_line, lines):
if first_line is not None:
if isinstance(first_line, six.text_type):
first_line = first_line.encode('utf-8')
yield first_line
for line in lines:
if isinstance(line, six.text_type):
line = line.encode('utf-8')
yield line

return out_headers, check_str(res), errs
return out_headers, check_str(first_line, res), errs


#=============================================================================
Expand Down
8 changes: 8 additions & 0 deletions tests/test_zipnum_auto_dir.py
Expand Up @@ -46,5 +46,13 @@ def test_paged_index_query(self):
assert lines[2] == {"urlkey": "org,iana)/_css/2013.1/fonts/opensans-regular.ttf 20140126200654", "part": "zipnum", "offset": 1692, "length": 235, "lineno": 7}
assert lines[3] == {"urlkey": "org,iana)/_css/2013.1/fonts/opensans-regular.ttf 20140126200816", "part": "zipnum", "offset": 1927, "length": 231, "lineno": 8}

def test_paged_index_query_out_of_range(self):
res = self.testapp.get(
'/testzip/cdx?url=http://iana.org/domains/&matchType=domain&output=json&showPagedIndex=true&pageSize=4&page=10',
expect_errors=True)

assert res.status_code == 400
assert res.json == {"message": "Page 10 invalid: First Page is 0, Last Page is 9"}



0 comments on commit 212691b

Please sign in to comment.