diff --git a/NEWS b/NEWS index bda5e48..25ecf0e 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,7 @@ Changes coming in the next release: * Issue 310 (pubDate should map to `published`, not `updated`) * Issue 313 (include the compression test files in MANIFEST.in) * Issue 315 (HTTP server for unit tests runs on 0.0.0.0) + * Issue 321 (malformed URIs can cause ValueError to be thrown) * Issue 325 (map `description_detail` to `summary_detail`) 5.1 - December 2, 2011 diff --git a/feedparser/feedparser.py b/feedparser/feedparser.py index 01f2887..229ea18 100644 --- a/feedparser/feedparser.py +++ b/feedparser/feedparser.py @@ -2542,15 +2542,24 @@ def _resolveRelativeURIs(htmlSource, baseURI, encoding, _type): def _makeSafeAbsoluteURI(base, rel=None): # bail if ACCEPTABLE_URI_SCHEMES is empty if not ACCEPTABLE_URI_SCHEMES: - return _urljoin(base, rel or u'') + try: + return _urljoin(base, rel or u'') + except ValueError: + return u'' if not base: return rel or u'' if not rel: - scheme = urlparse.urlparse(base)[0] + try: + scheme = urlparse.urlparse(base)[0] + except ValueError: + return u'' if not scheme or scheme in ACCEPTABLE_URI_SCHEMES: return base return u'' - uri = _urljoin(base, rel) + try: + uri = _urljoin(base, rel) + except ValueError: + return u'' if uri.strip().split(':', 1)[0] not in ACCEPTABLE_URI_SCHEMES: return u'' return uri diff --git a/feedparser/feedparsertest.py b/feedparser/feedparsertest.py index ff30edb..de77357 100644 --- a/feedparser/feedparsertest.py +++ b/feedparser/feedparsertest.py @@ -319,6 +319,20 @@ def fn(self): test_rel = _mktest(u'/new', u'http://d.test/new', 'relative uri') test_bad = _mktest(u'x://bad.test/', u'', 'unacceptable uri protocol') + def test_catch_ValueError(self): + 'catch ValueError in Python 2.7 and up' + uri = u'http://bad]test/' + value1 = feedparser._makeSafeAbsoluteURI(uri) + value2 = feedparser._makeSafeAbsoluteURI(self.base, uri) + swap = feedparser.ACCEPTABLE_URI_SCHEMES + feedparser.ACCEPTABLE_URI_SCHEMES = () + value3 = feedparser._makeSafeAbsoluteURI(self.base, uri) + feedparser.ACCEPTABLE_URI_SCHEMES = swap + # Only Python 2.7 and up throw a ValueError, otherwise uri is returned + self.assertTrue(value1 in (uri, u'')) + self.assertTrue(value2 in (uri, u'')) + self.assertTrue(value3 in (uri, u'')) + class TestConvertToIdn(unittest.TestCase): "Test IDN support (unavailable in Jython as of Jython 2.5.2)" # this is the greek test domain