Skip to content

Commit

Permalink
Fixed non-ascii string issue
Browse files Browse the repository at this point in the history
  • Loading branch information
twolfson committed Apr 20, 2015
1 parent c6c291b commit cd6cefe
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 3 deletions.
31 changes: 28 additions & 3 deletions resolve_link/__init__.py
@@ -1,12 +1,24 @@
# Load in our dependencies
import re
import sys
try:
from urlparse import urlsplit, urlunsplit
except ImportError:
from urllib.parse import urlsplit, urlunsplit


# Define our constants
TLD_REGEXP = r'\.([a-zA-Z]*?)$'
# Via https://hg.python.org/cpython/file/2.7/Lib/urlparse.py#l317
PY3 = sys.version_info[0] == 3
try:
unicode
except NameError:
def _is_unicode(x):
return 0
else:
def _is_unicode(x):
return isinstance(x, unicode)


# Define our library
Expand All @@ -17,6 +29,12 @@ def resolve_link(src_url, target_url):
:param str target_url: Canonical URL to try to match if on the same domain
:returns str ret_val: Completed URL formatted via `urllib.parse`
"""
# If the `src_url` is encoded, decode it
encoding = None
if _is_unicode(src_url):
encoding = 'latin-1'
src_url = src_url.encode(encoding)

# Parse the src URL
src_url_parts = urlsplit(src_url)

Expand Down Expand Up @@ -60,7 +78,14 @@ def resolve_link(src_url, target_url):
src_url_dict['scheme'] = target_url_parts.scheme
src_url_dict['netloc'] = target_url_parts.netloc

# Return the completed src URL
# Construct the completed src URL
# https://docs.python.org/2/library/urlparse.html#urlparse.urlsplit
return urlunsplit((src_url_dict['scheme'], src_url_dict['netloc'], src_url_dict['path'],
src_url_dict['query'], src_url_dict['fragment']))
ret_url = urlunsplit((src_url_dict['scheme'], src_url_dict['netloc'], src_url_dict['path'],
src_url_dict['query'], src_url_dict['fragment']))

# If we were originally encoded, re-encode us
if encoding:
ret_url = ret_url.decode(encoding)

# Return our URL
return ret_url
11 changes: 11 additions & 0 deletions resolve_link/test/test.py
Expand Up @@ -70,3 +70,14 @@ def test_username(self):
"""
result = resolve_link('underdogio', 'https://github.com/')
self.assertEqual(result, 'https://github.com/underdogio')

def test_unicode_username(self):
"""
A unicode username to our target site when resolved
has no errors
points to the username on the target site
This is a regression test for https://github.com/underdogio/python-resolve-link/issues/2
"""
result = resolve_link(u'underdogi\xf5', 'https://github.com/')
self.assertEqual(result, u'https://github.com/underdogi\xf5')

0 comments on commit cd6cefe

Please sign in to comment.