From 054260b40dda4f9f902d9269969d6484a47b19a1 Mon Sep 17 00:00:00 2001 From: Val Markovic Date: Tue, 9 Feb 2016 13:58:17 -0800 Subject: [PATCH] Tests for ToUnicode and ToBytes + bugfixes --- CONTRIBUTING.md | 4 + ycmd/completers/cs/solutiondetection.py | 4 +- ycmd/completers/general/filename_completer.py | 6 +- ycmd/completers/go/gocode_completer.py | 6 +- ycmd/hmac_plugin.py | 4 +- ycmd/request_wrap.py | 4 +- ycmd/tests/utils_test.py | 131 ++++++++++++++++++ ycmd/utils.py | 29 +++- 8 files changed, 171 insertions(+), 17 deletions(-) create mode 100644 ycmd/tests/utils_test.py diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index afd840af15..36a4b2a8fa 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -132,6 +132,10 @@ Here's what you should watch out for: which will convert `bytes` to a real `str` (again, only on py2). Heed this advice for your own sanity; behind it are 40 hours of debugging and an instance of tears-down-the-cheek crying at 2 am. +- **Use the `ToBytes()` and `ToUnicode()` helper functions from + `ycmd/utils.py`.** They work around weirdness, complexity and bugs in + `python-future` and behave as you would expect. They're also extensively + covered with tests. - Use `from future.utils import iteritems` then `for key, value in iteritems( dict_obj )` to efficiently iterate dicts on py2 & py3 diff --git a/ycmd/completers/cs/solutiondetection.py b/ycmd/completers/cs/solutiondetection.py index 4c8c802010..b8d7d7e543 100644 --- a/ycmd/completers/cs/solutiondetection.py +++ b/ycmd/completers/cs/solutiondetection.py @@ -29,7 +29,7 @@ import logging from inspect import getfile from ycmd import extra_conf_store -from ycmd.utils import ToUnicodeIfNeeded +from ycmd.utils import ToUnicode __logger = logging.getLogger( __name__ ) @@ -58,7 +58,7 @@ def PollModule( module, filepath ): try: module_hint = module.CSharpSolutionFile( filepath ) __logger.info( u'extra_conf_store suggests {0} as solution file'.format( - ToUnicodeIfNeeded( module_hint ) ) ) + ToUnicode( module_hint ) ) ) if module_hint: # received a full path or one relative to the config's location? candidates = [ module_hint, diff --git a/ycmd/completers/general/filename_completer.py b/ycmd/completers/general/filename_completer.py index 6d55b64deb..e4986e07f1 100644 --- a/ycmd/completers/general/filename_completer.py +++ b/ycmd/completers/general/filename_completer.py @@ -33,7 +33,7 @@ GetIncludeStatementValue ) from ycmd.completers.cpp.clang_completer import InCFamilyFile from ycmd.completers.cpp.flags import Flags -from ycmd.utils import ToUnicodeIfNeeded, OnWindows +from ycmd.utils import ToUnicode, OnWindows from ycmd import responses EXTRA_INFO_MAP = { 1 : '[File]', 2 : '[Dir]', 3 : '[File&Dir]' } @@ -145,7 +145,7 @@ def GetPathsIncludeCase( self, path_dir, quoted_include, filepath, include_paths.extend( quoted_include_paths ) for include_path in include_paths: - unicode_path = ToUnicodeIfNeeded( os.path.join( include_path, path_dir ) ) + unicode_path = ToUnicode( os.path.join( include_path, path_dir ) ) try: # We need to pass a unicode string to get unicode strings out of # listdir. @@ -194,7 +194,7 @@ def _GetPathsStandardCase( path_dir, use_working_dir, filepath, working_dir ): try: # We need to pass a unicode string to get unicode strings out of # listdir. - relative_paths = os.listdir( ToUnicodeIfNeeded( absolute_path_dir ) ) + relative_paths = os.listdir( ToUnicode( absolute_path_dir ) ) except: relative_paths = [] diff --git a/ycmd/completers/go/gocode_completer.py b/ycmd/completers/go/gocode_completer.py index 1e72e55b60..df9fe3c1a0 100755 --- a/ycmd/completers/go/gocode_completer.py +++ b/ycmd/completers/go/gocode_completer.py @@ -30,7 +30,7 @@ from ycmd import responses from ycmd import utils -from ycmd.utils import ToBytes, ToUnicodeIfNeeded +from ycmd.utils import ToBytes, ToUnicode from ycmd.completers.completer import Completer GO_FILETYPES = set( [ 'go' ] ) @@ -99,7 +99,7 @@ def ComputeCandidatesInner( self, request_data ): contents = contents ) try: - resultdata = json.loads( ToUnicodeIfNeeded( stdoutdata ) ) + resultdata = json.loads( ToUnicode( stdoutdata ) ) except ValueError: _logger.error( PARSE_ERROR_MESSAGE ) raise RuntimeError( PARSE_ERROR_MESSAGE ) @@ -217,7 +217,7 @@ def _GoToDefinition( self, request_data ): def _ConstructGoToFromResponse( self, response_str ): - parsed = json.loads( ToUnicodeIfNeeded( response_str ) ) + parsed = json.loads( ToUnicode( response_str ) ) if 'filename' in parsed and 'column' in parsed: return responses.BuildGoToResponse( parsed[ 'filename' ], int( parsed[ 'line' ] ), diff --git a/ycmd/hmac_plugin.py b/ycmd/hmac_plugin.py index 218dbf9d72..431bb5aa52 100644 --- a/ycmd/hmac_plugin.py +++ b/ycmd/hmac_plugin.py @@ -29,7 +29,7 @@ from base64 import b64decode, b64encode from bottle import request, response, abort from ycmd import hmac_utils -from ycmd.utils import ToBytes, ToUnicodeIfNeeded +from ycmd.utils import ToBytes, ToUnicode _HMAC_HEADER = 'x-ycm-hmac' _HOST_HEADER = 'host' @@ -91,5 +91,5 @@ def RequestAuthenticated( method, path, body, hmac_secret ): def SetHmacHeader( body, hmac_secret ): - response.headers[ _HMAC_HEADER ] = ToUnicodeIfNeeded( b64encode( + response.headers[ _HMAC_HEADER ] = ToUnicode( b64encode( hmac_utils.CreateHmac( ToBytes( body ), ToBytes( hmac_secret ) ) ) ) diff --git a/ycmd/request_wrap.py b/ycmd/request_wrap.py index 55c5136a1e..1fdfb92fbd 100644 --- a/ycmd/request_wrap.py +++ b/ycmd/request_wrap.py @@ -23,7 +23,7 @@ standard_library.install_aliases() from builtins import * # noqa -from ycmd.utils import ToUnicodeIfNeeded, ToUtf8IfNeeded +from ycmd.utils import ToUnicode, ToUtf8IfNeeded from ycmd.identifier_utils import StartOfLongestIdentifierEndingAtIndex from ycmd.request_validation import EnsureRequestValid @@ -104,7 +104,7 @@ def CompletionStartColumn( line_value, column_num, filetype ): # to walk codepoints for identifier checks. utf8_line_value = ToUtf8IfNeeded( line_value ) - unicode_line_value = ToUnicodeIfNeeded( line_value ) + unicode_line_value = ToUnicode( line_value ) codepoint_column_num = len( str( utf8_line_value[ : column_num -1 ], 'utf8' ) ) + 1 diff --git a/ycmd/tests/utils_test.py b/ycmd/tests/utils_test.py new file mode 100644 index 0000000000..f7e659268d --- /dev/null +++ b/ycmd/tests/utils_test.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python +# +# Copyright (C) 2016 ycmd contributors. +# +# This file is part of YouCompleteMe. +# +# YouCompleteMe is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# YouCompleteMe is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with YouCompleteMe. If not, see . + +# Intentionally not importing unicode_literals! +from __future__ import division +from __future__ import print_function +from __future__ import absolute_import +from future import standard_library +standard_library.install_aliases() +from builtins import * # noqa +from future.utils import PY2 + +from nose.tools import eq_, ok_ +from ycmd import utils + +# NOTE: isinstance() vs type() is carefully used in this test file. Before +# changing things here, read the comments in utils.ToBytes. + + +if PY2: + def ToBytes_Py2Bytes_test(): + value = utils.ToBytes( bytes( 'abc' ) ) + eq_( value, bytes( 'abc' ) ) + eq_( type( value ), bytes ) + + + def ToBytes_Py2Str_test(): + value = utils.ToBytes( 'abc' ) + eq_( value, bytes( 'abc' ) ) + eq_( type( value ), bytes ) + + + def ToBytes_Py2FutureStr_test(): + value = utils.ToBytes( str( 'abc' ) ) + eq_( value, bytes( 'abc' ) ) + eq_( type( value ), bytes ) + + + def ToBytes_Py2Unicode_test(): + value = utils.ToBytes( u'abc' ) + eq_( value, bytes( 'abc' ) ) + eq_( type( value ), bytes ) + + + def ToBytes_Py2Int_test(): + value = utils.ToBytes( 123 ) + eq_( value, bytes( '123' ) ) + eq_( type( value ), bytes ) + + +def ToBytes_Bytes_test(): + value = utils.ToBytes( bytes( b'abc' ) ) + eq_( value, bytes( b'abc' ) ) + eq_( type( value ), bytes ) + + +def ToBytes_Str_test(): + value = utils.ToBytes( u'abc' ) + eq_( value, bytes( b'abc' ) ) + eq_( type( value ), bytes ) + + +def ToBytes_Int_test(): + value = utils.ToBytes( 123 ) + eq_( value, bytes( b'123' ) ) + eq_( type( value ), bytes ) + + +if PY2: + def ToUnicode_Py2Bytes_test(): + value = utils.ToUnicode( bytes( 'abc' ) ) + eq_( value, u'abc' ) + ok_( isinstance( value, str ) ) + + + def ToUnicode_Py2Str_test(): + value = utils.ToUnicode( 'abc' ) + eq_( value, u'abc' ) + ok_( isinstance( value, str ) ) + + + def ToUnicode_Py2FutureStr_test(): + value = utils.ToUnicode( str( 'abc' ) ) + eq_( value, u'abc' ) + ok_( isinstance( value, str ) ) + + + def ToUnicode_Py2Unicode_test(): + value = utils.ToUnicode( u'abc' ) + eq_( value, u'abc' ) + ok_( isinstance( value, str ) ) + + + def ToUnicode_Py2Int_test(): + value = utils.ToUnicode( 123 ) + eq_( value, u'123' ) + ok_( isinstance( value, str ) ) + + +def ToUnicode_Bytes_test(): + value = utils.ToUnicode( bytes( b'abc' ) ) + eq_( value, u'abc' ) + ok_( isinstance( value, str ) ) + + +def ToUnicode_Str_test(): + value = utils.ToUnicode( u'abc' ) + eq_( value, u'abc' ) + ok_( isinstance( value, str ) ) + + +def ToUnicode_Int_test(): + value = utils.ToUnicode( 123 ) + eq_( value, u'123' ) + ok_( isinstance( value, str ) ) diff --git a/ycmd/utils.py b/ycmd/utils.py index 4df93bd410..91e3c99d1f 100644 --- a/ycmd/utils.py +++ b/ycmd/utils.py @@ -65,7 +65,9 @@ def ToUtf8IfNeeded( value ): return bytes( str( value ) ) -def ToUnicodeIfNeeded( value ): +# Returns a unicode type; either the new python-future str type or the real +# unicode type. The difference shouldn't matter. +def ToUnicode( value ): if isinstance( value, str ): return value if isinstance( value, bytes ): @@ -74,6 +76,8 @@ def ToUnicodeIfNeeded( value ): return str( value ) +# Consistently returns the new bytes() type from python-future. Assumes incoming +# strings are either UTF-8 or unicode (which is converted to UTF-8). def ToBytes( value ): # This is tricky. On py2, the bytes type from builtins (from python-future) is # a subclass of str. So all of the following are true: @@ -81,17 +85,32 @@ def ToBytes( value ): # isinstance(bytes(), str) # But they don't behave the same in one important aspect: iterating over a # bytes instance yields ints, while iterating over a (raw, py2) str yields - # chars. + # chars. We want consistent behavior so we force the use of bytes(). if type( value ) == bytes: return value # This is meant to catch Python 2's str type and the str on Python 3, which is # unicode. - if isinstance( value, bytes ) or isinstance( value, str ): - return bytes( value, encoding = 'utf-8' ) + if isinstance( value, bytes ): + return bytes( value, encoding = 'utf8' ) + + if isinstance( value, str ): + # On py2, with `from builtins import *` imported, the following is true: + # + # bytes(str(u'abc'), 'utf8') == b"b'abc'" + # + # Obviously this is a bug in python-future. So we work around it. Also filed + # upstream at: https://github.com/PythonCharmers/python-future/issues/193 + # We can't just return value.encode( 'utf8' ) on both py2 & py3 because on + # py2 that returns the built-in str type instead of the newbytes type from + # python-future. + if PY2: + return bytes( value.encode( 'utf8' ), encoding = 'utf8' ) + else: + return bytes( value, encoding = 'utf8' ) # This is meant to catch `int` and similar non-string/bytes types. - return bytes( str( value ), encoding = 'utf-8' ) + return ToBytes( str( value ) ) def PathToTempDir():