Skip to content

Commit

Permalink
Tests for ToUnicode and ToBytes + bugfixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Valloric committed Feb 12, 2016
1 parent c8580d4 commit 054260b
Show file tree
Hide file tree
Showing 8 changed files with 171 additions and 17 deletions.
4 changes: 4 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ Here's what you should watch out for:
which will convert `bytes` to a real `str` (again, only on py2). Heed this
advice for your own sanity; behind it are 40 hours of debugging and an
instance of tears-down-the-cheek crying at 2 am.
- **Use the `ToBytes()` and `ToUnicode()` helper functions from
`ycmd/utils.py`.** They work around weirdness, complexity and bugs in
`python-future` and behave as you would expect. They're also extensively
covered with tests.
- Use `from future.utils import iteritems`
then `for key, value in iteritems( dict_obj )` to efficiently iterate dicts on
py2 & py3
Expand Down
4 changes: 2 additions & 2 deletions ycmd/completers/cs/solutiondetection.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
import logging
from inspect import getfile
from ycmd import extra_conf_store
from ycmd.utils import ToUnicodeIfNeeded
from ycmd.utils import ToUnicode


__logger = logging.getLogger( __name__ )
Expand Down Expand Up @@ -58,7 +58,7 @@ def PollModule( module, filepath ):
try:
module_hint = module.CSharpSolutionFile( filepath )
__logger.info( u'extra_conf_store suggests {0} as solution file'.format(
ToUnicodeIfNeeded( module_hint ) ) )
ToUnicode( module_hint ) ) )
if module_hint:
# received a full path or one relative to the config's location?
candidates = [ module_hint,
Expand Down
6 changes: 3 additions & 3 deletions ycmd/completers/general/filename_completer.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
GetIncludeStatementValue )
from ycmd.completers.cpp.clang_completer import InCFamilyFile
from ycmd.completers.cpp.flags import Flags
from ycmd.utils import ToUnicodeIfNeeded, OnWindows
from ycmd.utils import ToUnicode, OnWindows
from ycmd import responses

EXTRA_INFO_MAP = { 1 : '[File]', 2 : '[Dir]', 3 : '[File&Dir]' }
Expand Down Expand Up @@ -145,7 +145,7 @@ def GetPathsIncludeCase( self, path_dir, quoted_include, filepath,
include_paths.extend( quoted_include_paths )

for include_path in include_paths:
unicode_path = ToUnicodeIfNeeded( os.path.join( include_path, path_dir ) )
unicode_path = ToUnicode( os.path.join( include_path, path_dir ) )
try:
# We need to pass a unicode string to get unicode strings out of
# listdir.
Expand Down Expand Up @@ -194,7 +194,7 @@ def _GetPathsStandardCase( path_dir, use_working_dir, filepath, working_dir ):
try:
# We need to pass a unicode string to get unicode strings out of
# listdir.
relative_paths = os.listdir( ToUnicodeIfNeeded( absolute_path_dir ) )
relative_paths = os.listdir( ToUnicode( absolute_path_dir ) )
except:
relative_paths = []

Expand Down
6 changes: 3 additions & 3 deletions ycmd/completers/go/gocode_completer.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

from ycmd import responses
from ycmd import utils
from ycmd.utils import ToBytes, ToUnicodeIfNeeded
from ycmd.utils import ToBytes, ToUnicode
from ycmd.completers.completer import Completer

GO_FILETYPES = set( [ 'go' ] )
Expand Down Expand Up @@ -99,7 +99,7 @@ def ComputeCandidatesInner( self, request_data ):
contents = contents )

try:
resultdata = json.loads( ToUnicodeIfNeeded( stdoutdata ) )
resultdata = json.loads( ToUnicode( stdoutdata ) )
except ValueError:
_logger.error( PARSE_ERROR_MESSAGE )
raise RuntimeError( PARSE_ERROR_MESSAGE )
Expand Down Expand Up @@ -217,7 +217,7 @@ def _GoToDefinition( self, request_data ):


def _ConstructGoToFromResponse( self, response_str ):
parsed = json.loads( ToUnicodeIfNeeded( response_str ) )
parsed = json.loads( ToUnicode( response_str ) )
if 'filename' in parsed and 'column' in parsed:
return responses.BuildGoToResponse( parsed[ 'filename' ],
int( parsed[ 'line' ] ),
Expand Down
4 changes: 2 additions & 2 deletions ycmd/hmac_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from base64 import b64decode, b64encode
from bottle import request, response, abort
from ycmd import hmac_utils
from ycmd.utils import ToBytes, ToUnicodeIfNeeded
from ycmd.utils import ToBytes, ToUnicode

_HMAC_HEADER = 'x-ycm-hmac'
_HOST_HEADER = 'host'
Expand Down Expand Up @@ -91,5 +91,5 @@ def RequestAuthenticated( method, path, body, hmac_secret ):


def SetHmacHeader( body, hmac_secret ):
response.headers[ _HMAC_HEADER ] = ToUnicodeIfNeeded( b64encode(
response.headers[ _HMAC_HEADER ] = ToUnicode( b64encode(
hmac_utils.CreateHmac( ToBytes( body ), ToBytes( hmac_secret ) ) ) )
4 changes: 2 additions & 2 deletions ycmd/request_wrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
standard_library.install_aliases()
from builtins import * # noqa

from ycmd.utils import ToUnicodeIfNeeded, ToUtf8IfNeeded
from ycmd.utils import ToUnicode, ToUtf8IfNeeded
from ycmd.identifier_utils import StartOfLongestIdentifierEndingAtIndex
from ycmd.request_validation import EnsureRequestValid

Expand Down Expand Up @@ -104,7 +104,7 @@ def CompletionStartColumn( line_value, column_num, filetype ):
# to walk codepoints for identifier checks.

utf8_line_value = ToUtf8IfNeeded( line_value )
unicode_line_value = ToUnicodeIfNeeded( line_value )
unicode_line_value = ToUnicode( line_value )
codepoint_column_num = len(
str( utf8_line_value[ : column_num -1 ], 'utf8' ) ) + 1

Expand Down
131 changes: 131 additions & 0 deletions ycmd/tests/utils_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
#!/usr/bin/env python
#
# Copyright (C) 2016 ycmd contributors.
#
# This file is part of YouCompleteMe.
#
# YouCompleteMe is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# YouCompleteMe is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with YouCompleteMe. If not, see <http://www.gnu.org/licenses/>.

# Intentionally not importing unicode_literals!
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import
from future import standard_library
standard_library.install_aliases()
from builtins import * # noqa
from future.utils import PY2

from nose.tools import eq_, ok_
from ycmd import utils

# NOTE: isinstance() vs type() is carefully used in this test file. Before
# changing things here, read the comments in utils.ToBytes.


if PY2:
def ToBytes_Py2Bytes_test():
value = utils.ToBytes( bytes( 'abc' ) )
eq_( value, bytes( 'abc' ) )
eq_( type( value ), bytes )


def ToBytes_Py2Str_test():
value = utils.ToBytes( 'abc' )
eq_( value, bytes( 'abc' ) )
eq_( type( value ), bytes )


def ToBytes_Py2FutureStr_test():
value = utils.ToBytes( str( 'abc' ) )
eq_( value, bytes( 'abc' ) )
eq_( type( value ), bytes )


def ToBytes_Py2Unicode_test():
value = utils.ToBytes( u'abc' )
eq_( value, bytes( 'abc' ) )
eq_( type( value ), bytes )


def ToBytes_Py2Int_test():
value = utils.ToBytes( 123 )
eq_( value, bytes( '123' ) )
eq_( type( value ), bytes )


def ToBytes_Bytes_test():
value = utils.ToBytes( bytes( b'abc' ) )
eq_( value, bytes( b'abc' ) )
eq_( type( value ), bytes )


def ToBytes_Str_test():
value = utils.ToBytes( u'abc' )
eq_( value, bytes( b'abc' ) )
eq_( type( value ), bytes )


def ToBytes_Int_test():
value = utils.ToBytes( 123 )
eq_( value, bytes( b'123' ) )
eq_( type( value ), bytes )


if PY2:
def ToUnicode_Py2Bytes_test():
value = utils.ToUnicode( bytes( 'abc' ) )
eq_( value, u'abc' )
ok_( isinstance( value, str ) )


def ToUnicode_Py2Str_test():
value = utils.ToUnicode( 'abc' )
eq_( value, u'abc' )
ok_( isinstance( value, str ) )


def ToUnicode_Py2FutureStr_test():
value = utils.ToUnicode( str( 'abc' ) )
eq_( value, u'abc' )
ok_( isinstance( value, str ) )


def ToUnicode_Py2Unicode_test():
value = utils.ToUnicode( u'abc' )
eq_( value, u'abc' )
ok_( isinstance( value, str ) )


def ToUnicode_Py2Int_test():
value = utils.ToUnicode( 123 )
eq_( value, u'123' )
ok_( isinstance( value, str ) )


def ToUnicode_Bytes_test():
value = utils.ToUnicode( bytes( b'abc' ) )
eq_( value, u'abc' )
ok_( isinstance( value, str ) )


def ToUnicode_Str_test():
value = utils.ToUnicode( u'abc' )
eq_( value, u'abc' )
ok_( isinstance( value, str ) )


def ToUnicode_Int_test():
value = utils.ToUnicode( 123 )
eq_( value, u'123' )
ok_( isinstance( value, str ) )
29 changes: 24 additions & 5 deletions ycmd/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,9 @@ def ToUtf8IfNeeded( value ):
return bytes( str( value ) )


def ToUnicodeIfNeeded( value ):
# Returns a unicode type; either the new python-future str type or the real
# unicode type. The difference shouldn't matter.
def ToUnicode( value ):
if isinstance( value, str ):
return value
if isinstance( value, bytes ):
Expand All @@ -74,24 +76,41 @@ def ToUnicodeIfNeeded( value ):
return str( value )


# Consistently returns the new bytes() type from python-future. Assumes incoming
# strings are either UTF-8 or unicode (which is converted to UTF-8).
def ToBytes( value ):
# This is tricky. On py2, the bytes type from builtins (from python-future) is
# a subclass of str. So all of the following are true:
# isinstance(str(), bytes)
# isinstance(bytes(), str)
# But they don't behave the same in one important aspect: iterating over a
# bytes instance yields ints, while iterating over a (raw, py2) str yields
# chars.
# chars. We want consistent behavior so we force the use of bytes().
if type( value ) == bytes:
return value

# This is meant to catch Python 2's str type and the str on Python 3, which is
# unicode.
if isinstance( value, bytes ) or isinstance( value, str ):
return bytes( value, encoding = 'utf-8' )
if isinstance( value, bytes ):
return bytes( value, encoding = 'utf8' )

if isinstance( value, str ):
# On py2, with `from builtins import *` imported, the following is true:
#
# bytes(str(u'abc'), 'utf8') == b"b'abc'"
#
# Obviously this is a bug in python-future. So we work around it. Also filed
# upstream at: https://github.com/PythonCharmers/python-future/issues/193
# We can't just return value.encode( 'utf8' ) on both py2 & py3 because on
# py2 that returns the built-in str type instead of the newbytes type from
# python-future.
if PY2:
return bytes( value.encode( 'utf8' ), encoding = 'utf8' )
else:
return bytes( value, encoding = 'utf8' )

# This is meant to catch `int` and similar non-string/bytes types.
return bytes( str( value ), encoding = 'utf-8' )
return ToBytes( str( value ) )


def PathToTempDir():
Expand Down

0 comments on commit 054260b

Please sign in to comment.