Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
patch 8.0.1755: MS-Windows: high unicode char received as two utf-16 …
…words

Problem:    MS-Windows GUI: high unicode char received as two utf-16 words.
Solution:   Keep the first word until the second word is received. (Chris
            Morgan, closes #2800)
  • Loading branch information
brammool committed Apr 24, 2018
1 parent 2225ebb commit f1f2f83
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 5 deletions.
36 changes: 31 additions & 5 deletions src/gui_w32.c
Expand Up @@ -494,9 +494,11 @@ static int s_getting_focus = FALSE;
static int s_x_pending;
static int s_y_pending;
static UINT s_kFlags_pending;
static UINT s_wait_timer = 0; /* Timer for get char from user */
static UINT s_wait_timer = 0; // Timer for get char from user
static int s_timed_out = FALSE;
static int dead_key = 0; /* 0: no dead key, 1: dead key pressed */
static int dead_key = 0; // 0: no dead key, 1: dead key pressed
static UINT surrogate_pending_ch = 0; // 0: no surrogate pending,
// else a high surrogate

#ifdef FEAT_BEVAL_GUI
/* balloon-eval WM_NOTIFY_HANDLER */
Expand Down Expand Up @@ -708,6 +710,12 @@ _OnDeadChar(
* Convert Unicode character "ch" to bytes in "string[slen]".
* When "had_alt" is TRUE the ALT key was included in "ch".
* Return the length.
* Because the Windows API uses UTF-16, we have to deal with surrogate
* pairs; this is where we choose to deal with them: if "ch" is a high
* surrogate, it will be stored, and the length returned will be zero; the next
* char_to_string call will then include the high surrogate, decoding the pair
* of UTF-16 code units to a single Unicode code point, presuming it is the
* matching low surrogate.
*/
static int
char_to_string(int ch, char_u *string, int slen, int had_alt)
Expand All @@ -718,8 +726,27 @@ char_to_string(int ch, char_u *string, int slen, int had_alt)
WCHAR wstring[2];
char_u *ws = NULL;

wstring[0] = ch;
len = 1;
if (surrogate_pending_ch != 0)
{
/* We don't guarantee ch is a low surrogate to match the high surrogate
* we already have; it should be, but if it isn't, tough luck. */
wstring[0] = surrogate_pending_ch;
wstring[1] = ch;
surrogate_pending_ch = 0;
len = 2;
}
else if (ch >= 0xD800 && ch <= 0xDBFF) /* high surrogate */
{
/* We don't have the entire code point yet, only the first UTF-16 code
* unit; so just remember it and use it in the next call. */
surrogate_pending_ch = ch;
return 0;
}
else
{
wstring[0] = ch;
len = 1;
}

/* "ch" is a UTF-16 character. Convert it to a string of bytes. When
* "enc_codepage" is non-zero use the standard Win32 function,
Expand All @@ -743,7 +770,6 @@ char_to_string(int ch, char_u *string, int slen, int had_alt)
}
else
{
len = 1;
ws = utf16_to_enc(wstring, &len);
if (ws == NULL)
len = 0;
Expand Down
2 changes: 2 additions & 0 deletions src/version.c
Expand Up @@ -761,6 +761,8 @@ static char *(features[]) =

static int included_patches[] =
{ /* Add new patch number below this line */
/**/
1755,
/**/
1754,
/**/
Expand Down

0 comments on commit f1f2f83

Please sign in to comment.