Permalink
Browse files

patch 8.0.0982: cannot use a terminal when 'encoding' is non-utf8 mul…

…ti-byte

Problem:    When 'encoding' is set to a multi-byte encoding other than utf-8
            the characters from ther terminal are messed up.
Solution:   Convert displayed text from utf-8 to 'encoding' for MS-Windows.
            (Yasuhiro Matsumoto, close #2000)
  • Loading branch information...
brammool committed Aug 21, 2017
1 parent 0cbba82 commit 740c433c5909e3118dc4a7c42028f8a8b78a353b
Showing with 68 additions and 15 deletions.
  1. +66 −15 src/terminal.c
  2. +2 −0 src/version.c
View
@@ -49,8 +49,8 @@
"err_io", "err_name", "err_buf", "err_modifiable", "err_msg"
* Check that something is connected to the terminal.
* Test: "cat" reading from a file or buffer
* "ls" writing stdout to a file or buffer
* shell writing stderr to a file or buffer
* "ls" writing stdout to a file or buffer
* shell writing stderr to a file or buffer
* - For the GUI fill termios with default values, perhaps like pangoterm:
* http://bazaar.launchpad.net/~leonerd/pangoterm/trunk/view/head:/main.c#L134
* - support ":term NONE" to open a terminal with a pty but not running a job
@@ -845,7 +845,26 @@ add_scrollback_line_to_buffer(term_T *term, char_u *text, int len)
int empty = (buf->b_ml.ml_flags & ML_EMPTY);
linenr_T lnum = buf->b_ml.ml_line_count;
ml_append_buf(term->tl_buffer, lnum, text, len + 1, FALSE);
#ifdef _WIN32
if (!enc_utf8 && enc_codepage > 0)
{
WCHAR *ret = NULL;
int length = 0;
MultiByteToWideChar_alloc(CP_UTF8, 0, (char*)text, len + 1,
&ret, &length);
if (ret != NULL)
{
WideCharToMultiByte_alloc(enc_codepage, 0,
ret, length, (char **)&text, &len, 0, 0);
vim_free(ret);
ml_append_buf(term->tl_buffer, lnum, text, len, FALSE);
vim_free(text);
}
}
else
#endif
ml_append_buf(term->tl_buffer, lnum, text, len + 1, FALSE);
if (empty)
{
/* Delete the empty line that was in the empty buffer. */
@@ -936,7 +955,7 @@ move_terminal_to_buffer(term_T *term)
int c;
for (i = 0; (c = cell.chars[i]) > 0 || i == 0; ++i)
ga.ga_len += mb_char2bytes(c == NUL ? ' ' : c,
ga.ga_len += utf_char2bytes(c == NUL ? ' ' : c,
(char_u *)ga.ga_data + ga.ga_len);
}
}
@@ -1468,6 +1487,18 @@ terminal_loop(void)
goto theend;
}
}
# ifdef _WIN32
if (!enc_utf8 && has_mbyte && c >= 0x80)
{
WCHAR wc;
char_u mb[3];
mb[0] = (unsigned)c >> 8;
mb[1] = c;
if (MultiByteToWideChar(GetACP(), 0, (char*)mb, 2, &wc, 1) > 0)
c = wc;
}
# endif
if (send_keys_to_term(curbuf->b_term, c, TRUE) != OK)
{
ret = OK;
@@ -1627,7 +1658,7 @@ color2index(VTermColor *color, int fg, int *boldp)
/* 216-color cube */
return 17 + ((red + 25) / 0x33) * 36
+ ((green + 25) / 0x33) * 6
+ ((green + 25) / 0x33) * 6
+ (blue + 25) / 0x33;
}
return 0;
@@ -2076,32 +2107,52 @@ term_update_window(win_T *wp)
else
{
#if defined(FEAT_MBYTE)
if (enc_utf8 && c >= 0x80)
if (enc_utf8)
{
ScreenLines[off] = ' ';
ScreenLinesUC[off] = c;
if (c >= 0x80)
{
ScreenLines[off] = ' ';
ScreenLinesUC[off] = c;
}
else
{
ScreenLines[off] = c;
ScreenLinesUC[off] = NUL;
}
}
else
# ifdef _WIN32
else if (has_mbyte && c >= 0x80)
{
ScreenLines[off] = c;
if (enc_utf8)
ScreenLinesUC[off] = NUL;
char_u mb[MB_MAXBYTES+1];
WCHAR wc = c;
if (WideCharToMultiByte(GetACP(), 0, &wc, 1,
(char*)mb, 2, 0, 0) > 1)
{
ScreenLines[off] = mb[0];
ScreenLines[off+1] = mb[1];
cell.width = mb_ptr2cells(mb);
}
else
ScreenLines[off] = c;
}
#else
ScreenLines[off] = c;
# endif
else
#endif
ScreenLines[off] = c;
}
ScreenAttrs[off] = cell2attr(cell.attrs, cell.fg, cell.bg);
++pos.col;
++off;
if (cell.width == 2)
{
ScreenLines[off] = NUL;
#if defined(FEAT_MBYTE)
if (enc_utf8)
ScreenLinesUC[off] = NUL;
else if (!has_mbyte)
#endif
ScreenLines[off] = NUL;
++pos.col;
++off;
}
View
@@ -769,6 +769,8 @@ static char *(features[]) =
static int included_patches[] =
{ /* Add new patch number below this line */
/**/
982,
/**/
981,
/**/

4 comments on commit 740c433

@mattn

This comment has been minimized.

Show comment
Hide comment
@mattn

mattn Aug 22, 2017

Sorry, this break utf-8 string

--- a/src/terminal.c
+++ b/src/terminal.c
@@ -2145,7 +2145,10 @@ term_update_window(win_T *wp)
 		{
 #if defined(FEAT_MBYTE)
 		    if (enc_utf8)
+		    {
 			ScreenLinesUC[off] = NUL;
+			ScreenLines[off] = NUL;
+		    }
 		    else if (!has_mbyte)
 #endif
 			ScreenLines[off] = NUL;

thanks @k-takata

mattn replied Aug 22, 2017

Sorry, this break utf-8 string

--- a/src/terminal.c
+++ b/src/terminal.c
@@ -2145,7 +2145,10 @@ term_update_window(win_T *wp)
 		{
 #if defined(FEAT_MBYTE)
 		    if (enc_utf8)
+		    {
 			ScreenLinesUC[off] = NUL;
+			ScreenLines[off] = NUL;
+		    }
 		    else if (!has_mbyte)
 #endif
 			ScreenLines[off] = NUL;

thanks @k-takata

@brammool

This comment has been minimized.

Show comment
Hide comment
@brammool

brammool Aug 22, 2017

Contributor
Contributor

brammool replied Aug 22, 2017

@mattn

This comment has been minimized.

Show comment
Hide comment
@mattn

mattn Aug 22, 2017

I confirmed first patch works fine.

diff --git a/src/terminal.c b/src/terminal.c
index 2cf4525ae..75f38b3c4 100644
--- a/src/terminal.c
+++ b/src/terminal.c
@@ -2150,7 +2150,7 @@ term_update_window(win_T *wp)
 #if defined(FEAT_MBYTE)
 		    if (enc_utf8)
 			ScreenLinesUC[off] = NUL;
-		    else if (!has_mbyte)
+		    if (enc_utf8 || !has_mbyte)
 #endif
 			ScreenLines[off] = NUL;
 		    ++pos.col;

byte_two is used for only DBCS. This will make more confusing ifdef/endif. So I prefer to add comment like below.

diff --git a/src/terminal.c b/src/terminal.c
index 2cf4525ae..895641d24 100644
--- a/src/terminal.c
+++ b/src/terminal.c
@@ -2150,7 +2150,8 @@ term_update_window(win_T *wp)
 #if defined(FEAT_MBYTE)
 		    if (enc_utf8)
 			ScreenLinesUC[off] = NUL;
-		    else if (!has_mbyte)
+		    /* On DBCS encodings, second byte is set on above */
+		    if (enc_utf8 || !has_mbyte)
 #endif
 			ScreenLines[off] = NUL;
 		    ++pos.col;

mattn replied Aug 22, 2017

I confirmed first patch works fine.

diff --git a/src/terminal.c b/src/terminal.c
index 2cf4525ae..75f38b3c4 100644
--- a/src/terminal.c
+++ b/src/terminal.c
@@ -2150,7 +2150,7 @@ term_update_window(win_T *wp)
 #if defined(FEAT_MBYTE)
 		    if (enc_utf8)
 			ScreenLinesUC[off] = NUL;
-		    else if (!has_mbyte)
+		    if (enc_utf8 || !has_mbyte)
 #endif
 			ScreenLines[off] = NUL;
 		    ++pos.col;

byte_two is used for only DBCS. This will make more confusing ifdef/endif. So I prefer to add comment like below.

diff --git a/src/terminal.c b/src/terminal.c
index 2cf4525ae..895641d24 100644
--- a/src/terminal.c
+++ b/src/terminal.c
@@ -2150,7 +2150,8 @@ term_update_window(win_T *wp)
 #if defined(FEAT_MBYTE)
 		    if (enc_utf8)
 			ScreenLinesUC[off] = NUL;
-		    else if (!has_mbyte)
+		    /* On DBCS encodings, second byte is set on above */
+		    if (enc_utf8 || !has_mbyte)
 #endif
 			ScreenLines[off] = NUL;
 		    ++pos.col;
@brammool

This comment has been minimized.

Show comment
Hide comment
@brammool

brammool Aug 22, 2017

Contributor
Contributor

brammool replied Aug 22, 2017

Please sign in to comment.