To: vim_dev@googlegroups.com Subject: Patch 8.0.1755 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 8.0.1755 Problem: MS-Windows GUI: high unicode char received as two utf-16 words. Solution: Keep the first word until the second word is received. (Chris Morgan, closes #2800) Files: src/gui_w32.c *** ../vim-8.0.1754/src/gui_w32.c 2018-02-10 18:45:21.060822215 +0100 --- src/gui_w32.c 2018-04-24 16:04:45.154854711 +0200 *************** *** 494,502 **** static int s_x_pending; static int s_y_pending; static UINT s_kFlags_pending; ! static UINT s_wait_timer = 0; /* Timer for get char from user */ static int s_timed_out = FALSE; ! static int dead_key = 0; /* 0: no dead key, 1: dead key pressed */ #ifdef FEAT_BEVAL_GUI /* balloon-eval WM_NOTIFY_HANDLER */ --- 494,504 ---- static int s_x_pending; static int s_y_pending; static UINT s_kFlags_pending; ! static UINT s_wait_timer = 0; // Timer for get char from user static int s_timed_out = FALSE; ! static int dead_key = 0; // 0: no dead key, 1: dead key pressed ! static UINT surrogate_pending_ch = 0; // 0: no surrogate pending, ! // else a high surrogate #ifdef FEAT_BEVAL_GUI /* balloon-eval WM_NOTIFY_HANDLER */ *************** *** 708,713 **** --- 710,721 ---- * Convert Unicode character "ch" to bytes in "string[slen]". * When "had_alt" is TRUE the ALT key was included in "ch". * Return the length. + * Because the Windows API uses UTF-16, we have to deal with surrogate + * pairs; this is where we choose to deal with them: if "ch" is a high + * surrogate, it will be stored, and the length returned will be zero; the next + * char_to_string call will then include the high surrogate, decoding the pair + * of UTF-16 code units to a single Unicode code point, presuming it is the + * matching low surrogate. */ static int char_to_string(int ch, char_u *string, int slen, int had_alt) *************** *** 718,725 **** WCHAR wstring[2]; char_u *ws = NULL; ! wstring[0] = ch; ! len = 1; /* "ch" is a UTF-16 character. Convert it to a string of bytes. When * "enc_codepage" is non-zero use the standard Win32 function, --- 726,752 ---- WCHAR wstring[2]; char_u *ws = NULL; ! if (surrogate_pending_ch != 0) ! { ! /* We don't guarantee ch is a low surrogate to match the high surrogate ! * we already have; it should be, but if it isn't, tough luck. */ ! wstring[0] = surrogate_pending_ch; ! wstring[1] = ch; ! surrogate_pending_ch = 0; ! len = 2; ! } ! else if (ch >= 0xD800 && ch <= 0xDBFF) /* high surrogate */ ! { ! /* We don't have the entire code point yet, only the first UTF-16 code ! * unit; so just remember it and use it in the next call. */ ! surrogate_pending_ch = ch; ! return 0; ! } ! else ! { ! wstring[0] = ch; ! len = 1; ! } /* "ch" is a UTF-16 character. Convert it to a string of bytes. When * "enc_codepage" is non-zero use the standard Win32 function, *************** *** 743,749 **** } else { - len = 1; ws = utf16_to_enc(wstring, &len); if (ws == NULL) len = 0; --- 770,775 ---- *** ../vim-8.0.1754/src/version.c 2018-04-24 15:48:05.780784369 +0200 --- src/version.c 2018-04-24 16:02:04.439808511 +0200 *************** *** 763,764 **** --- 763,766 ---- { /* Add new patch number below this line */ + /**/ + 1755, /**/ -- Two fish in a tank. One says to the other: "Do you know how to drive this thing?" /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ \\\ an exciting new programming language -- http://www.Zimbu.org /// \\\ help me help AIDS victims -- http://ICCF-Holland.org ///