To: vim-dev@vim.org Subject: Patch 6.1.156 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 8bit ------------ Patch 6.1.156 Problem: Conversion between DBCS and UCS-2 isn't implemented cleanly. Solution: Clean up a few things. Files: src/mbyte.c, src/structs.h *** ../vim61.155/src/mbyte.c Sun Jun 9 17:49:17 2002 --- src/mbyte.c Sat Aug 3 21:12:53 2002 *************** *** 277,282 **** --- 277,286 ---- {NULL, 0} }; + #ifndef CP_UTF8 + # define CP_UTF8 65001 /* magic number from winnls.h */ + #endif + /* * Find encoding "name" in the list of canonical encoding names. * Returns -1 if not found. *************** *** 578,584 **** ime_conv_cp.vc_fd = (iconv_t)-1; # endif convert_setup(&ime_conv, "ucs-2", p_enc); ! ime_conv_cp.vc_type = CONV_CODEPAGE; ime_conv_cp.vc_factor = 2; /* we don't really know anything about the codepage */ #endif --- 582,589 ---- ime_conv_cp.vc_fd = (iconv_t)-1; # endif convert_setup(&ime_conv, "ucs-2", p_enc); ! ime_conv_cp.vc_type = CONV_DBCS_TO_UCS2; ! ime_conv_cp.vc_dbcs = GetACP(); ime_conv_cp.vc_factor = 2; /* we don't really know anything about the codepage */ #endif *************** *** 1191,1198 **** } /* ! * Get the length of a UTF-8 byte sequence. Ignores any following composing ! * characters. * Returns 0 for "". * Returns 1 for an illegal byte sequence. */ --- 1196,1203 ---- } /* ! * Get the length of a UTF-8 byte sequence, not including any following ! * composing characters. * Returns 0 for "". * Returns 1 for an illegal byte sequence. */ *************** *** 1224,1231 **** } /* ! * Get the length of UTF-8 byte sequence "p[size]". Ignores any following ! * composing characters. * Returns 1 for "". * Returns 1 for an illegal byte sequence. * Returns number > "size" for an incomplete byte sequence. --- 1229,1236 ---- } /* ! * Get the length of UTF-8 byte sequence "p[size]". Does not include any ! * following composing characters. * Returns 1 for "". * Returns 1 for an illegal byte sequence. * Returns number > "size" for an incomplete byte sequence. *************** *** 1345,1351 **** } /* ! * Convert UTF-8 character "c" to string of bytes in "buf[]". * Returns the number of bytes. * This does not include composing characters. */ --- 1350,1356 ---- } /* ! * Convert Unicode character "c" to UTF-8 string in "buf[]". * Returns the number of bytes. * This does not include composing characters. */ *************** *** 4045,4069 **** vcp->vc_type = CONV_TO_LATIN1; } #ifdef WIN32 ! /* Win32-specific UTF-16 -> DBCS conversion, for the IME, * so we don't need iconv ... */ else if ((from_prop & ENC_UNICODE) && (from_prop & ENC_2BYTE) && (to_prop & ENC_DBCS)) { ! vcp->vc_type = CONV_DBCS; vcp->vc_factor = 2; /* up to twice as long */ vcp->vc_dbcs = atoi(to + 2); } else if ((from_prop & ENC_UNICODE) && (from_prop & ENC_2BYTE) && (to_prop & ENC_UNICODE)) { ! vcp->vc_type = CONV_DBCS; vcp->vc_factor = 2; /* up to twice as long */ - #ifdef CP_UTF8 vcp->vc_dbcs = CP_UTF8; - #else - vcp->vc_dbcs = 65001; /* magic number from winnls.h */ - #endif } #endif # ifdef USE_ICONV --- 4050,4070 ---- vcp->vc_type = CONV_TO_LATIN1; } #ifdef WIN32 ! /* Win32-specific UCS-2 <-> DBCS conversion, for the IME, * so we don't need iconv ... */ else if ((from_prop & ENC_UNICODE) && (from_prop & ENC_2BYTE) && (to_prop & ENC_DBCS)) { ! vcp->vc_type = CONV_UCS2_TO_DBCS; vcp->vc_factor = 2; /* up to twice as long */ vcp->vc_dbcs = atoi(to + 2); } else if ((from_prop & ENC_UNICODE) && (from_prop & ENC_2BYTE) && (to_prop & ENC_UNICODE)) { ! vcp->vc_type = CONV_UCS2_TO_DBCS; vcp->vc_factor = 2; /* up to twice as long */ vcp->vc_dbcs = CP_UTF8; } #endif # ifdef USE_ICONV *************** *** 4195,4201 **** break; # endif # ifdef WIN32 ! case CONV_DBCS: /* UTF-16 -> dbcs or UTF8 */ { int retlen; --- 4196,4207 ---- break; # endif # ifdef WIN32 ! /* ! * Note: Using these functions for UTF-8 (CP_UTF8) is NT-specific. ! * Don't put too much faith in its UTF-8 parsing; it's not ! * too good at handling invalid and overlong sequences. ! */ ! case CONV_UCS2_TO_DBCS: /* UCS-2 -> DBCS or UTF8 */ { int retlen; *************** *** 4213,4223 **** *lenp = retlen; break; } ! case CONV_CODEPAGE: /* current codepage -> ucs-2 */ { int retlen; ! retlen = MultiByteToWideChar(GetACP(), 0, ptr, len, 0, 0); retval = alloc(sizeof(unsigned short) * retlen); if (retval == NULL) break; --- 4219,4229 ---- *lenp = retlen; break; } ! case CONV_DBCS_TO_UCS2: /* UTF-8 or DBCS -> UCS-2 */ { int retlen; ! retlen = MultiByteToWideChar(vcp->vc_dbcs, 0, ptr, len, 0, 0); retval = alloc(sizeof(unsigned short) * retlen); if (retval == NULL) break; *** ../vim61.155/src/structs.h Sat Jun 29 16:35:43 2002 --- src/structs.h Sat Aug 3 21:13:33 2002 *************** *** 691,703 **** #endif } vir_T; ! #define CONV_NONE 0 ! #define CONV_TO_UTF8 1 ! #define CONV_TO_LATIN1 2 ! #define CONV_ICONV 3 #ifdef FEAT_WINDOWS ! # define CONV_DBCS 4 /* ucs-2 -> dbcs */ ! # define CONV_CODEPAGE 5 /* current codepage -> ucs-2 */ #endif /* --- 695,707 ---- #endif } vir_T; ! #define CONV_NONE 0 ! #define CONV_TO_UTF8 1 ! #define CONV_TO_LATIN1 2 ! #define CONV_ICONV 3 #ifdef FEAT_WINDOWS ! # define CONV_UCS2_TO_DBCS 4 /* ucs-2 -> dbcs */ ! # define CONV_DBCS_TO_UCS2 5 /* current codepage -> ucs-2 */ #endif /* *** ../vim61.155/src/version.c Sun Aug 18 15:56:14 2002 --- src/version.c Sun Aug 18 16:02:03 2002 *************** *** 608,609 **** --- 608,611 ---- { /* Add new patch number below this line */ + /**/ + 156, /**/ -- Get a life? What is the URL where it can be downloaded? /// Bram Moolenaar -- Bram@moolenaar.net -- http://www.moolenaar.net \\\ /// Creator of Vim -- http://vim.sf.net -- ftp://ftp.vim.org/pub/vim \\\ \\\ Project leader for A-A-P -- http://www.a-a-p.org /// \\\ Lord Of The Rings helps Uganda - http://iccf-holland.org/lotr.html ///