To: vim-dev@vim.org Subject: Patch 6.2.355 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 8bit ------------ Patch 6.2.355 (after 6.2.303) Problem: When 'encoding' is a double-byte encoding different from the current locale, the width of characters is not correct. Possible failure and memory leak when using iconv, Unicode digraphs and 'encoding' is not "utf-8". Solution: Use iconv() to discover the actual width of characters. Add the "vc_fail" field to vimconv_T. When converting a digraph, init the conversion type to NONE and cleanup afterwards. Files: src/digraph.c, src/mbyte.c, src/structs.h *** ../vim-6.2.354/src/digraph.c Mon Mar 1 17:01:39 2004 --- src/digraph.c Sun Mar 14 16:16:25 2004 *************** *** 2116,2145 **** { char_u buf[6], *to; vimconv_T vc; - int utflen; /* * Convert the Unicode digraph to 'encoding'. */ i = utf_char2bytes(retval, buf); if (convert_setup(&vc, (char_u *)"utf-8", p_enc) == OK) { ! utflen = i; to = string_convert(&vc, buf, &i); if (to != NULL) { ! /* Checking for invalid values isn't very easy. Internal ! * latin1 conversion will return char 0xbf in case it can't be ! * converted */ ! if ((i > 1 && !has_mbyte) ! || (vc.vc_type == CONV_TO_LATIN1 && utflen != 1 ! && to[0] == 0xbf)) ! /* assume invalid value */ ! retval = 0; ! else ! retval = (*mb_ptr2char)(to); vim_free(to); } } } # endif --- 2116,2138 ---- { char_u buf[6], *to; vimconv_T vc; /* * Convert the Unicode digraph to 'encoding'. */ i = utf_char2bytes(retval, buf); + retval = 0; + vc.vc_type = CONV_NONE; if (convert_setup(&vc, (char_u *)"utf-8", p_enc) == OK) { ! vc.vc_fail = TRUE; to = string_convert(&vc, buf, &i); if (to != NULL) { ! retval = (*mb_ptr2char)(to); vim_free(to); } + (void)convert_setup(&vc, NULL, NULL); } } # endif *** ../vim-6.2.354/src/mbyte.c Mon Mar 8 15:12:09 2004 --- src/mbyte.c Sun Mar 14 20:00:01 2004 *************** *** 408,413 **** --- 408,419 ---- int idx; int n; int enc_dbcs_new = 0; + #if defined(USE_ICONV) && !defined(WIN3264) && !defined(WIN32UNIX) \ + && !defined(MACOS) + # define LEN_FROM_CONV + vimconv_T vimconv; + char_u *p; + #endif if (p_enc == NULL) { *************** *** 555,560 **** --- 561,582 ---- /* * Fill the mb_bytelen_tab[] for MB_BYTE2LEN(). */ + #ifdef LEN_FROM_CONV + /* When 'encoding' is different from the current locale mblen() won't + * work. Use conversion to "utf-8" instead. */ + vimconv.vc_type = CONV_NONE; + if (enc_dbcs) + { + p = enc_locale(); + if (p == NULL || STRCMP(p, p_enc) != 0) + { + convert_setup(&vimconv, p_enc, (char_u *)"utf-8"); + vimconv.vc_fail = TRUE; + } + vim_free(p); + } + #endif + for (i = 0; i < 256; ++i) { /* Our own function to reliably check the length of UTF-8 characters, *************** *** 589,610 **** n = 1; else { - /* - * mblen() should return -1 for invalid (means the leading - * multibyte) character. However there are some platform - * where mblen() returns 0 for invalid character. Therefore, - * following condition includes 0. - */ buf[0] = i; buf[1] = 0; ! #if 0 ! if (i >= 0x80)/* TESTING DBCS: 'encoding' != current locale */ ! #else ! if (mblen(buf, (size_t)1) <= 0) ! #endif ! n = 2; else ! n = 1; } # endif #endif --- 611,648 ---- n = 1; else { buf[0] = i; buf[1] = 0; ! #ifdef LEN_FROM_CONV ! if (vimconv.vc_type != CONV_NONE) ! { ! /* ! * string_convert() should fail when converting the first ! * byte of a double-byte character. ! */ ! p = string_convert(&vimconv, (char_u *)buf, NULL); ! if (p != NULL) ! { ! vim_free(p); ! n = 1; ! } ! else ! n = 2; ! } else ! #endif ! { ! /* ! * mblen() should return -1 for invalid (means the leading ! * multibyte) character. However there are some platforms ! * where mblen() returns 0 for invalid character. ! * Therefore, following condition includes 0. ! */ ! if (mblen(buf, (size_t)1) <= 0) ! n = 2; ! else ! n = 1; ! } } # endif #endif *************** *** 613,618 **** --- 651,660 ---- mb_bytelen_tab[i] = n; } + #ifdef LEN_FROM_CONV + convert_setup(&vimconv, NULL, NULL); + #endif + /* The cell width depends on the type of multi-byte characters. */ (void)init_chartab(); *************** *** 2824,2830 **** # if defined(USE_ICONV) || defined(PROTO) ! static char_u *iconv_string __ARGS((iconv_t fd, char_u *str, int slen)); /* * Call iconv_open() with a check if iconv() works properly (there are broken --- 2866,2872 ---- # if defined(USE_ICONV) || defined(PROTO) ! static char_u *iconv_string __ARGS((vimconv_T *vcp, char_u *str, int slen)); /* * Call iconv_open() with a check if iconv() works properly (there are broken *************** *** 2885,2892 **** * Returns the converted string in allocated memory. NULL for an error. */ static char_u * ! iconv_string(fd, str, slen) ! iconv_t fd; char_u *str; int slen; { --- 2927,2934 ---- * Returns the converted string in allocated memory. NULL for an error. */ static char_u * ! iconv_string(vcp, str, slen) ! vimconv_T *vcp; char_u *str; int slen; { *************** *** 2922,2928 **** tolen = len - done - 2; /* Avoid a warning for systems with a wrong iconv() prototype by * casting the second argument to void *. */ ! if (iconv(fd, (void *)&from, &fromlen, &to, &tolen) != (size_t)-1) { /* Finished, append a NUL. */ *to = NUL; --- 2964,2971 ---- tolen = len - done - 2; /* Avoid a warning for systems with a wrong iconv() prototype by * casting the second argument to void *. */ ! if (iconv(vcp->vc_fd, (void *)&from, &fromlen, &to, &tolen) ! != (size_t)-1) { /* Finished, append a NUL. */ *to = NUL; *************** *** 2930,2936 **** } /* Check both ICONV_EILSEQ and EILSEQ, because the dynamically loaded * iconv library may use one of them. */ ! if (ICONV_ERRNO == ICONV_EILSEQ || ICONV_ERRNO == EILSEQ) { /* Can't convert: insert a '?' and skip a character. This assumes * conversion from 'encoding' to something else. In other --- 2973,2980 ---- } /* Check both ICONV_EILSEQ and EILSEQ, because the dynamically loaded * iconv library may use one of them. */ ! if (!vcp->vc_fail && (ICONV_ERRNO == ICONV_EILSEQ ! || ICONV_ERRNO == EILSEQ)) { /* Can't convert: insert a '?' and skip a character. This assumes * conversion from 'encoding' to something else. In other *************** *** 5209,5214 **** --- 5253,5259 ---- # endif vcp->vc_type = CONV_NONE; vcp->vc_factor = 1; + vcp->vc_fail = FALSE; /* No conversion when one of the names is empty or they are equal. */ if (from == NULL || *from == NUL || to == NULL || *to == NUL *************** *** 5304,5319 **** } #if defined(MACOS_X) ! static char_u *mac_string_convert __ARGS((char_u *ptr, int len, int *lenp, CFStringEncoding from, CFStringEncoding to)); /* * A Mac version of string_convert() for special cases. */ static char_u * ! mac_string_convert(ptr, len, lenp, from, to) char_u *ptr; int len; int *lenp; CFStringEncoding from; CFStringEncoding to; { --- 5349,5365 ---- } #if defined(MACOS_X) ! static char_u *mac_string_convert __ARGS((char_u *ptr, int len, int *lenp, int fail_on_error, CFStringEncoding from, CFStringEncoding to)); /* * A Mac version of string_convert() for special cases. */ static char_u * ! mac_string_convert(ptr, len, lenp, fail_on_error, from, to) char_u *ptr; int len; int *lenp; + int fail_on_error; CFStringEncoding from; CFStringEncoding to; { *************** *** 5337,5342 **** --- 5383,5394 ---- if (!CFStringGetCString(cfstr, retval, buflen, to)) { CFRelease(cfstr); + if (fail_on_error) + { + vim_free(retval); + return NULL; + } + /* conversion failed for the whole string, but maybe it will work * for each character */ for (d = retval, in = 0, out = 0; in < len && out < buflen - 1;) *************** *** 5384,5389 **** --- 5436,5442 ---- * Convert text "ptr[*lenp]" according to "vcp". * Returns the result in allocated memory and sets "*lenp". * When "lenp" is NULL, use NUL terminated strings. + * Illegal chars are often changed to "?", unless vcp->vc_fail is set. * When something goes wrong, NULL is returned and "*lenp" is unchanged. */ char_u * *************** *** 5445,5450 **** --- 5498,5508 ---- { if (c < 0x100) *d++ = c; + else if (vcp->vc_fail) + { + vim_free(retval); + return NULL; + } else { *d++ = 0xbf; *************** *** 5462,5486 **** # ifdef MACOS_X case CONV_MAC_LATIN1: ! retval = mac_string_convert(ptr, len, lenp, kCFStringEncodingMacRoman, kCFStringEncodingISOLatin1); break; case CONV_LATIN1_MAC: ! retval = mac_string_convert(ptr, len, lenp, kCFStringEncodingISOLatin1, kCFStringEncodingMacRoman); break; case CONV_MAC_UTF8: ! retval = mac_string_convert(ptr, len, lenp, kCFStringEncodingMacRoman, kCFStringEncodingUTF8); break; case CONV_UTF8_MAC: ! retval = mac_string_convert(ptr, len, lenp, kCFStringEncodingUTF8, kCFStringEncodingMacRoman); break; --- 5520,5544 ---- # ifdef MACOS_X case CONV_MAC_LATIN1: ! retval = mac_string_convert(ptr, len, lenp, vcp->vc_fail, kCFStringEncodingMacRoman, kCFStringEncodingISOLatin1); break; case CONV_LATIN1_MAC: ! retval = mac_string_convert(ptr, len, lenp, vcp->vc_fail, kCFStringEncodingISOLatin1, kCFStringEncodingMacRoman); break; case CONV_MAC_UTF8: ! retval = mac_string_convert(ptr, len, lenp, vcp->vc_fail, kCFStringEncodingMacRoman, kCFStringEncodingUTF8); break; case CONV_UTF8_MAC: ! retval = mac_string_convert(ptr, len, lenp, vcp->vc_fail, kCFStringEncodingUTF8, kCFStringEncodingMacRoman); break; *************** *** 5488,5494 **** # ifdef USE_ICONV case CONV_ICONV: /* conversion with output_conv.vc_fd */ ! retval = iconv_string(vcp->vc_fd, ptr, len); if (retval != NULL && lenp != NULL) *lenp = (int)STRLEN(retval); break; --- 5546,5552 ---- # ifdef USE_ICONV case CONV_ICONV: /* conversion with output_conv.vc_fd */ ! retval = iconv_string(vcp, ptr, len); if (retval != NULL && lenp != NULL) *lenp = (int)STRLEN(retval); break; *** ../vim-6.2.354/src/structs.h Tue Mar 9 12:41:56 2004 --- src/structs.h Sun Mar 14 15:46:06 2004 *************** *** 817,822 **** --- 817,823 ---- # ifdef USE_ICONV iconv_t vc_fd; /* for CONV_ICONV */ # endif + int vc_fail; /* fail for invalid char, don't use '?' */ } vimconv_T; /* *** ../vim-6.2.354/src/version.c Sun Mar 14 14:37:09 2004 --- src/version.c Sun Mar 14 20:06:12 2004 *************** *** 639,640 **** --- 639,642 ---- { /* Add new patch number below this line */ + /**/ + 355, /**/ -- How To Keep A Healthy Level Of Insanity: 8. Don't use any punctuation marks. /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// Sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ \\\ Project leader for A-A-P -- http://www.A-A-P.org /// \\\ Buy at Amazon and help AIDS victims -- http://ICCF.nl/click1.html ///