To: vim-dev@vim.org
Subject: Patch 6.1.156
Fcc: outbox
From: Bram Moolenaar <Bram@moolenaar.net>
Mime-Version: 1.0
Content-Type: text/plain; charset=ISO-8859-1
Content-Transfer-Encoding: 8bit
------------

Patch 6.1.156
Problem:    Conversion between DBCS and UCS-2 isn't implemented cleanly.
Solution:   Clean up a few things.
Files:	    src/mbyte.c, src/structs.h


*** ../vim61.155/src/mbyte.c	Sun Jun  9 17:49:17 2002
--- src/mbyte.c	Sat Aug  3 21:12:53 2002
***************
*** 277,282 ****
--- 277,286 ----
      {NULL,		0}
  };
  
+ #ifndef CP_UTF8
+ # define CP_UTF8 65001	/* magic number from winnls.h */
+ #endif
+ 
  /*
   * Find encoding "name" in the list of canonical encoding names.
   * Returns -1 if not found.
***************
*** 578,584 ****
      ime_conv_cp.vc_fd = (iconv_t)-1;
  # endif
      convert_setup(&ime_conv, "ucs-2", p_enc);
!     ime_conv_cp.vc_type = CONV_CODEPAGE;
      ime_conv_cp.vc_factor = 2; /* we don't really know anything about the codepage */
  #endif
  
--- 582,589 ----
      ime_conv_cp.vc_fd = (iconv_t)-1;
  # endif
      convert_setup(&ime_conv, "ucs-2", p_enc);
!     ime_conv_cp.vc_type = CONV_DBCS_TO_UCS2;
!     ime_conv_cp.vc_dbcs = GetACP();
      ime_conv_cp.vc_factor = 2; /* we don't really know anything about the codepage */
  #endif
  
***************
*** 1191,1198 ****
  }
  
  /*
!  * Get the length of a UTF-8 byte sequence.  Ignores any following composing
!  * characters.
   * Returns 0 for "".
   * Returns 1 for an illegal byte sequence.
   */
--- 1196,1203 ----
  }
  
  /*
!  * Get the length of a UTF-8 byte sequence, not including any following
!  * composing characters.
   * Returns 0 for "".
   * Returns 1 for an illegal byte sequence.
   */
***************
*** 1224,1231 ****
  }
  
  /*
!  * Get the length of UTF-8 byte sequence "p[size]".  Ignores any following
!  * composing characters.
   * Returns 1 for "".
   * Returns 1 for an illegal byte sequence.
   * Returns number > "size" for an incomplete byte sequence.
--- 1229,1236 ----
  }
  
  /*
!  * Get the length of UTF-8 byte sequence "p[size]".  Does not include any
!  * following composing characters.
   * Returns 1 for "".
   * Returns 1 for an illegal byte sequence.
   * Returns number > "size" for an incomplete byte sequence.
***************
*** 1345,1351 ****
  }
  
  /*
!  * Convert UTF-8 character "c" to string of bytes in "buf[]".
   * Returns the number of bytes.
   * This does not include composing characters.
   */
--- 1350,1356 ----
  }
  
  /*
!  * Convert Unicode character "c" to UTF-8 string in "buf[]".
   * Returns the number of bytes.
   * This does not include composing characters.
   */
***************
*** 4045,4069 ****
  	vcp->vc_type = CONV_TO_LATIN1;
      }
  #ifdef WIN32
!     /* Win32-specific UTF-16 -> DBCS conversion, for the IME,
       * so we don't need iconv ... */
      else if ((from_prop & ENC_UNICODE)
  			   && (from_prop & ENC_2BYTE) && (to_prop & ENC_DBCS))
      {
! 	vcp->vc_type = CONV_DBCS;
  	vcp->vc_factor = 2;	/* up to twice as long */
  	vcp->vc_dbcs = atoi(to + 2);
      }
      else if ((from_prop & ENC_UNICODE)
  			&& (from_prop & ENC_2BYTE) && (to_prop & ENC_UNICODE))
      {
! 	vcp->vc_type = CONV_DBCS;
  	vcp->vc_factor = 2;	/* up to twice as long */
- #ifdef CP_UTF8
  	vcp->vc_dbcs = CP_UTF8;
- #else
- 	vcp->vc_dbcs = 65001;	/* magic number from winnls.h */
- #endif
      }
  #endif
  # ifdef USE_ICONV
--- 4050,4070 ----
  	vcp->vc_type = CONV_TO_LATIN1;
      }
  #ifdef WIN32
!     /* Win32-specific UCS-2 <-> DBCS conversion, for the IME,
       * so we don't need iconv ... */
      else if ((from_prop & ENC_UNICODE)
  			   && (from_prop & ENC_2BYTE) && (to_prop & ENC_DBCS))
      {
! 	vcp->vc_type = CONV_UCS2_TO_DBCS;
  	vcp->vc_factor = 2;	/* up to twice as long */
  	vcp->vc_dbcs = atoi(to + 2);
      }
      else if ((from_prop & ENC_UNICODE)
  			&& (from_prop & ENC_2BYTE) && (to_prop & ENC_UNICODE))
      {
! 	vcp->vc_type = CONV_UCS2_TO_DBCS;
  	vcp->vc_factor = 2;	/* up to twice as long */
  	vcp->vc_dbcs = CP_UTF8;
      }
  #endif
  # ifdef USE_ICONV
***************
*** 4195,4201 ****
  	    break;
  # endif
  # ifdef WIN32
! 	case CONV_DBCS:		/* UTF-16 -> dbcs or UTF8 */
  	{
  	    int retlen;
  
--- 4196,4207 ----
  	    break;
  # endif
  # ifdef WIN32
! 	/*
! 	 * Note: Using these functions for UTF-8 (CP_UTF8) is NT-specific.
! 	 * Don't put too much faith in its UTF-8 parsing; it's not
! 	 * too good at handling invalid and overlong sequences.
! 	 */
! 	case CONV_UCS2_TO_DBCS:		/* UCS-2 -> DBCS or UTF8 */
  	{
  	    int retlen;
  
***************
*** 4213,4223 ****
  		*lenp = retlen;
  	    break;
  	}
! 	case CONV_CODEPAGE:	/* current codepage -> ucs-2 */
  	{
  	    int retlen;
  
! 	    retlen = MultiByteToWideChar(GetACP(), 0, ptr, len, 0, 0);
  	    retval = alloc(sizeof(unsigned short) * retlen);
  	    if (retval == NULL)
  		break;
--- 4219,4229 ----
  		*lenp = retlen;
  	    break;
  	}
! 	case CONV_DBCS_TO_UCS2:		/* UTF-8 or DBCS -> UCS-2 */
  	{
  	    int retlen;
  
! 	    retlen = MultiByteToWideChar(vcp->vc_dbcs, 0, ptr, len, 0, 0);
  	    retval = alloc(sizeof(unsigned short) * retlen);
  	    if (retval == NULL)
  		break;
*** ../vim61.155/src/structs.h	Sat Jun 29 16:35:43 2002
--- src/structs.h	Sat Aug  3 21:13:33 2002
***************
*** 691,703 ****
  #endif
  } vir_T;
  
! #define CONV_NONE	0
! #define CONV_TO_UTF8	1
! #define CONV_TO_LATIN1	2
! #define CONV_ICONV	3
  #ifdef FEAT_WINDOWS
! # define CONV_DBCS	4	/* ucs-2 -> dbcs */
! # define CONV_CODEPAGE	5	/* current codepage -> ucs-2 */
  #endif
  
  /*
--- 695,707 ----
  #endif
  } vir_T;
  
! #define CONV_NONE		0
! #define CONV_TO_UTF8		1
! #define CONV_TO_LATIN1		2
! #define CONV_ICONV		3
  #ifdef FEAT_WINDOWS
! # define CONV_UCS2_TO_DBCS	4	/* ucs-2 -> dbcs */
! # define CONV_DBCS_TO_UCS2	5	/* current codepage -> ucs-2 */
  #endif
  
  /*
*** ../vim61.155/src/version.c	Sun Aug 18 15:56:14 2002
--- src/version.c	Sun Aug 18 16:02:03 2002
***************
*** 608,609 ****
--- 608,611 ----
  {   /* Add new patch number below this line */
+ /**/
+     156,
  /**/

-- 
Get a life?  What is the URL where it can be downloaded?

 ///  Bram Moolenaar -- Bram@moolenaar.net -- http://www.moolenaar.net  \\\
///   Creator of Vim -- http://vim.sf.net -- ftp://ftp.vim.org/pub/vim   \\\
\\\           Project leader for A-A-P -- http://www.a-a-p.org           ///
 \\\ Lord Of The Rings helps Uganda - http://iccf-holland.org/lotr.html ///