To: vim_dev@googlegroups.com Subject: Patch 8.2.2233 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 8.2.2233 Problem: Cannot convert a byte index into a character index. Solution: Add charidx(). (Yegappan Lakshmanan, closes #7561) Files: runtime/doc/eval.txt, runtime/doc/usr_41.txt, src/evalfunc.c, src/testdir/test_functions.vim *** ../vim-8.2.2232/runtime/doc/eval.txt 2020-11-30 17:40:41.299714396 +0100 --- runtime/doc/eval.txt 2020-12-28 12:55:01.155827080 +0100 *************** *** 2441,2446 **** --- 2475,2482 ---- changenr() Number current change number char2nr({expr} [, {utf8}]) Number ASCII/UTF8 value of first char in {expr} charclass({string}) Number character class of {string} + charidx({string}, {idx} [, {countcc}]) + Number char index of byte {idx} in {string} chdir({dir}) String change current working directory cindent({lnum}) Number C indent for line {lnum} clearmatches([{win}]) none clear all matches *************** *** 3550,3555 **** --- 3590,3620 ---- other specific Unicode class The class is used in patterns and word motions. + *charidx()* + charidx({string}, {idx} [, {countcc}]) + Return the character index of the byte at {idx} in {string}. + The index of the first character is zero. + If there are no multibyte characters the returned value is + equal to {idx}. + When {countcc} is omitted or zero, then composing characters + are not counted separately, their byte length is added to the + preceding base character. + When {countcc} is set to 1, then composing characters are + counted as separate characters. + Returns -1 if the arguments are invalid or if {idx} is greater + than the index of the last byte in {string}. An error is + given if the first argument is not a string, the second + argument is not a number or when the third argument is present + and is not zero or one. + See |byteidx()| and |byteidxcomp()| for getting the byte index + from the character index. + Examples: > + echo charidx('áb́ć', 3) returns 1 + echo charidx('áb́ć', 6, 1) returns 4 + echo charidx('áb́ć', 16) returns -1 + < + Can also be used as a |method|: > + GetName()->charidx(idx) chdir({dir}) *chdir()* Change the current working directory to {dir}. The scope of *** ../vim-8.2.2232/runtime/doc/usr_41.txt 2020-11-09 18:31:30.544791868 +0100 --- runtime/doc/usr_41.txt 2020-12-28 12:47:17.340500809 +0100 *************** *** 617,622 **** --- 625,631 ---- iconv() convert text from one encoding to another byteidx() byte index of a character in a string byteidxcomp() like byteidx() but count composing characters + charidx() character index of a byte in a string repeat() repeat a string multiple times eval() evaluate a string expression execute() execute an Ex command and get the output *** ../vim-8.2.2232/src/evalfunc.c 2020-12-21 21:58:42.607687803 +0100 --- src/evalfunc.c 2020-12-28 12:51:40.224101632 +0100 *************** *** 47,52 **** --- 47,53 ---- #endif static void f_changenr(typval_T *argvars, typval_T *rettv); static void f_char2nr(typval_T *argvars, typval_T *rettv); + static void f_charidx(typval_T *argvars, typval_T *rettv); static void f_col(typval_T *argvars, typval_T *rettv); static void f_confirm(typval_T *argvars, typval_T *rettv); static void f_copy(typval_T *argvars, typval_T *rettv); *************** *** 789,794 **** --- 790,797 ---- ret_number, f_char2nr}, {"charclass", 1, 1, FEARG_1, NULL, ret_number, f_charclass}, + {"charidx", 2, 3, FEARG_1, NULL, + ret_number, f_charidx}, {"chdir", 1, 1, FEARG_1, NULL, ret_string, f_chdir}, {"cindent", 1, 1, FEARG_1, NULL, *************** *** 2420,2425 **** --- 2423,2479 ---- rettv->vval.v_number = tv_get_string(&argvars[0])[0]; } + /* + * "charidx()" function + */ + static void + f_charidx(typval_T *argvars, typval_T *rettv) + { + char_u *str; + varnumber_T idx; + int countcc = FALSE; + char_u *p; + int len; + int (*ptr2len)(char_u *); + + rettv->vval.v_number = -1; + + if (argvars[0].v_type != VAR_STRING || argvars[1].v_type != VAR_NUMBER + || (argvars[2].v_type != VAR_UNKNOWN + && argvars[2].v_type != VAR_NUMBER)) + { + emsg(_(e_invarg)); + return; + } + + str = tv_get_string_chk(&argvars[0]); + idx = tv_get_number_chk(&argvars[1], NULL); + if (str == NULL || idx < 0) + return; + + if (argvars[2].v_type != VAR_UNKNOWN) + countcc = (int)tv_get_bool(&argvars[2]); + if (countcc < 0 || countcc > 1) + { + semsg(_(e_using_number_as_bool_nr), countcc); + return; + } + + if (enc_utf8 && countcc) + ptr2len = utf_ptr2len; + else + ptr2len = mb_ptr2len; + + for (p = str, len = 0; p <= str + idx; len++) + { + if (*p == NUL) + return; + p += ptr2len(p); + } + + rettv->vval.v_number = len > 0 ? len - 1 : 0; + } + win_T * get_optional_window(typval_T *argvars, int idx) { *** ../vim-8.2.2232/src/testdir/test_functions.vim 2020-12-06 15:03:14.821379868 +0100 --- src/testdir/test_functions.vim 2020-12-28 12:47:17.340500809 +0100 *************** *** 1132,1137 **** --- 1132,1162 ---- call assert_fails("call byteidxcomp([], 0)", 'E730:') endfunc + " Test for charidx() + func Test_charidx() + let a = 'xáb́y' + call assert_equal(0, charidx(a, 0)) + call assert_equal(1, charidx(a, 3)) + call assert_equal(2, charidx(a, 4)) + call assert_equal(3, charidx(a, 7)) + call assert_equal(-1, charidx(a, 8)) + call assert_equal(-1, charidx('', 0)) + + " count composing characters + call assert_equal(0, charidx(a, 0, 1)) + call assert_equal(2, charidx(a, 2, 1)) + call assert_equal(3, charidx(a, 4, 1)) + call assert_equal(5, charidx(a, 7, 1)) + call assert_equal(-1, charidx(a, 8, 1)) + call assert_equal(-1, charidx('', 0, 1)) + + call assert_fails('let x = charidx([], 1)', 'E474:') + call assert_fails('let x = charidx("abc", [])', 'E474:') + call assert_fails('let x = charidx("abc", 1, [])', 'E474:') + call assert_fails('let x = charidx("abc", 1, -1)', 'E1023:') + call assert_fails('let x = charidx("abc", 1, 2)', 'E1023:') + endfunc + func Test_count() let l = ['a', 'a', 'A', 'b'] call assert_equal(2, count(l, 'a')) *** ../vim-8.2.2232/src/version.c 2020-12-27 19:17:52.507402031 +0100 --- src/version.c 2020-12-28 12:55:51.975736909 +0100 *************** *** 752,753 **** --- 752,755 ---- { /* Add new patch number below this line */ + /**/ + 2233, /**/ -- To define recursion, we must first define recursion. /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ \\\ an exciting new programming language -- http://www.Zimbu.org /// \\\ help me help AIDS victims -- http://ICCF-Holland.org ///