update uc functions

This commit is contained in:
Kyryl Melekhin
2021-11-26 11:45:06 +00:00
parent f783b12300
commit 14fd849706

22
pike.c
View File

@@ -6,8 +6,8 @@
#include <string.h> #include <string.h>
#include <ctype.h> #include <ctype.h>
const unsigned char utf8_length[256] = { unsigned char utf8_length[256] = {
/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
/* 0 */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0 */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 1 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 1 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 2 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 2 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -20,25 +20,23 @@ const unsigned char utf8_length[256] = {
/* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* A */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* A */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* B */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* B */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* C */ 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* C */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* D */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* D */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* E */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* E */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
/* F */ 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 /* F */ 4, 4, 4, 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1
}; };
/* return the length of a utf-8 character */ /* return the length of a utf-8 character */
#define uc_len(dst, s) \ #define uc_len(dst, s) dst = utf8_length[(unsigned char)s[0]];
dst = utf8_length[(unsigned char)s[0]]; \
/* the unicode codepoint of the given utf-8 character */ /* the unicode codepoint of the given utf-8 character */
#define uc_code(dst, s) \ #define uc_code(dst, s) \
dst = (unsigned char) s[0]; \ dst = (unsigned char)s[0]; \
if (~dst & 0xc0); \ if (dst < 192){} \
else if (~dst & 0x20) \ else if (dst < 224) \
dst = ((dst & 0x1f) << 6) | (s[1] & 0x3f); \ dst = ((dst & 0x1f) << 6) | (s[1] & 0x3f); \
else if (~dst & 0x10) \ else if (dst < 240) \
dst = ((dst & 0x0f) << 12) | ((s[1] & 0x3f) << 6) | (s[2] & 0x3f); \ dst = ((dst & 0x0f) << 12) | ((s[1] & 0x3f) << 6) | (s[2] & 0x3f); \
else if (~dst & 0x08) \ else if (dst < 248) \
dst = ((dst & 0x07) << 18) | ((s[1] & 0x3f) << 12) | \ dst = ((dst & 0x07) << 18) | ((s[1] & 0x3f) << 12) | \
((s[2] & 0x3f) << 6) | (s[3] & 0x3f); \ ((s[2] & 0x3f) << 6) | (s[3] & 0x3f); \
else \ else \