update uc functions
This commit is contained in:
18
pike.c
18
pike.c
@@ -6,7 +6,7 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
|
||||||
const unsigned char utf8_length[256] = {
|
unsigned char utf8_length[256] = {
|
||||||
/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
|
/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
|
||||||
/* 0 */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
/* 0 */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
/* 1 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
/* 1 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
@@ -20,25 +20,23 @@ const unsigned char utf8_length[256] = {
|
|||||||
/* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
/* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
/* A */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
/* A */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
/* B */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
/* B */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
/* C */ 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
/* C */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||||
/* D */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
/* D */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||||
/* E */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
/* E */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||||
/* F */ 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
|
/* F */ 4, 4, 4, 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1
|
||||||
};
|
};
|
||||||
|
|
||||||
/* return the length of a utf-8 character */
|
/* return the length of a utf-8 character */
|
||||||
#define uc_len(dst, s) \
|
#define uc_len(dst, s) dst = utf8_length[(unsigned char)s[0]];
|
||||||
dst = utf8_length[(unsigned char)s[0]]; \
|
|
||||||
|
|
||||||
/* the unicode codepoint of the given utf-8 character */
|
/* the unicode codepoint of the given utf-8 character */
|
||||||
#define uc_code(dst, s) \
|
#define uc_code(dst, s) \
|
||||||
dst = (unsigned char)s[0]; \
|
dst = (unsigned char)s[0]; \
|
||||||
if (~dst & 0xc0); \
|
if (dst < 192){} \
|
||||||
else if (~dst & 0x20) \
|
else if (dst < 224) \
|
||||||
dst = ((dst & 0x1f) << 6) | (s[1] & 0x3f); \
|
dst = ((dst & 0x1f) << 6) | (s[1] & 0x3f); \
|
||||||
else if (~dst & 0x10) \
|
else if (dst < 240) \
|
||||||
dst = ((dst & 0x0f) << 12) | ((s[1] & 0x3f) << 6) | (s[2] & 0x3f); \
|
dst = ((dst & 0x0f) << 12) | ((s[1] & 0x3f) << 6) | (s[2] & 0x3f); \
|
||||||
else if (~dst & 0x08) \
|
else if (dst < 248) \
|
||||||
dst = ((dst & 0x07) << 18) | ((s[1] & 0x3f) << 12) | \
|
dst = ((dst & 0x07) << 18) | ((s[1] & 0x3f) << 12) | \
|
||||||
((s[2] & 0x3f) << 6) | (s[3] & 0x3f); \
|
((s[2] & 0x3f) << 6) | (s[3] & 0x3f); \
|
||||||
else \
|
else \
|
||||||
|
|||||||
Reference in New Issue
Block a user