123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215 |
- #include <cutils/jstring.h>
- #include <assert.h>
- #include <limits.h>
- #include <stdlib.h>
- #define UTF16_REPLACEMENT_CHAR 0xfffd
- #define UTF8_SEQ_LENGTH(ch) (((0xe5000000 >> (((ch) >> 3) & 0x1e)) & 3) + 1)
- #define UTF8_SHIFT_AND_MASK(unicode, byte) \
- (unicode)<<=6; (unicode) |= (0x3f & (byte));
- #define UNICODE_UPPER_LIMIT 0x10fffd
- extern char16_t * strdup8to16 (const char* s, size_t *out_len)
- {
- char16_t *ret;
- size_t len;
- if (s == NULL) return NULL;
- len = strlen8to16(s);
-
- if (len && SIZE_MAX/len < sizeof(char16_t))
- return NULL;
-
- ret = (char16_t *) malloc (sizeof(char16_t) * len);
- return strcpy8to16 (ret, s, out_len);
- }
- extern size_t strlen8to16 (const char* utf8Str)
- {
- size_t len = 0;
- int ic;
- int expected = 0;
- while ((ic = *utf8Str++) != '\0') {
-
-
-
- if ((ic & 0xc0) == 0x80) {
-
- expected--;
- if (expected < 0) {
- len++;
- }
- } else {
- len++;
- expected = UTF8_SEQ_LENGTH(ic) - 1;
-
- if (expected == 3) {
- len++;
- }
- }
- }
- return len;
- }
- static inline uint32_t getUtf32FromUtf8(const char** pUtf8Ptr)
- {
- uint32_t ret;
- int seq_len;
- int i;
-
- static const unsigned char leaderMask[4] = {0xff, 0x1f, 0x0f, 0x07};
-
- if (((**pUtf8Ptr) & 0xc0) == 0x80) {
- (*pUtf8Ptr)++;
- return UTF16_REPLACEMENT_CHAR;
- }
-
- seq_len = UTF8_SEQ_LENGTH(**pUtf8Ptr);
- ret = (**pUtf8Ptr) & leaderMask [seq_len - 1];
- if (**pUtf8Ptr == '\0') return ret;
- (*pUtf8Ptr)++;
- for (i = 1; i < seq_len ; i++, (*pUtf8Ptr)++) {
- if ((**pUtf8Ptr) == '\0') return UTF16_REPLACEMENT_CHAR;
- if (((**pUtf8Ptr) & 0xc0) != 0x80) return UTF16_REPLACEMENT_CHAR;
- UTF8_SHIFT_AND_MASK(ret, **pUtf8Ptr);
- }
- return ret;
- }
- extern char16_t * strcpy8to16 (char16_t *utf16Str, const char*utf8Str,
- size_t *out_len)
- {
- char16_t *dest = utf16Str;
- while (*utf8Str != '\0') {
- uint32_t ret;
- ret = getUtf32FromUtf8(&utf8Str);
- if (ret <= 0xffff) {
- *dest++ = (char16_t) ret;
- } else if (ret <= UNICODE_UPPER_LIMIT) {
-
-
- *dest++ = 0xd800 | ((ret - 0x10000) >> 10);
- *dest++ = 0xdc00 | ((ret - 0x10000) & 0x3ff);
- } else {
- *dest++ = UTF16_REPLACEMENT_CHAR;
- }
- }
- *out_len = dest - utf16Str;
- return utf16Str;
- }
- extern char16_t * strcpylen8to16 (char16_t *utf16Str, const char*utf8Str,
- int length, size_t *out_len)
- {
-
-
- char16_t *dest = utf16Str;
- const char *end = utf8Str + length;
- while (utf8Str < end) {
- uint32_t ret;
- ret = getUtf32FromUtf8(&utf8Str);
- if (ret <= 0xffff) {
- *dest++ = (char16_t) ret;
- } else if (ret <= UNICODE_UPPER_LIMIT) {
-
-
- *dest++ = 0xd800 | ((ret - 0x10000) >> 10);
- *dest++ = 0xdc00 | ((ret - 0x10000) & 0x3ff);
- } else {
- *dest++ = UTF16_REPLACEMENT_CHAR;
- }
- }
- *out_len = dest - utf16Str;
- return utf16Str;
- }
|