strdup16to8.cpp 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. /* libs/cutils/strdup16to8.c
  2. **
  3. ** Copyright 2006, The Android Open Source Project
  4. **
  5. ** Licensed under the Apache License, Version 2.0 (the "License");
  6. ** you may not use this file except in compliance with the License.
  7. ** You may obtain a copy of the License at
  8. **
  9. ** http://www.apache.org/licenses/LICENSE-2.0
  10. **
  11. ** Unless required by applicable law or agreed to in writing, software
  12. ** distributed under the License is distributed on an "AS IS" BASIS,
  13. ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. ** See the License for the specific language governing permissions and
  15. ** limitations under the License.
  16. */
  17. #include <cutils/jstring.h>
  18. #include <assert.h>
  19. #include <limits.h> /* for SIZE_MAX */
  20. #include <stdlib.h>
  21. /**
  22. * Given a UTF-16 string, compute the length of the corresponding UTF-8
  23. * string in bytes.
  24. */
  25. extern size_t strnlen16to8(const char16_t* utf16Str, size_t len)
  26. {
  27. size_t utf8Len = 0;
  28. /* A small note on integer overflow. The result can
  29. * potentially be as big as 3*len, which will overflow
  30. * for len > SIZE_MAX/3.
  31. *
  32. * Moreover, the result of a strnlen16to8 is typically used
  33. * to allocate a destination buffer to strncpy16to8 which
  34. * requires one more byte to terminate the UTF-8 copy, and
  35. * this is generally done by careless users by incrementing
  36. * the result without checking for integer overflows, e.g.:
  37. *
  38. * dst = malloc(strnlen16to8(utf16,len)+1)
  39. *
  40. * Due to this, the following code will try to detect
  41. * overflows, and never return more than (SIZE_MAX-1)
  42. * when it detects one. A careless user will try to malloc
  43. * SIZE_MAX bytes, which will return NULL which can at least
  44. * be detected appropriately.
  45. *
  46. * As far as I know, this function is only used by strndup16(),
  47. * but better be safe than sorry.
  48. */
  49. /* Fast path for the usual case where 3*len is < SIZE_MAX-1.
  50. */
  51. if (len < (SIZE_MAX-1)/3) {
  52. while (len != 0) {
  53. len--;
  54. unsigned int uic = *utf16Str++;
  55. if (uic > 0x07ff)
  56. utf8Len += 3;
  57. else if (uic > 0x7f || uic == 0)
  58. utf8Len += 2;
  59. else
  60. utf8Len++;
  61. }
  62. return utf8Len;
  63. }
  64. /* The slower but paranoid version */
  65. while (len != 0) {
  66. len--;
  67. unsigned int uic = *utf16Str++;
  68. size_t utf8Cur = utf8Len;
  69. if (uic > 0x07ff)
  70. utf8Len += 3;
  71. else if (uic > 0x7f || uic == 0)
  72. utf8Len += 2;
  73. else
  74. utf8Len++;
  75. if (utf8Len < utf8Cur) /* overflow detected */
  76. return SIZE_MAX-1;
  77. }
  78. /* don't return SIZE_MAX to avoid common user bug */
  79. if (utf8Len == SIZE_MAX)
  80. utf8Len = SIZE_MAX-1;
  81. return utf8Len;
  82. }
  83. /**
  84. * Convert a Java-Style UTF-16 string + length to a JNI-Style UTF-8 string.
  85. *
  86. * This basically means: embedded \0's in the UTF-16 string are encoded
  87. * as "0xc0 0x80"
  88. *
  89. * Make sure you allocate "utf8Str" with the result of strlen16to8() + 1,
  90. * not just "len".
  91. *
  92. * Please note, a terminated \0 is always added, so your result will always
  93. * be "strlen16to8() + 1" bytes long.
  94. */
  95. extern char* strncpy16to8(char* utf8Str, const char16_t* utf16Str, size_t len)
  96. {
  97. char* utf8cur = utf8Str;
  98. /* Note on overflows: We assume the user did check the result of
  99. * strnlen16to8() properly or at a minimum checked the result of
  100. * its malloc(SIZE_MAX) in case of overflow.
  101. */
  102. while (len != 0) {
  103. len--;
  104. unsigned int uic = *utf16Str++;
  105. if (uic > 0x07ff) {
  106. *utf8cur++ = (uic >> 12) | 0xe0;
  107. *utf8cur++ = ((uic >> 6) & 0x3f) | 0x80;
  108. *utf8cur++ = (uic & 0x3f) | 0x80;
  109. } else if (uic > 0x7f || uic == 0) {
  110. *utf8cur++ = (uic >> 6) | 0xc0;
  111. *utf8cur++ = (uic & 0x3f) | 0x80;
  112. } else {
  113. *utf8cur++ = uic;
  114. if (uic == 0) {
  115. break;
  116. }
  117. }
  118. }
  119. *utf8cur = '\0';
  120. return utf8Str;
  121. }
  122. /**
  123. * Convert a UTF-16 string to UTF-8.
  124. *
  125. */
  126. char * strndup16to8 (const char16_t* s, size_t n)
  127. {
  128. if (s == NULL) {
  129. return NULL;
  130. }
  131. size_t len = strnlen16to8(s, n);
  132. /* We are paranoid, and we check for SIZE_MAX-1
  133. * too since it is an overflow value for our
  134. * strnlen16to8 implementation.
  135. */
  136. if (len >= SIZE_MAX-1)
  137. return NULL;
  138. char* ret = static_cast<char*>(malloc(len + 1));
  139. if (ret == NULL)
  140. return NULL;
  141. strncpy16to8 (ret, s, n);
  142. return ret;
  143. }