1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/omx-plugin/include/ics/utils/Unicode.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,170 @@ 1.4 +/* 1.5 + * Copyright (C) 2005 The Android Open Source Project 1.6 + * 1.7 + * Licensed under the Apache License, Version 2.0 (the "License"); 1.8 + * you may not use this file except in compliance with the License. 1.9 + * You may obtain a copy of the License at 1.10 + * 1.11 + * http://www.apache.org/licenses/LICENSE-2.0 1.12 + * 1.13 + * Unless required by applicable law or agreed to in writing, software 1.14 + * distributed under the License is distributed on an "AS IS" BASIS, 1.15 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1.16 + * See the License for the specific language governing permissions and 1.17 + * limitations under the License. 1.18 + */ 1.19 + 1.20 +#ifndef ANDROID_UNICODE_H 1.21 +#define ANDROID_UNICODE_H 1.22 + 1.23 +#include <sys/types.h> 1.24 +#include <stdint.h> 1.25 + 1.26 +extern "C" { 1.27 + 1.28 +#if !defined(__cplusplus) || __cplusplus == 199711L // C or C++98 1.29 +typedef uint32_t char32_t; 1.30 +typedef uint16_t char16_t; 1.31 +#endif 1.32 + 1.33 +// Standard string functions on char16_t strings. 1.34 +int strcmp16(const char16_t *, const char16_t *); 1.35 +int strncmp16(const char16_t *s1, const char16_t *s2, size_t n); 1.36 +size_t strlen16(const char16_t *); 1.37 +size_t strnlen16(const char16_t *, size_t); 1.38 +char16_t *strcpy16(char16_t *, const char16_t *); 1.39 +char16_t *strncpy16(char16_t *, const char16_t *, size_t); 1.40 + 1.41 +// Version of comparison that supports embedded nulls. 1.42 +// This is different than strncmp() because we don't stop 1.43 +// at a nul character and consider the strings to be different 1.44 +// if the lengths are different (thus we need to supply the 1.45 +// lengths of both strings). This can also be used when 1.46 +// your string is not nul-terminated as it will have the 1.47 +// equivalent result as strcmp16 (unlike strncmp16). 1.48 +int strzcmp16(const char16_t *s1, size_t n1, const char16_t *s2, size_t n2); 1.49 + 1.50 +// Version of strzcmp16 for comparing strings in different endianness. 1.51 +int strzcmp16_h_n(const char16_t *s1H, size_t n1, const char16_t *s2N, size_t n2); 1.52 + 1.53 +// Standard string functions on char32_t strings. 1.54 +size_t strlen32(const char32_t *); 1.55 +size_t strnlen32(const char32_t *, size_t); 1.56 + 1.57 +/** 1.58 + * Measure the length of a UTF-32 string in UTF-8. If the string is invalid 1.59 + * such as containing a surrogate character, -1 will be returned. 1.60 + */ 1.61 +ssize_t utf32_to_utf8_length(const char32_t *src, size_t src_len); 1.62 + 1.63 +/** 1.64 + * Stores a UTF-8 string converted from "src" in "dst", if "dst_length" is not 1.65 + * large enough to store the string, the part of the "src" string is stored 1.66 + * into "dst" as much as possible. See the examples for more detail. 1.67 + * Returns the size actually used for storing the string. 1.68 + * dst" is not null-terminated when dst_len is fully used (like strncpy). 1.69 + * 1.70 + * Example 1 1.71 + * "src" == \u3042\u3044 (\xE3\x81\x82\xE3\x81\x84) 1.72 + * "src_len" == 2 1.73 + * "dst_len" >= 7 1.74 + * -> 1.75 + * Returned value == 6 1.76 + * "dst" becomes \xE3\x81\x82\xE3\x81\x84\0 1.77 + * (note that "dst" is null-terminated) 1.78 + * 1.79 + * Example 2 1.80 + * "src" == \u3042\u3044 (\xE3\x81\x82\xE3\x81\x84) 1.81 + * "src_len" == 2 1.82 + * "dst_len" == 5 1.83 + * -> 1.84 + * Returned value == 3 1.85 + * "dst" becomes \xE3\x81\x82\0 1.86 + * (note that "dst" is null-terminated, but \u3044 is not stored in "dst" 1.87 + * since "dst" does not have enough size to store the character) 1.88 + * 1.89 + * Example 3 1.90 + * "src" == \u3042\u3044 (\xE3\x81\x82\xE3\x81\x84) 1.91 + * "src_len" == 2 1.92 + * "dst_len" == 6 1.93 + * -> 1.94 + * Returned value == 6 1.95 + * "dst" becomes \xE3\x81\x82\xE3\x81\x84 1.96 + * (note that "dst" is NOT null-terminated, like strncpy) 1.97 + */ 1.98 +void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst); 1.99 + 1.100 +/** 1.101 + * Returns the unicode value at "index". 1.102 + * Returns -1 when the index is invalid (equals to or more than "src_len"). 1.103 + * If returned value is positive, it is able to be converted to char32_t, which 1.104 + * is unsigned. Then, if "next_index" is not NULL, the next index to be used is 1.105 + * stored in "next_index". "next_index" can be NULL. 1.106 + */ 1.107 +int32_t utf32_from_utf8_at(const char *src, size_t src_len, size_t index, size_t *next_index); 1.108 + 1.109 + 1.110 +/** 1.111 + * Returns the UTF-8 length of UTF-16 string "src". 1.112 + */ 1.113 +ssize_t utf16_to_utf8_length(const char16_t *src, size_t src_len); 1.114 + 1.115 +/** 1.116 + * Converts a UTF-16 string to UTF-8. The destination buffer must be large 1.117 + * enough to fit the UTF-16 as measured by utf16_to_utf8_length with an added 1.118 + * NULL terminator. 1.119 + */ 1.120 +void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst); 1.121 + 1.122 +/** 1.123 + * Returns the length of "src" when "src" is valid UTF-8 string. 1.124 + * Returns 0 if src is NULL or 0-length string. Returns -1 when the source 1.125 + * is an invalid string. 1.126 + * 1.127 + * This function should be used to determine whether "src" is valid UTF-8 1.128 + * characters with valid unicode codepoints. "src" must be null-terminated. 1.129 + * 1.130 + * If you are going to use other utf8_to_... functions defined in this header 1.131 + * with string which may not be valid UTF-8 with valid codepoint (form 0 to 1.132 + * 0x10FFFF), you should use this function before calling others, since the 1.133 + * other functions do not check whether the string is valid UTF-8 or not. 1.134 + * 1.135 + * If you do not care whether "src" is valid UTF-8 or not, you should use 1.136 + * strlen() as usual, which should be much faster. 1.137 + */ 1.138 +ssize_t utf8_length(const char *src); 1.139 + 1.140 +/** 1.141 + * Measure the length of a UTF-32 string. 1.142 + */ 1.143 +size_t utf8_to_utf32_length(const char *src, size_t src_len); 1.144 + 1.145 +/** 1.146 + * Stores a UTF-32 string converted from "src" in "dst". "dst" must be large 1.147 + * enough to store the entire converted string as measured by 1.148 + * utf8_to_utf32_length plus space for a NULL terminator. 1.149 + */ 1.150 +void utf8_to_utf32(const char* src, size_t src_len, char32_t* dst); 1.151 + 1.152 +/** 1.153 + * Returns the UTF-16 length of UTF-8 string "src". 1.154 + */ 1.155 +ssize_t utf8_to_utf16_length(const uint8_t* src, size_t srcLen); 1.156 + 1.157 +/** 1.158 + * Convert UTF-8 to UTF-16 including surrogate pairs. 1.159 + * Returns a pointer to the end of the string (where a null terminator might go 1.160 + * if you wanted to add one). 1.161 + */ 1.162 +char16_t* utf8_to_utf16_no_null_terminator(const uint8_t* src, size_t srcLen, char16_t* dst); 1.163 + 1.164 +/** 1.165 + * Convert UTF-8 to UTF-16 including surrogate pairs. The destination buffer 1.166 + * must be large enough to hold the result as measured by utf8_to_utf16_length 1.167 + * plus an added NULL terminator. 1.168 + */ 1.169 +void utf8_to_utf16(const uint8_t* src, size_t srcLen, char16_t* dst); 1.170 + 1.171 +} 1.172 + 1.173 +#endif