1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/xpcom/string/public/nsReadableUtils.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,407 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 +// IWYU pragma: private, include "nsString.h" 1.9 + 1.10 +#ifndef nsReadableUtils_h___ 1.11 +#define nsReadableUtils_h___ 1.12 + 1.13 + /** 1.14 + * I guess all the routines in this file are all mis-named. 1.15 + * According to our conventions, they should be |NS_xxx|. 1.16 + */ 1.17 + 1.18 +#include "nsAString.h" 1.19 + 1.20 +#include "nsTArrayForwardDeclare.h" 1.21 + 1.22 +inline size_t Distance( const nsReadingIterator<char16_t>& start, const nsReadingIterator<char16_t>& end ) 1.23 + { 1.24 + return end.get() - start.get(); 1.25 + } 1.26 +inline size_t Distance( const nsReadingIterator<char>& start, const nsReadingIterator<char>& end ) 1.27 + { 1.28 + return end.get() - start.get(); 1.29 + } 1.30 + 1.31 +void LossyCopyUTF16toASCII( const nsAString& aSource, nsACString& aDest ); 1.32 +void CopyASCIItoUTF16( const nsACString& aSource, nsAString& aDest ); 1.33 + 1.34 +void LossyCopyUTF16toASCII( const char16_t* aSource, nsACString& aDest ); 1.35 +void CopyASCIItoUTF16( const char* aSource, nsAString& aDest ); 1.36 + 1.37 +void CopyUTF16toUTF8( const nsAString& aSource, nsACString& aDest ); 1.38 +void CopyUTF8toUTF16( const nsACString& aSource, nsAString& aDest ); 1.39 + 1.40 +void CopyUTF16toUTF8( const char16_t* aSource, nsACString& aDest ); 1.41 +void CopyUTF8toUTF16( const char* aSource, nsAString& aDest ); 1.42 + 1.43 +void LossyAppendUTF16toASCII( const nsAString& aSource, nsACString& aDest ); 1.44 +void AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest ); 1.45 +bool AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest, 1.46 + const mozilla::fallible_t& ) NS_WARN_UNUSED_RESULT; 1.47 + 1.48 +void LossyAppendUTF16toASCII( const char16_t* aSource, nsACString& aDest ); 1.49 +void AppendASCIItoUTF16( const char* aSource, nsAString& aDest ); 1.50 + 1.51 +void AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest ); 1.52 +bool AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest, 1.53 + const mozilla::fallible_t& ) NS_WARN_UNUSED_RESULT; 1.54 +void AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest ); 1.55 +bool AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest, 1.56 + const mozilla::fallible_t& ) NS_WARN_UNUSED_RESULT; 1.57 + 1.58 +void AppendUTF16toUTF8( const char16_t* aSource, nsACString& aDest ); 1.59 +void AppendUTF8toUTF16( const char* aSource, nsAString& aDest ); 1.60 + 1.61 +#ifdef MOZ_USE_CHAR16_WRAPPER 1.62 +inline void AppendUTF16toUTF8( char16ptr_t aSource, nsACString& aDest ) 1.63 + { 1.64 + return AppendUTF16toUTF8(static_cast<const char16_t*>(aSource), aDest); 1.65 + } 1.66 +#endif 1.67 + 1.68 + /** 1.69 + * Returns a new |char| buffer containing a zero-terminated copy of |aSource|. 1.70 + * 1.71 + * Allocates and returns a new |char| buffer which you must free with |nsMemory::Free|. 1.72 + * Performs a lossy encoding conversion by chopping 16-bit wide characters down to 8-bits wide while copying |aSource| to your new buffer. 1.73 + * This conversion is not well defined; but it reproduces legacy string behavior. 1.74 + * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls. 1.75 + * 1.76 + * @param aSource a 16-bit wide string 1.77 + * @return a new |char| buffer you must free with |nsMemory::Free|. 1.78 + */ 1.79 +char* ToNewCString( const nsAString& aSource ); 1.80 + 1.81 + 1.82 + /** 1.83 + * Returns a new |char| buffer containing a zero-terminated copy of |aSource|. 1.84 + * 1.85 + * Allocates and returns a new |char| buffer which you must free with |nsMemory::Free|. 1.86 + * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls. 1.87 + * 1.88 + * @param aSource an 8-bit wide string 1.89 + * @return a new |char| buffer you must free with |nsMemory::Free|. 1.90 + */ 1.91 +char* ToNewCString( const nsACString& aSource ); 1.92 + 1.93 + /** 1.94 + * Returns a new |char| buffer containing a zero-terminated copy of |aSource|. 1.95 + * 1.96 + * Allocates and returns a new |char| buffer which you must free with 1.97 + * |nsMemory::Free|. 1.98 + * Performs an encoding conversion from a UTF-16 string to a UTF-8 string 1.99 + * copying |aSource| to your new buffer. 1.100 + * The new buffer is zero-terminated, but that may not help you if |aSource| 1.101 + * contains embedded nulls. 1.102 + * 1.103 + * @param aSource a UTF-16 string (made of char16_t's) 1.104 + * @param aUTF8Count the number of 8-bit units that was returned 1.105 + * @return a new |char| buffer you must free with |nsMemory::Free|. 1.106 + */ 1.107 + 1.108 +char* ToNewUTF8String( const nsAString& aSource, uint32_t *aUTF8Count = nullptr ); 1.109 + 1.110 + 1.111 + /** 1.112 + * Returns a new |char16_t| buffer containing a zero-terminated copy of 1.113 + * |aSource|. 1.114 + * 1.115 + * Allocates and returns a new |char16_t| buffer which you must free with 1.116 + * |nsMemory::Free|. 1.117 + * The new buffer is zero-terminated, but that may not help you if |aSource| 1.118 + * contains embedded nulls. 1.119 + * 1.120 + * @param aSource a UTF-16 string 1.121 + * @return a new |char16_t| buffer you must free with |nsMemory::Free|. 1.122 + */ 1.123 +char16_t* ToNewUnicode( const nsAString& aSource ); 1.124 + 1.125 + 1.126 + /** 1.127 + * Returns a new |char16_t| buffer containing a zero-terminated copy of |aSource|. 1.128 + * 1.129 + * Allocates and returns a new |char16_t| buffer which you must free with |nsMemory::Free|. 1.130 + * Performs an encoding conversion by 0-padding 8-bit wide characters up to 16-bits wide while copying |aSource| to your new buffer. 1.131 + * This conversion is not well defined; but it reproduces legacy string behavior. 1.132 + * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls. 1.133 + * 1.134 + * @param aSource an 8-bit wide string (a C-string, NOT UTF-8) 1.135 + * @return a new |char16_t| buffer you must free with |nsMemory::Free|. 1.136 + */ 1.137 +char16_t* ToNewUnicode( const nsACString& aSource ); 1.138 + 1.139 + /** 1.140 + * Returns the required length for a char16_t buffer holding 1.141 + * a copy of aSource, using UTF-8 to UTF-16 conversion. 1.142 + * The length does NOT include any space for zero-termination. 1.143 + * 1.144 + * @param aSource an 8-bit wide string, UTF-8 encoded 1.145 + * @return length of UTF-16 encoded string copy, not zero-terminated 1.146 + */ 1.147 +uint32_t CalcUTF8ToUnicodeLength( const nsACString& aSource ); 1.148 + 1.149 + /** 1.150 + * Copies the source string into the specified buffer, converting UTF-8 to 1.151 + * UTF-16 in the process. The conversion is well defined for valid UTF-8 1.152 + * strings. 1.153 + * The copied string will be zero-terminated! Any embedded nulls will be 1.154 + * copied nonetheless. It is the caller's responsiblity to ensure the buffer 1.155 + * is large enough to hold the string copy plus one char16_t for 1.156 + * zero-termination! 1.157 + * 1.158 + * @see CalcUTF8ToUnicodeLength( const nsACString& ) 1.159 + * @see UTF8ToNewUnicode( const nsACString&, uint32_t* ) 1.160 + * 1.161 + * @param aSource an 8-bit wide string, UTF-8 encoded 1.162 + * @param aBuffer the buffer holding the converted string copy 1.163 + * @param aUTF16Count receiving optionally the number of 16-bit units that 1.164 + * were copied 1.165 + * @return aBuffer pointer, for convenience 1.166 + */ 1.167 +char16_t* UTF8ToUnicodeBuffer( const nsACString& aSource, 1.168 + char16_t *aBuffer, 1.169 + uint32_t *aUTF16Count = nullptr ); 1.170 + 1.171 + /** 1.172 + * Returns a new |char16_t| buffer containing a zero-terminated copy 1.173 + * of |aSource|. 1.174 + * 1.175 + * Allocates and returns a new |char| buffer which you must free with 1.176 + * |nsMemory::Free|. Performs an encoding conversion from UTF-8 to UTF-16 1.177 + * while copying |aSource| to your new buffer. This conversion is well defined 1.178 + * for a valid UTF-8 string. The new buffer is zero-terminated, but that 1.179 + * may not help you if |aSource| contains embedded nulls. 1.180 + * 1.181 + * @param aSource an 8-bit wide string, UTF-8 encoded 1.182 + * @param aUTF16Count the number of 16-bit units that was returned 1.183 + * @return a new |char16_t| buffer you must free with |nsMemory::Free|. 1.184 + * (UTF-16 encoded) 1.185 + */ 1.186 +char16_t* UTF8ToNewUnicode( const nsACString& aSource, uint32_t *aUTF16Count = nullptr ); 1.187 + 1.188 + /** 1.189 + * Copies |aLength| 16-bit code units from the start of |aSource| to the 1.190 + * |char16_t| buffer |aDest|. 1.191 + * 1.192 + * After this operation |aDest| is not null terminated. 1.193 + * 1.194 + * @param aSource a UTF-16 string 1.195 + * @param aSrcOffset start offset in the source string 1.196 + * @param aDest a |char16_t| buffer 1.197 + * @param aLength the number of 16-bit code units to copy 1.198 + * @return pointer to destination buffer - identical to |aDest| 1.199 + */ 1.200 +char16_t* CopyUnicodeTo( const nsAString& aSource, 1.201 + uint32_t aSrcOffset, 1.202 + char16_t* aDest, 1.203 + uint32_t aLength ); 1.204 + 1.205 + 1.206 + /** 1.207 + * Copies 16-bit characters between iterators |aSrcStart| and 1.208 + * |aSrcEnd| to the writable string |aDest|. Similar to the 1.209 + * |nsString::Mid| method. 1.210 + * 1.211 + * After this operation |aDest| is not null terminated. 1.212 + * 1.213 + * @param aSrcStart start source iterator 1.214 + * @param aSrcEnd end source iterator 1.215 + * @param aDest destination for the copy 1.216 + */ 1.217 +void CopyUnicodeTo( const nsAString::const_iterator& aSrcStart, 1.218 + const nsAString::const_iterator& aSrcEnd, 1.219 + nsAString& aDest ); 1.220 + 1.221 + /** 1.222 + * Appends 16-bit characters between iterators |aSrcStart| and 1.223 + * |aSrcEnd| to the writable string |aDest|. 1.224 + * 1.225 + * After this operation |aDest| is not null terminated. 1.226 + * 1.227 + * @param aSrcStart start source iterator 1.228 + * @param aSrcEnd end source iterator 1.229 + * @param aDest destination for the copy 1.230 + */ 1.231 +void AppendUnicodeTo( const nsAString::const_iterator& aSrcStart, 1.232 + const nsAString::const_iterator& aSrcEnd, 1.233 + nsAString& aDest ); 1.234 + 1.235 + /** 1.236 + * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F). 1.237 + * 1.238 + * @param aString a 16-bit wide string to scan 1.239 + */ 1.240 +bool IsASCII( const nsAString& aString ); 1.241 + 1.242 + /** 1.243 + * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F). 1.244 + * 1.245 + * @param aString a 8-bit wide string to scan 1.246 + */ 1.247 +bool IsASCII( const nsACString& aString ); 1.248 + 1.249 + /** 1.250 + * Returns |true| if |aString| is a valid UTF-8 string. 1.251 + * XXX This is not bullet-proof and nor an all-purpose UTF-8 validator. 1.252 + * It is mainly written to replace and roughly equivalent to 1.253 + * 1.254 + * str.Equals(NS_ConvertUTF16toUTF8(NS_ConvertUTF8toUTF16(str))) 1.255 + * 1.256 + * (see bug 191541) 1.257 + * As such, it does not check for non-UTF-8 7bit encodings such as 1.258 + * ISO-2022-JP and HZ. 1.259 + * 1.260 + * It rejects sequences with the following errors: 1.261 + * 1.262 + * byte sequences that cannot be decoded into characters according to 1.263 + * UTF-8's rules (including cases where the input is part of a valid 1.264 + * UTF-8 sequence but starts or ends mid-character) 1.265 + * overlong sequences (i.e., cases where a character was encoded 1.266 + * non-canonically by using more bytes than necessary) 1.267 + * surrogate codepoints (i.e., the codepoints reserved for 1.268 + representing astral characters in UTF-16) 1.269 + * codepoints above the unicode range (i.e., outside the first 17 1.270 + * planes; higher than U+10FFFF), in accordance with 1.271 + * http://tools.ietf.org/html/rfc3629 1.272 + * when aRejectNonChar is true (the default), any codepoint whose low 1.273 + * 16 bits are 0xFFFE or 0xFFFF 1.274 + 1.275 + * 1.276 + * @param aString an 8-bit wide string to scan 1.277 + * @param aRejectNonChar a boolean to control the rejection of utf-8 1.278 + * non characters 1.279 + */ 1.280 +bool IsUTF8( const nsACString& aString, bool aRejectNonChar = true ); 1.281 + 1.282 +bool ParseString(const nsACString& aAstring, char aDelimiter, 1.283 + nsTArray<nsCString>& aArray); 1.284 + 1.285 + /** 1.286 + * Converts case in place in the argument string. 1.287 + */ 1.288 +void ToUpperCase( nsACString& ); 1.289 + 1.290 +void ToLowerCase( nsACString& ); 1.291 + 1.292 +void ToUpperCase( nsCSubstring& ); 1.293 + 1.294 +void ToLowerCase( nsCSubstring& ); 1.295 + 1.296 + /** 1.297 + * Converts case from string aSource to aDest. 1.298 + */ 1.299 +void ToUpperCase( const nsACString& aSource, nsACString& aDest ); 1.300 + 1.301 +void ToLowerCase( const nsACString& aSource, nsACString& aDest ); 1.302 + 1.303 + /** 1.304 + * Finds the leftmost occurrence of |aPattern|, if any in the range |aSearchStart|..|aSearchEnd|. 1.305 + * 1.306 + * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to 1.307 + * point to the match. If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|. 1.308 + * 1.309 + * Currently, this is equivalent to the O(m*n) implementation previously on |ns[C]String|. 1.310 + * If we need something faster, then we can implement that later. 1.311 + */ 1.312 + 1.313 +bool FindInReadable( const nsAString& aPattern, nsAString::const_iterator&, nsAString::const_iterator&, const nsStringComparator& = nsDefaultStringComparator() ); 1.314 +bool FindInReadable( const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator&, const nsCStringComparator& = nsDefaultCStringComparator() ); 1.315 + 1.316 +/* sometimes we don't care about where the string was, just that we 1.317 + * found it or not */ 1.318 +inline bool FindInReadable( const nsAString& aPattern, const nsAString& aSource, const nsStringComparator& compare = nsDefaultStringComparator() ) 1.319 +{ 1.320 + nsAString::const_iterator start, end; 1.321 + aSource.BeginReading(start); 1.322 + aSource.EndReading(end); 1.323 + return FindInReadable(aPattern, start, end, compare); 1.324 +} 1.325 + 1.326 +inline bool FindInReadable( const nsACString& aPattern, const nsACString& aSource, const nsCStringComparator& compare = nsDefaultCStringComparator() ) 1.327 +{ 1.328 + nsACString::const_iterator start, end; 1.329 + aSource.BeginReading(start); 1.330 + aSource.EndReading(end); 1.331 + return FindInReadable(aPattern, start, end, compare); 1.332 +} 1.333 + 1.334 + 1.335 +bool CaseInsensitiveFindInReadable( const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator& ); 1.336 + 1.337 + /** 1.338 + * Finds the rightmost occurrence of |aPattern| 1.339 + * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to 1.340 + * point to the match. If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|. 1.341 + * 1.342 + */ 1.343 +bool RFindInReadable( const nsAString& aPattern, nsAString::const_iterator&, nsAString::const_iterator&, const nsStringComparator& = nsDefaultStringComparator() ); 1.344 +bool RFindInReadable( const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator&, const nsCStringComparator& = nsDefaultCStringComparator() ); 1.345 + 1.346 + /** 1.347 + * Finds the leftmost occurrence of |aChar|, if any in the range 1.348 + * |aSearchStart|..|aSearchEnd|. 1.349 + * 1.350 + * Returns |true| if a match was found, and adjusts |aSearchStart| to 1.351 + * point to the match. If no match was found, returns |false| and 1.352 + * makes |aSearchStart == aSearchEnd|. 1.353 + */ 1.354 +bool FindCharInReadable( char16_t aChar, nsAString::const_iterator& aSearchStart, const nsAString::const_iterator& aSearchEnd ); 1.355 +bool FindCharInReadable( char aChar, nsACString::const_iterator& aSearchStart, const nsACString::const_iterator& aSearchEnd ); 1.356 + 1.357 + /** 1.358 + * Finds the number of occurences of |aChar| in the string |aStr| 1.359 + */ 1.360 +uint32_t CountCharInReadable( const nsAString& aStr, 1.361 + char16_t aChar ); 1.362 +uint32_t CountCharInReadable( const nsACString& aStr, 1.363 + char aChar ); 1.364 + 1.365 +bool 1.366 +StringBeginsWith( const nsAString& aSource, const nsAString& aSubstring, 1.367 + const nsStringComparator& aComparator = 1.368 + nsDefaultStringComparator() ); 1.369 +bool 1.370 +StringBeginsWith( const nsACString& aSource, const nsACString& aSubstring, 1.371 + const nsCStringComparator& aComparator = 1.372 + nsDefaultCStringComparator() ); 1.373 +bool 1.374 +StringEndsWith( const nsAString& aSource, const nsAString& aSubstring, 1.375 + const nsStringComparator& aComparator = 1.376 + nsDefaultStringComparator() ); 1.377 +bool 1.378 +StringEndsWith( const nsACString& aSource, const nsACString& aSubstring, 1.379 + const nsCStringComparator& aComparator = 1.380 + nsDefaultCStringComparator() ); 1.381 + 1.382 +const nsAFlatString& EmptyString(); 1.383 +const nsAFlatCString& EmptyCString(); 1.384 + 1.385 +const nsAFlatString& NullString(); 1.386 +const nsAFlatCString& NullCString(); 1.387 + 1.388 + /** 1.389 + * Compare a UTF-8 string to an UTF-16 string. 1.390 + * 1.391 + * Returns 0 if the strings are equal, -1 if aUTF8String is less 1.392 + * than aUTF16Count, and 1 in the reverse case. In case of fatal 1.393 + * error (eg the strings are not valid UTF8 and UTF16 respectively), 1.394 + * this method will return INT32_MIN. 1.395 + */ 1.396 +int32_t 1.397 +CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String, 1.398 + const nsASingleFragmentString& aUTF16String); 1.399 + 1.400 +void 1.401 +AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest); 1.402 + 1.403 +template<class T> 1.404 +inline bool EnsureStringLength(T& aStr, uint32_t aLen) 1.405 +{ 1.406 + aStr.SetLength(aLen); 1.407 + return (aStr.Length() == aLen); 1.408 +} 1.409 + 1.410 +#endif // !defined(nsReadableUtils_h___)