xpcom/string/public/nsReadableUtils.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5 // IWYU pragma: private, include "nsString.h"
michael@0 6
michael@0 7 #ifndef nsReadableUtils_h___
michael@0 8 #define nsReadableUtils_h___
michael@0 9
michael@0 10 /**
michael@0 11 * I guess all the routines in this file are all mis-named.
michael@0 12 * According to our conventions, they should be |NS_xxx|.
michael@0 13 */
michael@0 14
michael@0 15 #include "nsAString.h"
michael@0 16
michael@0 17 #include "nsTArrayForwardDeclare.h"
michael@0 18
michael@0 19 inline size_t Distance( const nsReadingIterator<char16_t>& start, const nsReadingIterator<char16_t>& end )
michael@0 20 {
michael@0 21 return end.get() - start.get();
michael@0 22 }
michael@0 23 inline size_t Distance( const nsReadingIterator<char>& start, const nsReadingIterator<char>& end )
michael@0 24 {
michael@0 25 return end.get() - start.get();
michael@0 26 }
michael@0 27
michael@0 28 void LossyCopyUTF16toASCII( const nsAString& aSource, nsACString& aDest );
michael@0 29 void CopyASCIItoUTF16( const nsACString& aSource, nsAString& aDest );
michael@0 30
michael@0 31 void LossyCopyUTF16toASCII( const char16_t* aSource, nsACString& aDest );
michael@0 32 void CopyASCIItoUTF16( const char* aSource, nsAString& aDest );
michael@0 33
michael@0 34 void CopyUTF16toUTF8( const nsAString& aSource, nsACString& aDest );
michael@0 35 void CopyUTF8toUTF16( const nsACString& aSource, nsAString& aDest );
michael@0 36
michael@0 37 void CopyUTF16toUTF8( const char16_t* aSource, nsACString& aDest );
michael@0 38 void CopyUTF8toUTF16( const char* aSource, nsAString& aDest );
michael@0 39
michael@0 40 void LossyAppendUTF16toASCII( const nsAString& aSource, nsACString& aDest );
michael@0 41 void AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest );
michael@0 42 bool AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest,
michael@0 43 const mozilla::fallible_t& ) NS_WARN_UNUSED_RESULT;
michael@0 44
michael@0 45 void LossyAppendUTF16toASCII( const char16_t* aSource, nsACString& aDest );
michael@0 46 void AppendASCIItoUTF16( const char* aSource, nsAString& aDest );
michael@0 47
michael@0 48 void AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest );
michael@0 49 bool AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest,
michael@0 50 const mozilla::fallible_t& ) NS_WARN_UNUSED_RESULT;
michael@0 51 void AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest );
michael@0 52 bool AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest,
michael@0 53 const mozilla::fallible_t& ) NS_WARN_UNUSED_RESULT;
michael@0 54
michael@0 55 void AppendUTF16toUTF8( const char16_t* aSource, nsACString& aDest );
michael@0 56 void AppendUTF8toUTF16( const char* aSource, nsAString& aDest );
michael@0 57
michael@0 58 #ifdef MOZ_USE_CHAR16_WRAPPER
michael@0 59 inline void AppendUTF16toUTF8( char16ptr_t aSource, nsACString& aDest )
michael@0 60 {
michael@0 61 return AppendUTF16toUTF8(static_cast<const char16_t*>(aSource), aDest);
michael@0 62 }
michael@0 63 #endif
michael@0 64
michael@0 65 /**
michael@0 66 * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
michael@0 67 *
michael@0 68 * Allocates and returns a new |char| buffer which you must free with |nsMemory::Free|.
michael@0 69 * Performs a lossy encoding conversion by chopping 16-bit wide characters down to 8-bits wide while copying |aSource| to your new buffer.
michael@0 70 * This conversion is not well defined; but it reproduces legacy string behavior.
michael@0 71 * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
michael@0 72 *
michael@0 73 * @param aSource a 16-bit wide string
michael@0 74 * @return a new |char| buffer you must free with |nsMemory::Free|.
michael@0 75 */
michael@0 76 char* ToNewCString( const nsAString& aSource );
michael@0 77
michael@0 78
michael@0 79 /**
michael@0 80 * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
michael@0 81 *
michael@0 82 * Allocates and returns a new |char| buffer which you must free with |nsMemory::Free|.
michael@0 83 * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
michael@0 84 *
michael@0 85 * @param aSource an 8-bit wide string
michael@0 86 * @return a new |char| buffer you must free with |nsMemory::Free|.
michael@0 87 */
michael@0 88 char* ToNewCString( const nsACString& aSource );
michael@0 89
michael@0 90 /**
michael@0 91 * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
michael@0 92 *
michael@0 93 * Allocates and returns a new |char| buffer which you must free with
michael@0 94 * |nsMemory::Free|.
michael@0 95 * Performs an encoding conversion from a UTF-16 string to a UTF-8 string
michael@0 96 * copying |aSource| to your new buffer.
michael@0 97 * The new buffer is zero-terminated, but that may not help you if |aSource|
michael@0 98 * contains embedded nulls.
michael@0 99 *
michael@0 100 * @param aSource a UTF-16 string (made of char16_t's)
michael@0 101 * @param aUTF8Count the number of 8-bit units that was returned
michael@0 102 * @return a new |char| buffer you must free with |nsMemory::Free|.
michael@0 103 */
michael@0 104
michael@0 105 char* ToNewUTF8String( const nsAString& aSource, uint32_t *aUTF8Count = nullptr );
michael@0 106
michael@0 107
michael@0 108 /**
michael@0 109 * Returns a new |char16_t| buffer containing a zero-terminated copy of
michael@0 110 * |aSource|.
michael@0 111 *
michael@0 112 * Allocates and returns a new |char16_t| buffer which you must free with
michael@0 113 * |nsMemory::Free|.
michael@0 114 * The new buffer is zero-terminated, but that may not help you if |aSource|
michael@0 115 * contains embedded nulls.
michael@0 116 *
michael@0 117 * @param aSource a UTF-16 string
michael@0 118 * @return a new |char16_t| buffer you must free with |nsMemory::Free|.
michael@0 119 */
michael@0 120 char16_t* ToNewUnicode( const nsAString& aSource );
michael@0 121
michael@0 122
michael@0 123 /**
michael@0 124 * Returns a new |char16_t| buffer containing a zero-terminated copy of |aSource|.
michael@0 125 *
michael@0 126 * Allocates and returns a new |char16_t| buffer which you must free with |nsMemory::Free|.
michael@0 127 * Performs an encoding conversion by 0-padding 8-bit wide characters up to 16-bits wide while copying |aSource| to your new buffer.
michael@0 128 * This conversion is not well defined; but it reproduces legacy string behavior.
michael@0 129 * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
michael@0 130 *
michael@0 131 * @param aSource an 8-bit wide string (a C-string, NOT UTF-8)
michael@0 132 * @return a new |char16_t| buffer you must free with |nsMemory::Free|.
michael@0 133 */
michael@0 134 char16_t* ToNewUnicode( const nsACString& aSource );
michael@0 135
michael@0 136 /**
michael@0 137 * Returns the required length for a char16_t buffer holding
michael@0 138 * a copy of aSource, using UTF-8 to UTF-16 conversion.
michael@0 139 * The length does NOT include any space for zero-termination.
michael@0 140 *
michael@0 141 * @param aSource an 8-bit wide string, UTF-8 encoded
michael@0 142 * @return length of UTF-16 encoded string copy, not zero-terminated
michael@0 143 */
michael@0 144 uint32_t CalcUTF8ToUnicodeLength( const nsACString& aSource );
michael@0 145
michael@0 146 /**
michael@0 147 * Copies the source string into the specified buffer, converting UTF-8 to
michael@0 148 * UTF-16 in the process. The conversion is well defined for valid UTF-8
michael@0 149 * strings.
michael@0 150 * The copied string will be zero-terminated! Any embedded nulls will be
michael@0 151 * copied nonetheless. It is the caller's responsiblity to ensure the buffer
michael@0 152 * is large enough to hold the string copy plus one char16_t for
michael@0 153 * zero-termination!
michael@0 154 *
michael@0 155 * @see CalcUTF8ToUnicodeLength( const nsACString& )
michael@0 156 * @see UTF8ToNewUnicode( const nsACString&, uint32_t* )
michael@0 157 *
michael@0 158 * @param aSource an 8-bit wide string, UTF-8 encoded
michael@0 159 * @param aBuffer the buffer holding the converted string copy
michael@0 160 * @param aUTF16Count receiving optionally the number of 16-bit units that
michael@0 161 * were copied
michael@0 162 * @return aBuffer pointer, for convenience
michael@0 163 */
michael@0 164 char16_t* UTF8ToUnicodeBuffer( const nsACString& aSource,
michael@0 165 char16_t *aBuffer,
michael@0 166 uint32_t *aUTF16Count = nullptr );
michael@0 167
michael@0 168 /**
michael@0 169 * Returns a new |char16_t| buffer containing a zero-terminated copy
michael@0 170 * of |aSource|.
michael@0 171 *
michael@0 172 * Allocates and returns a new |char| buffer which you must free with
michael@0 173 * |nsMemory::Free|. Performs an encoding conversion from UTF-8 to UTF-16
michael@0 174 * while copying |aSource| to your new buffer. This conversion is well defined
michael@0 175 * for a valid UTF-8 string. The new buffer is zero-terminated, but that
michael@0 176 * may not help you if |aSource| contains embedded nulls.
michael@0 177 *
michael@0 178 * @param aSource an 8-bit wide string, UTF-8 encoded
michael@0 179 * @param aUTF16Count the number of 16-bit units that was returned
michael@0 180 * @return a new |char16_t| buffer you must free with |nsMemory::Free|.
michael@0 181 * (UTF-16 encoded)
michael@0 182 */
michael@0 183 char16_t* UTF8ToNewUnicode( const nsACString& aSource, uint32_t *aUTF16Count = nullptr );
michael@0 184
michael@0 185 /**
michael@0 186 * Copies |aLength| 16-bit code units from the start of |aSource| to the
michael@0 187 * |char16_t| buffer |aDest|.
michael@0 188 *
michael@0 189 * After this operation |aDest| is not null terminated.
michael@0 190 *
michael@0 191 * @param aSource a UTF-16 string
michael@0 192 * @param aSrcOffset start offset in the source string
michael@0 193 * @param aDest a |char16_t| buffer
michael@0 194 * @param aLength the number of 16-bit code units to copy
michael@0 195 * @return pointer to destination buffer - identical to |aDest|
michael@0 196 */
michael@0 197 char16_t* CopyUnicodeTo( const nsAString& aSource,
michael@0 198 uint32_t aSrcOffset,
michael@0 199 char16_t* aDest,
michael@0 200 uint32_t aLength );
michael@0 201
michael@0 202
michael@0 203 /**
michael@0 204 * Copies 16-bit characters between iterators |aSrcStart| and
michael@0 205 * |aSrcEnd| to the writable string |aDest|. Similar to the
michael@0 206 * |nsString::Mid| method.
michael@0 207 *
michael@0 208 * After this operation |aDest| is not null terminated.
michael@0 209 *
michael@0 210 * @param aSrcStart start source iterator
michael@0 211 * @param aSrcEnd end source iterator
michael@0 212 * @param aDest destination for the copy
michael@0 213 */
michael@0 214 void CopyUnicodeTo( const nsAString::const_iterator& aSrcStart,
michael@0 215 const nsAString::const_iterator& aSrcEnd,
michael@0 216 nsAString& aDest );
michael@0 217
michael@0 218 /**
michael@0 219 * Appends 16-bit characters between iterators |aSrcStart| and
michael@0 220 * |aSrcEnd| to the writable string |aDest|.
michael@0 221 *
michael@0 222 * After this operation |aDest| is not null terminated.
michael@0 223 *
michael@0 224 * @param aSrcStart start source iterator
michael@0 225 * @param aSrcEnd end source iterator
michael@0 226 * @param aDest destination for the copy
michael@0 227 */
michael@0 228 void AppendUnicodeTo( const nsAString::const_iterator& aSrcStart,
michael@0 229 const nsAString::const_iterator& aSrcEnd,
michael@0 230 nsAString& aDest );
michael@0 231
michael@0 232 /**
michael@0 233 * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F).
michael@0 234 *
michael@0 235 * @param aString a 16-bit wide string to scan
michael@0 236 */
michael@0 237 bool IsASCII( const nsAString& aString );
michael@0 238
michael@0 239 /**
michael@0 240 * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F).
michael@0 241 *
michael@0 242 * @param aString a 8-bit wide string to scan
michael@0 243 */
michael@0 244 bool IsASCII( const nsACString& aString );
michael@0 245
michael@0 246 /**
michael@0 247 * Returns |true| if |aString| is a valid UTF-8 string.
michael@0 248 * XXX This is not bullet-proof and nor an all-purpose UTF-8 validator.
michael@0 249 * It is mainly written to replace and roughly equivalent to
michael@0 250 *
michael@0 251 * str.Equals(NS_ConvertUTF16toUTF8(NS_ConvertUTF8toUTF16(str)))
michael@0 252 *
michael@0 253 * (see bug 191541)
michael@0 254 * As such, it does not check for non-UTF-8 7bit encodings such as
michael@0 255 * ISO-2022-JP and HZ.
michael@0 256 *
michael@0 257 * It rejects sequences with the following errors:
michael@0 258 *
michael@0 259 * byte sequences that cannot be decoded into characters according to
michael@0 260 * UTF-8's rules (including cases where the input is part of a valid
michael@0 261 * UTF-8 sequence but starts or ends mid-character)
michael@0 262 * overlong sequences (i.e., cases where a character was encoded
michael@0 263 * non-canonically by using more bytes than necessary)
michael@0 264 * surrogate codepoints (i.e., the codepoints reserved for
michael@0 265 representing astral characters in UTF-16)
michael@0 266 * codepoints above the unicode range (i.e., outside the first 17
michael@0 267 * planes; higher than U+10FFFF), in accordance with
michael@0 268 * http://tools.ietf.org/html/rfc3629
michael@0 269 * when aRejectNonChar is true (the default), any codepoint whose low
michael@0 270 * 16 bits are 0xFFFE or 0xFFFF
michael@0 271
michael@0 272 *
michael@0 273 * @param aString an 8-bit wide string to scan
michael@0 274 * @param aRejectNonChar a boolean to control the rejection of utf-8
michael@0 275 * non characters
michael@0 276 */
michael@0 277 bool IsUTF8( const nsACString& aString, bool aRejectNonChar = true );
michael@0 278
michael@0 279 bool ParseString(const nsACString& aAstring, char aDelimiter,
michael@0 280 nsTArray<nsCString>& aArray);
michael@0 281
michael@0 282 /**
michael@0 283 * Converts case in place in the argument string.
michael@0 284 */
michael@0 285 void ToUpperCase( nsACString& );
michael@0 286
michael@0 287 void ToLowerCase( nsACString& );
michael@0 288
michael@0 289 void ToUpperCase( nsCSubstring& );
michael@0 290
michael@0 291 void ToLowerCase( nsCSubstring& );
michael@0 292
michael@0 293 /**
michael@0 294 * Converts case from string aSource to aDest.
michael@0 295 */
michael@0 296 void ToUpperCase( const nsACString& aSource, nsACString& aDest );
michael@0 297
michael@0 298 void ToLowerCase( const nsACString& aSource, nsACString& aDest );
michael@0 299
michael@0 300 /**
michael@0 301 * Finds the leftmost occurrence of |aPattern|, if any in the range |aSearchStart|..|aSearchEnd|.
michael@0 302 *
michael@0 303 * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to
michael@0 304 * point to the match. If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|.
michael@0 305 *
michael@0 306 * Currently, this is equivalent to the O(m*n) implementation previously on |ns[C]String|.
michael@0 307 * If we need something faster, then we can implement that later.
michael@0 308 */
michael@0 309
michael@0 310 bool FindInReadable( const nsAString& aPattern, nsAString::const_iterator&, nsAString::const_iterator&, const nsStringComparator& = nsDefaultStringComparator() );
michael@0 311 bool FindInReadable( const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator&, const nsCStringComparator& = nsDefaultCStringComparator() );
michael@0 312
michael@0 313 /* sometimes we don't care about where the string was, just that we
michael@0 314 * found it or not */
michael@0 315 inline bool FindInReadable( const nsAString& aPattern, const nsAString& aSource, const nsStringComparator& compare = nsDefaultStringComparator() )
michael@0 316 {
michael@0 317 nsAString::const_iterator start, end;
michael@0 318 aSource.BeginReading(start);
michael@0 319 aSource.EndReading(end);
michael@0 320 return FindInReadable(aPattern, start, end, compare);
michael@0 321 }
michael@0 322
michael@0 323 inline bool FindInReadable( const nsACString& aPattern, const nsACString& aSource, const nsCStringComparator& compare = nsDefaultCStringComparator() )
michael@0 324 {
michael@0 325 nsACString::const_iterator start, end;
michael@0 326 aSource.BeginReading(start);
michael@0 327 aSource.EndReading(end);
michael@0 328 return FindInReadable(aPattern, start, end, compare);
michael@0 329 }
michael@0 330
michael@0 331
michael@0 332 bool CaseInsensitiveFindInReadable( const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator& );
michael@0 333
michael@0 334 /**
michael@0 335 * Finds the rightmost occurrence of |aPattern|
michael@0 336 * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to
michael@0 337 * point to the match. If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|.
michael@0 338 *
michael@0 339 */
michael@0 340 bool RFindInReadable( const nsAString& aPattern, nsAString::const_iterator&, nsAString::const_iterator&, const nsStringComparator& = nsDefaultStringComparator() );
michael@0 341 bool RFindInReadable( const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator&, const nsCStringComparator& = nsDefaultCStringComparator() );
michael@0 342
michael@0 343 /**
michael@0 344 * Finds the leftmost occurrence of |aChar|, if any in the range
michael@0 345 * |aSearchStart|..|aSearchEnd|.
michael@0 346 *
michael@0 347 * Returns |true| if a match was found, and adjusts |aSearchStart| to
michael@0 348 * point to the match. If no match was found, returns |false| and
michael@0 349 * makes |aSearchStart == aSearchEnd|.
michael@0 350 */
michael@0 351 bool FindCharInReadable( char16_t aChar, nsAString::const_iterator& aSearchStart, const nsAString::const_iterator& aSearchEnd );
michael@0 352 bool FindCharInReadable( char aChar, nsACString::const_iterator& aSearchStart, const nsACString::const_iterator& aSearchEnd );
michael@0 353
michael@0 354 /**
michael@0 355 * Finds the number of occurences of |aChar| in the string |aStr|
michael@0 356 */
michael@0 357 uint32_t CountCharInReadable( const nsAString& aStr,
michael@0 358 char16_t aChar );
michael@0 359 uint32_t CountCharInReadable( const nsACString& aStr,
michael@0 360 char aChar );
michael@0 361
michael@0 362 bool
michael@0 363 StringBeginsWith( const nsAString& aSource, const nsAString& aSubstring,
michael@0 364 const nsStringComparator& aComparator =
michael@0 365 nsDefaultStringComparator() );
michael@0 366 bool
michael@0 367 StringBeginsWith( const nsACString& aSource, const nsACString& aSubstring,
michael@0 368 const nsCStringComparator& aComparator =
michael@0 369 nsDefaultCStringComparator() );
michael@0 370 bool
michael@0 371 StringEndsWith( const nsAString& aSource, const nsAString& aSubstring,
michael@0 372 const nsStringComparator& aComparator =
michael@0 373 nsDefaultStringComparator() );
michael@0 374 bool
michael@0 375 StringEndsWith( const nsACString& aSource, const nsACString& aSubstring,
michael@0 376 const nsCStringComparator& aComparator =
michael@0 377 nsDefaultCStringComparator() );
michael@0 378
michael@0 379 const nsAFlatString& EmptyString();
michael@0 380 const nsAFlatCString& EmptyCString();
michael@0 381
michael@0 382 const nsAFlatString& NullString();
michael@0 383 const nsAFlatCString& NullCString();
michael@0 384
michael@0 385 /**
michael@0 386 * Compare a UTF-8 string to an UTF-16 string.
michael@0 387 *
michael@0 388 * Returns 0 if the strings are equal, -1 if aUTF8String is less
michael@0 389 * than aUTF16Count, and 1 in the reverse case. In case of fatal
michael@0 390 * error (eg the strings are not valid UTF8 and UTF16 respectively),
michael@0 391 * this method will return INT32_MIN.
michael@0 392 */
michael@0 393 int32_t
michael@0 394 CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String,
michael@0 395 const nsASingleFragmentString& aUTF16String);
michael@0 396
michael@0 397 void
michael@0 398 AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest);
michael@0 399
michael@0 400 template<class T>
michael@0 401 inline bool EnsureStringLength(T& aStr, uint32_t aLen)
michael@0 402 {
michael@0 403 aStr.SetLength(aLen);
michael@0 404 return (aStr.Length() == aLen);
michael@0 405 }
michael@0 406
michael@0 407 #endif // !defined(nsReadableUtils_h___)

mercurial