xpcom/io/nsLinebreakConverter.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6 #include "nsLinebreakConverter.h"
michael@0 7
michael@0 8 #include "nsMemory.h"
michael@0 9 #include "nsCRT.h"
michael@0 10
michael@0 11
michael@0 12 /*----------------------------------------------------------------------------
michael@0 13 GetLinebreakString
michael@0 14
michael@0 15 Could make this inline
michael@0 16 ----------------------------------------------------------------------------*/
michael@0 17 static const char* GetLinebreakString(nsLinebreakConverter::ELinebreakType aBreakType)
michael@0 18 {
michael@0 19 static const char* const sLinebreaks[] = {
michael@0 20 "", // any
michael@0 21 NS_LINEBREAK, // platform
michael@0 22 LFSTR, // content
michael@0 23 CRLF, // net
michael@0 24 CRSTR, // Mac
michael@0 25 LFSTR, // Unix
michael@0 26 CRLF, // Windows
michael@0 27 " ", // space
michael@0 28 nullptr
michael@0 29 };
michael@0 30
michael@0 31 return sLinebreaks[aBreakType];
michael@0 32 }
michael@0 33
michael@0 34
michael@0 35 /*----------------------------------------------------------------------------
michael@0 36 AppendLinebreak
michael@0 37
michael@0 38 Wee inline method to append a line break. Modifies ioDest.
michael@0 39 ----------------------------------------------------------------------------*/
michael@0 40 template<class T>
michael@0 41 void AppendLinebreak(T*& ioDest, const char* lineBreakStr)
michael@0 42 {
michael@0 43 *ioDest++ = *lineBreakStr;
michael@0 44
michael@0 45 if (lineBreakStr[1])
michael@0 46 *ioDest++ = lineBreakStr[1];
michael@0 47 }
michael@0 48
michael@0 49 /*----------------------------------------------------------------------------
michael@0 50 CountChars
michael@0 51
michael@0 52 Counts occurrences of breakStr in aSrc
michael@0 53 ----------------------------------------------------------------------------*/
michael@0 54 template<class T>
michael@0 55 int32_t CountLinebreaks(const T* aSrc, int32_t inLen, const char* breakStr)
michael@0 56 {
michael@0 57 const T* src = aSrc;
michael@0 58 const T* srcEnd = aSrc + inLen;
michael@0 59 int32_t theCount = 0;
michael@0 60
michael@0 61 while (src < srcEnd)
michael@0 62 {
michael@0 63 if (*src == *breakStr)
michael@0 64 {
michael@0 65 src++;
michael@0 66
michael@0 67 if (breakStr[1])
michael@0 68 {
michael@0 69 if (src < srcEnd && *src == breakStr[1])
michael@0 70 {
michael@0 71 src++;
michael@0 72 theCount++;
michael@0 73 }
michael@0 74 }
michael@0 75 else
michael@0 76 {
michael@0 77 theCount++;
michael@0 78 }
michael@0 79 }
michael@0 80 else
michael@0 81 {
michael@0 82 src++;
michael@0 83 }
michael@0 84 }
michael@0 85
michael@0 86 return theCount;
michael@0 87 }
michael@0 88
michael@0 89
michael@0 90 /*----------------------------------------------------------------------------
michael@0 91 ConvertBreaks
michael@0 92
michael@0 93 ioLen *includes* a terminating null, if any
michael@0 94 ----------------------------------------------------------------------------*/
michael@0 95 template<class T>
michael@0 96 static T* ConvertBreaks(const T* inSrc, int32_t& ioLen, const char* srcBreak, const char* destBreak)
michael@0 97 {
michael@0 98 NS_ASSERTION(inSrc && srcBreak && destBreak, "Got a null string");
michael@0 99
michael@0 100 T* resultString = nullptr;
michael@0 101
michael@0 102 // handle the no conversion case
michael@0 103 if (nsCRT::strcmp(srcBreak, destBreak) == 0)
michael@0 104 {
michael@0 105 resultString = (T *)nsMemory::Alloc(sizeof(T) * ioLen);
michael@0 106 if (!resultString) return nullptr;
michael@0 107 memcpy(resultString, inSrc, sizeof(T) * ioLen); // includes the null, if any
michael@0 108 return resultString;
michael@0 109 }
michael@0 110
michael@0 111 int32_t srcBreakLen = strlen(srcBreak);
michael@0 112 int32_t destBreakLen = strlen(destBreak);
michael@0 113
michael@0 114 // handle the easy case, where the string length does not change, and the
michael@0 115 // breaks are only 1 char long, i.e. CR <-> LF
michael@0 116 if (srcBreakLen == destBreakLen && srcBreakLen == 1)
michael@0 117 {
michael@0 118 resultString = (T *)nsMemory::Alloc(sizeof(T) * ioLen);
michael@0 119 if (!resultString) return nullptr;
michael@0 120
michael@0 121 const T* src = inSrc;
michael@0 122 const T* srcEnd = inSrc + ioLen; // includes null, if any
michael@0 123 T* dst = resultString;
michael@0 124
michael@0 125 char srcBreakChar = *srcBreak; // we know it's one char long already
michael@0 126 char dstBreakChar = *destBreak;
michael@0 127
michael@0 128 while (src < srcEnd)
michael@0 129 {
michael@0 130 if (*src == srcBreakChar)
michael@0 131 {
michael@0 132 *dst++ = dstBreakChar;
michael@0 133 src++;
michael@0 134 }
michael@0 135 else
michael@0 136 {
michael@0 137 *dst++ = *src++;
michael@0 138 }
michael@0 139 }
michael@0 140
michael@0 141 // ioLen does not change
michael@0 142 }
michael@0 143 else
michael@0 144 {
michael@0 145 // src and dest termination is different length. Do it a slower way.
michael@0 146
michael@0 147 // count linebreaks in src. Assumes that chars in 2-char linebreaks are unique.
michael@0 148 int32_t numLinebreaks = CountLinebreaks(inSrc, ioLen, srcBreak);
michael@0 149
michael@0 150 int32_t newBufLen = ioLen - (numLinebreaks * srcBreakLen) + (numLinebreaks * destBreakLen);
michael@0 151 resultString = (T *)nsMemory::Alloc(sizeof(T) * newBufLen);
michael@0 152 if (!resultString) return nullptr;
michael@0 153
michael@0 154 const T* src = inSrc;
michael@0 155 const T* srcEnd = inSrc + ioLen; // includes null, if any
michael@0 156 T* dst = resultString;
michael@0 157
michael@0 158 while (src < srcEnd)
michael@0 159 {
michael@0 160 if (*src == *srcBreak)
michael@0 161 {
michael@0 162 *dst++ = *destBreak;
michael@0 163 if (destBreak[1])
michael@0 164 *dst++ = destBreak[1];
michael@0 165
michael@0 166 src++;
michael@0 167 if (src < srcEnd && srcBreak[1] && *src == srcBreak[1])
michael@0 168 src++;
michael@0 169 }
michael@0 170 else
michael@0 171 {
michael@0 172 *dst++ = *src++;
michael@0 173 }
michael@0 174 }
michael@0 175
michael@0 176 ioLen = newBufLen;
michael@0 177 }
michael@0 178
michael@0 179 return resultString;
michael@0 180 }
michael@0 181
michael@0 182
michael@0 183 /*----------------------------------------------------------------------------
michael@0 184 ConvertBreaksInSitu
michael@0 185
michael@0 186 Convert breaks in situ. Can only do this if the linebreak length
michael@0 187 does not change.
michael@0 188 ----------------------------------------------------------------------------*/
michael@0 189 template<class T>
michael@0 190 static void ConvertBreaksInSitu(T* inSrc, int32_t inLen, char srcBreak, char destBreak)
michael@0 191 {
michael@0 192 T* src = inSrc;
michael@0 193 T* srcEnd = inSrc + inLen;
michael@0 194
michael@0 195 while (src < srcEnd)
michael@0 196 {
michael@0 197 if (*src == srcBreak)
michael@0 198 *src = destBreak;
michael@0 199
michael@0 200 src++;
michael@0 201 }
michael@0 202 }
michael@0 203
michael@0 204
michael@0 205 /*----------------------------------------------------------------------------
michael@0 206 ConvertUnknownBreaks
michael@0 207
michael@0 208 Convert unknown line breaks to the specified break.
michael@0 209
michael@0 210 This will convert CRLF pairs to one break, and single CR or LF to a break.
michael@0 211 ----------------------------------------------------------------------------*/
michael@0 212 template<class T>
michael@0 213 static T* ConvertUnknownBreaks(const T* inSrc, int32_t& ioLen, const char* destBreak)
michael@0 214 {
michael@0 215 const T* src = inSrc;
michael@0 216 const T* srcEnd = inSrc + ioLen; // includes null, if any
michael@0 217
michael@0 218 int32_t destBreakLen = strlen(destBreak);
michael@0 219 int32_t finalLen = 0;
michael@0 220
michael@0 221 while (src < srcEnd)
michael@0 222 {
michael@0 223 if (*src == nsCRT::CR)
michael@0 224 {
michael@0 225 if (src < srcEnd && src[1] == nsCRT::LF)
michael@0 226 {
michael@0 227 // CRLF
michael@0 228 finalLen += destBreakLen;
michael@0 229 src++;
michael@0 230 }
michael@0 231 else
michael@0 232 {
michael@0 233 // Lone CR
michael@0 234 finalLen += destBreakLen;
michael@0 235 }
michael@0 236 }
michael@0 237 else if (*src == nsCRT::LF)
michael@0 238 {
michael@0 239 // Lone LF
michael@0 240 finalLen += destBreakLen;
michael@0 241 }
michael@0 242 else
michael@0 243 {
michael@0 244 finalLen++;
michael@0 245 }
michael@0 246 src++;
michael@0 247 }
michael@0 248
michael@0 249 T* resultString = (T *)nsMemory::Alloc(sizeof(T) * finalLen);
michael@0 250 if (!resultString) return nullptr;
michael@0 251
michael@0 252 src = inSrc;
michael@0 253 srcEnd = inSrc + ioLen; // includes null, if any
michael@0 254
michael@0 255 T* dst = resultString;
michael@0 256
michael@0 257 while (src < srcEnd)
michael@0 258 {
michael@0 259 if (*src == nsCRT::CR)
michael@0 260 {
michael@0 261 if (src < srcEnd && src[1] == nsCRT::LF)
michael@0 262 {
michael@0 263 // CRLF
michael@0 264 AppendLinebreak(dst, destBreak);
michael@0 265 src++;
michael@0 266 }
michael@0 267 else
michael@0 268 {
michael@0 269 // Lone CR
michael@0 270 AppendLinebreak(dst, destBreak);
michael@0 271 }
michael@0 272 }
michael@0 273 else if (*src == nsCRT::LF)
michael@0 274 {
michael@0 275 // Lone LF
michael@0 276 AppendLinebreak(dst, destBreak);
michael@0 277 }
michael@0 278 else
michael@0 279 {
michael@0 280 *dst++ = *src;
michael@0 281 }
michael@0 282 src++;
michael@0 283 }
michael@0 284
michael@0 285 ioLen = finalLen;
michael@0 286 return resultString;
michael@0 287 }
michael@0 288
michael@0 289
michael@0 290 /*----------------------------------------------------------------------------
michael@0 291 ConvertLineBreaks
michael@0 292
michael@0 293 ----------------------------------------------------------------------------*/
michael@0 294 char* nsLinebreakConverter::ConvertLineBreaks(const char* aSrc,
michael@0 295 ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks, int32_t aSrcLen, int32_t* outLen)
michael@0 296 {
michael@0 297 NS_ASSERTION(aDestBreaks != eLinebreakAny &&
michael@0 298 aSrcBreaks != eLinebreakSpace, "Invalid parameter");
michael@0 299 if (!aSrc) return nullptr;
michael@0 300
michael@0 301 int32_t sourceLen = (aSrcLen == kIgnoreLen) ? strlen(aSrc) + 1 : aSrcLen;
michael@0 302
michael@0 303 char* resultString;
michael@0 304 if (aSrcBreaks == eLinebreakAny)
michael@0 305 resultString = ConvertUnknownBreaks(aSrc, sourceLen, GetLinebreakString(aDestBreaks));
michael@0 306 else
michael@0 307 resultString = ConvertBreaks(aSrc, sourceLen, GetLinebreakString(aSrcBreaks), GetLinebreakString(aDestBreaks));
michael@0 308
michael@0 309 if (outLen)
michael@0 310 *outLen = sourceLen;
michael@0 311 return resultString;
michael@0 312 }
michael@0 313
michael@0 314
michael@0 315 /*----------------------------------------------------------------------------
michael@0 316 ConvertLineBreaksInSitu
michael@0 317
michael@0 318 ----------------------------------------------------------------------------*/
michael@0 319 nsresult nsLinebreakConverter::ConvertLineBreaksInSitu(char **ioBuffer, ELinebreakType aSrcBreaks,
michael@0 320 ELinebreakType aDestBreaks, int32_t aSrcLen, int32_t* outLen)
michael@0 321 {
michael@0 322 NS_ASSERTION(ioBuffer && *ioBuffer, "Null pointer passed");
michael@0 323 if (!ioBuffer || !*ioBuffer) return NS_ERROR_NULL_POINTER;
michael@0 324
michael@0 325 NS_ASSERTION(aDestBreaks != eLinebreakAny &&
michael@0 326 aSrcBreaks != eLinebreakSpace, "Invalid parameter");
michael@0 327
michael@0 328 int32_t sourceLen = (aSrcLen == kIgnoreLen) ? strlen(*ioBuffer) + 1 : aSrcLen;
michael@0 329
michael@0 330 // can we convert in-place?
michael@0 331 const char* srcBreaks = GetLinebreakString(aSrcBreaks);
michael@0 332 const char* dstBreaks = GetLinebreakString(aDestBreaks);
michael@0 333
michael@0 334 if ( (aSrcBreaks != eLinebreakAny) &&
michael@0 335 (strlen(srcBreaks) == 1) &&
michael@0 336 (strlen(dstBreaks) == 1) )
michael@0 337 {
michael@0 338 ConvertBreaksInSitu(*ioBuffer, sourceLen, *srcBreaks, *dstBreaks);
michael@0 339 if (outLen)
michael@0 340 *outLen = sourceLen;
michael@0 341 }
michael@0 342 else
michael@0 343 {
michael@0 344 char* destBuffer;
michael@0 345
michael@0 346 if (aSrcBreaks == eLinebreakAny)
michael@0 347 destBuffer = ConvertUnknownBreaks(*ioBuffer, sourceLen, dstBreaks);
michael@0 348 else
michael@0 349 destBuffer = ConvertBreaks(*ioBuffer, sourceLen, srcBreaks, dstBreaks);
michael@0 350
michael@0 351 if (!destBuffer) return NS_ERROR_OUT_OF_MEMORY;
michael@0 352 *ioBuffer = destBuffer;
michael@0 353 if (outLen)
michael@0 354 *outLen = sourceLen;
michael@0 355 }
michael@0 356
michael@0 357 return NS_OK;
michael@0 358 }
michael@0 359
michael@0 360
michael@0 361 /*----------------------------------------------------------------------------
michael@0 362 ConvertUnicharLineBreaks
michael@0 363
michael@0 364 ----------------------------------------------------------------------------*/
michael@0 365 char16_t* nsLinebreakConverter::ConvertUnicharLineBreaks(const char16_t* aSrc,
michael@0 366 ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks, int32_t aSrcLen, int32_t* outLen)
michael@0 367 {
michael@0 368 NS_ASSERTION(aDestBreaks != eLinebreakAny &&
michael@0 369 aSrcBreaks != eLinebreakSpace, "Invalid parameter");
michael@0 370 if (!aSrc) return nullptr;
michael@0 371
michael@0 372 int32_t bufLen = (aSrcLen == kIgnoreLen) ? NS_strlen(aSrc) + 1 : aSrcLen;
michael@0 373
michael@0 374 char16_t* resultString;
michael@0 375 if (aSrcBreaks == eLinebreakAny)
michael@0 376 resultString = ConvertUnknownBreaks(aSrc, bufLen, GetLinebreakString(aDestBreaks));
michael@0 377 else
michael@0 378 resultString = ConvertBreaks(aSrc, bufLen, GetLinebreakString(aSrcBreaks), GetLinebreakString(aDestBreaks));
michael@0 379
michael@0 380 if (outLen)
michael@0 381 *outLen = bufLen;
michael@0 382 return resultString;
michael@0 383 }
michael@0 384
michael@0 385
michael@0 386 /*----------------------------------------------------------------------------
michael@0 387 ConvertStringLineBreaks
michael@0 388
michael@0 389 ----------------------------------------------------------------------------*/
michael@0 390 nsresult nsLinebreakConverter::ConvertUnicharLineBreaksInSitu(char16_t **ioBuffer,
michael@0 391 ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks, int32_t aSrcLen, int32_t* outLen)
michael@0 392 {
michael@0 393 NS_ASSERTION(ioBuffer && *ioBuffer, "Null pointer passed");
michael@0 394 if (!ioBuffer || !*ioBuffer) return NS_ERROR_NULL_POINTER;
michael@0 395 NS_ASSERTION(aDestBreaks != eLinebreakAny &&
michael@0 396 aSrcBreaks != eLinebreakSpace, "Invalid parameter");
michael@0 397
michael@0 398 int32_t sourceLen = (aSrcLen == kIgnoreLen) ? NS_strlen(*ioBuffer) + 1 : aSrcLen;
michael@0 399
michael@0 400 // can we convert in-place?
michael@0 401 const char* srcBreaks = GetLinebreakString(aSrcBreaks);
michael@0 402 const char* dstBreaks = GetLinebreakString(aDestBreaks);
michael@0 403
michael@0 404 if ( (aSrcBreaks != eLinebreakAny) &&
michael@0 405 (strlen(srcBreaks) == 1) &&
michael@0 406 (strlen(dstBreaks) == 1) )
michael@0 407 {
michael@0 408 ConvertBreaksInSitu(*ioBuffer, sourceLen, *srcBreaks, *dstBreaks);
michael@0 409 if (outLen)
michael@0 410 *outLen = sourceLen;
michael@0 411 }
michael@0 412 else
michael@0 413 {
michael@0 414 char16_t* destBuffer;
michael@0 415
michael@0 416 if (aSrcBreaks == eLinebreakAny)
michael@0 417 destBuffer = ConvertUnknownBreaks(*ioBuffer, sourceLen, dstBreaks);
michael@0 418 else
michael@0 419 destBuffer = ConvertBreaks(*ioBuffer, sourceLen, srcBreaks, dstBreaks);
michael@0 420
michael@0 421 if (!destBuffer) return NS_ERROR_OUT_OF_MEMORY;
michael@0 422 *ioBuffer = destBuffer;
michael@0 423 if (outLen)
michael@0 424 *outLen = sourceLen;
michael@0 425 }
michael@0 426
michael@0 427 return NS_OK;
michael@0 428 }
michael@0 429
michael@0 430 /*----------------------------------------------------------------------------
michael@0 431 ConvertStringLineBreaks
michael@0 432
michael@0 433 ----------------------------------------------------------------------------*/
michael@0 434 nsresult nsLinebreakConverter::ConvertStringLineBreaks(nsString& ioString,
michael@0 435 ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks)
michael@0 436 {
michael@0 437
michael@0 438 NS_ASSERTION(aDestBreaks != eLinebreakAny &&
michael@0 439 aSrcBreaks != eLinebreakSpace, "Invalid parameter");
michael@0 440
michael@0 441 // nothing to do
michael@0 442 if (ioString.IsEmpty()) return NS_OK;
michael@0 443
michael@0 444 nsresult rv;
michael@0 445
michael@0 446 // remember the old buffer in case
michael@0 447 // we blow it away later
michael@0 448 nsString::char_iterator stringBuf;
michael@0 449 ioString.BeginWriting(stringBuf);
michael@0 450
michael@0 451 int32_t newLen;
michael@0 452
michael@0 453 rv = ConvertUnicharLineBreaksInSitu(&stringBuf,
michael@0 454 aSrcBreaks, aDestBreaks,
michael@0 455 ioString.Length() + 1, &newLen);
michael@0 456 if (NS_FAILED(rv)) return rv;
michael@0 457
michael@0 458 if (stringBuf != ioString.get())
michael@0 459 ioString.Adopt(stringBuf);
michael@0 460
michael@0 461 return NS_OK;
michael@0 462 }
michael@0 463
michael@0 464
michael@0 465

mercurial