1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/xpcom/io/nsLinebreakConverter.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,465 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +#include "nsLinebreakConverter.h" 1.10 + 1.11 +#include "nsMemory.h" 1.12 +#include "nsCRT.h" 1.13 + 1.14 + 1.15 +/*---------------------------------------------------------------------------- 1.16 + GetLinebreakString 1.17 + 1.18 + Could make this inline 1.19 +----------------------------------------------------------------------------*/ 1.20 +static const char* GetLinebreakString(nsLinebreakConverter::ELinebreakType aBreakType) 1.21 +{ 1.22 + static const char* const sLinebreaks[] = { 1.23 + "", // any 1.24 + NS_LINEBREAK, // platform 1.25 + LFSTR, // content 1.26 + CRLF, // net 1.27 + CRSTR, // Mac 1.28 + LFSTR, // Unix 1.29 + CRLF, // Windows 1.30 + " ", // space 1.31 + nullptr 1.32 + }; 1.33 + 1.34 + return sLinebreaks[aBreakType]; 1.35 +} 1.36 + 1.37 + 1.38 +/*---------------------------------------------------------------------------- 1.39 + AppendLinebreak 1.40 + 1.41 + Wee inline method to append a line break. Modifies ioDest. 1.42 +----------------------------------------------------------------------------*/ 1.43 +template<class T> 1.44 +void AppendLinebreak(T*& ioDest, const char* lineBreakStr) 1.45 +{ 1.46 + *ioDest++ = *lineBreakStr; 1.47 + 1.48 + if (lineBreakStr[1]) 1.49 + *ioDest++ = lineBreakStr[1]; 1.50 +} 1.51 + 1.52 +/*---------------------------------------------------------------------------- 1.53 + CountChars 1.54 + 1.55 + Counts occurrences of breakStr in aSrc 1.56 +----------------------------------------------------------------------------*/ 1.57 +template<class T> 1.58 +int32_t CountLinebreaks(const T* aSrc, int32_t inLen, const char* breakStr) 1.59 +{ 1.60 + const T* src = aSrc; 1.61 + const T* srcEnd = aSrc + inLen; 1.62 + int32_t theCount = 0; 1.63 + 1.64 + while (src < srcEnd) 1.65 + { 1.66 + if (*src == *breakStr) 1.67 + { 1.68 + src++; 1.69 + 1.70 + if (breakStr[1]) 1.71 + { 1.72 + if (src < srcEnd && *src == breakStr[1]) 1.73 + { 1.74 + src++; 1.75 + theCount++; 1.76 + } 1.77 + } 1.78 + else 1.79 + { 1.80 + theCount++; 1.81 + } 1.82 + } 1.83 + else 1.84 + { 1.85 + src++; 1.86 + } 1.87 + } 1.88 + 1.89 + return theCount; 1.90 +} 1.91 + 1.92 + 1.93 +/*---------------------------------------------------------------------------- 1.94 + ConvertBreaks 1.95 + 1.96 + ioLen *includes* a terminating null, if any 1.97 +----------------------------------------------------------------------------*/ 1.98 +template<class T> 1.99 +static T* ConvertBreaks(const T* inSrc, int32_t& ioLen, const char* srcBreak, const char* destBreak) 1.100 +{ 1.101 + NS_ASSERTION(inSrc && srcBreak && destBreak, "Got a null string"); 1.102 + 1.103 + T* resultString = nullptr; 1.104 + 1.105 + // handle the no conversion case 1.106 + if (nsCRT::strcmp(srcBreak, destBreak) == 0) 1.107 + { 1.108 + resultString = (T *)nsMemory::Alloc(sizeof(T) * ioLen); 1.109 + if (!resultString) return nullptr; 1.110 + memcpy(resultString, inSrc, sizeof(T) * ioLen); // includes the null, if any 1.111 + return resultString; 1.112 + } 1.113 + 1.114 + int32_t srcBreakLen = strlen(srcBreak); 1.115 + int32_t destBreakLen = strlen(destBreak); 1.116 + 1.117 + // handle the easy case, where the string length does not change, and the 1.118 + // breaks are only 1 char long, i.e. CR <-> LF 1.119 + if (srcBreakLen == destBreakLen && srcBreakLen == 1) 1.120 + { 1.121 + resultString = (T *)nsMemory::Alloc(sizeof(T) * ioLen); 1.122 + if (!resultString) return nullptr; 1.123 + 1.124 + const T* src = inSrc; 1.125 + const T* srcEnd = inSrc + ioLen; // includes null, if any 1.126 + T* dst = resultString; 1.127 + 1.128 + char srcBreakChar = *srcBreak; // we know it's one char long already 1.129 + char dstBreakChar = *destBreak; 1.130 + 1.131 + while (src < srcEnd) 1.132 + { 1.133 + if (*src == srcBreakChar) 1.134 + { 1.135 + *dst++ = dstBreakChar; 1.136 + src++; 1.137 + } 1.138 + else 1.139 + { 1.140 + *dst++ = *src++; 1.141 + } 1.142 + } 1.143 + 1.144 + // ioLen does not change 1.145 + } 1.146 + else 1.147 + { 1.148 + // src and dest termination is different length. Do it a slower way. 1.149 + 1.150 + // count linebreaks in src. Assumes that chars in 2-char linebreaks are unique. 1.151 + int32_t numLinebreaks = CountLinebreaks(inSrc, ioLen, srcBreak); 1.152 + 1.153 + int32_t newBufLen = ioLen - (numLinebreaks * srcBreakLen) + (numLinebreaks * destBreakLen); 1.154 + resultString = (T *)nsMemory::Alloc(sizeof(T) * newBufLen); 1.155 + if (!resultString) return nullptr; 1.156 + 1.157 + const T* src = inSrc; 1.158 + const T* srcEnd = inSrc + ioLen; // includes null, if any 1.159 + T* dst = resultString; 1.160 + 1.161 + while (src < srcEnd) 1.162 + { 1.163 + if (*src == *srcBreak) 1.164 + { 1.165 + *dst++ = *destBreak; 1.166 + if (destBreak[1]) 1.167 + *dst++ = destBreak[1]; 1.168 + 1.169 + src++; 1.170 + if (src < srcEnd && srcBreak[1] && *src == srcBreak[1]) 1.171 + src++; 1.172 + } 1.173 + else 1.174 + { 1.175 + *dst++ = *src++; 1.176 + } 1.177 + } 1.178 + 1.179 + ioLen = newBufLen; 1.180 + } 1.181 + 1.182 + return resultString; 1.183 +} 1.184 + 1.185 + 1.186 +/*---------------------------------------------------------------------------- 1.187 + ConvertBreaksInSitu 1.188 + 1.189 + Convert breaks in situ. Can only do this if the linebreak length 1.190 + does not change. 1.191 +----------------------------------------------------------------------------*/ 1.192 +template<class T> 1.193 +static void ConvertBreaksInSitu(T* inSrc, int32_t inLen, char srcBreak, char destBreak) 1.194 +{ 1.195 + T* src = inSrc; 1.196 + T* srcEnd = inSrc + inLen; 1.197 + 1.198 + while (src < srcEnd) 1.199 + { 1.200 + if (*src == srcBreak) 1.201 + *src = destBreak; 1.202 + 1.203 + src++; 1.204 + } 1.205 +} 1.206 + 1.207 + 1.208 +/*---------------------------------------------------------------------------- 1.209 + ConvertUnknownBreaks 1.210 + 1.211 + Convert unknown line breaks to the specified break. 1.212 + 1.213 + This will convert CRLF pairs to one break, and single CR or LF to a break. 1.214 +----------------------------------------------------------------------------*/ 1.215 +template<class T> 1.216 +static T* ConvertUnknownBreaks(const T* inSrc, int32_t& ioLen, const char* destBreak) 1.217 +{ 1.218 + const T* src = inSrc; 1.219 + const T* srcEnd = inSrc + ioLen; // includes null, if any 1.220 + 1.221 + int32_t destBreakLen = strlen(destBreak); 1.222 + int32_t finalLen = 0; 1.223 + 1.224 + while (src < srcEnd) 1.225 + { 1.226 + if (*src == nsCRT::CR) 1.227 + { 1.228 + if (src < srcEnd && src[1] == nsCRT::LF) 1.229 + { 1.230 + // CRLF 1.231 + finalLen += destBreakLen; 1.232 + src++; 1.233 + } 1.234 + else 1.235 + { 1.236 + // Lone CR 1.237 + finalLen += destBreakLen; 1.238 + } 1.239 + } 1.240 + else if (*src == nsCRT::LF) 1.241 + { 1.242 + // Lone LF 1.243 + finalLen += destBreakLen; 1.244 + } 1.245 + else 1.246 + { 1.247 + finalLen++; 1.248 + } 1.249 + src++; 1.250 + } 1.251 + 1.252 + T* resultString = (T *)nsMemory::Alloc(sizeof(T) * finalLen); 1.253 + if (!resultString) return nullptr; 1.254 + 1.255 + src = inSrc; 1.256 + srcEnd = inSrc + ioLen; // includes null, if any 1.257 + 1.258 + T* dst = resultString; 1.259 + 1.260 + while (src < srcEnd) 1.261 + { 1.262 + if (*src == nsCRT::CR) 1.263 + { 1.264 + if (src < srcEnd && src[1] == nsCRT::LF) 1.265 + { 1.266 + // CRLF 1.267 + AppendLinebreak(dst, destBreak); 1.268 + src++; 1.269 + } 1.270 + else 1.271 + { 1.272 + // Lone CR 1.273 + AppendLinebreak(dst, destBreak); 1.274 + } 1.275 + } 1.276 + else if (*src == nsCRT::LF) 1.277 + { 1.278 + // Lone LF 1.279 + AppendLinebreak(dst, destBreak); 1.280 + } 1.281 + else 1.282 + { 1.283 + *dst++ = *src; 1.284 + } 1.285 + src++; 1.286 + } 1.287 + 1.288 + ioLen = finalLen; 1.289 + return resultString; 1.290 +} 1.291 + 1.292 + 1.293 +/*---------------------------------------------------------------------------- 1.294 + ConvertLineBreaks 1.295 + 1.296 +----------------------------------------------------------------------------*/ 1.297 +char* nsLinebreakConverter::ConvertLineBreaks(const char* aSrc, 1.298 + ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks, int32_t aSrcLen, int32_t* outLen) 1.299 +{ 1.300 + NS_ASSERTION(aDestBreaks != eLinebreakAny && 1.301 + aSrcBreaks != eLinebreakSpace, "Invalid parameter"); 1.302 + if (!aSrc) return nullptr; 1.303 + 1.304 + int32_t sourceLen = (aSrcLen == kIgnoreLen) ? strlen(aSrc) + 1 : aSrcLen; 1.305 + 1.306 + char* resultString; 1.307 + if (aSrcBreaks == eLinebreakAny) 1.308 + resultString = ConvertUnknownBreaks(aSrc, sourceLen, GetLinebreakString(aDestBreaks)); 1.309 + else 1.310 + resultString = ConvertBreaks(aSrc, sourceLen, GetLinebreakString(aSrcBreaks), GetLinebreakString(aDestBreaks)); 1.311 + 1.312 + if (outLen) 1.313 + *outLen = sourceLen; 1.314 + return resultString; 1.315 +} 1.316 + 1.317 + 1.318 +/*---------------------------------------------------------------------------- 1.319 + ConvertLineBreaksInSitu 1.320 + 1.321 +----------------------------------------------------------------------------*/ 1.322 +nsresult nsLinebreakConverter::ConvertLineBreaksInSitu(char **ioBuffer, ELinebreakType aSrcBreaks, 1.323 + ELinebreakType aDestBreaks, int32_t aSrcLen, int32_t* outLen) 1.324 +{ 1.325 + NS_ASSERTION(ioBuffer && *ioBuffer, "Null pointer passed"); 1.326 + if (!ioBuffer || !*ioBuffer) return NS_ERROR_NULL_POINTER; 1.327 + 1.328 + NS_ASSERTION(aDestBreaks != eLinebreakAny && 1.329 + aSrcBreaks != eLinebreakSpace, "Invalid parameter"); 1.330 + 1.331 + int32_t sourceLen = (aSrcLen == kIgnoreLen) ? strlen(*ioBuffer) + 1 : aSrcLen; 1.332 + 1.333 + // can we convert in-place? 1.334 + const char* srcBreaks = GetLinebreakString(aSrcBreaks); 1.335 + const char* dstBreaks = GetLinebreakString(aDestBreaks); 1.336 + 1.337 + if ( (aSrcBreaks != eLinebreakAny) && 1.338 + (strlen(srcBreaks) == 1) && 1.339 + (strlen(dstBreaks) == 1) ) 1.340 + { 1.341 + ConvertBreaksInSitu(*ioBuffer, sourceLen, *srcBreaks, *dstBreaks); 1.342 + if (outLen) 1.343 + *outLen = sourceLen; 1.344 + } 1.345 + else 1.346 + { 1.347 + char* destBuffer; 1.348 + 1.349 + if (aSrcBreaks == eLinebreakAny) 1.350 + destBuffer = ConvertUnknownBreaks(*ioBuffer, sourceLen, dstBreaks); 1.351 + else 1.352 + destBuffer = ConvertBreaks(*ioBuffer, sourceLen, srcBreaks, dstBreaks); 1.353 + 1.354 + if (!destBuffer) return NS_ERROR_OUT_OF_MEMORY; 1.355 + *ioBuffer = destBuffer; 1.356 + if (outLen) 1.357 + *outLen = sourceLen; 1.358 + } 1.359 + 1.360 + return NS_OK; 1.361 +} 1.362 + 1.363 + 1.364 +/*---------------------------------------------------------------------------- 1.365 + ConvertUnicharLineBreaks 1.366 + 1.367 +----------------------------------------------------------------------------*/ 1.368 +char16_t* nsLinebreakConverter::ConvertUnicharLineBreaks(const char16_t* aSrc, 1.369 + ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks, int32_t aSrcLen, int32_t* outLen) 1.370 +{ 1.371 + NS_ASSERTION(aDestBreaks != eLinebreakAny && 1.372 + aSrcBreaks != eLinebreakSpace, "Invalid parameter"); 1.373 + if (!aSrc) return nullptr; 1.374 + 1.375 + int32_t bufLen = (aSrcLen == kIgnoreLen) ? NS_strlen(aSrc) + 1 : aSrcLen; 1.376 + 1.377 + char16_t* resultString; 1.378 + if (aSrcBreaks == eLinebreakAny) 1.379 + resultString = ConvertUnknownBreaks(aSrc, bufLen, GetLinebreakString(aDestBreaks)); 1.380 + else 1.381 + resultString = ConvertBreaks(aSrc, bufLen, GetLinebreakString(aSrcBreaks), GetLinebreakString(aDestBreaks)); 1.382 + 1.383 + if (outLen) 1.384 + *outLen = bufLen; 1.385 + return resultString; 1.386 +} 1.387 + 1.388 + 1.389 +/*---------------------------------------------------------------------------- 1.390 + ConvertStringLineBreaks 1.391 + 1.392 +----------------------------------------------------------------------------*/ 1.393 +nsresult nsLinebreakConverter::ConvertUnicharLineBreaksInSitu(char16_t **ioBuffer, 1.394 + ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks, int32_t aSrcLen, int32_t* outLen) 1.395 +{ 1.396 + NS_ASSERTION(ioBuffer && *ioBuffer, "Null pointer passed"); 1.397 + if (!ioBuffer || !*ioBuffer) return NS_ERROR_NULL_POINTER; 1.398 + NS_ASSERTION(aDestBreaks != eLinebreakAny && 1.399 + aSrcBreaks != eLinebreakSpace, "Invalid parameter"); 1.400 + 1.401 + int32_t sourceLen = (aSrcLen == kIgnoreLen) ? NS_strlen(*ioBuffer) + 1 : aSrcLen; 1.402 + 1.403 + // can we convert in-place? 1.404 + const char* srcBreaks = GetLinebreakString(aSrcBreaks); 1.405 + const char* dstBreaks = GetLinebreakString(aDestBreaks); 1.406 + 1.407 + if ( (aSrcBreaks != eLinebreakAny) && 1.408 + (strlen(srcBreaks) == 1) && 1.409 + (strlen(dstBreaks) == 1) ) 1.410 + { 1.411 + ConvertBreaksInSitu(*ioBuffer, sourceLen, *srcBreaks, *dstBreaks); 1.412 + if (outLen) 1.413 + *outLen = sourceLen; 1.414 + } 1.415 + else 1.416 + { 1.417 + char16_t* destBuffer; 1.418 + 1.419 + if (aSrcBreaks == eLinebreakAny) 1.420 + destBuffer = ConvertUnknownBreaks(*ioBuffer, sourceLen, dstBreaks); 1.421 + else 1.422 + destBuffer = ConvertBreaks(*ioBuffer, sourceLen, srcBreaks, dstBreaks); 1.423 + 1.424 + if (!destBuffer) return NS_ERROR_OUT_OF_MEMORY; 1.425 + *ioBuffer = destBuffer; 1.426 + if (outLen) 1.427 + *outLen = sourceLen; 1.428 + } 1.429 + 1.430 + return NS_OK; 1.431 +} 1.432 + 1.433 +/*---------------------------------------------------------------------------- 1.434 + ConvertStringLineBreaks 1.435 + 1.436 +----------------------------------------------------------------------------*/ 1.437 +nsresult nsLinebreakConverter::ConvertStringLineBreaks(nsString& ioString, 1.438 + ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks) 1.439 +{ 1.440 + 1.441 + NS_ASSERTION(aDestBreaks != eLinebreakAny && 1.442 + aSrcBreaks != eLinebreakSpace, "Invalid parameter"); 1.443 + 1.444 + // nothing to do 1.445 + if (ioString.IsEmpty()) return NS_OK; 1.446 + 1.447 + nsresult rv; 1.448 + 1.449 + // remember the old buffer in case 1.450 + // we blow it away later 1.451 + nsString::char_iterator stringBuf; 1.452 + ioString.BeginWriting(stringBuf); 1.453 + 1.454 + int32_t newLen; 1.455 + 1.456 + rv = ConvertUnicharLineBreaksInSitu(&stringBuf, 1.457 + aSrcBreaks, aDestBreaks, 1.458 + ioString.Length() + 1, &newLen); 1.459 + if (NS_FAILED(rv)) return rv; 1.460 + 1.461 + if (stringBuf != ioString.get()) 1.462 + ioString.Adopt(stringBuf); 1.463 + 1.464 + return NS_OK; 1.465 +} 1.466 + 1.467 + 1.468 +