xpcom/io/nsLinebreakConverter.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     6 #include "nsLinebreakConverter.h"
     8 #include "nsMemory.h"
     9 #include "nsCRT.h"
    12 /*----------------------------------------------------------------------------
    13 	GetLinebreakString 
    15 	Could make this inline
    16 ----------------------------------------------------------------------------*/
    17 static const char* GetLinebreakString(nsLinebreakConverter::ELinebreakType aBreakType)
    18 {
    19   static const char* const sLinebreaks[] = {
    20     "",             // any
    21     NS_LINEBREAK,   // platform
    22     LFSTR,          // content
    23     CRLF,           // net
    24     CRSTR,          // Mac
    25     LFSTR,          // Unix
    26     CRLF,           // Windows
    27     " ",            // space
    28     nullptr  
    29   };
    31   return sLinebreaks[aBreakType];
    32 }
    35 /*----------------------------------------------------------------------------
    36 	AppendLinebreak 
    38 	Wee inline method to append a line break. Modifies ioDest.
    39 ----------------------------------------------------------------------------*/
    40 template<class T>
    41 void AppendLinebreak(T*& ioDest, const char* lineBreakStr)
    42 {
    43   *ioDest++ = *lineBreakStr;
    45   if (lineBreakStr[1])
    46     *ioDest++ = lineBreakStr[1];
    47 }
    49 /*----------------------------------------------------------------------------
    50 	CountChars 
    52 	Counts occurrences of breakStr in aSrc
    53 ----------------------------------------------------------------------------*/
    54 template<class T>
    55 int32_t CountLinebreaks(const T* aSrc, int32_t inLen, const char* breakStr)
    56 {
    57   const T* src = aSrc;
    58   const T* srcEnd = aSrc + inLen;
    59   int32_t theCount = 0;
    61   while (src < srcEnd)
    62   {
    63     if (*src == *breakStr)
    64     {
    65       src++;
    67       if (breakStr[1])
    68       {
    69         if (src < srcEnd && *src == breakStr[1])
    70         {
    71           src++;
    72           theCount++;
    73         }
    74       }
    75       else
    76       {
    77         theCount++;
    78       }
    79     }
    80     else
    81     {
    82       src++;
    83     }
    84   }
    86   return theCount;
    87 }
    90 /*----------------------------------------------------------------------------
    91 	ConvertBreaks 
    93 	ioLen *includes* a terminating null, if any
    94 ----------------------------------------------------------------------------*/
    95 template<class T>
    96 static T* ConvertBreaks(const T* inSrc, int32_t& ioLen, const char* srcBreak, const char* destBreak)
    97 {
    98   NS_ASSERTION(inSrc && srcBreak && destBreak, "Got a null string");
   100   T* resultString = nullptr;
   102   // handle the no conversion case
   103   if (nsCRT::strcmp(srcBreak, destBreak) == 0)
   104   {
   105     resultString = (T *)nsMemory::Alloc(sizeof(T) * ioLen);
   106     if (!resultString) return nullptr;
   107     memcpy(resultString, inSrc, sizeof(T) * ioLen); // includes the null, if any
   108     return resultString;
   109   }
   111   int32_t srcBreakLen = strlen(srcBreak);
   112   int32_t destBreakLen = strlen(destBreak);
   114   // handle the easy case, where the string length does not change, and the
   115   // breaks are only 1 char long, i.e. CR <-> LF
   116   if (srcBreakLen == destBreakLen && srcBreakLen == 1)
   117   {
   118     resultString = (T *)nsMemory::Alloc(sizeof(T) * ioLen);
   119     if (!resultString) return nullptr;
   121     const T* src = inSrc;
   122     const T* srcEnd = inSrc + ioLen;		// includes null, if any
   123     T*       dst = resultString;
   125     char srcBreakChar = *srcBreak;        // we know it's one char long already
   126     char dstBreakChar = *destBreak;
   128     while (src < srcEnd)
   129     {
   130       if (*src == srcBreakChar)
   131       {
   132         *dst++ = dstBreakChar;
   133         src++;
   134       }
   135       else
   136       {
   137         *dst++ = *src++;
   138       }
   139     }
   141     // ioLen does not change
   142   }
   143   else
   144   {
   145     // src and dest termination is different length. Do it a slower way.
   147     // count linebreaks in src. Assumes that chars in 2-char linebreaks are unique.
   148     int32_t numLinebreaks = CountLinebreaks(inSrc, ioLen, srcBreak);
   150     int32_t newBufLen = ioLen - (numLinebreaks * srcBreakLen) + (numLinebreaks * destBreakLen);
   151     resultString = (T *)nsMemory::Alloc(sizeof(T) * newBufLen);
   152     if (!resultString) return nullptr;
   154     const T* src = inSrc;
   155     const T* srcEnd = inSrc + ioLen;		// includes null, if any
   156     T*       dst = resultString;
   158     while (src < srcEnd)
   159     {
   160       if (*src == *srcBreak)
   161       {
   162         *dst++ = *destBreak;
   163         if (destBreak[1])
   164           *dst++ = destBreak[1];
   166         src++;
   167         if (src < srcEnd && srcBreak[1] && *src == srcBreak[1])
   168           src++;
   169       }
   170       else
   171       {
   172         *dst++ = *src++;
   173       }
   174     }
   176     ioLen = newBufLen;
   177   }
   179   return resultString;
   180 }
   183 /*----------------------------------------------------------------------------
   184   ConvertBreaksInSitu 
   186   Convert breaks in situ. Can only do this if the linebreak length
   187   does not change.
   188 ----------------------------------------------------------------------------*/
   189 template<class T>
   190 static void ConvertBreaksInSitu(T* inSrc, int32_t inLen, char srcBreak, char destBreak)
   191 {
   192   T* src = inSrc;
   193   T* srcEnd = inSrc + inLen;
   195   while (src < srcEnd)
   196   {
   197     if (*src == srcBreak)
   198       *src = destBreak;
   200     src++;
   201   }
   202 }
   205 /*----------------------------------------------------------------------------
   206   ConvertUnknownBreaks 
   208   Convert unknown line breaks to the specified break.
   210   This will convert CRLF pairs to one break, and single CR or LF to a break.
   211 ----------------------------------------------------------------------------*/
   212 template<class T>
   213 static T* ConvertUnknownBreaks(const T* inSrc, int32_t& ioLen, const char* destBreak)
   214 {
   215   const T* src = inSrc;
   216   const T* srcEnd = inSrc + ioLen;		// includes null, if any
   218   int32_t destBreakLen = strlen(destBreak);
   219   int32_t finalLen = 0;
   221   while (src < srcEnd)
   222   {
   223     if (*src == nsCRT::CR)
   224     {
   225       if (src < srcEnd && src[1] == nsCRT::LF)
   226       {
   227         // CRLF
   228         finalLen += destBreakLen;
   229         src++;
   230       }
   231       else
   232       {
   233         // Lone CR
   234         finalLen += destBreakLen;
   235       }
   236     }
   237     else if (*src == nsCRT::LF)
   238     {
   239       // Lone LF
   240       finalLen += destBreakLen;
   241     }
   242     else
   243     {
   244       finalLen++;
   245     }
   246     src++;
   247   }
   249   T* resultString = (T *)nsMemory::Alloc(sizeof(T) * finalLen);
   250   if (!resultString) return nullptr;
   252   src = inSrc;
   253   srcEnd = inSrc + ioLen;		// includes null, if any
   255   T* dst = resultString;
   257   while (src < srcEnd)
   258   {
   259     if (*src == nsCRT::CR)
   260     {
   261       if (src < srcEnd && src[1] == nsCRT::LF)
   262       {
   263         // CRLF
   264         AppendLinebreak(dst, destBreak);
   265         src++;
   266       }
   267       else
   268       {
   269         // Lone CR
   270         AppendLinebreak(dst, destBreak);
   271       }
   272     }
   273     else if (*src == nsCRT::LF)
   274     {
   275       // Lone LF
   276       AppendLinebreak(dst, destBreak);
   277     }
   278     else
   279     {
   280       *dst++ = *src;
   281     }
   282     src++;
   283   }
   285   ioLen = finalLen;
   286   return resultString;
   287 }
   290 /*----------------------------------------------------------------------------
   291 	ConvertLineBreaks 
   293 ----------------------------------------------------------------------------*/
   294 char* nsLinebreakConverter::ConvertLineBreaks(const char* aSrc,
   295             ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks, int32_t aSrcLen, int32_t* outLen)
   296 {
   297   NS_ASSERTION(aDestBreaks != eLinebreakAny &&
   298                aSrcBreaks != eLinebreakSpace, "Invalid parameter");
   299   if (!aSrc) return nullptr;
   301   int32_t sourceLen = (aSrcLen == kIgnoreLen) ? strlen(aSrc) + 1 : aSrcLen;
   303   char* resultString;
   304   if (aSrcBreaks == eLinebreakAny)
   305     resultString = ConvertUnknownBreaks(aSrc, sourceLen, GetLinebreakString(aDestBreaks));
   306   else
   307     resultString = ConvertBreaks(aSrc, sourceLen, GetLinebreakString(aSrcBreaks), GetLinebreakString(aDestBreaks));
   309   if (outLen)
   310     *outLen = sourceLen;
   311   return resultString;
   312 }
   315 /*----------------------------------------------------------------------------
   316 	ConvertLineBreaksInSitu 
   318 ----------------------------------------------------------------------------*/
   319 nsresult nsLinebreakConverter::ConvertLineBreaksInSitu(char **ioBuffer, ELinebreakType aSrcBreaks,
   320             ELinebreakType aDestBreaks, int32_t aSrcLen, int32_t* outLen)
   321 {
   322   NS_ASSERTION(ioBuffer && *ioBuffer, "Null pointer passed");
   323   if (!ioBuffer || !*ioBuffer) return NS_ERROR_NULL_POINTER;
   325   NS_ASSERTION(aDestBreaks != eLinebreakAny &&
   326                aSrcBreaks != eLinebreakSpace, "Invalid parameter");
   328   int32_t sourceLen = (aSrcLen == kIgnoreLen) ? strlen(*ioBuffer) + 1 : aSrcLen;
   330   // can we convert in-place?
   331   const char* srcBreaks = GetLinebreakString(aSrcBreaks);
   332   const char* dstBreaks = GetLinebreakString(aDestBreaks);
   334   if ( (aSrcBreaks != eLinebreakAny) &&
   335        (strlen(srcBreaks) == 1) &&
   336        (strlen(dstBreaks) == 1) )
   337   {
   338     ConvertBreaksInSitu(*ioBuffer, sourceLen, *srcBreaks, *dstBreaks);
   339     if (outLen)
   340       *outLen = sourceLen;
   341   }
   342   else
   343   {
   344     char* destBuffer;
   346     if (aSrcBreaks == eLinebreakAny)
   347       destBuffer = ConvertUnknownBreaks(*ioBuffer, sourceLen, dstBreaks);
   348     else
   349       destBuffer = ConvertBreaks(*ioBuffer, sourceLen, srcBreaks, dstBreaks);
   351     if (!destBuffer) return NS_ERROR_OUT_OF_MEMORY;
   352     *ioBuffer = destBuffer;
   353     if (outLen)
   354       *outLen = sourceLen;
   355   }
   357   return NS_OK;
   358 }
   361 /*----------------------------------------------------------------------------
   362 	ConvertUnicharLineBreaks 
   364 ----------------------------------------------------------------------------*/
   365 char16_t* nsLinebreakConverter::ConvertUnicharLineBreaks(const char16_t* aSrc,
   366             ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks, int32_t aSrcLen, int32_t* outLen)
   367 {
   368   NS_ASSERTION(aDestBreaks != eLinebreakAny &&
   369                aSrcBreaks != eLinebreakSpace, "Invalid parameter");
   370   if (!aSrc) return nullptr;
   372   int32_t bufLen = (aSrcLen == kIgnoreLen) ? NS_strlen(aSrc) + 1 : aSrcLen;
   374   char16_t* resultString;
   375   if (aSrcBreaks == eLinebreakAny)
   376     resultString = ConvertUnknownBreaks(aSrc, bufLen, GetLinebreakString(aDestBreaks));
   377   else
   378     resultString = ConvertBreaks(aSrc, bufLen, GetLinebreakString(aSrcBreaks), GetLinebreakString(aDestBreaks));
   380   if (outLen)
   381     *outLen = bufLen;
   382   return resultString;
   383 }
   386 /*----------------------------------------------------------------------------
   387 	ConvertStringLineBreaks 
   389 ----------------------------------------------------------------------------*/
   390 nsresult nsLinebreakConverter::ConvertUnicharLineBreaksInSitu(char16_t **ioBuffer,
   391             ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks, int32_t aSrcLen, int32_t* outLen)
   392 {
   393   NS_ASSERTION(ioBuffer && *ioBuffer, "Null pointer passed");
   394   if (!ioBuffer || !*ioBuffer) return NS_ERROR_NULL_POINTER;
   395   NS_ASSERTION(aDestBreaks != eLinebreakAny &&
   396                aSrcBreaks != eLinebreakSpace, "Invalid parameter");
   398   int32_t sourceLen = (aSrcLen == kIgnoreLen) ? NS_strlen(*ioBuffer) + 1 : aSrcLen;
   400   // can we convert in-place?
   401   const char* srcBreaks = GetLinebreakString(aSrcBreaks);
   402   const char* dstBreaks = GetLinebreakString(aDestBreaks);
   404   if ( (aSrcBreaks != eLinebreakAny) &&
   405        (strlen(srcBreaks) == 1) &&
   406        (strlen(dstBreaks) == 1) )
   407   {
   408     ConvertBreaksInSitu(*ioBuffer, sourceLen, *srcBreaks, *dstBreaks);
   409     if (outLen)
   410       *outLen = sourceLen;
   411   }
   412   else
   413   {
   414     char16_t* destBuffer;
   416     if (aSrcBreaks == eLinebreakAny)
   417       destBuffer = ConvertUnknownBreaks(*ioBuffer, sourceLen, dstBreaks);
   418     else
   419       destBuffer = ConvertBreaks(*ioBuffer, sourceLen, srcBreaks, dstBreaks);
   421     if (!destBuffer) return NS_ERROR_OUT_OF_MEMORY;
   422     *ioBuffer = destBuffer;
   423     if (outLen)
   424       *outLen = sourceLen;
   425   }
   427   return NS_OK;
   428 }
   430 /*----------------------------------------------------------------------------
   431 	ConvertStringLineBreaks 
   433 ----------------------------------------------------------------------------*/
   434 nsresult nsLinebreakConverter::ConvertStringLineBreaks(nsString& ioString,
   435           ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks)
   436 {
   438   NS_ASSERTION(aDestBreaks != eLinebreakAny &&
   439                aSrcBreaks != eLinebreakSpace, "Invalid parameter");
   441   // nothing to do
   442   if (ioString.IsEmpty()) return NS_OK;
   444   nsresult rv;
   446   // remember the old buffer in case
   447   // we blow it away later
   448   nsString::char_iterator stringBuf;
   449   ioString.BeginWriting(stringBuf);
   451   int32_t    newLen;
   453   rv = ConvertUnicharLineBreaksInSitu(&stringBuf,
   454                                       aSrcBreaks, aDestBreaks,
   455                                       ioString.Length() + 1, &newLen);
   456   if (NS_FAILED(rv)) return rv;
   458   if (stringBuf != ioString.get())
   459     ioString.Adopt(stringBuf);
   461   return NS_OK;
   462 }

mercurial