|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 #include "nsLinebreakConverter.h" |
|
7 |
|
8 #include "nsMemory.h" |
|
9 #include "nsCRT.h" |
|
10 |
|
11 |
|
12 /*---------------------------------------------------------------------------- |
|
13 GetLinebreakString |
|
14 |
|
15 Could make this inline |
|
16 ----------------------------------------------------------------------------*/ |
|
17 static const char* GetLinebreakString(nsLinebreakConverter::ELinebreakType aBreakType) |
|
18 { |
|
19 static const char* const sLinebreaks[] = { |
|
20 "", // any |
|
21 NS_LINEBREAK, // platform |
|
22 LFSTR, // content |
|
23 CRLF, // net |
|
24 CRSTR, // Mac |
|
25 LFSTR, // Unix |
|
26 CRLF, // Windows |
|
27 " ", // space |
|
28 nullptr |
|
29 }; |
|
30 |
|
31 return sLinebreaks[aBreakType]; |
|
32 } |
|
33 |
|
34 |
|
35 /*---------------------------------------------------------------------------- |
|
36 AppendLinebreak |
|
37 |
|
38 Wee inline method to append a line break. Modifies ioDest. |
|
39 ----------------------------------------------------------------------------*/ |
|
40 template<class T> |
|
41 void AppendLinebreak(T*& ioDest, const char* lineBreakStr) |
|
42 { |
|
43 *ioDest++ = *lineBreakStr; |
|
44 |
|
45 if (lineBreakStr[1]) |
|
46 *ioDest++ = lineBreakStr[1]; |
|
47 } |
|
48 |
|
49 /*---------------------------------------------------------------------------- |
|
50 CountChars |
|
51 |
|
52 Counts occurrences of breakStr in aSrc |
|
53 ----------------------------------------------------------------------------*/ |
|
54 template<class T> |
|
55 int32_t CountLinebreaks(const T* aSrc, int32_t inLen, const char* breakStr) |
|
56 { |
|
57 const T* src = aSrc; |
|
58 const T* srcEnd = aSrc + inLen; |
|
59 int32_t theCount = 0; |
|
60 |
|
61 while (src < srcEnd) |
|
62 { |
|
63 if (*src == *breakStr) |
|
64 { |
|
65 src++; |
|
66 |
|
67 if (breakStr[1]) |
|
68 { |
|
69 if (src < srcEnd && *src == breakStr[1]) |
|
70 { |
|
71 src++; |
|
72 theCount++; |
|
73 } |
|
74 } |
|
75 else |
|
76 { |
|
77 theCount++; |
|
78 } |
|
79 } |
|
80 else |
|
81 { |
|
82 src++; |
|
83 } |
|
84 } |
|
85 |
|
86 return theCount; |
|
87 } |
|
88 |
|
89 |
|
90 /*---------------------------------------------------------------------------- |
|
91 ConvertBreaks |
|
92 |
|
93 ioLen *includes* a terminating null, if any |
|
94 ----------------------------------------------------------------------------*/ |
|
95 template<class T> |
|
96 static T* ConvertBreaks(const T* inSrc, int32_t& ioLen, const char* srcBreak, const char* destBreak) |
|
97 { |
|
98 NS_ASSERTION(inSrc && srcBreak && destBreak, "Got a null string"); |
|
99 |
|
100 T* resultString = nullptr; |
|
101 |
|
102 // handle the no conversion case |
|
103 if (nsCRT::strcmp(srcBreak, destBreak) == 0) |
|
104 { |
|
105 resultString = (T *)nsMemory::Alloc(sizeof(T) * ioLen); |
|
106 if (!resultString) return nullptr; |
|
107 memcpy(resultString, inSrc, sizeof(T) * ioLen); // includes the null, if any |
|
108 return resultString; |
|
109 } |
|
110 |
|
111 int32_t srcBreakLen = strlen(srcBreak); |
|
112 int32_t destBreakLen = strlen(destBreak); |
|
113 |
|
114 // handle the easy case, where the string length does not change, and the |
|
115 // breaks are only 1 char long, i.e. CR <-> LF |
|
116 if (srcBreakLen == destBreakLen && srcBreakLen == 1) |
|
117 { |
|
118 resultString = (T *)nsMemory::Alloc(sizeof(T) * ioLen); |
|
119 if (!resultString) return nullptr; |
|
120 |
|
121 const T* src = inSrc; |
|
122 const T* srcEnd = inSrc + ioLen; // includes null, if any |
|
123 T* dst = resultString; |
|
124 |
|
125 char srcBreakChar = *srcBreak; // we know it's one char long already |
|
126 char dstBreakChar = *destBreak; |
|
127 |
|
128 while (src < srcEnd) |
|
129 { |
|
130 if (*src == srcBreakChar) |
|
131 { |
|
132 *dst++ = dstBreakChar; |
|
133 src++; |
|
134 } |
|
135 else |
|
136 { |
|
137 *dst++ = *src++; |
|
138 } |
|
139 } |
|
140 |
|
141 // ioLen does not change |
|
142 } |
|
143 else |
|
144 { |
|
145 // src and dest termination is different length. Do it a slower way. |
|
146 |
|
147 // count linebreaks in src. Assumes that chars in 2-char linebreaks are unique. |
|
148 int32_t numLinebreaks = CountLinebreaks(inSrc, ioLen, srcBreak); |
|
149 |
|
150 int32_t newBufLen = ioLen - (numLinebreaks * srcBreakLen) + (numLinebreaks * destBreakLen); |
|
151 resultString = (T *)nsMemory::Alloc(sizeof(T) * newBufLen); |
|
152 if (!resultString) return nullptr; |
|
153 |
|
154 const T* src = inSrc; |
|
155 const T* srcEnd = inSrc + ioLen; // includes null, if any |
|
156 T* dst = resultString; |
|
157 |
|
158 while (src < srcEnd) |
|
159 { |
|
160 if (*src == *srcBreak) |
|
161 { |
|
162 *dst++ = *destBreak; |
|
163 if (destBreak[1]) |
|
164 *dst++ = destBreak[1]; |
|
165 |
|
166 src++; |
|
167 if (src < srcEnd && srcBreak[1] && *src == srcBreak[1]) |
|
168 src++; |
|
169 } |
|
170 else |
|
171 { |
|
172 *dst++ = *src++; |
|
173 } |
|
174 } |
|
175 |
|
176 ioLen = newBufLen; |
|
177 } |
|
178 |
|
179 return resultString; |
|
180 } |
|
181 |
|
182 |
|
183 /*---------------------------------------------------------------------------- |
|
184 ConvertBreaksInSitu |
|
185 |
|
186 Convert breaks in situ. Can only do this if the linebreak length |
|
187 does not change. |
|
188 ----------------------------------------------------------------------------*/ |
|
189 template<class T> |
|
190 static void ConvertBreaksInSitu(T* inSrc, int32_t inLen, char srcBreak, char destBreak) |
|
191 { |
|
192 T* src = inSrc; |
|
193 T* srcEnd = inSrc + inLen; |
|
194 |
|
195 while (src < srcEnd) |
|
196 { |
|
197 if (*src == srcBreak) |
|
198 *src = destBreak; |
|
199 |
|
200 src++; |
|
201 } |
|
202 } |
|
203 |
|
204 |
|
205 /*---------------------------------------------------------------------------- |
|
206 ConvertUnknownBreaks |
|
207 |
|
208 Convert unknown line breaks to the specified break. |
|
209 |
|
210 This will convert CRLF pairs to one break, and single CR or LF to a break. |
|
211 ----------------------------------------------------------------------------*/ |
|
212 template<class T> |
|
213 static T* ConvertUnknownBreaks(const T* inSrc, int32_t& ioLen, const char* destBreak) |
|
214 { |
|
215 const T* src = inSrc; |
|
216 const T* srcEnd = inSrc + ioLen; // includes null, if any |
|
217 |
|
218 int32_t destBreakLen = strlen(destBreak); |
|
219 int32_t finalLen = 0; |
|
220 |
|
221 while (src < srcEnd) |
|
222 { |
|
223 if (*src == nsCRT::CR) |
|
224 { |
|
225 if (src < srcEnd && src[1] == nsCRT::LF) |
|
226 { |
|
227 // CRLF |
|
228 finalLen += destBreakLen; |
|
229 src++; |
|
230 } |
|
231 else |
|
232 { |
|
233 // Lone CR |
|
234 finalLen += destBreakLen; |
|
235 } |
|
236 } |
|
237 else if (*src == nsCRT::LF) |
|
238 { |
|
239 // Lone LF |
|
240 finalLen += destBreakLen; |
|
241 } |
|
242 else |
|
243 { |
|
244 finalLen++; |
|
245 } |
|
246 src++; |
|
247 } |
|
248 |
|
249 T* resultString = (T *)nsMemory::Alloc(sizeof(T) * finalLen); |
|
250 if (!resultString) return nullptr; |
|
251 |
|
252 src = inSrc; |
|
253 srcEnd = inSrc + ioLen; // includes null, if any |
|
254 |
|
255 T* dst = resultString; |
|
256 |
|
257 while (src < srcEnd) |
|
258 { |
|
259 if (*src == nsCRT::CR) |
|
260 { |
|
261 if (src < srcEnd && src[1] == nsCRT::LF) |
|
262 { |
|
263 // CRLF |
|
264 AppendLinebreak(dst, destBreak); |
|
265 src++; |
|
266 } |
|
267 else |
|
268 { |
|
269 // Lone CR |
|
270 AppendLinebreak(dst, destBreak); |
|
271 } |
|
272 } |
|
273 else if (*src == nsCRT::LF) |
|
274 { |
|
275 // Lone LF |
|
276 AppendLinebreak(dst, destBreak); |
|
277 } |
|
278 else |
|
279 { |
|
280 *dst++ = *src; |
|
281 } |
|
282 src++; |
|
283 } |
|
284 |
|
285 ioLen = finalLen; |
|
286 return resultString; |
|
287 } |
|
288 |
|
289 |
|
290 /*---------------------------------------------------------------------------- |
|
291 ConvertLineBreaks |
|
292 |
|
293 ----------------------------------------------------------------------------*/ |
|
294 char* nsLinebreakConverter::ConvertLineBreaks(const char* aSrc, |
|
295 ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks, int32_t aSrcLen, int32_t* outLen) |
|
296 { |
|
297 NS_ASSERTION(aDestBreaks != eLinebreakAny && |
|
298 aSrcBreaks != eLinebreakSpace, "Invalid parameter"); |
|
299 if (!aSrc) return nullptr; |
|
300 |
|
301 int32_t sourceLen = (aSrcLen == kIgnoreLen) ? strlen(aSrc) + 1 : aSrcLen; |
|
302 |
|
303 char* resultString; |
|
304 if (aSrcBreaks == eLinebreakAny) |
|
305 resultString = ConvertUnknownBreaks(aSrc, sourceLen, GetLinebreakString(aDestBreaks)); |
|
306 else |
|
307 resultString = ConvertBreaks(aSrc, sourceLen, GetLinebreakString(aSrcBreaks), GetLinebreakString(aDestBreaks)); |
|
308 |
|
309 if (outLen) |
|
310 *outLen = sourceLen; |
|
311 return resultString; |
|
312 } |
|
313 |
|
314 |
|
315 /*---------------------------------------------------------------------------- |
|
316 ConvertLineBreaksInSitu |
|
317 |
|
318 ----------------------------------------------------------------------------*/ |
|
319 nsresult nsLinebreakConverter::ConvertLineBreaksInSitu(char **ioBuffer, ELinebreakType aSrcBreaks, |
|
320 ELinebreakType aDestBreaks, int32_t aSrcLen, int32_t* outLen) |
|
321 { |
|
322 NS_ASSERTION(ioBuffer && *ioBuffer, "Null pointer passed"); |
|
323 if (!ioBuffer || !*ioBuffer) return NS_ERROR_NULL_POINTER; |
|
324 |
|
325 NS_ASSERTION(aDestBreaks != eLinebreakAny && |
|
326 aSrcBreaks != eLinebreakSpace, "Invalid parameter"); |
|
327 |
|
328 int32_t sourceLen = (aSrcLen == kIgnoreLen) ? strlen(*ioBuffer) + 1 : aSrcLen; |
|
329 |
|
330 // can we convert in-place? |
|
331 const char* srcBreaks = GetLinebreakString(aSrcBreaks); |
|
332 const char* dstBreaks = GetLinebreakString(aDestBreaks); |
|
333 |
|
334 if ( (aSrcBreaks != eLinebreakAny) && |
|
335 (strlen(srcBreaks) == 1) && |
|
336 (strlen(dstBreaks) == 1) ) |
|
337 { |
|
338 ConvertBreaksInSitu(*ioBuffer, sourceLen, *srcBreaks, *dstBreaks); |
|
339 if (outLen) |
|
340 *outLen = sourceLen; |
|
341 } |
|
342 else |
|
343 { |
|
344 char* destBuffer; |
|
345 |
|
346 if (aSrcBreaks == eLinebreakAny) |
|
347 destBuffer = ConvertUnknownBreaks(*ioBuffer, sourceLen, dstBreaks); |
|
348 else |
|
349 destBuffer = ConvertBreaks(*ioBuffer, sourceLen, srcBreaks, dstBreaks); |
|
350 |
|
351 if (!destBuffer) return NS_ERROR_OUT_OF_MEMORY; |
|
352 *ioBuffer = destBuffer; |
|
353 if (outLen) |
|
354 *outLen = sourceLen; |
|
355 } |
|
356 |
|
357 return NS_OK; |
|
358 } |
|
359 |
|
360 |
|
361 /*---------------------------------------------------------------------------- |
|
362 ConvertUnicharLineBreaks |
|
363 |
|
364 ----------------------------------------------------------------------------*/ |
|
365 char16_t* nsLinebreakConverter::ConvertUnicharLineBreaks(const char16_t* aSrc, |
|
366 ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks, int32_t aSrcLen, int32_t* outLen) |
|
367 { |
|
368 NS_ASSERTION(aDestBreaks != eLinebreakAny && |
|
369 aSrcBreaks != eLinebreakSpace, "Invalid parameter"); |
|
370 if (!aSrc) return nullptr; |
|
371 |
|
372 int32_t bufLen = (aSrcLen == kIgnoreLen) ? NS_strlen(aSrc) + 1 : aSrcLen; |
|
373 |
|
374 char16_t* resultString; |
|
375 if (aSrcBreaks == eLinebreakAny) |
|
376 resultString = ConvertUnknownBreaks(aSrc, bufLen, GetLinebreakString(aDestBreaks)); |
|
377 else |
|
378 resultString = ConvertBreaks(aSrc, bufLen, GetLinebreakString(aSrcBreaks), GetLinebreakString(aDestBreaks)); |
|
379 |
|
380 if (outLen) |
|
381 *outLen = bufLen; |
|
382 return resultString; |
|
383 } |
|
384 |
|
385 |
|
386 /*---------------------------------------------------------------------------- |
|
387 ConvertStringLineBreaks |
|
388 |
|
389 ----------------------------------------------------------------------------*/ |
|
390 nsresult nsLinebreakConverter::ConvertUnicharLineBreaksInSitu(char16_t **ioBuffer, |
|
391 ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks, int32_t aSrcLen, int32_t* outLen) |
|
392 { |
|
393 NS_ASSERTION(ioBuffer && *ioBuffer, "Null pointer passed"); |
|
394 if (!ioBuffer || !*ioBuffer) return NS_ERROR_NULL_POINTER; |
|
395 NS_ASSERTION(aDestBreaks != eLinebreakAny && |
|
396 aSrcBreaks != eLinebreakSpace, "Invalid parameter"); |
|
397 |
|
398 int32_t sourceLen = (aSrcLen == kIgnoreLen) ? NS_strlen(*ioBuffer) + 1 : aSrcLen; |
|
399 |
|
400 // can we convert in-place? |
|
401 const char* srcBreaks = GetLinebreakString(aSrcBreaks); |
|
402 const char* dstBreaks = GetLinebreakString(aDestBreaks); |
|
403 |
|
404 if ( (aSrcBreaks != eLinebreakAny) && |
|
405 (strlen(srcBreaks) == 1) && |
|
406 (strlen(dstBreaks) == 1) ) |
|
407 { |
|
408 ConvertBreaksInSitu(*ioBuffer, sourceLen, *srcBreaks, *dstBreaks); |
|
409 if (outLen) |
|
410 *outLen = sourceLen; |
|
411 } |
|
412 else |
|
413 { |
|
414 char16_t* destBuffer; |
|
415 |
|
416 if (aSrcBreaks == eLinebreakAny) |
|
417 destBuffer = ConvertUnknownBreaks(*ioBuffer, sourceLen, dstBreaks); |
|
418 else |
|
419 destBuffer = ConvertBreaks(*ioBuffer, sourceLen, srcBreaks, dstBreaks); |
|
420 |
|
421 if (!destBuffer) return NS_ERROR_OUT_OF_MEMORY; |
|
422 *ioBuffer = destBuffer; |
|
423 if (outLen) |
|
424 *outLen = sourceLen; |
|
425 } |
|
426 |
|
427 return NS_OK; |
|
428 } |
|
429 |
|
430 /*---------------------------------------------------------------------------- |
|
431 ConvertStringLineBreaks |
|
432 |
|
433 ----------------------------------------------------------------------------*/ |
|
434 nsresult nsLinebreakConverter::ConvertStringLineBreaks(nsString& ioString, |
|
435 ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks) |
|
436 { |
|
437 |
|
438 NS_ASSERTION(aDestBreaks != eLinebreakAny && |
|
439 aSrcBreaks != eLinebreakSpace, "Invalid parameter"); |
|
440 |
|
441 // nothing to do |
|
442 if (ioString.IsEmpty()) return NS_OK; |
|
443 |
|
444 nsresult rv; |
|
445 |
|
446 // remember the old buffer in case |
|
447 // we blow it away later |
|
448 nsString::char_iterator stringBuf; |
|
449 ioString.BeginWriting(stringBuf); |
|
450 |
|
451 int32_t newLen; |
|
452 |
|
453 rv = ConvertUnicharLineBreaksInSitu(&stringBuf, |
|
454 aSrcBreaks, aDestBreaks, |
|
455 ioString.Length() + 1, &newLen); |
|
456 if (NS_FAILED(rv)) return rv; |
|
457 |
|
458 if (stringBuf != ioString.get()) |
|
459 ioString.Adopt(stringBuf); |
|
460 |
|
461 return NS_OK; |
|
462 } |
|
463 |
|
464 |
|
465 |