Tue, 06 Jan 2015 21:39:09 +0100
Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 // First checked in on 98/12/03 by John R. McMullen, derived from net.h/mkparse.c.
8 #include "nsEscape.h"
9 #include "nsMemory.h"
10 #include "nsCRT.h"
11 #include "nsReadableUtils.h"
13 const int netCharType[256] =
14 /* Bit 0 xalpha -- the alphas
15 ** Bit 1 xpalpha -- as xalpha but
16 ** converts spaces to plus and plus to %2B
17 ** Bit 3 ... path -- as xalphas but doesn't escape '/'
18 */
19 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
20 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x */
21 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 1x */
22 0,0,0,0,0,0,0,0,0,0,7,4,0,7,7,4, /* 2x !"#$%&'()*+,-./ */
23 7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
24 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 4x @ABCDEFGHIJKLMNO */
25 /* bits for '@' changed from 7 to 0 so '@' can be escaped */
26 /* in usernames and passwords in publishing. */
27 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7, /* 5X PQRSTUVWXYZ[\]^_ */
28 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 6x `abcdefghijklmno */
29 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0, /* 7X pqrstuvwxyz{\}~ DEL */
30 0, };
32 /* decode % escaped hex codes into character values
33 */
34 #define UNHEX(C) \
35 ((C >= '0' && C <= '9') ? C - '0' : \
36 ((C >= 'A' && C <= 'F') ? C - 'A' + 10 : \
37 ((C >= 'a' && C <= 'f') ? C - 'a' + 10 : 0)))
40 #define IS_OK(C) (netCharType[((unsigned int) (C))] & (flags))
41 #define HEX_ESCAPE '%'
43 //----------------------------------------------------------------------------------------
44 static char* nsEscapeCount(
45 const char * str,
46 nsEscapeMask flags,
47 size_t* out_len)
48 //----------------------------------------------------------------------------------------
49 {
50 if (!str)
51 return 0;
53 size_t i, len = 0, charsToEscape = 0;
54 static const char hexChars[] = "0123456789ABCDEF";
56 const unsigned char* src = (const unsigned char *) str;
57 while (*src)
58 {
59 len++;
60 if (!IS_OK(*src++))
61 charsToEscape++;
62 }
64 // calculate how much memory should be allocated
65 // original length + 2 bytes for each escaped character + terminating '\0'
66 // do the sum in steps to check for overflow
67 size_t dstSize = len + 1 + charsToEscape;
68 if (dstSize <= len)
69 return 0;
70 dstSize += charsToEscape;
71 if (dstSize < len)
72 return 0;
74 // fail if we need more than 4GB
75 // size_t is likely to be long unsigned int but nsMemory::Alloc(size_t)
76 // calls NS_Alloc_P(size_t) which calls PR_Malloc(uint32_t), so there is
77 // no chance to allocate more than 4GB using nsMemory::Alloc()
78 if (dstSize > UINT32_MAX)
79 return 0;
81 char* result = (char *)nsMemory::Alloc(dstSize);
82 if (!result)
83 return 0;
85 unsigned char* dst = (unsigned char *) result;
86 src = (const unsigned char *) str;
87 if (flags == url_XPAlphas)
88 {
89 for (i = 0; i < len; i++)
90 {
91 unsigned char c = *src++;
92 if (IS_OK(c))
93 *dst++ = c;
94 else if (c == ' ')
95 *dst++ = '+'; /* convert spaces to pluses */
96 else
97 {
98 *dst++ = HEX_ESCAPE;
99 *dst++ = hexChars[c >> 4]; /* high nibble */
100 *dst++ = hexChars[c & 0x0f]; /* low nibble */
101 }
102 }
103 }
104 else
105 {
106 for (i = 0; i < len; i++)
107 {
108 unsigned char c = *src++;
109 if (IS_OK(c))
110 *dst++ = c;
111 else
112 {
113 *dst++ = HEX_ESCAPE;
114 *dst++ = hexChars[c >> 4]; /* high nibble */
115 *dst++ = hexChars[c & 0x0f]; /* low nibble */
116 }
117 }
118 }
120 *dst = '\0'; /* tack on eos */
121 if(out_len)
122 *out_len = dst - (unsigned char *) result;
123 return result;
124 }
126 //----------------------------------------------------------------------------------------
127 char* nsEscape(const char * str, nsEscapeMask flags)
128 //----------------------------------------------------------------------------------------
129 {
130 if(!str)
131 return nullptr;
132 return nsEscapeCount(str, flags, nullptr);
133 }
135 //----------------------------------------------------------------------------------------
136 char* nsUnescape(char * str)
137 //----------------------------------------------------------------------------------------
138 {
139 nsUnescapeCount(str);
140 return str;
141 }
143 //----------------------------------------------------------------------------------------
144 int32_t nsUnescapeCount(char * str)
145 //----------------------------------------------------------------------------------------
146 {
147 char *src = str;
148 char *dst = str;
149 static const char hexChars[] = "0123456789ABCDEFabcdef";
151 char c1[] = " ";
152 char c2[] = " ";
153 char* const pc1 = c1;
154 char* const pc2 = c2;
156 if (!*src) {
157 // A null string was passed in. Nothing to escape.
158 // Returns early as the string might not actually be mutable with
159 // length 0.
160 return 0;
161 }
163 while (*src)
164 {
165 c1[0] = *(src+1);
166 if (*(src+1) == '\0')
167 c2[0] = '\0';
168 else
169 c2[0] = *(src+2);
171 if (*src != HEX_ESCAPE || PL_strpbrk(pc1, hexChars) == 0 ||
172 PL_strpbrk(pc2, hexChars) == 0 )
173 *dst++ = *src++;
174 else
175 {
176 src++; /* walk over escape */
177 if (*src)
178 {
179 *dst = UNHEX(*src) << 4;
180 src++;
181 }
182 if (*src)
183 {
184 *dst = (*dst + UNHEX(*src));
185 src++;
186 }
187 dst++;
188 }
189 }
191 *dst = 0;
192 return (int)(dst - str);
194 } /* NET_UnEscapeCnt */
197 char *
198 nsEscapeHTML(const char * string)
199 {
200 char *rv = nullptr;
201 /* XXX Hardcoded max entity len. The +1 is for the trailing null. */
202 uint32_t len = strlen(string);
203 if (len >= (UINT32_MAX / 6))
204 return nullptr;
206 rv = (char *)NS_Alloc( (6 * len) + 1 );
207 char *ptr = rv;
209 if(rv)
210 {
211 for(; *string != '\0'; string++)
212 {
213 if(*string == '<')
214 {
215 *ptr++ = '&';
216 *ptr++ = 'l';
217 *ptr++ = 't';
218 *ptr++ = ';';
219 }
220 else if(*string == '>')
221 {
222 *ptr++ = '&';
223 *ptr++ = 'g';
224 *ptr++ = 't';
225 *ptr++ = ';';
226 }
227 else if(*string == '&')
228 {
229 *ptr++ = '&';
230 *ptr++ = 'a';
231 *ptr++ = 'm';
232 *ptr++ = 'p';
233 *ptr++ = ';';
234 }
235 else if (*string == '"')
236 {
237 *ptr++ = '&';
238 *ptr++ = 'q';
239 *ptr++ = 'u';
240 *ptr++ = 'o';
241 *ptr++ = 't';
242 *ptr++ = ';';
243 }
244 else if (*string == '\'')
245 {
246 *ptr++ = '&';
247 *ptr++ = '#';
248 *ptr++ = '3';
249 *ptr++ = '9';
250 *ptr++ = ';';
251 }
252 else
253 {
254 *ptr++ = *string;
255 }
256 }
257 *ptr = '\0';
258 }
260 return(rv);
261 }
263 char16_t *
264 nsEscapeHTML2(const char16_t *aSourceBuffer, int32_t aSourceBufferLen)
265 {
266 // Calculate the length, if the caller didn't.
267 if (aSourceBufferLen < 0) {
268 aSourceBufferLen = NS_strlen(aSourceBuffer);
269 }
271 /* XXX Hardcoded max entity len. */
272 if (uint32_t(aSourceBufferLen) >=
273 ((UINT32_MAX - sizeof(char16_t)) / (6 * sizeof(char16_t))) )
274 return nullptr;
276 char16_t *resultBuffer = (char16_t *)nsMemory::Alloc(aSourceBufferLen *
277 6 * sizeof(char16_t) + sizeof(char16_t('\0')));
278 char16_t *ptr = resultBuffer;
280 if (resultBuffer) {
281 int32_t i;
283 for(i = 0; i < aSourceBufferLen; i++) {
284 if(aSourceBuffer[i] == '<') {
285 *ptr++ = '&';
286 *ptr++ = 'l';
287 *ptr++ = 't';
288 *ptr++ = ';';
289 } else if(aSourceBuffer[i] == '>') {
290 *ptr++ = '&';
291 *ptr++ = 'g';
292 *ptr++ = 't';
293 *ptr++ = ';';
294 } else if(aSourceBuffer[i] == '&') {
295 *ptr++ = '&';
296 *ptr++ = 'a';
297 *ptr++ = 'm';
298 *ptr++ = 'p';
299 *ptr++ = ';';
300 } else if (aSourceBuffer[i] == '"') {
301 *ptr++ = '&';
302 *ptr++ = 'q';
303 *ptr++ = 'u';
304 *ptr++ = 'o';
305 *ptr++ = 't';
306 *ptr++ = ';';
307 } else if (aSourceBuffer[i] == '\'') {
308 *ptr++ = '&';
309 *ptr++ = '#';
310 *ptr++ = '3';
311 *ptr++ = '9';
312 *ptr++ = ';';
313 } else {
314 *ptr++ = aSourceBuffer[i];
315 }
316 }
317 *ptr = 0;
318 }
320 return resultBuffer;
321 }
323 //----------------------------------------------------------------------------------------
325 const int EscapeChars[256] =
326 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
327 {
328 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
330 0,1023, 0, 512,1023, 0,1023, 0,1023,1023,1023,1023,1023,1023, 953, 784, /* 2x !"#$%&'()*+,-./ */
331 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1008,1008, 0,1008, 0, 768, /* 3x 0123456789:;<=>? */
332 1008,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, /* 4x @ABCDEFGHIJKLMNO */
333 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896, 896, 896, 896,1023, /* 5x PQRSTUVWXYZ[\]^_ */
334 0,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, /* 6x `abcdefghijklmno */
335 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896,1012, 896,1023, 0, /* 7x pqrstuvwxyz{|}~ */
336 0 /* 8x DEL */
337 };
339 #define NO_NEED_ESC(C) (EscapeChars[((unsigned int) (C))] & (flags))
341 //----------------------------------------------------------------------------------------
343 /* returns an escaped string */
345 /* use the following flags to specify which
346 part of an URL you want to escape:
348 esc_Scheme = 1
349 esc_Username = 2
350 esc_Password = 4
351 esc_Host = 8
352 esc_Directory = 16
353 esc_FileBaseName = 32
354 esc_FileExtension = 64
355 esc_Param = 128
356 esc_Query = 256
357 esc_Ref = 512
358 */
360 /* by default this function will not escape parts of a string
361 that already look escaped, which means it already includes
362 a valid hexcode. This is done to avoid multiple escapes of
363 a string. Use the following flags to force escaping of a
364 string:
366 esc_Forced = 1024
367 */
369 bool NS_EscapeURL(const char *part,
370 int32_t partLen,
371 uint32_t flags,
372 nsACString &result)
373 {
374 if (!part) {
375 NS_NOTREACHED("null pointer");
376 return false;
377 }
379 int i = 0;
380 static const char hexChars[] = "0123456789ABCDEF";
381 if (partLen < 0)
382 partLen = strlen(part);
383 bool forced = !!(flags & esc_Forced);
384 bool ignoreNonAscii = !!(flags & esc_OnlyASCII);
385 bool ignoreAscii = !!(flags & esc_OnlyNonASCII);
386 bool writing = !!(flags & esc_AlwaysCopy);
387 bool colon = !!(flags & esc_Colon);
389 const unsigned char* src = (const unsigned char *) part;
391 char tempBuffer[100];
392 unsigned int tempBufferPos = 0;
394 bool previousIsNonASCII = false;
395 for (i = 0; i < partLen; i++)
396 {
397 unsigned char c = *src++;
399 // if the char has not to be escaped or whatever follows % is
400 // a valid escaped string, just copy the char.
401 //
402 // Also the % will not be escaped until forced
403 // See bugzilla bug 61269 for details why we changed this
404 //
405 // And, we will not escape non-ascii characters if requested.
406 // On special request we will also escape the colon even when
407 // not covered by the matrix.
408 // ignoreAscii is not honored for control characters (C0 and DEL)
409 //
410 // And, we should escape the '|' character when it occurs after any
411 // non-ASCII character as it may be part of a multi-byte character.
412 //
413 // 0x20..0x7e are the valid ASCII characters. We also escape spaces
414 // (0x20) since they are not legal in URLs.
415 if ((NO_NEED_ESC(c) || (c == HEX_ESCAPE && !forced)
416 || (c > 0x7f && ignoreNonAscii)
417 || (c > 0x20 && c < 0x7f && ignoreAscii))
418 && !(c == ':' && colon)
419 && !(previousIsNonASCII && c == '|' && !ignoreNonAscii))
420 {
421 if (writing)
422 tempBuffer[tempBufferPos++] = c;
423 }
424 else /* do the escape magic */
425 {
426 if (!writing)
427 {
428 result.Append(part, i);
429 writing = true;
430 }
431 tempBuffer[tempBufferPos++] = HEX_ESCAPE;
432 tempBuffer[tempBufferPos++] = hexChars[c >> 4]; /* high nibble */
433 tempBuffer[tempBufferPos++] = hexChars[c & 0x0f]; /* low nibble */
434 }
436 if (tempBufferPos >= sizeof(tempBuffer) - 4)
437 {
438 NS_ASSERTION(writing, "should be writing");
439 tempBuffer[tempBufferPos] = '\0';
440 result += tempBuffer;
441 tempBufferPos = 0;
442 }
444 previousIsNonASCII = (c > 0x7f);
445 }
446 if (writing) {
447 tempBuffer[tempBufferPos] = '\0';
448 result += tempBuffer;
449 }
450 return writing;
451 }
453 #define ISHEX(c) memchr(hexChars, c, sizeof(hexChars)-1)
455 bool NS_UnescapeURL(const char *str, int32_t len, uint32_t flags, nsACString &result)
456 {
457 if (!str) {
458 NS_NOTREACHED("null pointer");
459 return false;
460 }
462 if (len < 0)
463 len = strlen(str);
465 bool ignoreNonAscii = !!(flags & esc_OnlyASCII);
466 bool ignoreAscii = !!(flags & esc_OnlyNonASCII);
467 bool writing = !!(flags & esc_AlwaysCopy);
468 bool skipControl = !!(flags & esc_SkipControl);
470 static const char hexChars[] = "0123456789ABCDEFabcdef";
472 const char *last = str;
473 const char *p = str;
475 for (int i=0; i<len; ++i, ++p) {
476 //printf("%c [i=%d of len=%d]\n", *p, i, len);
477 if (*p == HEX_ESCAPE && i < len-2) {
478 unsigned char *p1 = ((unsigned char *) p) + 1;
479 unsigned char *p2 = ((unsigned char *) p) + 2;
480 if (ISHEX(*p1) && ISHEX(*p2) &&
481 ((*p1 < '8' && !ignoreAscii) || (*p1 >= '8' && !ignoreNonAscii)) &&
482 !(skipControl &&
483 (*p1 < '2' || (*p1 == '7' && (*p2 == 'f' || *p2 == 'F'))))) {
484 //printf("- p1=%c p2=%c\n", *p1, *p2);
485 writing = true;
486 if (p > last) {
487 //printf("- p=%p, last=%p\n", p, last);
488 result.Append(last, p - last);
489 last = p;
490 }
491 char u = (UNHEX(*p1) << 4) + UNHEX(*p2);
492 //printf("- u=%c\n", u);
493 result.Append(u);
494 i += 2;
495 p += 2;
496 last += 3;
497 }
498 }
499 }
500 if (writing && last < str + len)
501 result.Append(last, str + len - last);
503 return writing;
504 }