xpcom/io/nsEscape.cpp

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6 // First checked in on 98/12/03 by John R. McMullen, derived from net.h/mkparse.c.
michael@0 7
michael@0 8 #include "nsEscape.h"
michael@0 9 #include "nsMemory.h"
michael@0 10 #include "nsCRT.h"
michael@0 11 #include "nsReadableUtils.h"
michael@0 12
michael@0 13 const int netCharType[256] =
michael@0 14 /* Bit 0 xalpha -- the alphas
michael@0 15 ** Bit 1 xpalpha -- as xalpha but
michael@0 16 ** converts spaces to plus and plus to %2B
michael@0 17 ** Bit 3 ... path -- as xalphas but doesn't escape '/'
michael@0 18 */
michael@0 19 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
michael@0 20 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x */
michael@0 21 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 1x */
michael@0 22 0,0,0,0,0,0,0,0,0,0,7,4,0,7,7,4, /* 2x !"#$%&'()*+,-./ */
michael@0 23 7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
michael@0 24 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 4x @ABCDEFGHIJKLMNO */
michael@0 25 /* bits for '@' changed from 7 to 0 so '@' can be escaped */
michael@0 26 /* in usernames and passwords in publishing. */
michael@0 27 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7, /* 5X PQRSTUVWXYZ[\]^_ */
michael@0 28 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 6x `abcdefghijklmno */
michael@0 29 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0, /* 7X pqrstuvwxyz{\}~ DEL */
michael@0 30 0, };
michael@0 31
michael@0 32 /* decode % escaped hex codes into character values
michael@0 33 */
michael@0 34 #define UNHEX(C) \
michael@0 35 ((C >= '0' && C <= '9') ? C - '0' : \
michael@0 36 ((C >= 'A' && C <= 'F') ? C - 'A' + 10 : \
michael@0 37 ((C >= 'a' && C <= 'f') ? C - 'a' + 10 : 0)))
michael@0 38
michael@0 39
michael@0 40 #define IS_OK(C) (netCharType[((unsigned int) (C))] & (flags))
michael@0 41 #define HEX_ESCAPE '%'
michael@0 42
michael@0 43 //----------------------------------------------------------------------------------------
michael@0 44 static char* nsEscapeCount(
michael@0 45 const char * str,
michael@0 46 nsEscapeMask flags,
michael@0 47 size_t* out_len)
michael@0 48 //----------------------------------------------------------------------------------------
michael@0 49 {
michael@0 50 if (!str)
michael@0 51 return 0;
michael@0 52
michael@0 53 size_t i, len = 0, charsToEscape = 0;
michael@0 54 static const char hexChars[] = "0123456789ABCDEF";
michael@0 55
michael@0 56 const unsigned char* src = (const unsigned char *) str;
michael@0 57 while (*src)
michael@0 58 {
michael@0 59 len++;
michael@0 60 if (!IS_OK(*src++))
michael@0 61 charsToEscape++;
michael@0 62 }
michael@0 63
michael@0 64 // calculate how much memory should be allocated
michael@0 65 // original length + 2 bytes for each escaped character + terminating '\0'
michael@0 66 // do the sum in steps to check for overflow
michael@0 67 size_t dstSize = len + 1 + charsToEscape;
michael@0 68 if (dstSize <= len)
michael@0 69 return 0;
michael@0 70 dstSize += charsToEscape;
michael@0 71 if (dstSize < len)
michael@0 72 return 0;
michael@0 73
michael@0 74 // fail if we need more than 4GB
michael@0 75 // size_t is likely to be long unsigned int but nsMemory::Alloc(size_t)
michael@0 76 // calls NS_Alloc_P(size_t) which calls PR_Malloc(uint32_t), so there is
michael@0 77 // no chance to allocate more than 4GB using nsMemory::Alloc()
michael@0 78 if (dstSize > UINT32_MAX)
michael@0 79 return 0;
michael@0 80
michael@0 81 char* result = (char *)nsMemory::Alloc(dstSize);
michael@0 82 if (!result)
michael@0 83 return 0;
michael@0 84
michael@0 85 unsigned char* dst = (unsigned char *) result;
michael@0 86 src = (const unsigned char *) str;
michael@0 87 if (flags == url_XPAlphas)
michael@0 88 {
michael@0 89 for (i = 0; i < len; i++)
michael@0 90 {
michael@0 91 unsigned char c = *src++;
michael@0 92 if (IS_OK(c))
michael@0 93 *dst++ = c;
michael@0 94 else if (c == ' ')
michael@0 95 *dst++ = '+'; /* convert spaces to pluses */
michael@0 96 else
michael@0 97 {
michael@0 98 *dst++ = HEX_ESCAPE;
michael@0 99 *dst++ = hexChars[c >> 4]; /* high nibble */
michael@0 100 *dst++ = hexChars[c & 0x0f]; /* low nibble */
michael@0 101 }
michael@0 102 }
michael@0 103 }
michael@0 104 else
michael@0 105 {
michael@0 106 for (i = 0; i < len; i++)
michael@0 107 {
michael@0 108 unsigned char c = *src++;
michael@0 109 if (IS_OK(c))
michael@0 110 *dst++ = c;
michael@0 111 else
michael@0 112 {
michael@0 113 *dst++ = HEX_ESCAPE;
michael@0 114 *dst++ = hexChars[c >> 4]; /* high nibble */
michael@0 115 *dst++ = hexChars[c & 0x0f]; /* low nibble */
michael@0 116 }
michael@0 117 }
michael@0 118 }
michael@0 119
michael@0 120 *dst = '\0'; /* tack on eos */
michael@0 121 if(out_len)
michael@0 122 *out_len = dst - (unsigned char *) result;
michael@0 123 return result;
michael@0 124 }
michael@0 125
michael@0 126 //----------------------------------------------------------------------------------------
michael@0 127 char* nsEscape(const char * str, nsEscapeMask flags)
michael@0 128 //----------------------------------------------------------------------------------------
michael@0 129 {
michael@0 130 if(!str)
michael@0 131 return nullptr;
michael@0 132 return nsEscapeCount(str, flags, nullptr);
michael@0 133 }
michael@0 134
michael@0 135 //----------------------------------------------------------------------------------------
michael@0 136 char* nsUnescape(char * str)
michael@0 137 //----------------------------------------------------------------------------------------
michael@0 138 {
michael@0 139 nsUnescapeCount(str);
michael@0 140 return str;
michael@0 141 }
michael@0 142
michael@0 143 //----------------------------------------------------------------------------------------
michael@0 144 int32_t nsUnescapeCount(char * str)
michael@0 145 //----------------------------------------------------------------------------------------
michael@0 146 {
michael@0 147 char *src = str;
michael@0 148 char *dst = str;
michael@0 149 static const char hexChars[] = "0123456789ABCDEFabcdef";
michael@0 150
michael@0 151 char c1[] = " ";
michael@0 152 char c2[] = " ";
michael@0 153 char* const pc1 = c1;
michael@0 154 char* const pc2 = c2;
michael@0 155
michael@0 156 if (!*src) {
michael@0 157 // A null string was passed in. Nothing to escape.
michael@0 158 // Returns early as the string might not actually be mutable with
michael@0 159 // length 0.
michael@0 160 return 0;
michael@0 161 }
michael@0 162
michael@0 163 while (*src)
michael@0 164 {
michael@0 165 c1[0] = *(src+1);
michael@0 166 if (*(src+1) == '\0')
michael@0 167 c2[0] = '\0';
michael@0 168 else
michael@0 169 c2[0] = *(src+2);
michael@0 170
michael@0 171 if (*src != HEX_ESCAPE || PL_strpbrk(pc1, hexChars) == 0 ||
michael@0 172 PL_strpbrk(pc2, hexChars) == 0 )
michael@0 173 *dst++ = *src++;
michael@0 174 else
michael@0 175 {
michael@0 176 src++; /* walk over escape */
michael@0 177 if (*src)
michael@0 178 {
michael@0 179 *dst = UNHEX(*src) << 4;
michael@0 180 src++;
michael@0 181 }
michael@0 182 if (*src)
michael@0 183 {
michael@0 184 *dst = (*dst + UNHEX(*src));
michael@0 185 src++;
michael@0 186 }
michael@0 187 dst++;
michael@0 188 }
michael@0 189 }
michael@0 190
michael@0 191 *dst = 0;
michael@0 192 return (int)(dst - str);
michael@0 193
michael@0 194 } /* NET_UnEscapeCnt */
michael@0 195
michael@0 196
michael@0 197 char *
michael@0 198 nsEscapeHTML(const char * string)
michael@0 199 {
michael@0 200 char *rv = nullptr;
michael@0 201 /* XXX Hardcoded max entity len. The +1 is for the trailing null. */
michael@0 202 uint32_t len = strlen(string);
michael@0 203 if (len >= (UINT32_MAX / 6))
michael@0 204 return nullptr;
michael@0 205
michael@0 206 rv = (char *)NS_Alloc( (6 * len) + 1 );
michael@0 207 char *ptr = rv;
michael@0 208
michael@0 209 if(rv)
michael@0 210 {
michael@0 211 for(; *string != '\0'; string++)
michael@0 212 {
michael@0 213 if(*string == '<')
michael@0 214 {
michael@0 215 *ptr++ = '&';
michael@0 216 *ptr++ = 'l';
michael@0 217 *ptr++ = 't';
michael@0 218 *ptr++ = ';';
michael@0 219 }
michael@0 220 else if(*string == '>')
michael@0 221 {
michael@0 222 *ptr++ = '&';
michael@0 223 *ptr++ = 'g';
michael@0 224 *ptr++ = 't';
michael@0 225 *ptr++ = ';';
michael@0 226 }
michael@0 227 else if(*string == '&')
michael@0 228 {
michael@0 229 *ptr++ = '&';
michael@0 230 *ptr++ = 'a';
michael@0 231 *ptr++ = 'm';
michael@0 232 *ptr++ = 'p';
michael@0 233 *ptr++ = ';';
michael@0 234 }
michael@0 235 else if (*string == '"')
michael@0 236 {
michael@0 237 *ptr++ = '&';
michael@0 238 *ptr++ = 'q';
michael@0 239 *ptr++ = 'u';
michael@0 240 *ptr++ = 'o';
michael@0 241 *ptr++ = 't';
michael@0 242 *ptr++ = ';';
michael@0 243 }
michael@0 244 else if (*string == '\'')
michael@0 245 {
michael@0 246 *ptr++ = '&';
michael@0 247 *ptr++ = '#';
michael@0 248 *ptr++ = '3';
michael@0 249 *ptr++ = '9';
michael@0 250 *ptr++ = ';';
michael@0 251 }
michael@0 252 else
michael@0 253 {
michael@0 254 *ptr++ = *string;
michael@0 255 }
michael@0 256 }
michael@0 257 *ptr = '\0';
michael@0 258 }
michael@0 259
michael@0 260 return(rv);
michael@0 261 }
michael@0 262
michael@0 263 char16_t *
michael@0 264 nsEscapeHTML2(const char16_t *aSourceBuffer, int32_t aSourceBufferLen)
michael@0 265 {
michael@0 266 // Calculate the length, if the caller didn't.
michael@0 267 if (aSourceBufferLen < 0) {
michael@0 268 aSourceBufferLen = NS_strlen(aSourceBuffer);
michael@0 269 }
michael@0 270
michael@0 271 /* XXX Hardcoded max entity len. */
michael@0 272 if (uint32_t(aSourceBufferLen) >=
michael@0 273 ((UINT32_MAX - sizeof(char16_t)) / (6 * sizeof(char16_t))) )
michael@0 274 return nullptr;
michael@0 275
michael@0 276 char16_t *resultBuffer = (char16_t *)nsMemory::Alloc(aSourceBufferLen *
michael@0 277 6 * sizeof(char16_t) + sizeof(char16_t('\0')));
michael@0 278 char16_t *ptr = resultBuffer;
michael@0 279
michael@0 280 if (resultBuffer) {
michael@0 281 int32_t i;
michael@0 282
michael@0 283 for(i = 0; i < aSourceBufferLen; i++) {
michael@0 284 if(aSourceBuffer[i] == '<') {
michael@0 285 *ptr++ = '&';
michael@0 286 *ptr++ = 'l';
michael@0 287 *ptr++ = 't';
michael@0 288 *ptr++ = ';';
michael@0 289 } else if(aSourceBuffer[i] == '>') {
michael@0 290 *ptr++ = '&';
michael@0 291 *ptr++ = 'g';
michael@0 292 *ptr++ = 't';
michael@0 293 *ptr++ = ';';
michael@0 294 } else if(aSourceBuffer[i] == '&') {
michael@0 295 *ptr++ = '&';
michael@0 296 *ptr++ = 'a';
michael@0 297 *ptr++ = 'm';
michael@0 298 *ptr++ = 'p';
michael@0 299 *ptr++ = ';';
michael@0 300 } else if (aSourceBuffer[i] == '"') {
michael@0 301 *ptr++ = '&';
michael@0 302 *ptr++ = 'q';
michael@0 303 *ptr++ = 'u';
michael@0 304 *ptr++ = 'o';
michael@0 305 *ptr++ = 't';
michael@0 306 *ptr++ = ';';
michael@0 307 } else if (aSourceBuffer[i] == '\'') {
michael@0 308 *ptr++ = '&';
michael@0 309 *ptr++ = '#';
michael@0 310 *ptr++ = '3';
michael@0 311 *ptr++ = '9';
michael@0 312 *ptr++ = ';';
michael@0 313 } else {
michael@0 314 *ptr++ = aSourceBuffer[i];
michael@0 315 }
michael@0 316 }
michael@0 317 *ptr = 0;
michael@0 318 }
michael@0 319
michael@0 320 return resultBuffer;
michael@0 321 }
michael@0 322
michael@0 323 //----------------------------------------------------------------------------------------
michael@0 324
michael@0 325 const int EscapeChars[256] =
michael@0 326 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
michael@0 327 {
michael@0 328 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
michael@0 329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
michael@0 330 0,1023, 0, 512,1023, 0,1023, 0,1023,1023,1023,1023,1023,1023, 953, 784, /* 2x !"#$%&'()*+,-./ */
michael@0 331 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1008,1008, 0,1008, 0, 768, /* 3x 0123456789:;<=>? */
michael@0 332 1008,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, /* 4x @ABCDEFGHIJKLMNO */
michael@0 333 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896, 896, 896, 896,1023, /* 5x PQRSTUVWXYZ[\]^_ */
michael@0 334 0,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, /* 6x `abcdefghijklmno */
michael@0 335 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896,1012, 896,1023, 0, /* 7x pqrstuvwxyz{|}~ */
michael@0 336 0 /* 8x DEL */
michael@0 337 };
michael@0 338
michael@0 339 #define NO_NEED_ESC(C) (EscapeChars[((unsigned int) (C))] & (flags))
michael@0 340
michael@0 341 //----------------------------------------------------------------------------------------
michael@0 342
michael@0 343 /* returns an escaped string */
michael@0 344
michael@0 345 /* use the following flags to specify which
michael@0 346 part of an URL you want to escape:
michael@0 347
michael@0 348 esc_Scheme = 1
michael@0 349 esc_Username = 2
michael@0 350 esc_Password = 4
michael@0 351 esc_Host = 8
michael@0 352 esc_Directory = 16
michael@0 353 esc_FileBaseName = 32
michael@0 354 esc_FileExtension = 64
michael@0 355 esc_Param = 128
michael@0 356 esc_Query = 256
michael@0 357 esc_Ref = 512
michael@0 358 */
michael@0 359
michael@0 360 /* by default this function will not escape parts of a string
michael@0 361 that already look escaped, which means it already includes
michael@0 362 a valid hexcode. This is done to avoid multiple escapes of
michael@0 363 a string. Use the following flags to force escaping of a
michael@0 364 string:
michael@0 365
michael@0 366 esc_Forced = 1024
michael@0 367 */
michael@0 368
michael@0 369 bool NS_EscapeURL(const char *part,
michael@0 370 int32_t partLen,
michael@0 371 uint32_t flags,
michael@0 372 nsACString &result)
michael@0 373 {
michael@0 374 if (!part) {
michael@0 375 NS_NOTREACHED("null pointer");
michael@0 376 return false;
michael@0 377 }
michael@0 378
michael@0 379 int i = 0;
michael@0 380 static const char hexChars[] = "0123456789ABCDEF";
michael@0 381 if (partLen < 0)
michael@0 382 partLen = strlen(part);
michael@0 383 bool forced = !!(flags & esc_Forced);
michael@0 384 bool ignoreNonAscii = !!(flags & esc_OnlyASCII);
michael@0 385 bool ignoreAscii = !!(flags & esc_OnlyNonASCII);
michael@0 386 bool writing = !!(flags & esc_AlwaysCopy);
michael@0 387 bool colon = !!(flags & esc_Colon);
michael@0 388
michael@0 389 const unsigned char* src = (const unsigned char *) part;
michael@0 390
michael@0 391 char tempBuffer[100];
michael@0 392 unsigned int tempBufferPos = 0;
michael@0 393
michael@0 394 bool previousIsNonASCII = false;
michael@0 395 for (i = 0; i < partLen; i++)
michael@0 396 {
michael@0 397 unsigned char c = *src++;
michael@0 398
michael@0 399 // if the char has not to be escaped or whatever follows % is
michael@0 400 // a valid escaped string, just copy the char.
michael@0 401 //
michael@0 402 // Also the % will not be escaped until forced
michael@0 403 // See bugzilla bug 61269 for details why we changed this
michael@0 404 //
michael@0 405 // And, we will not escape non-ascii characters if requested.
michael@0 406 // On special request we will also escape the colon even when
michael@0 407 // not covered by the matrix.
michael@0 408 // ignoreAscii is not honored for control characters (C0 and DEL)
michael@0 409 //
michael@0 410 // And, we should escape the '|' character when it occurs after any
michael@0 411 // non-ASCII character as it may be part of a multi-byte character.
michael@0 412 //
michael@0 413 // 0x20..0x7e are the valid ASCII characters. We also escape spaces
michael@0 414 // (0x20) since they are not legal in URLs.
michael@0 415 if ((NO_NEED_ESC(c) || (c == HEX_ESCAPE && !forced)
michael@0 416 || (c > 0x7f && ignoreNonAscii)
michael@0 417 || (c > 0x20 && c < 0x7f && ignoreAscii))
michael@0 418 && !(c == ':' && colon)
michael@0 419 && !(previousIsNonASCII && c == '|' && !ignoreNonAscii))
michael@0 420 {
michael@0 421 if (writing)
michael@0 422 tempBuffer[tempBufferPos++] = c;
michael@0 423 }
michael@0 424 else /* do the escape magic */
michael@0 425 {
michael@0 426 if (!writing)
michael@0 427 {
michael@0 428 result.Append(part, i);
michael@0 429 writing = true;
michael@0 430 }
michael@0 431 tempBuffer[tempBufferPos++] = HEX_ESCAPE;
michael@0 432 tempBuffer[tempBufferPos++] = hexChars[c >> 4]; /* high nibble */
michael@0 433 tempBuffer[tempBufferPos++] = hexChars[c & 0x0f]; /* low nibble */
michael@0 434 }
michael@0 435
michael@0 436 if (tempBufferPos >= sizeof(tempBuffer) - 4)
michael@0 437 {
michael@0 438 NS_ASSERTION(writing, "should be writing");
michael@0 439 tempBuffer[tempBufferPos] = '\0';
michael@0 440 result += tempBuffer;
michael@0 441 tempBufferPos = 0;
michael@0 442 }
michael@0 443
michael@0 444 previousIsNonASCII = (c > 0x7f);
michael@0 445 }
michael@0 446 if (writing) {
michael@0 447 tempBuffer[tempBufferPos] = '\0';
michael@0 448 result += tempBuffer;
michael@0 449 }
michael@0 450 return writing;
michael@0 451 }
michael@0 452
michael@0 453 #define ISHEX(c) memchr(hexChars, c, sizeof(hexChars)-1)
michael@0 454
michael@0 455 bool NS_UnescapeURL(const char *str, int32_t len, uint32_t flags, nsACString &result)
michael@0 456 {
michael@0 457 if (!str) {
michael@0 458 NS_NOTREACHED("null pointer");
michael@0 459 return false;
michael@0 460 }
michael@0 461
michael@0 462 if (len < 0)
michael@0 463 len = strlen(str);
michael@0 464
michael@0 465 bool ignoreNonAscii = !!(flags & esc_OnlyASCII);
michael@0 466 bool ignoreAscii = !!(flags & esc_OnlyNonASCII);
michael@0 467 bool writing = !!(flags & esc_AlwaysCopy);
michael@0 468 bool skipControl = !!(flags & esc_SkipControl);
michael@0 469
michael@0 470 static const char hexChars[] = "0123456789ABCDEFabcdef";
michael@0 471
michael@0 472 const char *last = str;
michael@0 473 const char *p = str;
michael@0 474
michael@0 475 for (int i=0; i<len; ++i, ++p) {
michael@0 476 //printf("%c [i=%d of len=%d]\n", *p, i, len);
michael@0 477 if (*p == HEX_ESCAPE && i < len-2) {
michael@0 478 unsigned char *p1 = ((unsigned char *) p) + 1;
michael@0 479 unsigned char *p2 = ((unsigned char *) p) + 2;
michael@0 480 if (ISHEX(*p1) && ISHEX(*p2) &&
michael@0 481 ((*p1 < '8' && !ignoreAscii) || (*p1 >= '8' && !ignoreNonAscii)) &&
michael@0 482 !(skipControl &&
michael@0 483 (*p1 < '2' || (*p1 == '7' && (*p2 == 'f' || *p2 == 'F'))))) {
michael@0 484 //printf("- p1=%c p2=%c\n", *p1, *p2);
michael@0 485 writing = true;
michael@0 486 if (p > last) {
michael@0 487 //printf("- p=%p, last=%p\n", p, last);
michael@0 488 result.Append(last, p - last);
michael@0 489 last = p;
michael@0 490 }
michael@0 491 char u = (UNHEX(*p1) << 4) + UNHEX(*p2);
michael@0 492 //printf("- u=%c\n", u);
michael@0 493 result.Append(u);
michael@0 494 i += 2;
michael@0 495 p += 2;
michael@0 496 last += 3;
michael@0 497 }
michael@0 498 }
michael@0 499 }
michael@0 500 if (writing && last < str + len)
michael@0 501 result.Append(last, str + len - last);
michael@0 502
michael@0 503 return writing;
michael@0 504 }

mercurial