Tue, 06 Jan 2015 21:39:09 +0100
Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.
michael@0 | 1 | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 2 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 5 | |
michael@0 | 6 | // First checked in on 98/12/03 by John R. McMullen, derived from net.h/mkparse.c. |
michael@0 | 7 | |
michael@0 | 8 | #include "nsEscape.h" |
michael@0 | 9 | #include "nsMemory.h" |
michael@0 | 10 | #include "nsCRT.h" |
michael@0 | 11 | #include "nsReadableUtils.h" |
michael@0 | 12 | |
michael@0 | 13 | const int netCharType[256] = |
michael@0 | 14 | /* Bit 0 xalpha -- the alphas |
michael@0 | 15 | ** Bit 1 xpalpha -- as xalpha but |
michael@0 | 16 | ** converts spaces to plus and plus to %2B |
michael@0 | 17 | ** Bit 3 ... path -- as xalphas but doesn't escape '/' |
michael@0 | 18 | */ |
michael@0 | 19 | /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ |
michael@0 | 20 | { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x */ |
michael@0 | 21 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 1x */ |
michael@0 | 22 | 0,0,0,0,0,0,0,0,0,0,7,4,0,7,7,4, /* 2x !"#$%&'()*+,-./ */ |
michael@0 | 23 | 7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */ |
michael@0 | 24 | 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 4x @ABCDEFGHIJKLMNO */ |
michael@0 | 25 | /* bits for '@' changed from 7 to 0 so '@' can be escaped */ |
michael@0 | 26 | /* in usernames and passwords in publishing. */ |
michael@0 | 27 | 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7, /* 5X PQRSTUVWXYZ[\]^_ */ |
michael@0 | 28 | 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 6x `abcdefghijklmno */ |
michael@0 | 29 | 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0, /* 7X pqrstuvwxyz{\}~ DEL */ |
michael@0 | 30 | 0, }; |
michael@0 | 31 | |
michael@0 | 32 | /* decode % escaped hex codes into character values |
michael@0 | 33 | */ |
michael@0 | 34 | #define UNHEX(C) \ |
michael@0 | 35 | ((C >= '0' && C <= '9') ? C - '0' : \ |
michael@0 | 36 | ((C >= 'A' && C <= 'F') ? C - 'A' + 10 : \ |
michael@0 | 37 | ((C >= 'a' && C <= 'f') ? C - 'a' + 10 : 0))) |
michael@0 | 38 | |
michael@0 | 39 | |
michael@0 | 40 | #define IS_OK(C) (netCharType[((unsigned int) (C))] & (flags)) |
michael@0 | 41 | #define HEX_ESCAPE '%' |
michael@0 | 42 | |
michael@0 | 43 | //---------------------------------------------------------------------------------------- |
michael@0 | 44 | static char* nsEscapeCount( |
michael@0 | 45 | const char * str, |
michael@0 | 46 | nsEscapeMask flags, |
michael@0 | 47 | size_t* out_len) |
michael@0 | 48 | //---------------------------------------------------------------------------------------- |
michael@0 | 49 | { |
michael@0 | 50 | if (!str) |
michael@0 | 51 | return 0; |
michael@0 | 52 | |
michael@0 | 53 | size_t i, len = 0, charsToEscape = 0; |
michael@0 | 54 | static const char hexChars[] = "0123456789ABCDEF"; |
michael@0 | 55 | |
michael@0 | 56 | const unsigned char* src = (const unsigned char *) str; |
michael@0 | 57 | while (*src) |
michael@0 | 58 | { |
michael@0 | 59 | len++; |
michael@0 | 60 | if (!IS_OK(*src++)) |
michael@0 | 61 | charsToEscape++; |
michael@0 | 62 | } |
michael@0 | 63 | |
michael@0 | 64 | // calculate how much memory should be allocated |
michael@0 | 65 | // original length + 2 bytes for each escaped character + terminating '\0' |
michael@0 | 66 | // do the sum in steps to check for overflow |
michael@0 | 67 | size_t dstSize = len + 1 + charsToEscape; |
michael@0 | 68 | if (dstSize <= len) |
michael@0 | 69 | return 0; |
michael@0 | 70 | dstSize += charsToEscape; |
michael@0 | 71 | if (dstSize < len) |
michael@0 | 72 | return 0; |
michael@0 | 73 | |
michael@0 | 74 | // fail if we need more than 4GB |
michael@0 | 75 | // size_t is likely to be long unsigned int but nsMemory::Alloc(size_t) |
michael@0 | 76 | // calls NS_Alloc_P(size_t) which calls PR_Malloc(uint32_t), so there is |
michael@0 | 77 | // no chance to allocate more than 4GB using nsMemory::Alloc() |
michael@0 | 78 | if (dstSize > UINT32_MAX) |
michael@0 | 79 | return 0; |
michael@0 | 80 | |
michael@0 | 81 | char* result = (char *)nsMemory::Alloc(dstSize); |
michael@0 | 82 | if (!result) |
michael@0 | 83 | return 0; |
michael@0 | 84 | |
michael@0 | 85 | unsigned char* dst = (unsigned char *) result; |
michael@0 | 86 | src = (const unsigned char *) str; |
michael@0 | 87 | if (flags == url_XPAlphas) |
michael@0 | 88 | { |
michael@0 | 89 | for (i = 0; i < len; i++) |
michael@0 | 90 | { |
michael@0 | 91 | unsigned char c = *src++; |
michael@0 | 92 | if (IS_OK(c)) |
michael@0 | 93 | *dst++ = c; |
michael@0 | 94 | else if (c == ' ') |
michael@0 | 95 | *dst++ = '+'; /* convert spaces to pluses */ |
michael@0 | 96 | else |
michael@0 | 97 | { |
michael@0 | 98 | *dst++ = HEX_ESCAPE; |
michael@0 | 99 | *dst++ = hexChars[c >> 4]; /* high nibble */ |
michael@0 | 100 | *dst++ = hexChars[c & 0x0f]; /* low nibble */ |
michael@0 | 101 | } |
michael@0 | 102 | } |
michael@0 | 103 | } |
michael@0 | 104 | else |
michael@0 | 105 | { |
michael@0 | 106 | for (i = 0; i < len; i++) |
michael@0 | 107 | { |
michael@0 | 108 | unsigned char c = *src++; |
michael@0 | 109 | if (IS_OK(c)) |
michael@0 | 110 | *dst++ = c; |
michael@0 | 111 | else |
michael@0 | 112 | { |
michael@0 | 113 | *dst++ = HEX_ESCAPE; |
michael@0 | 114 | *dst++ = hexChars[c >> 4]; /* high nibble */ |
michael@0 | 115 | *dst++ = hexChars[c & 0x0f]; /* low nibble */ |
michael@0 | 116 | } |
michael@0 | 117 | } |
michael@0 | 118 | } |
michael@0 | 119 | |
michael@0 | 120 | *dst = '\0'; /* tack on eos */ |
michael@0 | 121 | if(out_len) |
michael@0 | 122 | *out_len = dst - (unsigned char *) result; |
michael@0 | 123 | return result; |
michael@0 | 124 | } |
michael@0 | 125 | |
michael@0 | 126 | //---------------------------------------------------------------------------------------- |
michael@0 | 127 | char* nsEscape(const char * str, nsEscapeMask flags) |
michael@0 | 128 | //---------------------------------------------------------------------------------------- |
michael@0 | 129 | { |
michael@0 | 130 | if(!str) |
michael@0 | 131 | return nullptr; |
michael@0 | 132 | return nsEscapeCount(str, flags, nullptr); |
michael@0 | 133 | } |
michael@0 | 134 | |
michael@0 | 135 | //---------------------------------------------------------------------------------------- |
michael@0 | 136 | char* nsUnescape(char * str) |
michael@0 | 137 | //---------------------------------------------------------------------------------------- |
michael@0 | 138 | { |
michael@0 | 139 | nsUnescapeCount(str); |
michael@0 | 140 | return str; |
michael@0 | 141 | } |
michael@0 | 142 | |
michael@0 | 143 | //---------------------------------------------------------------------------------------- |
michael@0 | 144 | int32_t nsUnescapeCount(char * str) |
michael@0 | 145 | //---------------------------------------------------------------------------------------- |
michael@0 | 146 | { |
michael@0 | 147 | char *src = str; |
michael@0 | 148 | char *dst = str; |
michael@0 | 149 | static const char hexChars[] = "0123456789ABCDEFabcdef"; |
michael@0 | 150 | |
michael@0 | 151 | char c1[] = " "; |
michael@0 | 152 | char c2[] = " "; |
michael@0 | 153 | char* const pc1 = c1; |
michael@0 | 154 | char* const pc2 = c2; |
michael@0 | 155 | |
michael@0 | 156 | if (!*src) { |
michael@0 | 157 | // A null string was passed in. Nothing to escape. |
michael@0 | 158 | // Returns early as the string might not actually be mutable with |
michael@0 | 159 | // length 0. |
michael@0 | 160 | return 0; |
michael@0 | 161 | } |
michael@0 | 162 | |
michael@0 | 163 | while (*src) |
michael@0 | 164 | { |
michael@0 | 165 | c1[0] = *(src+1); |
michael@0 | 166 | if (*(src+1) == '\0') |
michael@0 | 167 | c2[0] = '\0'; |
michael@0 | 168 | else |
michael@0 | 169 | c2[0] = *(src+2); |
michael@0 | 170 | |
michael@0 | 171 | if (*src != HEX_ESCAPE || PL_strpbrk(pc1, hexChars) == 0 || |
michael@0 | 172 | PL_strpbrk(pc2, hexChars) == 0 ) |
michael@0 | 173 | *dst++ = *src++; |
michael@0 | 174 | else |
michael@0 | 175 | { |
michael@0 | 176 | src++; /* walk over escape */ |
michael@0 | 177 | if (*src) |
michael@0 | 178 | { |
michael@0 | 179 | *dst = UNHEX(*src) << 4; |
michael@0 | 180 | src++; |
michael@0 | 181 | } |
michael@0 | 182 | if (*src) |
michael@0 | 183 | { |
michael@0 | 184 | *dst = (*dst + UNHEX(*src)); |
michael@0 | 185 | src++; |
michael@0 | 186 | } |
michael@0 | 187 | dst++; |
michael@0 | 188 | } |
michael@0 | 189 | } |
michael@0 | 190 | |
michael@0 | 191 | *dst = 0; |
michael@0 | 192 | return (int)(dst - str); |
michael@0 | 193 | |
michael@0 | 194 | } /* NET_UnEscapeCnt */ |
michael@0 | 195 | |
michael@0 | 196 | |
michael@0 | 197 | char * |
michael@0 | 198 | nsEscapeHTML(const char * string) |
michael@0 | 199 | { |
michael@0 | 200 | char *rv = nullptr; |
michael@0 | 201 | /* XXX Hardcoded max entity len. The +1 is for the trailing null. */ |
michael@0 | 202 | uint32_t len = strlen(string); |
michael@0 | 203 | if (len >= (UINT32_MAX / 6)) |
michael@0 | 204 | return nullptr; |
michael@0 | 205 | |
michael@0 | 206 | rv = (char *)NS_Alloc( (6 * len) + 1 ); |
michael@0 | 207 | char *ptr = rv; |
michael@0 | 208 | |
michael@0 | 209 | if(rv) |
michael@0 | 210 | { |
michael@0 | 211 | for(; *string != '\0'; string++) |
michael@0 | 212 | { |
michael@0 | 213 | if(*string == '<') |
michael@0 | 214 | { |
michael@0 | 215 | *ptr++ = '&'; |
michael@0 | 216 | *ptr++ = 'l'; |
michael@0 | 217 | *ptr++ = 't'; |
michael@0 | 218 | *ptr++ = ';'; |
michael@0 | 219 | } |
michael@0 | 220 | else if(*string == '>') |
michael@0 | 221 | { |
michael@0 | 222 | *ptr++ = '&'; |
michael@0 | 223 | *ptr++ = 'g'; |
michael@0 | 224 | *ptr++ = 't'; |
michael@0 | 225 | *ptr++ = ';'; |
michael@0 | 226 | } |
michael@0 | 227 | else if(*string == '&') |
michael@0 | 228 | { |
michael@0 | 229 | *ptr++ = '&'; |
michael@0 | 230 | *ptr++ = 'a'; |
michael@0 | 231 | *ptr++ = 'm'; |
michael@0 | 232 | *ptr++ = 'p'; |
michael@0 | 233 | *ptr++ = ';'; |
michael@0 | 234 | } |
michael@0 | 235 | else if (*string == '"') |
michael@0 | 236 | { |
michael@0 | 237 | *ptr++ = '&'; |
michael@0 | 238 | *ptr++ = 'q'; |
michael@0 | 239 | *ptr++ = 'u'; |
michael@0 | 240 | *ptr++ = 'o'; |
michael@0 | 241 | *ptr++ = 't'; |
michael@0 | 242 | *ptr++ = ';'; |
michael@0 | 243 | } |
michael@0 | 244 | else if (*string == '\'') |
michael@0 | 245 | { |
michael@0 | 246 | *ptr++ = '&'; |
michael@0 | 247 | *ptr++ = '#'; |
michael@0 | 248 | *ptr++ = '3'; |
michael@0 | 249 | *ptr++ = '9'; |
michael@0 | 250 | *ptr++ = ';'; |
michael@0 | 251 | } |
michael@0 | 252 | else |
michael@0 | 253 | { |
michael@0 | 254 | *ptr++ = *string; |
michael@0 | 255 | } |
michael@0 | 256 | } |
michael@0 | 257 | *ptr = '\0'; |
michael@0 | 258 | } |
michael@0 | 259 | |
michael@0 | 260 | return(rv); |
michael@0 | 261 | } |
michael@0 | 262 | |
michael@0 | 263 | char16_t * |
michael@0 | 264 | nsEscapeHTML2(const char16_t *aSourceBuffer, int32_t aSourceBufferLen) |
michael@0 | 265 | { |
michael@0 | 266 | // Calculate the length, if the caller didn't. |
michael@0 | 267 | if (aSourceBufferLen < 0) { |
michael@0 | 268 | aSourceBufferLen = NS_strlen(aSourceBuffer); |
michael@0 | 269 | } |
michael@0 | 270 | |
michael@0 | 271 | /* XXX Hardcoded max entity len. */ |
michael@0 | 272 | if (uint32_t(aSourceBufferLen) >= |
michael@0 | 273 | ((UINT32_MAX - sizeof(char16_t)) / (6 * sizeof(char16_t))) ) |
michael@0 | 274 | return nullptr; |
michael@0 | 275 | |
michael@0 | 276 | char16_t *resultBuffer = (char16_t *)nsMemory::Alloc(aSourceBufferLen * |
michael@0 | 277 | 6 * sizeof(char16_t) + sizeof(char16_t('\0'))); |
michael@0 | 278 | char16_t *ptr = resultBuffer; |
michael@0 | 279 | |
michael@0 | 280 | if (resultBuffer) { |
michael@0 | 281 | int32_t i; |
michael@0 | 282 | |
michael@0 | 283 | for(i = 0; i < aSourceBufferLen; i++) { |
michael@0 | 284 | if(aSourceBuffer[i] == '<') { |
michael@0 | 285 | *ptr++ = '&'; |
michael@0 | 286 | *ptr++ = 'l'; |
michael@0 | 287 | *ptr++ = 't'; |
michael@0 | 288 | *ptr++ = ';'; |
michael@0 | 289 | } else if(aSourceBuffer[i] == '>') { |
michael@0 | 290 | *ptr++ = '&'; |
michael@0 | 291 | *ptr++ = 'g'; |
michael@0 | 292 | *ptr++ = 't'; |
michael@0 | 293 | *ptr++ = ';'; |
michael@0 | 294 | } else if(aSourceBuffer[i] == '&') { |
michael@0 | 295 | *ptr++ = '&'; |
michael@0 | 296 | *ptr++ = 'a'; |
michael@0 | 297 | *ptr++ = 'm'; |
michael@0 | 298 | *ptr++ = 'p'; |
michael@0 | 299 | *ptr++ = ';'; |
michael@0 | 300 | } else if (aSourceBuffer[i] == '"') { |
michael@0 | 301 | *ptr++ = '&'; |
michael@0 | 302 | *ptr++ = 'q'; |
michael@0 | 303 | *ptr++ = 'u'; |
michael@0 | 304 | *ptr++ = 'o'; |
michael@0 | 305 | *ptr++ = 't'; |
michael@0 | 306 | *ptr++ = ';'; |
michael@0 | 307 | } else if (aSourceBuffer[i] == '\'') { |
michael@0 | 308 | *ptr++ = '&'; |
michael@0 | 309 | *ptr++ = '#'; |
michael@0 | 310 | *ptr++ = '3'; |
michael@0 | 311 | *ptr++ = '9'; |
michael@0 | 312 | *ptr++ = ';'; |
michael@0 | 313 | } else { |
michael@0 | 314 | *ptr++ = aSourceBuffer[i]; |
michael@0 | 315 | } |
michael@0 | 316 | } |
michael@0 | 317 | *ptr = 0; |
michael@0 | 318 | } |
michael@0 | 319 | |
michael@0 | 320 | return resultBuffer; |
michael@0 | 321 | } |
michael@0 | 322 | |
michael@0 | 323 | //---------------------------------------------------------------------------------------- |
michael@0 | 324 | |
michael@0 | 325 | const int EscapeChars[256] = |
michael@0 | 326 | /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ |
michael@0 | 327 | { |
michael@0 | 328 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ |
michael@0 | 329 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ |
michael@0 | 330 | 0,1023, 0, 512,1023, 0,1023, 0,1023,1023,1023,1023,1023,1023, 953, 784, /* 2x !"#$%&'()*+,-./ */ |
michael@0 | 331 | 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1008,1008, 0,1008, 0, 768, /* 3x 0123456789:;<=>? */ |
michael@0 | 332 | 1008,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, /* 4x @ABCDEFGHIJKLMNO */ |
michael@0 | 333 | 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896, 896, 896, 896,1023, /* 5x PQRSTUVWXYZ[\]^_ */ |
michael@0 | 334 | 0,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, /* 6x `abcdefghijklmno */ |
michael@0 | 335 | 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896,1012, 896,1023, 0, /* 7x pqrstuvwxyz{|}~ */ |
michael@0 | 336 | 0 /* 8x DEL */ |
michael@0 | 337 | }; |
michael@0 | 338 | |
michael@0 | 339 | #define NO_NEED_ESC(C) (EscapeChars[((unsigned int) (C))] & (flags)) |
michael@0 | 340 | |
michael@0 | 341 | //---------------------------------------------------------------------------------------- |
michael@0 | 342 | |
michael@0 | 343 | /* returns an escaped string */ |
michael@0 | 344 | |
michael@0 | 345 | /* use the following flags to specify which |
michael@0 | 346 | part of an URL you want to escape: |
michael@0 | 347 | |
michael@0 | 348 | esc_Scheme = 1 |
michael@0 | 349 | esc_Username = 2 |
michael@0 | 350 | esc_Password = 4 |
michael@0 | 351 | esc_Host = 8 |
michael@0 | 352 | esc_Directory = 16 |
michael@0 | 353 | esc_FileBaseName = 32 |
michael@0 | 354 | esc_FileExtension = 64 |
michael@0 | 355 | esc_Param = 128 |
michael@0 | 356 | esc_Query = 256 |
michael@0 | 357 | esc_Ref = 512 |
michael@0 | 358 | */ |
michael@0 | 359 | |
michael@0 | 360 | /* by default this function will not escape parts of a string |
michael@0 | 361 | that already look escaped, which means it already includes |
michael@0 | 362 | a valid hexcode. This is done to avoid multiple escapes of |
michael@0 | 363 | a string. Use the following flags to force escaping of a |
michael@0 | 364 | string: |
michael@0 | 365 | |
michael@0 | 366 | esc_Forced = 1024 |
michael@0 | 367 | */ |
michael@0 | 368 | |
michael@0 | 369 | bool NS_EscapeURL(const char *part, |
michael@0 | 370 | int32_t partLen, |
michael@0 | 371 | uint32_t flags, |
michael@0 | 372 | nsACString &result) |
michael@0 | 373 | { |
michael@0 | 374 | if (!part) { |
michael@0 | 375 | NS_NOTREACHED("null pointer"); |
michael@0 | 376 | return false; |
michael@0 | 377 | } |
michael@0 | 378 | |
michael@0 | 379 | int i = 0; |
michael@0 | 380 | static const char hexChars[] = "0123456789ABCDEF"; |
michael@0 | 381 | if (partLen < 0) |
michael@0 | 382 | partLen = strlen(part); |
michael@0 | 383 | bool forced = !!(flags & esc_Forced); |
michael@0 | 384 | bool ignoreNonAscii = !!(flags & esc_OnlyASCII); |
michael@0 | 385 | bool ignoreAscii = !!(flags & esc_OnlyNonASCII); |
michael@0 | 386 | bool writing = !!(flags & esc_AlwaysCopy); |
michael@0 | 387 | bool colon = !!(flags & esc_Colon); |
michael@0 | 388 | |
michael@0 | 389 | const unsigned char* src = (const unsigned char *) part; |
michael@0 | 390 | |
michael@0 | 391 | char tempBuffer[100]; |
michael@0 | 392 | unsigned int tempBufferPos = 0; |
michael@0 | 393 | |
michael@0 | 394 | bool previousIsNonASCII = false; |
michael@0 | 395 | for (i = 0; i < partLen; i++) |
michael@0 | 396 | { |
michael@0 | 397 | unsigned char c = *src++; |
michael@0 | 398 | |
michael@0 | 399 | // if the char has not to be escaped or whatever follows % is |
michael@0 | 400 | // a valid escaped string, just copy the char. |
michael@0 | 401 | // |
michael@0 | 402 | // Also the % will not be escaped until forced |
michael@0 | 403 | // See bugzilla bug 61269 for details why we changed this |
michael@0 | 404 | // |
michael@0 | 405 | // And, we will not escape non-ascii characters if requested. |
michael@0 | 406 | // On special request we will also escape the colon even when |
michael@0 | 407 | // not covered by the matrix. |
michael@0 | 408 | // ignoreAscii is not honored for control characters (C0 and DEL) |
michael@0 | 409 | // |
michael@0 | 410 | // And, we should escape the '|' character when it occurs after any |
michael@0 | 411 | // non-ASCII character as it may be part of a multi-byte character. |
michael@0 | 412 | // |
michael@0 | 413 | // 0x20..0x7e are the valid ASCII characters. We also escape spaces |
michael@0 | 414 | // (0x20) since they are not legal in URLs. |
michael@0 | 415 | if ((NO_NEED_ESC(c) || (c == HEX_ESCAPE && !forced) |
michael@0 | 416 | || (c > 0x7f && ignoreNonAscii) |
michael@0 | 417 | || (c > 0x20 && c < 0x7f && ignoreAscii)) |
michael@0 | 418 | && !(c == ':' && colon) |
michael@0 | 419 | && !(previousIsNonASCII && c == '|' && !ignoreNonAscii)) |
michael@0 | 420 | { |
michael@0 | 421 | if (writing) |
michael@0 | 422 | tempBuffer[tempBufferPos++] = c; |
michael@0 | 423 | } |
michael@0 | 424 | else /* do the escape magic */ |
michael@0 | 425 | { |
michael@0 | 426 | if (!writing) |
michael@0 | 427 | { |
michael@0 | 428 | result.Append(part, i); |
michael@0 | 429 | writing = true; |
michael@0 | 430 | } |
michael@0 | 431 | tempBuffer[tempBufferPos++] = HEX_ESCAPE; |
michael@0 | 432 | tempBuffer[tempBufferPos++] = hexChars[c >> 4]; /* high nibble */ |
michael@0 | 433 | tempBuffer[tempBufferPos++] = hexChars[c & 0x0f]; /* low nibble */ |
michael@0 | 434 | } |
michael@0 | 435 | |
michael@0 | 436 | if (tempBufferPos >= sizeof(tempBuffer) - 4) |
michael@0 | 437 | { |
michael@0 | 438 | NS_ASSERTION(writing, "should be writing"); |
michael@0 | 439 | tempBuffer[tempBufferPos] = '\0'; |
michael@0 | 440 | result += tempBuffer; |
michael@0 | 441 | tempBufferPos = 0; |
michael@0 | 442 | } |
michael@0 | 443 | |
michael@0 | 444 | previousIsNonASCII = (c > 0x7f); |
michael@0 | 445 | } |
michael@0 | 446 | if (writing) { |
michael@0 | 447 | tempBuffer[tempBufferPos] = '\0'; |
michael@0 | 448 | result += tempBuffer; |
michael@0 | 449 | } |
michael@0 | 450 | return writing; |
michael@0 | 451 | } |
michael@0 | 452 | |
michael@0 | 453 | #define ISHEX(c) memchr(hexChars, c, sizeof(hexChars)-1) |
michael@0 | 454 | |
michael@0 | 455 | bool NS_UnescapeURL(const char *str, int32_t len, uint32_t flags, nsACString &result) |
michael@0 | 456 | { |
michael@0 | 457 | if (!str) { |
michael@0 | 458 | NS_NOTREACHED("null pointer"); |
michael@0 | 459 | return false; |
michael@0 | 460 | } |
michael@0 | 461 | |
michael@0 | 462 | if (len < 0) |
michael@0 | 463 | len = strlen(str); |
michael@0 | 464 | |
michael@0 | 465 | bool ignoreNonAscii = !!(flags & esc_OnlyASCII); |
michael@0 | 466 | bool ignoreAscii = !!(flags & esc_OnlyNonASCII); |
michael@0 | 467 | bool writing = !!(flags & esc_AlwaysCopy); |
michael@0 | 468 | bool skipControl = !!(flags & esc_SkipControl); |
michael@0 | 469 | |
michael@0 | 470 | static const char hexChars[] = "0123456789ABCDEFabcdef"; |
michael@0 | 471 | |
michael@0 | 472 | const char *last = str; |
michael@0 | 473 | const char *p = str; |
michael@0 | 474 | |
michael@0 | 475 | for (int i=0; i<len; ++i, ++p) { |
michael@0 | 476 | //printf("%c [i=%d of len=%d]\n", *p, i, len); |
michael@0 | 477 | if (*p == HEX_ESCAPE && i < len-2) { |
michael@0 | 478 | unsigned char *p1 = ((unsigned char *) p) + 1; |
michael@0 | 479 | unsigned char *p2 = ((unsigned char *) p) + 2; |
michael@0 | 480 | if (ISHEX(*p1) && ISHEX(*p2) && |
michael@0 | 481 | ((*p1 < '8' && !ignoreAscii) || (*p1 >= '8' && !ignoreNonAscii)) && |
michael@0 | 482 | !(skipControl && |
michael@0 | 483 | (*p1 < '2' || (*p1 == '7' && (*p2 == 'f' || *p2 == 'F'))))) { |
michael@0 | 484 | //printf("- p1=%c p2=%c\n", *p1, *p2); |
michael@0 | 485 | writing = true; |
michael@0 | 486 | if (p > last) { |
michael@0 | 487 | //printf("- p=%p, last=%p\n", p, last); |
michael@0 | 488 | result.Append(last, p - last); |
michael@0 | 489 | last = p; |
michael@0 | 490 | } |
michael@0 | 491 | char u = (UNHEX(*p1) << 4) + UNHEX(*p2); |
michael@0 | 492 | //printf("- u=%c\n", u); |
michael@0 | 493 | result.Append(u); |
michael@0 | 494 | i += 2; |
michael@0 | 495 | p += 2; |
michael@0 | 496 | last += 3; |
michael@0 | 497 | } |
michael@0 | 498 | } |
michael@0 | 499 | } |
michael@0 | 500 | if (writing && last < str + len) |
michael@0 | 501 | result.Append(last, str + len - last); |
michael@0 | 502 | |
michael@0 | 503 | return writing; |
michael@0 | 504 | } |