1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/xpcom/io/nsEscape.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,504 @@ 1.4 +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +// First checked in on 98/12/03 by John R. McMullen, derived from net.h/mkparse.c. 1.10 + 1.11 +#include "nsEscape.h" 1.12 +#include "nsMemory.h" 1.13 +#include "nsCRT.h" 1.14 +#include "nsReadableUtils.h" 1.15 + 1.16 +const int netCharType[256] = 1.17 +/* Bit 0 xalpha -- the alphas 1.18 +** Bit 1 xpalpha -- as xalpha but 1.19 +** converts spaces to plus and plus to %2B 1.20 +** Bit 3 ... path -- as xalphas but doesn't escape '/' 1.21 +*/ 1.22 + /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ 1.23 + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x */ 1.24 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 1x */ 1.25 + 0,0,0,0,0,0,0,0,0,0,7,4,0,7,7,4, /* 2x !"#$%&'()*+,-./ */ 1.26 + 7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */ 1.27 + 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 4x @ABCDEFGHIJKLMNO */ 1.28 + /* bits for '@' changed from 7 to 0 so '@' can be escaped */ 1.29 + /* in usernames and passwords in publishing. */ 1.30 + 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7, /* 5X PQRSTUVWXYZ[\]^_ */ 1.31 + 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 6x `abcdefghijklmno */ 1.32 + 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0, /* 7X pqrstuvwxyz{\}~ DEL */ 1.33 + 0, }; 1.34 + 1.35 +/* decode % escaped hex codes into character values 1.36 + */ 1.37 +#define UNHEX(C) \ 1.38 + ((C >= '0' && C <= '9') ? C - '0' : \ 1.39 + ((C >= 'A' && C <= 'F') ? C - 'A' + 10 : \ 1.40 + ((C >= 'a' && C <= 'f') ? C - 'a' + 10 : 0))) 1.41 + 1.42 + 1.43 +#define IS_OK(C) (netCharType[((unsigned int) (C))] & (flags)) 1.44 +#define HEX_ESCAPE '%' 1.45 + 1.46 +//---------------------------------------------------------------------------------------- 1.47 +static char* nsEscapeCount( 1.48 + const char * str, 1.49 + nsEscapeMask flags, 1.50 + size_t* out_len) 1.51 +//---------------------------------------------------------------------------------------- 1.52 +{ 1.53 + if (!str) 1.54 + return 0; 1.55 + 1.56 + size_t i, len = 0, charsToEscape = 0; 1.57 + static const char hexChars[] = "0123456789ABCDEF"; 1.58 + 1.59 + const unsigned char* src = (const unsigned char *) str; 1.60 + while (*src) 1.61 + { 1.62 + len++; 1.63 + if (!IS_OK(*src++)) 1.64 + charsToEscape++; 1.65 + } 1.66 + 1.67 + // calculate how much memory should be allocated 1.68 + // original length + 2 bytes for each escaped character + terminating '\0' 1.69 + // do the sum in steps to check for overflow 1.70 + size_t dstSize = len + 1 + charsToEscape; 1.71 + if (dstSize <= len) 1.72 + return 0; 1.73 + dstSize += charsToEscape; 1.74 + if (dstSize < len) 1.75 + return 0; 1.76 + 1.77 + // fail if we need more than 4GB 1.78 + // size_t is likely to be long unsigned int but nsMemory::Alloc(size_t) 1.79 + // calls NS_Alloc_P(size_t) which calls PR_Malloc(uint32_t), so there is 1.80 + // no chance to allocate more than 4GB using nsMemory::Alloc() 1.81 + if (dstSize > UINT32_MAX) 1.82 + return 0; 1.83 + 1.84 + char* result = (char *)nsMemory::Alloc(dstSize); 1.85 + if (!result) 1.86 + return 0; 1.87 + 1.88 + unsigned char* dst = (unsigned char *) result; 1.89 + src = (const unsigned char *) str; 1.90 + if (flags == url_XPAlphas) 1.91 + { 1.92 + for (i = 0; i < len; i++) 1.93 + { 1.94 + unsigned char c = *src++; 1.95 + if (IS_OK(c)) 1.96 + *dst++ = c; 1.97 + else if (c == ' ') 1.98 + *dst++ = '+'; /* convert spaces to pluses */ 1.99 + else 1.100 + { 1.101 + *dst++ = HEX_ESCAPE; 1.102 + *dst++ = hexChars[c >> 4]; /* high nibble */ 1.103 + *dst++ = hexChars[c & 0x0f]; /* low nibble */ 1.104 + } 1.105 + } 1.106 + } 1.107 + else 1.108 + { 1.109 + for (i = 0; i < len; i++) 1.110 + { 1.111 + unsigned char c = *src++; 1.112 + if (IS_OK(c)) 1.113 + *dst++ = c; 1.114 + else 1.115 + { 1.116 + *dst++ = HEX_ESCAPE; 1.117 + *dst++ = hexChars[c >> 4]; /* high nibble */ 1.118 + *dst++ = hexChars[c & 0x0f]; /* low nibble */ 1.119 + } 1.120 + } 1.121 + } 1.122 + 1.123 + *dst = '\0'; /* tack on eos */ 1.124 + if(out_len) 1.125 + *out_len = dst - (unsigned char *) result; 1.126 + return result; 1.127 +} 1.128 + 1.129 +//---------------------------------------------------------------------------------------- 1.130 +char* nsEscape(const char * str, nsEscapeMask flags) 1.131 +//---------------------------------------------------------------------------------------- 1.132 +{ 1.133 + if(!str) 1.134 + return nullptr; 1.135 + return nsEscapeCount(str, flags, nullptr); 1.136 +} 1.137 + 1.138 +//---------------------------------------------------------------------------------------- 1.139 +char* nsUnescape(char * str) 1.140 +//---------------------------------------------------------------------------------------- 1.141 +{ 1.142 + nsUnescapeCount(str); 1.143 + return str; 1.144 +} 1.145 + 1.146 +//---------------------------------------------------------------------------------------- 1.147 +int32_t nsUnescapeCount(char * str) 1.148 +//---------------------------------------------------------------------------------------- 1.149 +{ 1.150 + char *src = str; 1.151 + char *dst = str; 1.152 + static const char hexChars[] = "0123456789ABCDEFabcdef"; 1.153 + 1.154 + char c1[] = " "; 1.155 + char c2[] = " "; 1.156 + char* const pc1 = c1; 1.157 + char* const pc2 = c2; 1.158 + 1.159 + if (!*src) { 1.160 + // A null string was passed in. Nothing to escape. 1.161 + // Returns early as the string might not actually be mutable with 1.162 + // length 0. 1.163 + return 0; 1.164 + } 1.165 + 1.166 + while (*src) 1.167 + { 1.168 + c1[0] = *(src+1); 1.169 + if (*(src+1) == '\0') 1.170 + c2[0] = '\0'; 1.171 + else 1.172 + c2[0] = *(src+2); 1.173 + 1.174 + if (*src != HEX_ESCAPE || PL_strpbrk(pc1, hexChars) == 0 || 1.175 + PL_strpbrk(pc2, hexChars) == 0 ) 1.176 + *dst++ = *src++; 1.177 + else 1.178 + { 1.179 + src++; /* walk over escape */ 1.180 + if (*src) 1.181 + { 1.182 + *dst = UNHEX(*src) << 4; 1.183 + src++; 1.184 + } 1.185 + if (*src) 1.186 + { 1.187 + *dst = (*dst + UNHEX(*src)); 1.188 + src++; 1.189 + } 1.190 + dst++; 1.191 + } 1.192 + } 1.193 + 1.194 + *dst = 0; 1.195 + return (int)(dst - str); 1.196 + 1.197 +} /* NET_UnEscapeCnt */ 1.198 + 1.199 + 1.200 +char * 1.201 +nsEscapeHTML(const char * string) 1.202 +{ 1.203 + char *rv = nullptr; 1.204 + /* XXX Hardcoded max entity len. The +1 is for the trailing null. */ 1.205 + uint32_t len = strlen(string); 1.206 + if (len >= (UINT32_MAX / 6)) 1.207 + return nullptr; 1.208 + 1.209 + rv = (char *)NS_Alloc( (6 * len) + 1 ); 1.210 + char *ptr = rv; 1.211 + 1.212 + if(rv) 1.213 + { 1.214 + for(; *string != '\0'; string++) 1.215 + { 1.216 + if(*string == '<') 1.217 + { 1.218 + *ptr++ = '&'; 1.219 + *ptr++ = 'l'; 1.220 + *ptr++ = 't'; 1.221 + *ptr++ = ';'; 1.222 + } 1.223 + else if(*string == '>') 1.224 + { 1.225 + *ptr++ = '&'; 1.226 + *ptr++ = 'g'; 1.227 + *ptr++ = 't'; 1.228 + *ptr++ = ';'; 1.229 + } 1.230 + else if(*string == '&') 1.231 + { 1.232 + *ptr++ = '&'; 1.233 + *ptr++ = 'a'; 1.234 + *ptr++ = 'm'; 1.235 + *ptr++ = 'p'; 1.236 + *ptr++ = ';'; 1.237 + } 1.238 + else if (*string == '"') 1.239 + { 1.240 + *ptr++ = '&'; 1.241 + *ptr++ = 'q'; 1.242 + *ptr++ = 'u'; 1.243 + *ptr++ = 'o'; 1.244 + *ptr++ = 't'; 1.245 + *ptr++ = ';'; 1.246 + } 1.247 + else if (*string == '\'') 1.248 + { 1.249 + *ptr++ = '&'; 1.250 + *ptr++ = '#'; 1.251 + *ptr++ = '3'; 1.252 + *ptr++ = '9'; 1.253 + *ptr++ = ';'; 1.254 + } 1.255 + else 1.256 + { 1.257 + *ptr++ = *string; 1.258 + } 1.259 + } 1.260 + *ptr = '\0'; 1.261 + } 1.262 + 1.263 + return(rv); 1.264 +} 1.265 + 1.266 +char16_t * 1.267 +nsEscapeHTML2(const char16_t *aSourceBuffer, int32_t aSourceBufferLen) 1.268 +{ 1.269 + // Calculate the length, if the caller didn't. 1.270 + if (aSourceBufferLen < 0) { 1.271 + aSourceBufferLen = NS_strlen(aSourceBuffer); 1.272 + } 1.273 + 1.274 + /* XXX Hardcoded max entity len. */ 1.275 + if (uint32_t(aSourceBufferLen) >= 1.276 + ((UINT32_MAX - sizeof(char16_t)) / (6 * sizeof(char16_t))) ) 1.277 + return nullptr; 1.278 + 1.279 + char16_t *resultBuffer = (char16_t *)nsMemory::Alloc(aSourceBufferLen * 1.280 + 6 * sizeof(char16_t) + sizeof(char16_t('\0'))); 1.281 + char16_t *ptr = resultBuffer; 1.282 + 1.283 + if (resultBuffer) { 1.284 + int32_t i; 1.285 + 1.286 + for(i = 0; i < aSourceBufferLen; i++) { 1.287 + if(aSourceBuffer[i] == '<') { 1.288 + *ptr++ = '&'; 1.289 + *ptr++ = 'l'; 1.290 + *ptr++ = 't'; 1.291 + *ptr++ = ';'; 1.292 + } else if(aSourceBuffer[i] == '>') { 1.293 + *ptr++ = '&'; 1.294 + *ptr++ = 'g'; 1.295 + *ptr++ = 't'; 1.296 + *ptr++ = ';'; 1.297 + } else if(aSourceBuffer[i] == '&') { 1.298 + *ptr++ = '&'; 1.299 + *ptr++ = 'a'; 1.300 + *ptr++ = 'm'; 1.301 + *ptr++ = 'p'; 1.302 + *ptr++ = ';'; 1.303 + } else if (aSourceBuffer[i] == '"') { 1.304 + *ptr++ = '&'; 1.305 + *ptr++ = 'q'; 1.306 + *ptr++ = 'u'; 1.307 + *ptr++ = 'o'; 1.308 + *ptr++ = 't'; 1.309 + *ptr++ = ';'; 1.310 + } else if (aSourceBuffer[i] == '\'') { 1.311 + *ptr++ = '&'; 1.312 + *ptr++ = '#'; 1.313 + *ptr++ = '3'; 1.314 + *ptr++ = '9'; 1.315 + *ptr++ = ';'; 1.316 + } else { 1.317 + *ptr++ = aSourceBuffer[i]; 1.318 + } 1.319 + } 1.320 + *ptr = 0; 1.321 + } 1.322 + 1.323 + return resultBuffer; 1.324 +} 1.325 + 1.326 +//---------------------------------------------------------------------------------------- 1.327 + 1.328 +const int EscapeChars[256] = 1.329 +/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ 1.330 +{ 1.331 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ 1.332 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ 1.333 + 0,1023, 0, 512,1023, 0,1023, 0,1023,1023,1023,1023,1023,1023, 953, 784, /* 2x !"#$%&'()*+,-./ */ 1.334 + 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1008,1008, 0,1008, 0, 768, /* 3x 0123456789:;<=>? */ 1.335 + 1008,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, /* 4x @ABCDEFGHIJKLMNO */ 1.336 + 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896, 896, 896, 896,1023, /* 5x PQRSTUVWXYZ[\]^_ */ 1.337 + 0,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, /* 6x `abcdefghijklmno */ 1.338 + 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896,1012, 896,1023, 0, /* 7x pqrstuvwxyz{|}~ */ 1.339 + 0 /* 8x DEL */ 1.340 +}; 1.341 + 1.342 +#define NO_NEED_ESC(C) (EscapeChars[((unsigned int) (C))] & (flags)) 1.343 + 1.344 +//---------------------------------------------------------------------------------------- 1.345 + 1.346 +/* returns an escaped string */ 1.347 + 1.348 +/* use the following flags to specify which 1.349 + part of an URL you want to escape: 1.350 + 1.351 + esc_Scheme = 1 1.352 + esc_Username = 2 1.353 + esc_Password = 4 1.354 + esc_Host = 8 1.355 + esc_Directory = 16 1.356 + esc_FileBaseName = 32 1.357 + esc_FileExtension = 64 1.358 + esc_Param = 128 1.359 + esc_Query = 256 1.360 + esc_Ref = 512 1.361 +*/ 1.362 + 1.363 +/* by default this function will not escape parts of a string 1.364 + that already look escaped, which means it already includes 1.365 + a valid hexcode. This is done to avoid multiple escapes of 1.366 + a string. Use the following flags to force escaping of a 1.367 + string: 1.368 + 1.369 + esc_Forced = 1024 1.370 +*/ 1.371 + 1.372 +bool NS_EscapeURL(const char *part, 1.373 + int32_t partLen, 1.374 + uint32_t flags, 1.375 + nsACString &result) 1.376 +{ 1.377 + if (!part) { 1.378 + NS_NOTREACHED("null pointer"); 1.379 + return false; 1.380 + } 1.381 + 1.382 + int i = 0; 1.383 + static const char hexChars[] = "0123456789ABCDEF"; 1.384 + if (partLen < 0) 1.385 + partLen = strlen(part); 1.386 + bool forced = !!(flags & esc_Forced); 1.387 + bool ignoreNonAscii = !!(flags & esc_OnlyASCII); 1.388 + bool ignoreAscii = !!(flags & esc_OnlyNonASCII); 1.389 + bool writing = !!(flags & esc_AlwaysCopy); 1.390 + bool colon = !!(flags & esc_Colon); 1.391 + 1.392 + const unsigned char* src = (const unsigned char *) part; 1.393 + 1.394 + char tempBuffer[100]; 1.395 + unsigned int tempBufferPos = 0; 1.396 + 1.397 + bool previousIsNonASCII = false; 1.398 + for (i = 0; i < partLen; i++) 1.399 + { 1.400 + unsigned char c = *src++; 1.401 + 1.402 + // if the char has not to be escaped or whatever follows % is 1.403 + // a valid escaped string, just copy the char. 1.404 + // 1.405 + // Also the % will not be escaped until forced 1.406 + // See bugzilla bug 61269 for details why we changed this 1.407 + // 1.408 + // And, we will not escape non-ascii characters if requested. 1.409 + // On special request we will also escape the colon even when 1.410 + // not covered by the matrix. 1.411 + // ignoreAscii is not honored for control characters (C0 and DEL) 1.412 + // 1.413 + // And, we should escape the '|' character when it occurs after any 1.414 + // non-ASCII character as it may be part of a multi-byte character. 1.415 + // 1.416 + // 0x20..0x7e are the valid ASCII characters. We also escape spaces 1.417 + // (0x20) since they are not legal in URLs. 1.418 + if ((NO_NEED_ESC(c) || (c == HEX_ESCAPE && !forced) 1.419 + || (c > 0x7f && ignoreNonAscii) 1.420 + || (c > 0x20 && c < 0x7f && ignoreAscii)) 1.421 + && !(c == ':' && colon) 1.422 + && !(previousIsNonASCII && c == '|' && !ignoreNonAscii)) 1.423 + { 1.424 + if (writing) 1.425 + tempBuffer[tempBufferPos++] = c; 1.426 + } 1.427 + else /* do the escape magic */ 1.428 + { 1.429 + if (!writing) 1.430 + { 1.431 + result.Append(part, i); 1.432 + writing = true; 1.433 + } 1.434 + tempBuffer[tempBufferPos++] = HEX_ESCAPE; 1.435 + tempBuffer[tempBufferPos++] = hexChars[c >> 4]; /* high nibble */ 1.436 + tempBuffer[tempBufferPos++] = hexChars[c & 0x0f]; /* low nibble */ 1.437 + } 1.438 + 1.439 + if (tempBufferPos >= sizeof(tempBuffer) - 4) 1.440 + { 1.441 + NS_ASSERTION(writing, "should be writing"); 1.442 + tempBuffer[tempBufferPos] = '\0'; 1.443 + result += tempBuffer; 1.444 + tempBufferPos = 0; 1.445 + } 1.446 + 1.447 + previousIsNonASCII = (c > 0x7f); 1.448 + } 1.449 + if (writing) { 1.450 + tempBuffer[tempBufferPos] = '\0'; 1.451 + result += tempBuffer; 1.452 + } 1.453 + return writing; 1.454 +} 1.455 + 1.456 +#define ISHEX(c) memchr(hexChars, c, sizeof(hexChars)-1) 1.457 + 1.458 +bool NS_UnescapeURL(const char *str, int32_t len, uint32_t flags, nsACString &result) 1.459 +{ 1.460 + if (!str) { 1.461 + NS_NOTREACHED("null pointer"); 1.462 + return false; 1.463 + } 1.464 + 1.465 + if (len < 0) 1.466 + len = strlen(str); 1.467 + 1.468 + bool ignoreNonAscii = !!(flags & esc_OnlyASCII); 1.469 + bool ignoreAscii = !!(flags & esc_OnlyNonASCII); 1.470 + bool writing = !!(flags & esc_AlwaysCopy); 1.471 + bool skipControl = !!(flags & esc_SkipControl); 1.472 + 1.473 + static const char hexChars[] = "0123456789ABCDEFabcdef"; 1.474 + 1.475 + const char *last = str; 1.476 + const char *p = str; 1.477 + 1.478 + for (int i=0; i<len; ++i, ++p) { 1.479 + //printf("%c [i=%d of len=%d]\n", *p, i, len); 1.480 + if (*p == HEX_ESCAPE && i < len-2) { 1.481 + unsigned char *p1 = ((unsigned char *) p) + 1; 1.482 + unsigned char *p2 = ((unsigned char *) p) + 2; 1.483 + if (ISHEX(*p1) && ISHEX(*p2) && 1.484 + ((*p1 < '8' && !ignoreAscii) || (*p1 >= '8' && !ignoreNonAscii)) && 1.485 + !(skipControl && 1.486 + (*p1 < '2' || (*p1 == '7' && (*p2 == 'f' || *p2 == 'F'))))) { 1.487 + //printf("- p1=%c p2=%c\n", *p1, *p2); 1.488 + writing = true; 1.489 + if (p > last) { 1.490 + //printf("- p=%p, last=%p\n", p, last); 1.491 + result.Append(last, p - last); 1.492 + last = p; 1.493 + } 1.494 + char u = (UNHEX(*p1) << 4) + UNHEX(*p2); 1.495 + //printf("- u=%c\n", u); 1.496 + result.Append(u); 1.497 + i += 2; 1.498 + p += 2; 1.499 + last += 3; 1.500 + } 1.501 + } 1.502 + } 1.503 + if (writing && last < str + len) 1.504 + result.Append(last, str + len - last); 1.505 + 1.506 + return writing; 1.507 +}