xpcom/io/nsEscape.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/xpcom/io/nsEscape.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,504 @@
     1.4 +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 + 
     1.9 +//	First checked in on 98/12/03 by John R. McMullen, derived from net.h/mkparse.c.
    1.10 +
    1.11 +#include "nsEscape.h"
    1.12 +#include "nsMemory.h"
    1.13 +#include "nsCRT.h"
    1.14 +#include "nsReadableUtils.h"
    1.15 +
    1.16 +const int netCharType[256] =
    1.17 +/*	Bit 0		xalpha		-- the alphas
    1.18 +**	Bit 1		xpalpha		-- as xalpha but 
    1.19 +**                             converts spaces to plus and plus to %2B
    1.20 +**	Bit 3 ...	path		-- as xalphas but doesn't escape '/'
    1.21 +*/
    1.22 +    /*   0 1 2 3 4 5 6 7 8 9 A B C D E F */
    1.23 +    {    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,	/* 0x */
    1.24 +		 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,	/* 1x */
    1.25 +		 0,0,0,0,0,0,0,0,0,0,7,4,0,7,7,4,	/* 2x   !"#$%&'()*+,-./	 */
    1.26 +         7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0,	/* 3x  0123456789:;<=>?	 */
    1.27 +	     0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,	/* 4x  @ABCDEFGHIJKLMNO  */
    1.28 +	     /* bits for '@' changed from 7 to 0 so '@' can be escaped   */
    1.29 +	     /* in usernames and passwords in publishing.                */
    1.30 +	     7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7,	/* 5X  PQRSTUVWXYZ[\]^_	 */
    1.31 +	     0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,	/* 6x  `abcdefghijklmno	 */
    1.32 +	     7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,	/* 7X  pqrstuvwxyz{\}~	DEL */
    1.33 +		 0, };
    1.34 +
    1.35 +/* decode % escaped hex codes into character values
    1.36 + */
    1.37 +#define UNHEX(C) \
    1.38 +    ((C >= '0' && C <= '9') ? C - '0' : \
    1.39 +     ((C >= 'A' && C <= 'F') ? C - 'A' + 10 : \
    1.40 +     ((C >= 'a' && C <= 'f') ? C - 'a' + 10 : 0)))
    1.41 +
    1.42 +
    1.43 +#define IS_OK(C) (netCharType[((unsigned int) (C))] & (flags))
    1.44 +#define HEX_ESCAPE '%'
    1.45 +
    1.46 +//----------------------------------------------------------------------------------------
    1.47 +static char* nsEscapeCount(
    1.48 +    const char * str,
    1.49 +    nsEscapeMask flags,
    1.50 +    size_t* out_len)
    1.51 +//----------------------------------------------------------------------------------------
    1.52 +{
    1.53 +	if (!str)
    1.54 +		return 0;
    1.55 +
    1.56 +    size_t i, len = 0, charsToEscape = 0;
    1.57 +    static const char hexChars[] = "0123456789ABCDEF";
    1.58 +
    1.59 +	const unsigned char* src = (const unsigned char *) str;
    1.60 +    while (*src)
    1.61 +	{
    1.62 +        len++;
    1.63 +        if (!IS_OK(*src++))
    1.64 +            charsToEscape++;
    1.65 +	}
    1.66 +
    1.67 +    // calculate how much memory should be allocated
    1.68 +    // original length + 2 bytes for each escaped character + terminating '\0'
    1.69 +    // do the sum in steps to check for overflow
    1.70 +    size_t dstSize = len + 1 + charsToEscape;
    1.71 +    if (dstSize <= len)
    1.72 +	return 0;
    1.73 +    dstSize += charsToEscape;
    1.74 +    if (dstSize < len)
    1.75 +	return 0;
    1.76 +
    1.77 +    // fail if we need more than 4GB
    1.78 +    // size_t is likely to be long unsigned int but nsMemory::Alloc(size_t)
    1.79 +    // calls NS_Alloc_P(size_t) which calls PR_Malloc(uint32_t), so there is
    1.80 +    // no chance to allocate more than 4GB using nsMemory::Alloc()
    1.81 +    if (dstSize > UINT32_MAX)
    1.82 +        return 0;
    1.83 +
    1.84 +	char* result = (char *)nsMemory::Alloc(dstSize);
    1.85 +    if (!result)
    1.86 +        return 0;
    1.87 +
    1.88 +    unsigned char* dst = (unsigned char *) result;
    1.89 +	src = (const unsigned char *) str;
    1.90 +	if (flags == url_XPAlphas)
    1.91 +	{
    1.92 +	    for (i = 0; i < len; i++)
    1.93 +		{
    1.94 +			unsigned char c = *src++;
    1.95 +			if (IS_OK(c))
    1.96 +				*dst++ = c;
    1.97 +			else if (c == ' ')
    1.98 +				*dst++ = '+'; /* convert spaces to pluses */
    1.99 +			else 
   1.100 +			{
   1.101 +				*dst++ = HEX_ESCAPE;
   1.102 +				*dst++ = hexChars[c >> 4];	/* high nibble */
   1.103 +				*dst++ = hexChars[c & 0x0f];	/* low nibble */
   1.104 +			}
   1.105 +		}
   1.106 +	}
   1.107 +	else
   1.108 +	{
   1.109 +	    for (i = 0; i < len; i++)
   1.110 +		{
   1.111 +			unsigned char c = *src++;
   1.112 +			if (IS_OK(c))
   1.113 +				*dst++ = c;
   1.114 +			else 
   1.115 +			{
   1.116 +				*dst++ = HEX_ESCAPE;
   1.117 +				*dst++ = hexChars[c >> 4];	/* high nibble */
   1.118 +				*dst++ = hexChars[c & 0x0f];	/* low nibble */
   1.119 +			}
   1.120 +		}
   1.121 +	}
   1.122 +
   1.123 +    *dst = '\0';     /* tack on eos */
   1.124 +	if(out_len)
   1.125 +		*out_len = dst - (unsigned char *) result;
   1.126 +    return result;
   1.127 +}
   1.128 +
   1.129 +//----------------------------------------------------------------------------------------
   1.130 +char* nsEscape(const char * str, nsEscapeMask flags)
   1.131 +//----------------------------------------------------------------------------------------
   1.132 +{
   1.133 +    if(!str)
   1.134 +        return nullptr;
   1.135 +    return nsEscapeCount(str, flags, nullptr);
   1.136 +}
   1.137 +
   1.138 +//----------------------------------------------------------------------------------------
   1.139 +char* nsUnescape(char * str)
   1.140 +//----------------------------------------------------------------------------------------
   1.141 +{
   1.142 +	nsUnescapeCount(str);
   1.143 +	return str;
   1.144 +}
   1.145 +
   1.146 +//----------------------------------------------------------------------------------------
   1.147 +int32_t nsUnescapeCount(char * str)
   1.148 +//----------------------------------------------------------------------------------------
   1.149 +{
   1.150 +    char *src = str;
   1.151 +    char *dst = str;
   1.152 +    static const char hexChars[] = "0123456789ABCDEFabcdef";
   1.153 +
   1.154 +    char c1[] = " ";
   1.155 +    char c2[] = " ";
   1.156 +    char* const pc1 = c1;
   1.157 +    char* const pc2 = c2;
   1.158 +
   1.159 +    if (!*src) {
   1.160 +      // A null string was passed in.  Nothing to escape.
   1.161 +      // Returns early as the string might not actually be mutable with
   1.162 +      // length 0.
   1.163 +      return 0;
   1.164 +    }
   1.165 +
   1.166 +    while (*src)
   1.167 +    {
   1.168 +        c1[0] = *(src+1);
   1.169 +        if (*(src+1) == '\0') 
   1.170 +            c2[0] = '\0';
   1.171 +        else
   1.172 +            c2[0] = *(src+2);
   1.173 +
   1.174 +        if (*src != HEX_ESCAPE || PL_strpbrk(pc1, hexChars) == 0 || 
   1.175 +                                  PL_strpbrk(pc2, hexChars) == 0 )
   1.176 +        	*dst++ = *src++;
   1.177 +        else 	
   1.178 +		{
   1.179 +        	src++; /* walk over escape */
   1.180 +        	if (*src)
   1.181 +            {
   1.182 +            	*dst = UNHEX(*src) << 4;
   1.183 +            	src++;
   1.184 +            }
   1.185 +        	if (*src)
   1.186 +            {
   1.187 +            	*dst = (*dst + UNHEX(*src));
   1.188 +            	src++;
   1.189 +            }
   1.190 +        	dst++;
   1.191 +        }
   1.192 +    }
   1.193 +
   1.194 +    *dst = 0;
   1.195 +    return (int)(dst - str);
   1.196 +
   1.197 +} /* NET_UnEscapeCnt */
   1.198 +
   1.199 +
   1.200 +char *
   1.201 +nsEscapeHTML(const char * string)
   1.202 +{
   1.203 +    char *rv = nullptr;
   1.204 +    /* XXX Hardcoded max entity len. The +1 is for the trailing null. */
   1.205 +    uint32_t len = strlen(string);
   1.206 +    if (len >= (UINT32_MAX / 6))
   1.207 +      return nullptr;
   1.208 +
   1.209 +    rv = (char *)NS_Alloc( (6 * len) + 1 );
   1.210 +    char *ptr = rv;
   1.211 +
   1.212 +    if(rv)
   1.213 +      {
   1.214 +        for(; *string != '\0'; string++)
   1.215 +          {
   1.216 +            if(*string == '<')
   1.217 +              {
   1.218 +                *ptr++ = '&';
   1.219 +                *ptr++ = 'l';
   1.220 +                *ptr++ = 't';
   1.221 +                *ptr++ = ';';
   1.222 +              }
   1.223 +            else if(*string == '>')
   1.224 +              {
   1.225 +                *ptr++ = '&';
   1.226 +                *ptr++ = 'g';
   1.227 +                *ptr++ = 't';
   1.228 +                *ptr++ = ';';
   1.229 +              }
   1.230 +            else if(*string == '&')
   1.231 +              {
   1.232 +                *ptr++ = '&';
   1.233 +                *ptr++ = 'a';
   1.234 +                *ptr++ = 'm';
   1.235 +                *ptr++ = 'p';
   1.236 +                *ptr++ = ';';
   1.237 +              }
   1.238 +            else if (*string == '"')
   1.239 +              {
   1.240 +                *ptr++ = '&';
   1.241 +                *ptr++ = 'q';
   1.242 +                *ptr++ = 'u';
   1.243 +                *ptr++ = 'o';
   1.244 +                *ptr++ = 't';
   1.245 +                *ptr++ = ';';
   1.246 +              }
   1.247 +            else if (*string == '\'')
   1.248 +              {
   1.249 +                *ptr++ = '&';
   1.250 +                *ptr++ = '#';
   1.251 +                *ptr++ = '3';
   1.252 +                *ptr++ = '9';
   1.253 +                *ptr++ = ';';
   1.254 +              }
   1.255 +            else
   1.256 +              {
   1.257 +                *ptr++ = *string;
   1.258 +              }
   1.259 +          }
   1.260 +        *ptr = '\0';
   1.261 +      }
   1.262 +
   1.263 +    return(rv);
   1.264 +}
   1.265 +
   1.266 +char16_t *
   1.267 +nsEscapeHTML2(const char16_t *aSourceBuffer, int32_t aSourceBufferLen)
   1.268 +{
   1.269 +  // Calculate the length, if the caller didn't.
   1.270 +  if (aSourceBufferLen < 0) {
   1.271 +    aSourceBufferLen = NS_strlen(aSourceBuffer);
   1.272 +  }
   1.273 +
   1.274 +  /* XXX Hardcoded max entity len. */
   1.275 +  if (uint32_t(aSourceBufferLen) >=
   1.276 +      ((UINT32_MAX - sizeof(char16_t)) / (6 * sizeof(char16_t))) )
   1.277 +    return nullptr;
   1.278 +
   1.279 +  char16_t *resultBuffer = (char16_t *)nsMemory::Alloc(aSourceBufferLen *
   1.280 +                            6 * sizeof(char16_t) + sizeof(char16_t('\0')));
   1.281 +  char16_t *ptr = resultBuffer;
   1.282 +
   1.283 +  if (resultBuffer) {
   1.284 +    int32_t i;
   1.285 +
   1.286 +    for(i = 0; i < aSourceBufferLen; i++) {
   1.287 +      if(aSourceBuffer[i] == '<') {
   1.288 +        *ptr++ = '&';
   1.289 +        *ptr++ = 'l';
   1.290 +        *ptr++ = 't';
   1.291 +        *ptr++ = ';';
   1.292 +      } else if(aSourceBuffer[i] == '>') {
   1.293 +        *ptr++ = '&';
   1.294 +        *ptr++ = 'g';
   1.295 +        *ptr++ = 't';
   1.296 +        *ptr++ = ';';
   1.297 +      } else if(aSourceBuffer[i] == '&') {
   1.298 +        *ptr++ = '&';
   1.299 +        *ptr++ = 'a';
   1.300 +        *ptr++ = 'm';
   1.301 +        *ptr++ = 'p';
   1.302 +        *ptr++ = ';';
   1.303 +      } else if (aSourceBuffer[i] == '"') {
   1.304 +        *ptr++ = '&';
   1.305 +        *ptr++ = 'q';
   1.306 +        *ptr++ = 'u';
   1.307 +        *ptr++ = 'o';
   1.308 +        *ptr++ = 't';
   1.309 +        *ptr++ = ';';
   1.310 +      } else if (aSourceBuffer[i] == '\'') {
   1.311 +        *ptr++ = '&';
   1.312 +        *ptr++ = '#';
   1.313 +        *ptr++ = '3';
   1.314 +        *ptr++ = '9';
   1.315 +        *ptr++ = ';';
   1.316 +      } else {
   1.317 +        *ptr++ = aSourceBuffer[i];
   1.318 +      }
   1.319 +    }
   1.320 +    *ptr = 0;
   1.321 +  }
   1.322 +
   1.323 +  return resultBuffer;
   1.324 +}
   1.325 +
   1.326 +//----------------------------------------------------------------------------------------
   1.327 +
   1.328 +const int EscapeChars[256] =
   1.329 +/*      0    1    2    3    4    5    6    7    8    9    A    B    C    D    E    F */
   1.330 +{
   1.331 +        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,       /* 0x */
   1.332 +        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  	    /* 1x */
   1.333 +        0,1023,   0, 512,1023,   0,1023,   0,1023,1023,1023,1023,1023,1023, 953, 784,       /* 2x   !"#$%&'()*+,-./	 */
   1.334 +     1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1008,1008,   0,1008,   0, 768,       /* 3x  0123456789:;<=>?	 */
   1.335 +     1008,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,       /* 4x  @ABCDEFGHIJKLMNO  */
   1.336 +     1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896, 896, 896, 896,1023,       /* 5x  PQRSTUVWXYZ[\]^_	 */
   1.337 +        0,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,       /* 6x  `abcdefghijklmno	 */
   1.338 +     1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896,1012, 896,1023,   0,       /* 7x  pqrstuvwxyz{|}~	 */
   1.339 +        0    /* 8x  DEL               */
   1.340 +};
   1.341 +
   1.342 +#define NO_NEED_ESC(C) (EscapeChars[((unsigned int) (C))] & (flags))
   1.343 +
   1.344 +//----------------------------------------------------------------------------------------
   1.345 +
   1.346 +/* returns an escaped string */
   1.347 +
   1.348 +/* use the following flags to specify which 
   1.349 +   part of an URL you want to escape: 
   1.350 +
   1.351 +   esc_Scheme        =     1
   1.352 +   esc_Username      =     2
   1.353 +   esc_Password      =     4
   1.354 +   esc_Host          =     8
   1.355 +   esc_Directory     =    16
   1.356 +   esc_FileBaseName  =    32
   1.357 +   esc_FileExtension =    64
   1.358 +   esc_Param         =   128
   1.359 +   esc_Query         =   256
   1.360 +   esc_Ref           =   512
   1.361 +*/
   1.362 +
   1.363 +/* by default this function will not escape parts of a string
   1.364 +   that already look escaped, which means it already includes 
   1.365 +   a valid hexcode. This is done to avoid multiple escapes of
   1.366 +   a string. Use the following flags to force escaping of a 
   1.367 +   string:
   1.368 + 
   1.369 +   esc_Forced        =  1024
   1.370 +*/
   1.371 +
   1.372 +bool NS_EscapeURL(const char *part,
   1.373 +                           int32_t partLen,
   1.374 +                           uint32_t flags,
   1.375 +                           nsACString &result)
   1.376 +{
   1.377 +    if (!part) {
   1.378 +        NS_NOTREACHED("null pointer");
   1.379 +        return false;
   1.380 +    }
   1.381 +
   1.382 +    int i = 0;
   1.383 +    static const char hexChars[] = "0123456789ABCDEF";
   1.384 +    if (partLen < 0)
   1.385 +        partLen = strlen(part);
   1.386 +    bool forced = !!(flags & esc_Forced);
   1.387 +    bool ignoreNonAscii = !!(flags & esc_OnlyASCII);
   1.388 +    bool ignoreAscii = !!(flags & esc_OnlyNonASCII);
   1.389 +    bool writing = !!(flags & esc_AlwaysCopy);
   1.390 +    bool colon = !!(flags & esc_Colon);
   1.391 +
   1.392 +    const unsigned char* src = (const unsigned char *) part;
   1.393 +
   1.394 +    char tempBuffer[100];
   1.395 +    unsigned int tempBufferPos = 0;
   1.396 +
   1.397 +    bool previousIsNonASCII = false;
   1.398 +    for (i = 0; i < partLen; i++)
   1.399 +    {
   1.400 +      unsigned char c = *src++;
   1.401 +
   1.402 +      // if the char has not to be escaped or whatever follows % is 
   1.403 +      // a valid escaped string, just copy the char.
   1.404 +      //
   1.405 +      // Also the % will not be escaped until forced
   1.406 +      // See bugzilla bug 61269 for details why we changed this
   1.407 +      //
   1.408 +      // And, we will not escape non-ascii characters if requested.
   1.409 +      // On special request we will also escape the colon even when
   1.410 +      // not covered by the matrix.
   1.411 +      // ignoreAscii is not honored for control characters (C0 and DEL)
   1.412 +      //
   1.413 +      // And, we should escape the '|' character when it occurs after any
   1.414 +      // non-ASCII character as it may be part of a multi-byte character.
   1.415 +      //
   1.416 +      // 0x20..0x7e are the valid ASCII characters. We also escape spaces
   1.417 +      // (0x20) since they are not legal in URLs.
   1.418 +      if ((NO_NEED_ESC(c) || (c == HEX_ESCAPE && !forced)
   1.419 +                          || (c > 0x7f && ignoreNonAscii)
   1.420 +                          || (c > 0x20 && c < 0x7f && ignoreAscii))
   1.421 +          && !(c == ':' && colon)
   1.422 +          && !(previousIsNonASCII && c == '|' && !ignoreNonAscii))
   1.423 +      {
   1.424 +        if (writing)
   1.425 +          tempBuffer[tempBufferPos++] = c;
   1.426 +      }
   1.427 +      else /* do the escape magic */
   1.428 +      {
   1.429 +        if (!writing)
   1.430 +        {
   1.431 +          result.Append(part, i);
   1.432 +          writing = true;
   1.433 +        }
   1.434 +        tempBuffer[tempBufferPos++] = HEX_ESCAPE;
   1.435 +        tempBuffer[tempBufferPos++] = hexChars[c >> 4];	/* high nibble */
   1.436 +        tempBuffer[tempBufferPos++] = hexChars[c & 0x0f]; /* low nibble */
   1.437 +      }
   1.438 +
   1.439 +      if (tempBufferPos >= sizeof(tempBuffer) - 4)
   1.440 +      {
   1.441 +        NS_ASSERTION(writing, "should be writing");
   1.442 +        tempBuffer[tempBufferPos] = '\0';
   1.443 +        result += tempBuffer;
   1.444 +        tempBufferPos = 0;
   1.445 +      }
   1.446 +
   1.447 +      previousIsNonASCII = (c > 0x7f);
   1.448 +    }
   1.449 +    if (writing) {
   1.450 +      tempBuffer[tempBufferPos] = '\0';
   1.451 +      result += tempBuffer;
   1.452 +    }
   1.453 +    return writing;
   1.454 +}
   1.455 +
   1.456 +#define ISHEX(c) memchr(hexChars, c, sizeof(hexChars)-1)
   1.457 +
   1.458 +bool NS_UnescapeURL(const char *str, int32_t len, uint32_t flags, nsACString &result)
   1.459 +{
   1.460 +    if (!str) {
   1.461 +        NS_NOTREACHED("null pointer");
   1.462 +        return false;
   1.463 +    }
   1.464 +
   1.465 +    if (len < 0)
   1.466 +        len = strlen(str);
   1.467 +
   1.468 +    bool ignoreNonAscii = !!(flags & esc_OnlyASCII);
   1.469 +    bool ignoreAscii = !!(flags & esc_OnlyNonASCII);
   1.470 +    bool writing = !!(flags & esc_AlwaysCopy);
   1.471 +    bool skipControl = !!(flags & esc_SkipControl); 
   1.472 +
   1.473 +    static const char hexChars[] = "0123456789ABCDEFabcdef";
   1.474 +
   1.475 +    const char *last = str;
   1.476 +    const char *p = str;
   1.477 +
   1.478 +    for (int i=0; i<len; ++i, ++p) {
   1.479 +        //printf("%c [i=%d of len=%d]\n", *p, i, len);
   1.480 +        if (*p == HEX_ESCAPE && i < len-2) {
   1.481 +            unsigned char *p1 = ((unsigned char *) p) + 1;
   1.482 +            unsigned char *p2 = ((unsigned char *) p) + 2;
   1.483 +            if (ISHEX(*p1) && ISHEX(*p2) && 
   1.484 +                ((*p1 < '8' && !ignoreAscii) || (*p1 >= '8' && !ignoreNonAscii)) &&
   1.485 +                !(skipControl && 
   1.486 +                  (*p1 < '2' || (*p1 == '7' && (*p2 == 'f' || *p2 == 'F'))))) {
   1.487 +                //printf("- p1=%c p2=%c\n", *p1, *p2);
   1.488 +                writing = true;
   1.489 +                if (p > last) {
   1.490 +                    //printf("- p=%p, last=%p\n", p, last);
   1.491 +                    result.Append(last, p - last);
   1.492 +                    last = p;
   1.493 +                }
   1.494 +                char u = (UNHEX(*p1) << 4) + UNHEX(*p2);
   1.495 +                //printf("- u=%c\n", u);
   1.496 +                result.Append(u);
   1.497 +                i += 2;
   1.498 +                p += 2;
   1.499 +                last += 3;
   1.500 +            }
   1.501 +        }
   1.502 +    }
   1.503 +    if (writing && last < str + len)
   1.504 +        result.Append(last, str + len - last);
   1.505 +
   1.506 +    return writing;
   1.507 +}

mercurial