xpcom/io/nsEscape.cpp

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     6 //	First checked in on 98/12/03 by John R. McMullen, derived from net.h/mkparse.c.
     8 #include "nsEscape.h"
     9 #include "nsMemory.h"
    10 #include "nsCRT.h"
    11 #include "nsReadableUtils.h"
    13 const int netCharType[256] =
    14 /*	Bit 0		xalpha		-- the alphas
    15 **	Bit 1		xpalpha		-- as xalpha but 
    16 **                             converts spaces to plus and plus to %2B
    17 **	Bit 3 ...	path		-- as xalphas but doesn't escape '/'
    18 */
    19     /*   0 1 2 3 4 5 6 7 8 9 A B C D E F */
    20     {    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,	/* 0x */
    21 		 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,	/* 1x */
    22 		 0,0,0,0,0,0,0,0,0,0,7,4,0,7,7,4,	/* 2x   !"#$%&'()*+,-./	 */
    23          7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0,	/* 3x  0123456789:;<=>?	 */
    24 	     0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,	/* 4x  @ABCDEFGHIJKLMNO  */
    25 	     /* bits for '@' changed from 7 to 0 so '@' can be escaped   */
    26 	     /* in usernames and passwords in publishing.                */
    27 	     7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7,	/* 5X  PQRSTUVWXYZ[\]^_	 */
    28 	     0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,	/* 6x  `abcdefghijklmno	 */
    29 	     7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,	/* 7X  pqrstuvwxyz{\}~	DEL */
    30 		 0, };
    32 /* decode % escaped hex codes into character values
    33  */
    34 #define UNHEX(C) \
    35     ((C >= '0' && C <= '9') ? C - '0' : \
    36      ((C >= 'A' && C <= 'F') ? C - 'A' + 10 : \
    37      ((C >= 'a' && C <= 'f') ? C - 'a' + 10 : 0)))
    40 #define IS_OK(C) (netCharType[((unsigned int) (C))] & (flags))
    41 #define HEX_ESCAPE '%'
    43 //----------------------------------------------------------------------------------------
    44 static char* nsEscapeCount(
    45     const char * str,
    46     nsEscapeMask flags,
    47     size_t* out_len)
    48 //----------------------------------------------------------------------------------------
    49 {
    50 	if (!str)
    51 		return 0;
    53     size_t i, len = 0, charsToEscape = 0;
    54     static const char hexChars[] = "0123456789ABCDEF";
    56 	const unsigned char* src = (const unsigned char *) str;
    57     while (*src)
    58 	{
    59         len++;
    60         if (!IS_OK(*src++))
    61             charsToEscape++;
    62 	}
    64     // calculate how much memory should be allocated
    65     // original length + 2 bytes for each escaped character + terminating '\0'
    66     // do the sum in steps to check for overflow
    67     size_t dstSize = len + 1 + charsToEscape;
    68     if (dstSize <= len)
    69 	return 0;
    70     dstSize += charsToEscape;
    71     if (dstSize < len)
    72 	return 0;
    74     // fail if we need more than 4GB
    75     // size_t is likely to be long unsigned int but nsMemory::Alloc(size_t)
    76     // calls NS_Alloc_P(size_t) which calls PR_Malloc(uint32_t), so there is
    77     // no chance to allocate more than 4GB using nsMemory::Alloc()
    78     if (dstSize > UINT32_MAX)
    79         return 0;
    81 	char* result = (char *)nsMemory::Alloc(dstSize);
    82     if (!result)
    83         return 0;
    85     unsigned char* dst = (unsigned char *) result;
    86 	src = (const unsigned char *) str;
    87 	if (flags == url_XPAlphas)
    88 	{
    89 	    for (i = 0; i < len; i++)
    90 		{
    91 			unsigned char c = *src++;
    92 			if (IS_OK(c))
    93 				*dst++ = c;
    94 			else if (c == ' ')
    95 				*dst++ = '+'; /* convert spaces to pluses */
    96 			else 
    97 			{
    98 				*dst++ = HEX_ESCAPE;
    99 				*dst++ = hexChars[c >> 4];	/* high nibble */
   100 				*dst++ = hexChars[c & 0x0f];	/* low nibble */
   101 			}
   102 		}
   103 	}
   104 	else
   105 	{
   106 	    for (i = 0; i < len; i++)
   107 		{
   108 			unsigned char c = *src++;
   109 			if (IS_OK(c))
   110 				*dst++ = c;
   111 			else 
   112 			{
   113 				*dst++ = HEX_ESCAPE;
   114 				*dst++ = hexChars[c >> 4];	/* high nibble */
   115 				*dst++ = hexChars[c & 0x0f];	/* low nibble */
   116 			}
   117 		}
   118 	}
   120     *dst = '\0';     /* tack on eos */
   121 	if(out_len)
   122 		*out_len = dst - (unsigned char *) result;
   123     return result;
   124 }
   126 //----------------------------------------------------------------------------------------
   127 char* nsEscape(const char * str, nsEscapeMask flags)
   128 //----------------------------------------------------------------------------------------
   129 {
   130     if(!str)
   131         return nullptr;
   132     return nsEscapeCount(str, flags, nullptr);
   133 }
   135 //----------------------------------------------------------------------------------------
   136 char* nsUnescape(char * str)
   137 //----------------------------------------------------------------------------------------
   138 {
   139 	nsUnescapeCount(str);
   140 	return str;
   141 }
   143 //----------------------------------------------------------------------------------------
   144 int32_t nsUnescapeCount(char * str)
   145 //----------------------------------------------------------------------------------------
   146 {
   147     char *src = str;
   148     char *dst = str;
   149     static const char hexChars[] = "0123456789ABCDEFabcdef";
   151     char c1[] = " ";
   152     char c2[] = " ";
   153     char* const pc1 = c1;
   154     char* const pc2 = c2;
   156     if (!*src) {
   157       // A null string was passed in.  Nothing to escape.
   158       // Returns early as the string might not actually be mutable with
   159       // length 0.
   160       return 0;
   161     }
   163     while (*src)
   164     {
   165         c1[0] = *(src+1);
   166         if (*(src+1) == '\0') 
   167             c2[0] = '\0';
   168         else
   169             c2[0] = *(src+2);
   171         if (*src != HEX_ESCAPE || PL_strpbrk(pc1, hexChars) == 0 || 
   172                                   PL_strpbrk(pc2, hexChars) == 0 )
   173         	*dst++ = *src++;
   174         else 	
   175 		{
   176         	src++; /* walk over escape */
   177         	if (*src)
   178             {
   179             	*dst = UNHEX(*src) << 4;
   180             	src++;
   181             }
   182         	if (*src)
   183             {
   184             	*dst = (*dst + UNHEX(*src));
   185             	src++;
   186             }
   187         	dst++;
   188         }
   189     }
   191     *dst = 0;
   192     return (int)(dst - str);
   194 } /* NET_UnEscapeCnt */
   197 char *
   198 nsEscapeHTML(const char * string)
   199 {
   200     char *rv = nullptr;
   201     /* XXX Hardcoded max entity len. The +1 is for the trailing null. */
   202     uint32_t len = strlen(string);
   203     if (len >= (UINT32_MAX / 6))
   204       return nullptr;
   206     rv = (char *)NS_Alloc( (6 * len) + 1 );
   207     char *ptr = rv;
   209     if(rv)
   210       {
   211         for(; *string != '\0'; string++)
   212           {
   213             if(*string == '<')
   214               {
   215                 *ptr++ = '&';
   216                 *ptr++ = 'l';
   217                 *ptr++ = 't';
   218                 *ptr++ = ';';
   219               }
   220             else if(*string == '>')
   221               {
   222                 *ptr++ = '&';
   223                 *ptr++ = 'g';
   224                 *ptr++ = 't';
   225                 *ptr++ = ';';
   226               }
   227             else if(*string == '&')
   228               {
   229                 *ptr++ = '&';
   230                 *ptr++ = 'a';
   231                 *ptr++ = 'm';
   232                 *ptr++ = 'p';
   233                 *ptr++ = ';';
   234               }
   235             else if (*string == '"')
   236               {
   237                 *ptr++ = '&';
   238                 *ptr++ = 'q';
   239                 *ptr++ = 'u';
   240                 *ptr++ = 'o';
   241                 *ptr++ = 't';
   242                 *ptr++ = ';';
   243               }
   244             else if (*string == '\'')
   245               {
   246                 *ptr++ = '&';
   247                 *ptr++ = '#';
   248                 *ptr++ = '3';
   249                 *ptr++ = '9';
   250                 *ptr++ = ';';
   251               }
   252             else
   253               {
   254                 *ptr++ = *string;
   255               }
   256           }
   257         *ptr = '\0';
   258       }
   260     return(rv);
   261 }
   263 char16_t *
   264 nsEscapeHTML2(const char16_t *aSourceBuffer, int32_t aSourceBufferLen)
   265 {
   266   // Calculate the length, if the caller didn't.
   267   if (aSourceBufferLen < 0) {
   268     aSourceBufferLen = NS_strlen(aSourceBuffer);
   269   }
   271   /* XXX Hardcoded max entity len. */
   272   if (uint32_t(aSourceBufferLen) >=
   273       ((UINT32_MAX - sizeof(char16_t)) / (6 * sizeof(char16_t))) )
   274     return nullptr;
   276   char16_t *resultBuffer = (char16_t *)nsMemory::Alloc(aSourceBufferLen *
   277                             6 * sizeof(char16_t) + sizeof(char16_t('\0')));
   278   char16_t *ptr = resultBuffer;
   280   if (resultBuffer) {
   281     int32_t i;
   283     for(i = 0; i < aSourceBufferLen; i++) {
   284       if(aSourceBuffer[i] == '<') {
   285         *ptr++ = '&';
   286         *ptr++ = 'l';
   287         *ptr++ = 't';
   288         *ptr++ = ';';
   289       } else if(aSourceBuffer[i] == '>') {
   290         *ptr++ = '&';
   291         *ptr++ = 'g';
   292         *ptr++ = 't';
   293         *ptr++ = ';';
   294       } else if(aSourceBuffer[i] == '&') {
   295         *ptr++ = '&';
   296         *ptr++ = 'a';
   297         *ptr++ = 'm';
   298         *ptr++ = 'p';
   299         *ptr++ = ';';
   300       } else if (aSourceBuffer[i] == '"') {
   301         *ptr++ = '&';
   302         *ptr++ = 'q';
   303         *ptr++ = 'u';
   304         *ptr++ = 'o';
   305         *ptr++ = 't';
   306         *ptr++ = ';';
   307       } else if (aSourceBuffer[i] == '\'') {
   308         *ptr++ = '&';
   309         *ptr++ = '#';
   310         *ptr++ = '3';
   311         *ptr++ = '9';
   312         *ptr++ = ';';
   313       } else {
   314         *ptr++ = aSourceBuffer[i];
   315       }
   316     }
   317     *ptr = 0;
   318   }
   320   return resultBuffer;
   321 }
   323 //----------------------------------------------------------------------------------------
   325 const int EscapeChars[256] =
   326 /*      0    1    2    3    4    5    6    7    8    9    A    B    C    D    E    F */
   327 {
   328         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,       /* 0x */
   329         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  	    /* 1x */
   330         0,1023,   0, 512,1023,   0,1023,   0,1023,1023,1023,1023,1023,1023, 953, 784,       /* 2x   !"#$%&'()*+,-./	 */
   331      1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1008,1008,   0,1008,   0, 768,       /* 3x  0123456789:;<=>?	 */
   332      1008,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,       /* 4x  @ABCDEFGHIJKLMNO  */
   333      1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896, 896, 896, 896,1023,       /* 5x  PQRSTUVWXYZ[\]^_	 */
   334         0,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,       /* 6x  `abcdefghijklmno	 */
   335      1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896,1012, 896,1023,   0,       /* 7x  pqrstuvwxyz{|}~	 */
   336         0    /* 8x  DEL               */
   337 };
   339 #define NO_NEED_ESC(C) (EscapeChars[((unsigned int) (C))] & (flags))
   341 //----------------------------------------------------------------------------------------
   343 /* returns an escaped string */
   345 /* use the following flags to specify which 
   346    part of an URL you want to escape: 
   348    esc_Scheme        =     1
   349    esc_Username      =     2
   350    esc_Password      =     4
   351    esc_Host          =     8
   352    esc_Directory     =    16
   353    esc_FileBaseName  =    32
   354    esc_FileExtension =    64
   355    esc_Param         =   128
   356    esc_Query         =   256
   357    esc_Ref           =   512
   358 */
   360 /* by default this function will not escape parts of a string
   361    that already look escaped, which means it already includes 
   362    a valid hexcode. This is done to avoid multiple escapes of
   363    a string. Use the following flags to force escaping of a 
   364    string:
   366    esc_Forced        =  1024
   367 */
   369 bool NS_EscapeURL(const char *part,
   370                            int32_t partLen,
   371                            uint32_t flags,
   372                            nsACString &result)
   373 {
   374     if (!part) {
   375         NS_NOTREACHED("null pointer");
   376         return false;
   377     }
   379     int i = 0;
   380     static const char hexChars[] = "0123456789ABCDEF";
   381     if (partLen < 0)
   382         partLen = strlen(part);
   383     bool forced = !!(flags & esc_Forced);
   384     bool ignoreNonAscii = !!(flags & esc_OnlyASCII);
   385     bool ignoreAscii = !!(flags & esc_OnlyNonASCII);
   386     bool writing = !!(flags & esc_AlwaysCopy);
   387     bool colon = !!(flags & esc_Colon);
   389     const unsigned char* src = (const unsigned char *) part;
   391     char tempBuffer[100];
   392     unsigned int tempBufferPos = 0;
   394     bool previousIsNonASCII = false;
   395     for (i = 0; i < partLen; i++)
   396     {
   397       unsigned char c = *src++;
   399       // if the char has not to be escaped or whatever follows % is 
   400       // a valid escaped string, just copy the char.
   401       //
   402       // Also the % will not be escaped until forced
   403       // See bugzilla bug 61269 for details why we changed this
   404       //
   405       // And, we will not escape non-ascii characters if requested.
   406       // On special request we will also escape the colon even when
   407       // not covered by the matrix.
   408       // ignoreAscii is not honored for control characters (C0 and DEL)
   409       //
   410       // And, we should escape the '|' character when it occurs after any
   411       // non-ASCII character as it may be part of a multi-byte character.
   412       //
   413       // 0x20..0x7e are the valid ASCII characters. We also escape spaces
   414       // (0x20) since they are not legal in URLs.
   415       if ((NO_NEED_ESC(c) || (c == HEX_ESCAPE && !forced)
   416                           || (c > 0x7f && ignoreNonAscii)
   417                           || (c > 0x20 && c < 0x7f && ignoreAscii))
   418           && !(c == ':' && colon)
   419           && !(previousIsNonASCII && c == '|' && !ignoreNonAscii))
   420       {
   421         if (writing)
   422           tempBuffer[tempBufferPos++] = c;
   423       }
   424       else /* do the escape magic */
   425       {
   426         if (!writing)
   427         {
   428           result.Append(part, i);
   429           writing = true;
   430         }
   431         tempBuffer[tempBufferPos++] = HEX_ESCAPE;
   432         tempBuffer[tempBufferPos++] = hexChars[c >> 4];	/* high nibble */
   433         tempBuffer[tempBufferPos++] = hexChars[c & 0x0f]; /* low nibble */
   434       }
   436       if (tempBufferPos >= sizeof(tempBuffer) - 4)
   437       {
   438         NS_ASSERTION(writing, "should be writing");
   439         tempBuffer[tempBufferPos] = '\0';
   440         result += tempBuffer;
   441         tempBufferPos = 0;
   442       }
   444       previousIsNonASCII = (c > 0x7f);
   445     }
   446     if (writing) {
   447       tempBuffer[tempBufferPos] = '\0';
   448       result += tempBuffer;
   449     }
   450     return writing;
   451 }
   453 #define ISHEX(c) memchr(hexChars, c, sizeof(hexChars)-1)
   455 bool NS_UnescapeURL(const char *str, int32_t len, uint32_t flags, nsACString &result)
   456 {
   457     if (!str) {
   458         NS_NOTREACHED("null pointer");
   459         return false;
   460     }
   462     if (len < 0)
   463         len = strlen(str);
   465     bool ignoreNonAscii = !!(flags & esc_OnlyASCII);
   466     bool ignoreAscii = !!(flags & esc_OnlyNonASCII);
   467     bool writing = !!(flags & esc_AlwaysCopy);
   468     bool skipControl = !!(flags & esc_SkipControl); 
   470     static const char hexChars[] = "0123456789ABCDEFabcdef";
   472     const char *last = str;
   473     const char *p = str;
   475     for (int i=0; i<len; ++i, ++p) {
   476         //printf("%c [i=%d of len=%d]\n", *p, i, len);
   477         if (*p == HEX_ESCAPE && i < len-2) {
   478             unsigned char *p1 = ((unsigned char *) p) + 1;
   479             unsigned char *p2 = ((unsigned char *) p) + 2;
   480             if (ISHEX(*p1) && ISHEX(*p2) && 
   481                 ((*p1 < '8' && !ignoreAscii) || (*p1 >= '8' && !ignoreNonAscii)) &&
   482                 !(skipControl && 
   483                   (*p1 < '2' || (*p1 == '7' && (*p2 == 'f' || *p2 == 'F'))))) {
   484                 //printf("- p1=%c p2=%c\n", *p1, *p2);
   485                 writing = true;
   486                 if (p > last) {
   487                     //printf("- p=%p, last=%p\n", p, last);
   488                     result.Append(last, p - last);
   489                     last = p;
   490                 }
   491                 char u = (UNHEX(*p1) << 4) + UNHEX(*p2);
   492                 //printf("- u=%c\n", u);
   493                 result.Append(u);
   494                 i += 2;
   495                 p += 2;
   496                 last += 3;
   497             }
   498         }
   499     }
   500     if (writing && last < str + len)
   501         result.Append(last, str + len - last);
   503     return writing;
   504 }

mercurial