michael@0: /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: // First checked in on 98/12/03 by John R. McMullen, derived from net.h/mkparse.c. michael@0: michael@0: #include "nsEscape.h" michael@0: #include "nsMemory.h" michael@0: #include "nsCRT.h" michael@0: #include "nsReadableUtils.h" michael@0: michael@0: const int netCharType[256] = michael@0: /* Bit 0 xalpha -- the alphas michael@0: ** Bit 1 xpalpha -- as xalpha but michael@0: ** converts spaces to plus and plus to %2B michael@0: ** Bit 3 ... path -- as xalphas but doesn't escape '/' michael@0: */ michael@0: /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ michael@0: { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x */ michael@0: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 1x */ michael@0: 0,0,0,0,0,0,0,0,0,0,7,4,0,7,7,4, /* 2x !"#$%&'()*+,-./ */ michael@0: 7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */ michael@0: 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 4x @ABCDEFGHIJKLMNO */ michael@0: /* bits for '@' changed from 7 to 0 so '@' can be escaped */ michael@0: /* in usernames and passwords in publishing. */ michael@0: 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7, /* 5X PQRSTUVWXYZ[\]^_ */ michael@0: 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 6x `abcdefghijklmno */ michael@0: 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0, /* 7X pqrstuvwxyz{\}~ DEL */ michael@0: 0, }; michael@0: michael@0: /* decode % escaped hex codes into character values michael@0: */ michael@0: #define UNHEX(C) \ michael@0: ((C >= '0' && C <= '9') ? C - '0' : \ michael@0: ((C >= 'A' && C <= 'F') ? C - 'A' + 10 : \ michael@0: ((C >= 'a' && C <= 'f') ? C - 'a' + 10 : 0))) michael@0: michael@0: michael@0: #define IS_OK(C) (netCharType[((unsigned int) (C))] & (flags)) michael@0: #define HEX_ESCAPE '%' michael@0: michael@0: //---------------------------------------------------------------------------------------- michael@0: static char* nsEscapeCount( michael@0: const char * str, michael@0: nsEscapeMask flags, michael@0: size_t* out_len) michael@0: //---------------------------------------------------------------------------------------- michael@0: { michael@0: if (!str) michael@0: return 0; michael@0: michael@0: size_t i, len = 0, charsToEscape = 0; michael@0: static const char hexChars[] = "0123456789ABCDEF"; michael@0: michael@0: const unsigned char* src = (const unsigned char *) str; michael@0: while (*src) michael@0: { michael@0: len++; michael@0: if (!IS_OK(*src++)) michael@0: charsToEscape++; michael@0: } michael@0: michael@0: // calculate how much memory should be allocated michael@0: // original length + 2 bytes for each escaped character + terminating '\0' michael@0: // do the sum in steps to check for overflow michael@0: size_t dstSize = len + 1 + charsToEscape; michael@0: if (dstSize <= len) michael@0: return 0; michael@0: dstSize += charsToEscape; michael@0: if (dstSize < len) michael@0: return 0; michael@0: michael@0: // fail if we need more than 4GB michael@0: // size_t is likely to be long unsigned int but nsMemory::Alloc(size_t) michael@0: // calls NS_Alloc_P(size_t) which calls PR_Malloc(uint32_t), so there is michael@0: // no chance to allocate more than 4GB using nsMemory::Alloc() michael@0: if (dstSize > UINT32_MAX) michael@0: return 0; michael@0: michael@0: char* result = (char *)nsMemory::Alloc(dstSize); michael@0: if (!result) michael@0: return 0; michael@0: michael@0: unsigned char* dst = (unsigned char *) result; michael@0: src = (const unsigned char *) str; michael@0: if (flags == url_XPAlphas) michael@0: { michael@0: for (i = 0; i < len; i++) michael@0: { michael@0: unsigned char c = *src++; michael@0: if (IS_OK(c)) michael@0: *dst++ = c; michael@0: else if (c == ' ') michael@0: *dst++ = '+'; /* convert spaces to pluses */ michael@0: else michael@0: { michael@0: *dst++ = HEX_ESCAPE; michael@0: *dst++ = hexChars[c >> 4]; /* high nibble */ michael@0: *dst++ = hexChars[c & 0x0f]; /* low nibble */ michael@0: } michael@0: } michael@0: } michael@0: else michael@0: { michael@0: for (i = 0; i < len; i++) michael@0: { michael@0: unsigned char c = *src++; michael@0: if (IS_OK(c)) michael@0: *dst++ = c; michael@0: else michael@0: { michael@0: *dst++ = HEX_ESCAPE; michael@0: *dst++ = hexChars[c >> 4]; /* high nibble */ michael@0: *dst++ = hexChars[c & 0x0f]; /* low nibble */ michael@0: } michael@0: } michael@0: } michael@0: michael@0: *dst = '\0'; /* tack on eos */ michael@0: if(out_len) michael@0: *out_len = dst - (unsigned char *) result; michael@0: return result; michael@0: } michael@0: michael@0: //---------------------------------------------------------------------------------------- michael@0: char* nsEscape(const char * str, nsEscapeMask flags) michael@0: //---------------------------------------------------------------------------------------- michael@0: { michael@0: if(!str) michael@0: return nullptr; michael@0: return nsEscapeCount(str, flags, nullptr); michael@0: } michael@0: michael@0: //---------------------------------------------------------------------------------------- michael@0: char* nsUnescape(char * str) michael@0: //---------------------------------------------------------------------------------------- michael@0: { michael@0: nsUnescapeCount(str); michael@0: return str; michael@0: } michael@0: michael@0: //---------------------------------------------------------------------------------------- michael@0: int32_t nsUnescapeCount(char * str) michael@0: //---------------------------------------------------------------------------------------- michael@0: { michael@0: char *src = str; michael@0: char *dst = str; michael@0: static const char hexChars[] = "0123456789ABCDEFabcdef"; michael@0: michael@0: char c1[] = " "; michael@0: char c2[] = " "; michael@0: char* const pc1 = c1; michael@0: char* const pc2 = c2; michael@0: michael@0: if (!*src) { michael@0: // A null string was passed in. Nothing to escape. michael@0: // Returns early as the string might not actually be mutable with michael@0: // length 0. michael@0: return 0; michael@0: } michael@0: michael@0: while (*src) michael@0: { michael@0: c1[0] = *(src+1); michael@0: if (*(src+1) == '\0') michael@0: c2[0] = '\0'; michael@0: else michael@0: c2[0] = *(src+2); michael@0: michael@0: if (*src != HEX_ESCAPE || PL_strpbrk(pc1, hexChars) == 0 || michael@0: PL_strpbrk(pc2, hexChars) == 0 ) michael@0: *dst++ = *src++; michael@0: else michael@0: { michael@0: src++; /* walk over escape */ michael@0: if (*src) michael@0: { michael@0: *dst = UNHEX(*src) << 4; michael@0: src++; michael@0: } michael@0: if (*src) michael@0: { michael@0: *dst = (*dst + UNHEX(*src)); michael@0: src++; michael@0: } michael@0: dst++; michael@0: } michael@0: } michael@0: michael@0: *dst = 0; michael@0: return (int)(dst - str); michael@0: michael@0: } /* NET_UnEscapeCnt */ michael@0: michael@0: michael@0: char * michael@0: nsEscapeHTML(const char * string) michael@0: { michael@0: char *rv = nullptr; michael@0: /* XXX Hardcoded max entity len. The +1 is for the trailing null. */ michael@0: uint32_t len = strlen(string); michael@0: if (len >= (UINT32_MAX / 6)) michael@0: return nullptr; michael@0: michael@0: rv = (char *)NS_Alloc( (6 * len) + 1 ); michael@0: char *ptr = rv; michael@0: michael@0: if(rv) michael@0: { michael@0: for(; *string != '\0'; string++) michael@0: { michael@0: if(*string == '<') michael@0: { michael@0: *ptr++ = '&'; michael@0: *ptr++ = 'l'; michael@0: *ptr++ = 't'; michael@0: *ptr++ = ';'; michael@0: } michael@0: else if(*string == '>') michael@0: { michael@0: *ptr++ = '&'; michael@0: *ptr++ = 'g'; michael@0: *ptr++ = 't'; michael@0: *ptr++ = ';'; michael@0: } michael@0: else if(*string == '&') michael@0: { michael@0: *ptr++ = '&'; michael@0: *ptr++ = 'a'; michael@0: *ptr++ = 'm'; michael@0: *ptr++ = 'p'; michael@0: *ptr++ = ';'; michael@0: } michael@0: else if (*string == '"') michael@0: { michael@0: *ptr++ = '&'; michael@0: *ptr++ = 'q'; michael@0: *ptr++ = 'u'; michael@0: *ptr++ = 'o'; michael@0: *ptr++ = 't'; michael@0: *ptr++ = ';'; michael@0: } michael@0: else if (*string == '\'') michael@0: { michael@0: *ptr++ = '&'; michael@0: *ptr++ = '#'; michael@0: *ptr++ = '3'; michael@0: *ptr++ = '9'; michael@0: *ptr++ = ';'; michael@0: } michael@0: else michael@0: { michael@0: *ptr++ = *string; michael@0: } michael@0: } michael@0: *ptr = '\0'; michael@0: } michael@0: michael@0: return(rv); michael@0: } michael@0: michael@0: char16_t * michael@0: nsEscapeHTML2(const char16_t *aSourceBuffer, int32_t aSourceBufferLen) michael@0: { michael@0: // Calculate the length, if the caller didn't. michael@0: if (aSourceBufferLen < 0) { michael@0: aSourceBufferLen = NS_strlen(aSourceBuffer); michael@0: } michael@0: michael@0: /* XXX Hardcoded max entity len. */ michael@0: if (uint32_t(aSourceBufferLen) >= michael@0: ((UINT32_MAX - sizeof(char16_t)) / (6 * sizeof(char16_t))) ) michael@0: return nullptr; michael@0: michael@0: char16_t *resultBuffer = (char16_t *)nsMemory::Alloc(aSourceBufferLen * michael@0: 6 * sizeof(char16_t) + sizeof(char16_t('\0'))); michael@0: char16_t *ptr = resultBuffer; michael@0: michael@0: if (resultBuffer) { michael@0: int32_t i; michael@0: michael@0: for(i = 0; i < aSourceBufferLen; i++) { michael@0: if(aSourceBuffer[i] == '<') { michael@0: *ptr++ = '&'; michael@0: *ptr++ = 'l'; michael@0: *ptr++ = 't'; michael@0: *ptr++ = ';'; michael@0: } else if(aSourceBuffer[i] == '>') { michael@0: *ptr++ = '&'; michael@0: *ptr++ = 'g'; michael@0: *ptr++ = 't'; michael@0: *ptr++ = ';'; michael@0: } else if(aSourceBuffer[i] == '&') { michael@0: *ptr++ = '&'; michael@0: *ptr++ = 'a'; michael@0: *ptr++ = 'm'; michael@0: *ptr++ = 'p'; michael@0: *ptr++ = ';'; michael@0: } else if (aSourceBuffer[i] == '"') { michael@0: *ptr++ = '&'; michael@0: *ptr++ = 'q'; michael@0: *ptr++ = 'u'; michael@0: *ptr++ = 'o'; michael@0: *ptr++ = 't'; michael@0: *ptr++ = ';'; michael@0: } else if (aSourceBuffer[i] == '\'') { michael@0: *ptr++ = '&'; michael@0: *ptr++ = '#'; michael@0: *ptr++ = '3'; michael@0: *ptr++ = '9'; michael@0: *ptr++ = ';'; michael@0: } else { michael@0: *ptr++ = aSourceBuffer[i]; michael@0: } michael@0: } michael@0: *ptr = 0; michael@0: } michael@0: michael@0: return resultBuffer; michael@0: } michael@0: michael@0: //---------------------------------------------------------------------------------------- michael@0: michael@0: const int EscapeChars[256] = michael@0: /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ michael@0: { michael@0: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ michael@0: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ michael@0: 0,1023, 0, 512,1023, 0,1023, 0,1023,1023,1023,1023,1023,1023, 953, 784, /* 2x !"#$%&'()*+,-./ */ michael@0: 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1008,1008, 0,1008, 0, 768, /* 3x 0123456789:;<=>? */ michael@0: 1008,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, /* 4x @ABCDEFGHIJKLMNO */ michael@0: 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896, 896, 896, 896,1023, /* 5x PQRSTUVWXYZ[\]^_ */ michael@0: 0,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, /* 6x `abcdefghijklmno */ michael@0: 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896,1012, 896,1023, 0, /* 7x pqrstuvwxyz{|}~ */ michael@0: 0 /* 8x DEL */ michael@0: }; michael@0: michael@0: #define NO_NEED_ESC(C) (EscapeChars[((unsigned int) (C))] & (flags)) michael@0: michael@0: //---------------------------------------------------------------------------------------- michael@0: michael@0: /* returns an escaped string */ michael@0: michael@0: /* use the following flags to specify which michael@0: part of an URL you want to escape: michael@0: michael@0: esc_Scheme = 1 michael@0: esc_Username = 2 michael@0: esc_Password = 4 michael@0: esc_Host = 8 michael@0: esc_Directory = 16 michael@0: esc_FileBaseName = 32 michael@0: esc_FileExtension = 64 michael@0: esc_Param = 128 michael@0: esc_Query = 256 michael@0: esc_Ref = 512 michael@0: */ michael@0: michael@0: /* by default this function will not escape parts of a string michael@0: that already look escaped, which means it already includes michael@0: a valid hexcode. This is done to avoid multiple escapes of michael@0: a string. Use the following flags to force escaping of a michael@0: string: michael@0: michael@0: esc_Forced = 1024 michael@0: */ michael@0: michael@0: bool NS_EscapeURL(const char *part, michael@0: int32_t partLen, michael@0: uint32_t flags, michael@0: nsACString &result) michael@0: { michael@0: if (!part) { michael@0: NS_NOTREACHED("null pointer"); michael@0: return false; michael@0: } michael@0: michael@0: int i = 0; michael@0: static const char hexChars[] = "0123456789ABCDEF"; michael@0: if (partLen < 0) michael@0: partLen = strlen(part); michael@0: bool forced = !!(flags & esc_Forced); michael@0: bool ignoreNonAscii = !!(flags & esc_OnlyASCII); michael@0: bool ignoreAscii = !!(flags & esc_OnlyNonASCII); michael@0: bool writing = !!(flags & esc_AlwaysCopy); michael@0: bool colon = !!(flags & esc_Colon); michael@0: michael@0: const unsigned char* src = (const unsigned char *) part; michael@0: michael@0: char tempBuffer[100]; michael@0: unsigned int tempBufferPos = 0; michael@0: michael@0: bool previousIsNonASCII = false; michael@0: for (i = 0; i < partLen; i++) michael@0: { michael@0: unsigned char c = *src++; michael@0: michael@0: // if the char has not to be escaped or whatever follows % is michael@0: // a valid escaped string, just copy the char. michael@0: // michael@0: // Also the % will not be escaped until forced michael@0: // See bugzilla bug 61269 for details why we changed this michael@0: // michael@0: // And, we will not escape non-ascii characters if requested. michael@0: // On special request we will also escape the colon even when michael@0: // not covered by the matrix. michael@0: // ignoreAscii is not honored for control characters (C0 and DEL) michael@0: // michael@0: // And, we should escape the '|' character when it occurs after any michael@0: // non-ASCII character as it may be part of a multi-byte character. michael@0: // michael@0: // 0x20..0x7e are the valid ASCII characters. We also escape spaces michael@0: // (0x20) since they are not legal in URLs. michael@0: if ((NO_NEED_ESC(c) || (c == HEX_ESCAPE && !forced) michael@0: || (c > 0x7f && ignoreNonAscii) michael@0: || (c > 0x20 && c < 0x7f && ignoreAscii)) michael@0: && !(c == ':' && colon) michael@0: && !(previousIsNonASCII && c == '|' && !ignoreNonAscii)) michael@0: { michael@0: if (writing) michael@0: tempBuffer[tempBufferPos++] = c; michael@0: } michael@0: else /* do the escape magic */ michael@0: { michael@0: if (!writing) michael@0: { michael@0: result.Append(part, i); michael@0: writing = true; michael@0: } michael@0: tempBuffer[tempBufferPos++] = HEX_ESCAPE; michael@0: tempBuffer[tempBufferPos++] = hexChars[c >> 4]; /* high nibble */ michael@0: tempBuffer[tempBufferPos++] = hexChars[c & 0x0f]; /* low nibble */ michael@0: } michael@0: michael@0: if (tempBufferPos >= sizeof(tempBuffer) - 4) michael@0: { michael@0: NS_ASSERTION(writing, "should be writing"); michael@0: tempBuffer[tempBufferPos] = '\0'; michael@0: result += tempBuffer; michael@0: tempBufferPos = 0; michael@0: } michael@0: michael@0: previousIsNonASCII = (c > 0x7f); michael@0: } michael@0: if (writing) { michael@0: tempBuffer[tempBufferPos] = '\0'; michael@0: result += tempBuffer; michael@0: } michael@0: return writing; michael@0: } michael@0: michael@0: #define ISHEX(c) memchr(hexChars, c, sizeof(hexChars)-1) michael@0: michael@0: bool NS_UnescapeURL(const char *str, int32_t len, uint32_t flags, nsACString &result) michael@0: { michael@0: if (!str) { michael@0: NS_NOTREACHED("null pointer"); michael@0: return false; michael@0: } michael@0: michael@0: if (len < 0) michael@0: len = strlen(str); michael@0: michael@0: bool ignoreNonAscii = !!(flags & esc_OnlyASCII); michael@0: bool ignoreAscii = !!(flags & esc_OnlyNonASCII); michael@0: bool writing = !!(flags & esc_AlwaysCopy); michael@0: bool skipControl = !!(flags & esc_SkipControl); michael@0: michael@0: static const char hexChars[] = "0123456789ABCDEFabcdef"; michael@0: michael@0: const char *last = str; michael@0: const char *p = str; michael@0: michael@0: for (int i=0; i= '8' && !ignoreNonAscii)) && michael@0: !(skipControl && michael@0: (*p1 < '2' || (*p1 == '7' && (*p2 == 'f' || *p2 == 'F'))))) { michael@0: //printf("- p1=%c p2=%c\n", *p1, *p2); michael@0: writing = true; michael@0: if (p > last) { michael@0: //printf("- p=%p, last=%p\n", p, last); michael@0: result.Append(last, p - last); michael@0: last = p; michael@0: } michael@0: char u = (UNHEX(*p1) << 4) + UNHEX(*p2); michael@0: //printf("- u=%c\n", u); michael@0: result.Append(u); michael@0: i += 2; michael@0: p += 2; michael@0: last += 3; michael@0: } michael@0: } michael@0: } michael@0: if (writing && last < str + len) michael@0: result.Append(last, str + len - last); michael@0: michael@0: return writing; michael@0: }