intl/uconv/ucvko/nsISO2022KRToUnicode.cpp

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rw-r--r--

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

michael@0 1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5 #include "nsISO2022KRToUnicode.h"
michael@0 6 #include "nsUCSupport.h"
michael@0 7 #include "nsICharsetConverterManager.h"
michael@0 8 #include "nsServiceManagerUtils.h"
michael@0 9
michael@0 10 NS_IMETHODIMP nsISO2022KRToUnicode::Convert(const char * aSrc, int32_t * aSrcLen, char16_t * aDest, int32_t * aDestLen)
michael@0 11 {
michael@0 12 static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
michael@0 13
michael@0 14 const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen;
michael@0 15 const unsigned char* src =(unsigned char*) aSrc;
michael@0 16 char16_t* destEnd = aDest + *aDestLen;
michael@0 17 char16_t* dest = aDest;
michael@0 18 while((src < srcEnd))
michael@0 19 {
michael@0 20 // if LF/CR, return to US-ASCII unconditionally.
michael@0 21 if ( *src == 0x0a || *src == 0x0d )
michael@0 22 mState = mState_Init;
michael@0 23
michael@0 24 switch(mState)
michael@0 25 {
michael@0 26 case mState_Init:
michael@0 27 if(0x1b == *src) {
michael@0 28 mLastLegalState = mState_ASCII;
michael@0 29 mState = mState_ESC;
michael@0 30 break;
michael@0 31 }
michael@0 32 mState = mState_ASCII;
michael@0 33 // fall through
michael@0 34
michael@0 35 case mState_ASCII:
michael@0 36 if(0x0e == *src) { // Shift-Out
michael@0 37 mState = mState_KSX1001_1992;
michael@0 38 mRunLength = 0;
michael@0 39 }
michael@0 40 else if(*src & 0x80) {
michael@0 41 if (CHECK_OVERRUN(dest, destEnd, 1))
michael@0 42 goto error1;
michael@0 43 *dest++ = 0xFFFD;
michael@0 44 }
michael@0 45 else {
michael@0 46 if (CHECK_OVERRUN(dest, destEnd, 1))
michael@0 47 goto error1;
michael@0 48 *dest++ = (char16_t) *src;
michael@0 49 }
michael@0 50 break;
michael@0 51
michael@0 52 case mState_ESC:
michael@0 53 if('$' == *src) {
michael@0 54 mState = mState_ESC_24;
michael@0 55 }
michael@0 56 else {
michael@0 57 if (CHECK_OVERRUN(dest, destEnd, 2))
michael@0 58 goto error1;
michael@0 59 *dest++ = (char16_t) 0x1b;
michael@0 60 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
michael@0 61 mState = mLastLegalState;
michael@0 62 }
michael@0 63 break;
michael@0 64
michael@0 65 case mState_ESC_24: // ESC $
michael@0 66 if(')' == *src) {
michael@0 67 mState = mState_ESC_24_29;
michael@0 68 }
michael@0 69 else {
michael@0 70 if (CHECK_OVERRUN(dest, destEnd, 3))
michael@0 71 goto error1;
michael@0 72 *dest++ = (char16_t) 0x1b;
michael@0 73 *dest++ = (char16_t) '$';
michael@0 74 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
michael@0 75 mState = mLastLegalState;
michael@0 76 }
michael@0 77 break;
michael@0 78
michael@0 79 case mState_ESC_24_29: // ESC $ )
michael@0 80 mState = mLastLegalState;
michael@0 81 if('C' == *src) {
michael@0 82 mState = mState_ASCII;
michael@0 83 mRunLength = 0;
michael@0 84 }
michael@0 85 else {
michael@0 86 if (CHECK_OVERRUN(dest, destEnd, 4))
michael@0 87 goto error1;
michael@0 88 *dest++ = (char16_t) 0x1b;
michael@0 89 *dest++ = (char16_t) '$';
michael@0 90 *dest++ = (char16_t) ')';
michael@0 91 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
michael@0 92 mState = mLastLegalState;
michael@0 93 }
michael@0 94 break;
michael@0 95
michael@0 96 case mState_KSX1001_1992:
michael@0 97 if (0x20 < (uint8_t) *src && (uint8_t) *src < 0x7f) {
michael@0 98 mData = (uint8_t) *src;
michael@0 99 mState = mState_KSX1001_1992_2ndbyte;
michael@0 100 }
michael@0 101 else if (0x0f == *src) { // Shift-In (SI)
michael@0 102 mState = mState_ASCII;
michael@0 103 if (mRunLength == 0) {
michael@0 104 if (CHECK_OVERRUN(dest, destEnd, 1))
michael@0 105 goto error1;
michael@0 106 *dest++ = 0xFFFD;
michael@0 107 }
michael@0 108 mRunLength = 0;
michael@0 109 }
michael@0 110 else if ((uint8_t) *src == 0x20 || (uint8_t) *src == 0x09) {
michael@0 111 // Allow space and tab between SO and SI (i.e. in Hangul segment)
michael@0 112 if (CHECK_OVERRUN(dest, destEnd, 1))
michael@0 113 goto error1;
michael@0 114 mState = mState_KSX1001_1992;
michael@0 115 *dest++ = (char16_t) *src;
michael@0 116 ++mRunLength;
michael@0 117 }
michael@0 118 else { // Everything else is invalid.
michael@0 119 if (CHECK_OVERRUN(dest, destEnd, 1))
michael@0 120 goto error1;
michael@0 121 *dest++ = 0xFFFD;
michael@0 122 }
michael@0 123 break;
michael@0 124
michael@0 125 case mState_KSX1001_1992_2ndbyte:
michael@0 126 if ( 0x20 < (uint8_t) *src && (uint8_t) *src < 0x7f ) {
michael@0 127 if (!mEUCKRDecoder) {
michael@0 128 // creating a delegate converter (EUC-KR)
michael@0 129 nsresult rv;
michael@0 130 nsCOMPtr<nsICharsetConverterManager> ccm =
michael@0 131 do_GetService(kCharsetConverterManagerCID, &rv);
michael@0 132 if (NS_SUCCEEDED(rv)) {
michael@0 133 rv = ccm->GetUnicodeDecoderRaw("EUC-KR", &mEUCKRDecoder);
michael@0 134 }
michael@0 135 }
michael@0 136
michael@0 137 if (!mEUCKRDecoder) {// failed creating a delegate converter
michael@0 138 *dest++ = 0xFFFD;
michael@0 139 }
michael@0 140 else {
michael@0 141 if (CHECK_OVERRUN(dest, destEnd, 1))
michael@0 142 goto error1;
michael@0 143 unsigned char ksx[2];
michael@0 144 char16_t uni;
michael@0 145 int32_t ksxLen = 2, uniLen = 1;
michael@0 146 // mData is the original 1st byte.
michael@0 147 // *src is the present 2nd byte.
michael@0 148 // Put 2 bytes (one character) to ksx[] with EUC-KR encoding.
michael@0 149 ksx[0] = mData | 0x80;
michael@0 150 ksx[1] = *src | 0x80;
michael@0 151 // Convert EUC-KR to unicode.
michael@0 152 mEUCKRDecoder->Convert((const char *)ksx, &ksxLen, &uni, &uniLen);
michael@0 153 *dest++ = uni;
michael@0 154 ++mRunLength;
michael@0 155 }
michael@0 156 mState = mState_KSX1001_1992;
michael@0 157 }
michael@0 158 else { // Invalid
michael@0 159 if ( 0x0f == *src ) { // Shift-In (SI)
michael@0 160 mState = mState_ASCII;
michael@0 161 }
michael@0 162 else {
michael@0 163 mState = mState_KSX1001_1992;
michael@0 164 }
michael@0 165 if (CHECK_OVERRUN(dest, destEnd, 1))
michael@0 166 goto error1;
michael@0 167 *dest++ = 0xFFFD;
michael@0 168 }
michael@0 169 break;
michael@0 170
michael@0 171 case mState_ERROR:
michael@0 172 mState = mLastLegalState;
michael@0 173 if (CHECK_OVERRUN(dest, destEnd, 1))
michael@0 174 goto error1;
michael@0 175 *dest++ = 0xFFFD;
michael@0 176 break;
michael@0 177
michael@0 178 } // switch
michael@0 179 src++;
michael@0 180 }
michael@0 181 *aDestLen = dest - aDest;
michael@0 182 return NS_OK;
michael@0 183
michael@0 184 error1:
michael@0 185 *aDestLen = dest-aDest;
michael@0 186 *aSrcLen = src-(unsigned char*)aSrc;
michael@0 187 return NS_OK_UDEC_MOREOUTPUT;
michael@0 188 }
michael@0 189

mercurial