intl/uconv/ucvko/nsISO2022KRToUnicode.cpp

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rw-r--r--

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

     1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     5 #include "nsISO2022KRToUnicode.h"
     6 #include "nsUCSupport.h"
     7 #include "nsICharsetConverterManager.h"
     8 #include "nsServiceManagerUtils.h"
    10 NS_IMETHODIMP nsISO2022KRToUnicode::Convert(const char * aSrc, int32_t * aSrcLen, char16_t * aDest, int32_t * aDestLen)
    11 {
    12   static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
    14   const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen;
    15   const unsigned char* src =(unsigned char*) aSrc;
    16   char16_t* destEnd = aDest + *aDestLen;
    17   char16_t* dest = aDest;
    18   while((src < srcEnd))
    19   {
    20     // if LF/CR, return to US-ASCII unconditionally.
    21     if ( *src == 0x0a || *src == 0x0d )
    22       mState = mState_Init;
    24     switch(mState)
    25     {
    26       case mState_Init:
    27         if(0x1b == *src) {
    28           mLastLegalState = mState_ASCII;
    29           mState = mState_ESC;
    30           break;
    31         }
    32         mState = mState_ASCII;
    33         // fall through
    35       case mState_ASCII:
    36         if(0x0e == *src) { // Shift-Out 
    37           mState = mState_KSX1001_1992;
    38           mRunLength = 0;
    39         } 
    40         else if(*src & 0x80) {
    41           if (CHECK_OVERRUN(dest, destEnd, 1))
    42             goto error1;
    43           *dest++ = 0xFFFD;
    44         } 
    45         else {
    46           if (CHECK_OVERRUN(dest, destEnd, 1))
    47             goto error1;
    48           *dest++ = (char16_t) *src;
    49         }
    50         break;
    52       case mState_ESC:
    53         if('$' == *src) {
    54           mState = mState_ESC_24;
    55         } 
    56         else  {
    57           if (CHECK_OVERRUN(dest, destEnd, 2))
    58             goto error1;
    59           *dest++ = (char16_t) 0x1b;
    60           *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
    61           mState =  mLastLegalState;
    62         }
    63         break;
    65       case mState_ESC_24: // ESC $
    66         if(')' == *src) {
    67           mState = mState_ESC_24_29;
    68         } 
    69         else  {
    70           if (CHECK_OVERRUN(dest, destEnd, 3))
    71             goto error1;
    72           *dest++ = (char16_t) 0x1b;
    73           *dest++ = (char16_t) '$';
    74           *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
    75           mState = mLastLegalState;
    76         }
    77         break;
    79       case mState_ESC_24_29: // ESC $ )
    80         mState = mLastLegalState;
    81         if('C' == *src) {
    82           mState = mState_ASCII;
    83           mRunLength = 0;
    84         } 
    85         else  {
    86           if (CHECK_OVERRUN(dest, destEnd, 4))
    87             goto error1;
    88           *dest++ = (char16_t) 0x1b;
    89           *dest++ = (char16_t) '$';
    90           *dest++ = (char16_t) ')';
    91           *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
    92           mState = mLastLegalState;
    93         }
    94         break;
    96       case mState_KSX1001_1992:
    97         if (0x20 < (uint8_t) *src  && (uint8_t) *src < 0x7f) {
    98           mData = (uint8_t) *src;
    99           mState = mState_KSX1001_1992_2ndbyte;
   100         } 
   101         else if (0x0f == *src) { // Shift-In (SI)
   102           mState = mState_ASCII;
   103           if (mRunLength == 0) {
   104             if (CHECK_OVERRUN(dest, destEnd, 1))
   105               goto error1;
   106             *dest++ = 0xFFFD;
   107           }
   108           mRunLength = 0;
   109         } 
   110         else if ((uint8_t) *src == 0x20 || (uint8_t) *src == 0x09) {
   111           // Allow space and tab between SO and SI (i.e. in Hangul segment)
   112           if (CHECK_OVERRUN(dest, destEnd, 1))
   113             goto error1;
   114           mState = mState_KSX1001_1992;
   115           *dest++ = (char16_t) *src;
   116           ++mRunLength;
   117         } 
   118         else {         // Everything else is invalid.
   119           if (CHECK_OVERRUN(dest, destEnd, 1))
   120             goto error1;
   121           *dest++ = 0xFFFD;
   122         }
   123         break;
   125       case mState_KSX1001_1992_2ndbyte:
   126         if ( 0x20 < (uint8_t) *src && (uint8_t) *src < 0x7f  ) {
   127           if (!mEUCKRDecoder) {
   128             // creating a delegate converter (EUC-KR)
   129             nsresult rv;
   130             nsCOMPtr<nsICharsetConverterManager> ccm = 
   131                   do_GetService(kCharsetConverterManagerCID, &rv);
   132             if (NS_SUCCEEDED(rv)) {
   133               rv = ccm->GetUnicodeDecoderRaw("EUC-KR", &mEUCKRDecoder);
   134             }
   135           }
   137           if (!mEUCKRDecoder) {// failed creating a delegate converter
   138            *dest++ = 0xFFFD;
   139           } 
   140           else {              
   141             if (CHECK_OVERRUN(dest, destEnd, 1))
   142               goto error1;
   143             unsigned char ksx[2];
   144             char16_t uni;
   145             int32_t ksxLen = 2, uniLen = 1;
   146             // mData is the original 1st byte.
   147             // *src is the present 2nd byte.
   148             // Put 2 bytes (one character) to ksx[] with EUC-KR encoding.
   149             ksx[0] = mData | 0x80;
   150             ksx[1] = *src | 0x80;
   151             // Convert EUC-KR to unicode.
   152             mEUCKRDecoder->Convert((const char *)ksx, &ksxLen, &uni, &uniLen);
   153             *dest++ = uni;
   154             ++mRunLength;
   155           }
   156           mState = mState_KSX1001_1992;
   157         } 
   158         else {        // Invalid 
   159           if ( 0x0f == *src ) {   // Shift-In (SI)
   160             mState = mState_ASCII;
   161           } 
   162           else {
   163             mState = mState_KSX1001_1992;
   164           }
   165           if (CHECK_OVERRUN(dest, destEnd, 1))
   166             goto error1;
   167           *dest++ = 0xFFFD;
   168         }
   169         break;
   171       case mState_ERROR:
   172         mState = mLastLegalState;
   173         if (CHECK_OVERRUN(dest, destEnd, 1))
   174           goto error1;
   175         *dest++ = 0xFFFD;
   176         break;
   178     } // switch
   179     src++;
   180   }
   181   *aDestLen = dest - aDest;
   182   return NS_OK;
   184 error1:
   185   *aDestLen = dest-aDest;
   186   *aSrcLen = src-(unsigned char*)aSrc;
   187   return NS_OK_UDEC_MOREOUTPUT;
   188 }

mercurial