Tue, 06 Jan 2015 21:39:09 +0100
Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.
1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 #include "nsISO2022KRToUnicode.h"
6 #include "nsUCSupport.h"
7 #include "nsICharsetConverterManager.h"
8 #include "nsServiceManagerUtils.h"
10 NS_IMETHODIMP nsISO2022KRToUnicode::Convert(const char * aSrc, int32_t * aSrcLen, char16_t * aDest, int32_t * aDestLen)
11 {
12 static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
14 const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen;
15 const unsigned char* src =(unsigned char*) aSrc;
16 char16_t* destEnd = aDest + *aDestLen;
17 char16_t* dest = aDest;
18 while((src < srcEnd))
19 {
20 // if LF/CR, return to US-ASCII unconditionally.
21 if ( *src == 0x0a || *src == 0x0d )
22 mState = mState_Init;
24 switch(mState)
25 {
26 case mState_Init:
27 if(0x1b == *src) {
28 mLastLegalState = mState_ASCII;
29 mState = mState_ESC;
30 break;
31 }
32 mState = mState_ASCII;
33 // fall through
35 case mState_ASCII:
36 if(0x0e == *src) { // Shift-Out
37 mState = mState_KSX1001_1992;
38 mRunLength = 0;
39 }
40 else if(*src & 0x80) {
41 if (CHECK_OVERRUN(dest, destEnd, 1))
42 goto error1;
43 *dest++ = 0xFFFD;
44 }
45 else {
46 if (CHECK_OVERRUN(dest, destEnd, 1))
47 goto error1;
48 *dest++ = (char16_t) *src;
49 }
50 break;
52 case mState_ESC:
53 if('$' == *src) {
54 mState = mState_ESC_24;
55 }
56 else {
57 if (CHECK_OVERRUN(dest, destEnd, 2))
58 goto error1;
59 *dest++ = (char16_t) 0x1b;
60 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
61 mState = mLastLegalState;
62 }
63 break;
65 case mState_ESC_24: // ESC $
66 if(')' == *src) {
67 mState = mState_ESC_24_29;
68 }
69 else {
70 if (CHECK_OVERRUN(dest, destEnd, 3))
71 goto error1;
72 *dest++ = (char16_t) 0x1b;
73 *dest++ = (char16_t) '$';
74 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
75 mState = mLastLegalState;
76 }
77 break;
79 case mState_ESC_24_29: // ESC $ )
80 mState = mLastLegalState;
81 if('C' == *src) {
82 mState = mState_ASCII;
83 mRunLength = 0;
84 }
85 else {
86 if (CHECK_OVERRUN(dest, destEnd, 4))
87 goto error1;
88 *dest++ = (char16_t) 0x1b;
89 *dest++ = (char16_t) '$';
90 *dest++ = (char16_t) ')';
91 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
92 mState = mLastLegalState;
93 }
94 break;
96 case mState_KSX1001_1992:
97 if (0x20 < (uint8_t) *src && (uint8_t) *src < 0x7f) {
98 mData = (uint8_t) *src;
99 mState = mState_KSX1001_1992_2ndbyte;
100 }
101 else if (0x0f == *src) { // Shift-In (SI)
102 mState = mState_ASCII;
103 if (mRunLength == 0) {
104 if (CHECK_OVERRUN(dest, destEnd, 1))
105 goto error1;
106 *dest++ = 0xFFFD;
107 }
108 mRunLength = 0;
109 }
110 else if ((uint8_t) *src == 0x20 || (uint8_t) *src == 0x09) {
111 // Allow space and tab between SO and SI (i.e. in Hangul segment)
112 if (CHECK_OVERRUN(dest, destEnd, 1))
113 goto error1;
114 mState = mState_KSX1001_1992;
115 *dest++ = (char16_t) *src;
116 ++mRunLength;
117 }
118 else { // Everything else is invalid.
119 if (CHECK_OVERRUN(dest, destEnd, 1))
120 goto error1;
121 *dest++ = 0xFFFD;
122 }
123 break;
125 case mState_KSX1001_1992_2ndbyte:
126 if ( 0x20 < (uint8_t) *src && (uint8_t) *src < 0x7f ) {
127 if (!mEUCKRDecoder) {
128 // creating a delegate converter (EUC-KR)
129 nsresult rv;
130 nsCOMPtr<nsICharsetConverterManager> ccm =
131 do_GetService(kCharsetConverterManagerCID, &rv);
132 if (NS_SUCCEEDED(rv)) {
133 rv = ccm->GetUnicodeDecoderRaw("EUC-KR", &mEUCKRDecoder);
134 }
135 }
137 if (!mEUCKRDecoder) {// failed creating a delegate converter
138 *dest++ = 0xFFFD;
139 }
140 else {
141 if (CHECK_OVERRUN(dest, destEnd, 1))
142 goto error1;
143 unsigned char ksx[2];
144 char16_t uni;
145 int32_t ksxLen = 2, uniLen = 1;
146 // mData is the original 1st byte.
147 // *src is the present 2nd byte.
148 // Put 2 bytes (one character) to ksx[] with EUC-KR encoding.
149 ksx[0] = mData | 0x80;
150 ksx[1] = *src | 0x80;
151 // Convert EUC-KR to unicode.
152 mEUCKRDecoder->Convert((const char *)ksx, &ksxLen, &uni, &uniLen);
153 *dest++ = uni;
154 ++mRunLength;
155 }
156 mState = mState_KSX1001_1992;
157 }
158 else { // Invalid
159 if ( 0x0f == *src ) { // Shift-In (SI)
160 mState = mState_ASCII;
161 }
162 else {
163 mState = mState_KSX1001_1992;
164 }
165 if (CHECK_OVERRUN(dest, destEnd, 1))
166 goto error1;
167 *dest++ = 0xFFFD;
168 }
169 break;
171 case mState_ERROR:
172 mState = mLastLegalState;
173 if (CHECK_OVERRUN(dest, destEnd, 1))
174 goto error1;
175 *dest++ = 0xFFFD;
176 break;
178 } // switch
179 src++;
180 }
181 *aDestLen = dest - aDest;
182 return NS_OK;
184 error1:
185 *aDestLen = dest-aDest;
186 *aSrcLen = src-(unsigned char*)aSrc;
187 return NS_OK_UDEC_MOREOUTPUT;
188 }