|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- |
|
2 * This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 #include "nsTextFrameUtils.h" |
|
7 |
|
8 #include "nsUnicharUtils.h" |
|
9 #include "nsBidiUtils.h" |
|
10 #include "nsIContent.h" |
|
11 #include "nsStyleStruct.h" |
|
12 #include "nsTextFragment.h" |
|
13 #include <algorithm> |
|
14 |
|
15 static bool IsDiscardable(char16_t ch, uint32_t* aFlags) |
|
16 { |
|
17 // Unlike IS_DISCARDABLE, we don't discard \r. \r will be ignored by gfxTextRun |
|
18 // and discarding it would force us to copy text in many cases of preformatted |
|
19 // text containing \r\n. |
|
20 if (ch == CH_SHY) { |
|
21 *aFlags |= nsTextFrameUtils::TEXT_HAS_SHY; |
|
22 return true; |
|
23 } |
|
24 return IsBidiControl(ch); |
|
25 } |
|
26 |
|
27 static bool IsDiscardable(uint8_t ch, uint32_t* aFlags) |
|
28 { |
|
29 if (ch == CH_SHY) { |
|
30 *aFlags |= nsTextFrameUtils::TEXT_HAS_SHY; |
|
31 return true; |
|
32 } |
|
33 return false; |
|
34 } |
|
35 |
|
36 char16_t* |
|
37 nsTextFrameUtils::TransformText(const char16_t* aText, uint32_t aLength, |
|
38 char16_t* aOutput, |
|
39 CompressionMode aCompression, |
|
40 uint8_t* aIncomingFlags, |
|
41 gfxSkipChars* aSkipChars, |
|
42 uint32_t* aAnalysisFlags) |
|
43 { |
|
44 uint32_t flags = 0; |
|
45 char16_t* outputStart = aOutput; |
|
46 |
|
47 bool lastCharArabic = false; |
|
48 |
|
49 if (aCompression == COMPRESS_NONE || |
|
50 aCompression == DISCARD_NEWLINE) { |
|
51 // Skip discardables. |
|
52 uint32_t i; |
|
53 for (i = 0; i < aLength; ++i) { |
|
54 char16_t ch = *aText++; |
|
55 if (IsDiscardable(ch, &flags) || |
|
56 (ch == '\n' && aCompression == DISCARD_NEWLINE)) { |
|
57 aSkipChars->SkipChar(); |
|
58 } else { |
|
59 aSkipChars->KeepChar(); |
|
60 if (ch > ' ') { |
|
61 lastCharArabic = IS_ARABIC_CHAR(ch); |
|
62 } else if (ch == '\t') { |
|
63 flags |= TEXT_HAS_TAB; |
|
64 } |
|
65 *aOutput++ = ch; |
|
66 } |
|
67 } |
|
68 if (lastCharArabic) { |
|
69 *aIncomingFlags |= INCOMING_ARABICCHAR; |
|
70 } else { |
|
71 *aIncomingFlags &= ~INCOMING_ARABICCHAR; |
|
72 } |
|
73 *aIncomingFlags &= ~INCOMING_WHITESPACE; |
|
74 } else { |
|
75 bool inWhitespace = (*aIncomingFlags & INCOMING_WHITESPACE) != 0; |
|
76 uint32_t i; |
|
77 for (i = 0; i < aLength; ++i) { |
|
78 char16_t ch = *aText++; |
|
79 bool nowInWhitespace; |
|
80 if (ch == ' ' && |
|
81 (i + 1 >= aLength || |
|
82 !IsSpaceCombiningSequenceTail(aText, aLength - (i + 1)))) { |
|
83 nowInWhitespace = true; |
|
84 } else if (ch == '\n' && aCompression == COMPRESS_WHITESPACE_NEWLINE) { |
|
85 if (i > 0 && IS_CJ_CHAR(aText[-1]) && |
|
86 i + 1 < aLength && IS_CJ_CHAR(aText[1])) { |
|
87 // Discard newlines between CJK chars. |
|
88 // XXX this really requires more context to get right! |
|
89 aSkipChars->SkipChar(); |
|
90 continue; |
|
91 } |
|
92 nowInWhitespace = true; |
|
93 } else { |
|
94 nowInWhitespace = ch == '\t'; |
|
95 } |
|
96 |
|
97 if (!nowInWhitespace) { |
|
98 if (IsDiscardable(ch, &flags)) { |
|
99 aSkipChars->SkipChar(); |
|
100 nowInWhitespace = inWhitespace; |
|
101 } else { |
|
102 *aOutput++ = ch; |
|
103 aSkipChars->KeepChar(); |
|
104 lastCharArabic = IS_ARABIC_CHAR(ch); |
|
105 } |
|
106 } else { |
|
107 if (inWhitespace) { |
|
108 aSkipChars->SkipChar(); |
|
109 } else { |
|
110 if (ch != ' ') { |
|
111 flags |= TEXT_WAS_TRANSFORMED; |
|
112 } |
|
113 *aOutput++ = ' '; |
|
114 aSkipChars->KeepChar(); |
|
115 } |
|
116 } |
|
117 inWhitespace = nowInWhitespace; |
|
118 } |
|
119 if (lastCharArabic) { |
|
120 *aIncomingFlags |= INCOMING_ARABICCHAR; |
|
121 } else { |
|
122 *aIncomingFlags &= ~INCOMING_ARABICCHAR; |
|
123 } |
|
124 if (inWhitespace) { |
|
125 *aIncomingFlags |= INCOMING_WHITESPACE; |
|
126 } else { |
|
127 *aIncomingFlags &= ~INCOMING_WHITESPACE; |
|
128 } |
|
129 } |
|
130 |
|
131 if (outputStart + aLength != aOutput) { |
|
132 flags |= TEXT_WAS_TRANSFORMED; |
|
133 } |
|
134 *aAnalysisFlags = flags; |
|
135 return aOutput; |
|
136 } |
|
137 |
|
138 uint8_t* |
|
139 nsTextFrameUtils::TransformText(const uint8_t* aText, uint32_t aLength, |
|
140 uint8_t* aOutput, |
|
141 CompressionMode aCompression, |
|
142 uint8_t* aIncomingFlags, |
|
143 gfxSkipChars* aSkipChars, |
|
144 uint32_t* aAnalysisFlags) |
|
145 { |
|
146 uint32_t flags = 0; |
|
147 uint8_t* outputStart = aOutput; |
|
148 |
|
149 if (aCompression == COMPRESS_NONE || |
|
150 aCompression == DISCARD_NEWLINE) { |
|
151 // Skip discardables. |
|
152 uint32_t i; |
|
153 for (i = 0; i < aLength; ++i) { |
|
154 uint8_t ch = *aText++; |
|
155 if (IsDiscardable(ch, &flags) || |
|
156 (ch == '\n' && aCompression == DISCARD_NEWLINE)) { |
|
157 aSkipChars->SkipChar(); |
|
158 } else { |
|
159 aSkipChars->KeepChar(); |
|
160 if (ch == '\t') { |
|
161 flags |= TEXT_HAS_TAB; |
|
162 } |
|
163 *aOutput++ = ch; |
|
164 } |
|
165 } |
|
166 *aIncomingFlags &= ~(INCOMING_ARABICCHAR | INCOMING_WHITESPACE); |
|
167 } else { |
|
168 bool inWhitespace = (*aIncomingFlags & INCOMING_WHITESPACE) != 0; |
|
169 uint32_t i; |
|
170 for (i = 0; i < aLength; ++i) { |
|
171 uint8_t ch = *aText++; |
|
172 bool nowInWhitespace = ch == ' ' || ch == '\t' || |
|
173 (ch == '\n' && aCompression == COMPRESS_WHITESPACE_NEWLINE); |
|
174 if (!nowInWhitespace) { |
|
175 if (IsDiscardable(ch, &flags)) { |
|
176 aSkipChars->SkipChar(); |
|
177 nowInWhitespace = inWhitespace; |
|
178 } else { |
|
179 *aOutput++ = ch; |
|
180 aSkipChars->KeepChar(); |
|
181 } |
|
182 } else { |
|
183 if (inWhitespace) { |
|
184 aSkipChars->SkipChar(); |
|
185 } else { |
|
186 if (ch != ' ') { |
|
187 flags |= TEXT_WAS_TRANSFORMED; |
|
188 } |
|
189 *aOutput++ = ' '; |
|
190 aSkipChars->KeepChar(); |
|
191 } |
|
192 } |
|
193 inWhitespace = nowInWhitespace; |
|
194 } |
|
195 *aIncomingFlags &= ~INCOMING_ARABICCHAR; |
|
196 if (inWhitespace) { |
|
197 *aIncomingFlags |= INCOMING_WHITESPACE; |
|
198 } else { |
|
199 *aIncomingFlags &= ~INCOMING_WHITESPACE; |
|
200 } |
|
201 } |
|
202 |
|
203 if (outputStart + aLength != aOutput) { |
|
204 flags |= TEXT_WAS_TRANSFORMED; |
|
205 } |
|
206 *aAnalysisFlags = flags; |
|
207 return aOutput; |
|
208 } |
|
209 |
|
210 uint32_t |
|
211 nsTextFrameUtils::ComputeApproximateLengthWithWhitespaceCompression( |
|
212 nsIContent *aContent, const nsStyleText *aStyleText) |
|
213 { |
|
214 const nsTextFragment *frag = aContent->GetText(); |
|
215 // This is an approximation so we don't really need anything |
|
216 // too fancy here. |
|
217 uint32_t len; |
|
218 if (aStyleText->WhiteSpaceIsSignificant()) { |
|
219 len = frag->GetLength(); |
|
220 } else { |
|
221 bool is2b = frag->Is2b(); |
|
222 union { |
|
223 const char *s1b; |
|
224 const char16_t *s2b; |
|
225 } u; |
|
226 if (is2b) { |
|
227 u.s2b = frag->Get2b(); |
|
228 } else { |
|
229 u.s1b = frag->Get1b(); |
|
230 } |
|
231 bool prevWS = true; // more important to ignore blocks with |
|
232 // only whitespace than get inline boundaries |
|
233 // exactly right |
|
234 len = 0; |
|
235 for (uint32_t i = 0, i_end = frag->GetLength(); i < i_end; ++i) { |
|
236 char16_t c = is2b ? u.s2b[i] : u.s1b[i]; |
|
237 if (c == ' ' || c == '\n' || c == '\t' || c == '\r') { |
|
238 if (!prevWS) { |
|
239 ++len; |
|
240 } |
|
241 prevWS = true; |
|
242 } else { |
|
243 ++len; |
|
244 prevWS = false; |
|
245 } |
|
246 } |
|
247 } |
|
248 return len; |
|
249 } |
|
250 |
|
251 bool nsSkipCharsRunIterator::NextRun() { |
|
252 do { |
|
253 if (mRunLength) { |
|
254 mIterator.AdvanceOriginal(mRunLength); |
|
255 NS_ASSERTION(mRunLength > 0, "No characters in run (initial length too large?)"); |
|
256 if (!mSkipped || mLengthIncludesSkipped) { |
|
257 mRemainingLength -= mRunLength; |
|
258 } |
|
259 } |
|
260 if (!mRemainingLength) |
|
261 return false; |
|
262 int32_t length; |
|
263 mSkipped = mIterator.IsOriginalCharSkipped(&length); |
|
264 mRunLength = std::min(length, mRemainingLength); |
|
265 } while (!mVisitSkipped && mSkipped); |
|
266 |
|
267 return true; |
|
268 } |