|
1 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 /** |
|
7 Description: Currently only functions to enhance plain text with HTML tags. See mozITXTToHTMLConv. Stream conversion is defunct. |
|
8 */ |
|
9 |
|
10 #ifndef _mozTXTToHTMLConv_h__ |
|
11 #define _mozTXTToHTMLConv_h__ |
|
12 |
|
13 #include "mozITXTToHTMLConv.h" |
|
14 #include "nsString.h" |
|
15 #include "nsCOMPtr.h" |
|
16 |
|
17 class nsIIOService; |
|
18 |
|
19 class mozTXTToHTMLConv : public mozITXTToHTMLConv |
|
20 { |
|
21 |
|
22 |
|
23 ////////////////////////////////////////////////////////// |
|
24 public: |
|
25 ////////////////////////////////////////////////////////// |
|
26 |
|
27 mozTXTToHTMLConv(); |
|
28 virtual ~mozTXTToHTMLConv(); |
|
29 NS_DECL_ISUPPORTS |
|
30 |
|
31 NS_DECL_MOZITXTTOHTMLCONV |
|
32 NS_DECL_NSIREQUESTOBSERVER |
|
33 NS_DECL_NSISTREAMLISTENER |
|
34 NS_DECL_NSISTREAMCONVERTER |
|
35 |
|
36 /** |
|
37 see mozITXTToHTMLConv::ScanTXT |
|
38 */ |
|
39 void ScanTXT(const char16_t * aInString, int32_t aInStringLength, uint32_t whattodo, nsString& aOutString); |
|
40 |
|
41 /** |
|
42 see mozITXTToHTMLConv::ScanHTML. We will modify aInString potentially... |
|
43 */ |
|
44 void ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOutString); |
|
45 |
|
46 /** |
|
47 see mozITXTToHTMLConv::CiteLevelTXT |
|
48 */ |
|
49 int32_t CiteLevelTXT(const char16_t * line,uint32_t& logLineStart); |
|
50 |
|
51 |
|
52 ////////////////////////////////////////////////////////// |
|
53 protected: |
|
54 ////////////////////////////////////////////////////////// |
|
55 nsCOMPtr<nsIIOService> mIOService; // for performance reasons, cache the netwerk service... |
|
56 /** |
|
57 Completes<ul> |
|
58 <li>Case 1: mailto: "mozilla@bucksch.org" -> "mailto:mozilla@bucksch.org" |
|
59 <li>Case 2: http: "www.mozilla.org" -> "http://www.mozilla.org" |
|
60 <li>Case 3: ftp: "ftp.mozilla.org" -> "ftp://www.mozilla.org" |
|
61 </ul> |
|
62 It does no check, if the resulting URL is valid. |
|
63 @param text (in): abbreviated URL |
|
64 @param pos (in): position of "@" (case 1) or first "." (case 2 and 3) |
|
65 @return Completed URL at success and empty string at failure |
|
66 */ |
|
67 void CompleteAbbreviatedURL(const char16_t * aInString, int32_t aInLength, |
|
68 const uint32_t pos, nsString& aOutString); |
|
69 |
|
70 |
|
71 ////////////////////////////////////////////////////////// |
|
72 private: |
|
73 ////////////////////////////////////////////////////////// |
|
74 |
|
75 enum LIMTYPE |
|
76 { |
|
77 LT_IGNORE, // limitation not checked |
|
78 LT_DELIMITER, // not alphanumeric and not rep[0]. End of text is also ok. |
|
79 LT_ALPHA, // alpha char |
|
80 LT_DIGIT |
|
81 }; |
|
82 |
|
83 /** |
|
84 @param text (in): the string to search through.<p> |
|
85 If before = IGNORE,<br> |
|
86 rep is compared starting at 1. char of text (text[0]),<br> |
|
87 else starting at 2. char of text (text[1]). |
|
88 Chars after "after"-delimiter are ignored. |
|
89 @param rep (in): the string to look for |
|
90 @param aRepLen (in): the number of bytes in the string to look for |
|
91 @param before (in): limitation before rep |
|
92 @param after (in): limitation after rep |
|
93 @return true, if rep is found and limitation spec is met or rep is empty |
|
94 */ |
|
95 bool ItMatchesDelimited(const char16_t * aInString, int32_t aInLength, |
|
96 const char16_t * rep, int32_t aRepLen, LIMTYPE before, LIMTYPE after); |
|
97 |
|
98 /** |
|
99 @param see ItMatchesDelimited |
|
100 @return Number of ItMatchesDelimited in text |
|
101 */ |
|
102 uint32_t NumberOfMatches(const char16_t * aInString, int32_t aInStringLength, |
|
103 const char16_t* rep, int32_t aRepLen, LIMTYPE before, LIMTYPE after); |
|
104 |
|
105 /** |
|
106 Currently only changes "<", ">" and "&". All others stay as they are.<p> |
|
107 "Char" in function name to avoid side effects with nsString(ch) |
|
108 constructors. |
|
109 @param ch (in) |
|
110 @param aStringToAppendto (out) - the string to append the escaped |
|
111 string to. |
|
112 @param inAttribute (in) - will escape quotes, too (which is |
|
113 only needed for attribute values) |
|
114 */ |
|
115 void EscapeChar(const char16_t ch, nsString& aStringToAppendto, |
|
116 bool inAttribute); |
|
117 |
|
118 /** |
|
119 See EscapeChar. Escapes the string in place. |
|
120 */ |
|
121 void EscapeStr(nsString& aInString, bool inAttribute); |
|
122 |
|
123 /** |
|
124 Currently only reverts "<", ">" and "&". All others stay as they are.<p> |
|
125 @param aInString (in) HTML string |
|
126 @param aStartPos (in) start index into the buffer |
|
127 @param aLength (in) length of the buffer |
|
128 @param aOutString (out) unescaped buffer |
|
129 */ |
|
130 void UnescapeStr(const char16_t * aInString, int32_t aStartPos, |
|
131 int32_t aLength, nsString& aOutString); |
|
132 |
|
133 /** |
|
134 <em>Note</em>: I use different strategies to pass context between the |
|
135 functions (full text and pos vs. cutted text and col0, glphyTextLen vs. |
|
136 replaceBefore/-After). It makes some sense, but is hard to understand |
|
137 (maintain) :-(. |
|
138 */ |
|
139 |
|
140 /** |
|
141 <p><em>Note:</em> replaceBefore + replaceAfter + 1 (for char at pos) chars |
|
142 in text should be replaced by outputHTML.</p> |
|
143 <p><em>Note:</em> This function should be able to process a URL on multiple |
|
144 lines, but currently, ScanForURLs is called for every line, so it can't.</p> |
|
145 @param text (in): includes possibly a URL |
|
146 @param pos (in): position in text, where either ":", "." or "@" are found |
|
147 @param whathasbeendone (in): What the calling ScanTXT did/has to do with the |
|
148 (not-linkified) text, i.e. usually the "whattodo" parameter. |
|
149 (Needed to calculate replaceBefore.) NOT what will be done with |
|
150 the content of the link. |
|
151 @param outputHTML (out): URL with HTML-a tag |
|
152 @param replaceBefore (out): Number of chars of URL before pos |
|
153 @param replaceAfter (out): Number of chars of URL after pos |
|
154 @return URL found |
|
155 */ |
|
156 bool FindURL(const char16_t * aInString, int32_t aInLength, const uint32_t pos, |
|
157 const uint32_t whathasbeendone, |
|
158 nsString& outputHTML, int32_t& replaceBefore, int32_t& replaceAfter); |
|
159 |
|
160 enum modetype { |
|
161 unknown, |
|
162 RFC1738, /* Check, if RFC1738, APPENDIX compliant, |
|
163 like "<URL:http://www.mozilla.org>". */ |
|
164 RFC2396E, /* RFC2396, APPENDIX E allows anglebrackets (like |
|
165 "<http://www.mozilla.org>") (without "URL:") or |
|
166 quotation marks(like ""http://www.mozilla.org""). |
|
167 Also allow email addresses without scheme, |
|
168 e.g. "<mozilla@bucksch.org>" */ |
|
169 freetext, /* assume heading scheme |
|
170 with "[a-zA-Z][a-zA-Z0-9+\-\.]*:" like "news:" |
|
171 (see RFC2396, Section 3.1). |
|
172 Certain characters (see code) or any whitespace |
|
173 (including linebreaks) end the URL. |
|
174 Other certain (punctation) characters (see code) |
|
175 at the end are stripped off. */ |
|
176 abbreviated /* Similar to freetext, but without scheme, e.g. |
|
177 "www.mozilla.org", "ftp.mozilla.org" and |
|
178 "mozilla@bucksch.org". */ |
|
179 /* RFC1738 and RFC2396E type URLs may use multiple lines, |
|
180 whitespace is stripped. Special characters like ")" stay intact.*/ |
|
181 }; |
|
182 |
|
183 /** |
|
184 * @param text (in), pos (in): see FindURL |
|
185 * @param check (in): Start must be conform with this mode |
|
186 * @param start (out): Position in text, where URL (including brackets or |
|
187 * similar) starts |
|
188 * @return |check|-conform start has been found |
|
189 */ |
|
190 bool FindURLStart(const char16_t * aInString, int32_t aInLength, const uint32_t pos, |
|
191 const modetype check, uint32_t& start); |
|
192 |
|
193 /** |
|
194 * @param text (in), pos (in): see FindURL |
|
195 * @param check (in): End must be conform with this mode |
|
196 * @param start (in): see FindURLStart |
|
197 * @param end (out): Similar to |start| param of FindURLStart |
|
198 * @return |check|-conform end has been found |
|
199 */ |
|
200 bool FindURLEnd(const char16_t * aInString, int32_t aInStringLength, const uint32_t pos, |
|
201 const modetype check, const uint32_t start, uint32_t& end); |
|
202 |
|
203 /** |
|
204 * @param text (in), pos (in), whathasbeendone (in): see FindURL |
|
205 * @param check (in): Current mode |
|
206 * @param start (in), end (in): see FindURLEnd |
|
207 * @param txtURL (out): Guessed (raw) URL. |
|
208 * Without whitespace, but not completed. |
|
209 * @param desc (out): Link as shown to the user, but already escaped. |
|
210 * Should be placed between the <a> and </a> tags. |
|
211 * @param replaceBefore(out), replaceAfter (out): see FindURL |
|
212 */ |
|
213 void CalculateURLBoundaries(const char16_t * aInString, int32_t aInStringLength, |
|
214 const uint32_t pos, const uint32_t whathasbeendone, |
|
215 const modetype check, const uint32_t start, const uint32_t end, |
|
216 nsString& txtURL, nsString& desc, |
|
217 int32_t& replaceBefore, int32_t& replaceAfter); |
|
218 |
|
219 /** |
|
220 * @param txtURL (in), desc (in): see CalculateURLBoundaries |
|
221 * @param outputHTML (out): see FindURL |
|
222 * @return A valid URL could be found (and creation of HTML successful) |
|
223 */ |
|
224 bool CheckURLAndCreateHTML( |
|
225 const nsString& txtURL, const nsString& desc, const modetype mode, |
|
226 nsString& outputHTML); |
|
227 |
|
228 /** |
|
229 @param text (in): line of text possibly with tagTXT.<p> |
|
230 if col0 is true, |
|
231 starting with tagTXT<br> |
|
232 else |
|
233 starting one char before tagTXT |
|
234 @param col0 (in): tagTXT is on the beginning of the line (or paragraph). |
|
235 open must be 0 then. |
|
236 @param tagTXT (in): Tag in plaintext to search for, e.g. "*" |
|
237 @param aTagTxtLen (in): length of tagTXT. |
|
238 @param tagHTML (in): HTML-Tag to replace tagTXT with, |
|
239 without "<" and ">", e.g. "strong" |
|
240 @param attributeHTML (in): HTML-attribute to add to opening tagHTML, |
|
241 e.g. "class=txt_star" |
|
242 @param aOutString: string to APPEND the converted html into |
|
243 @param open (in/out): Number of currently open tags of type tagHTML |
|
244 @return Conversion succeeded |
|
245 */ |
|
246 bool StructPhraseHit(const char16_t * aInString, int32_t aInStringLength, bool col0, |
|
247 const char16_t* tagTXT, |
|
248 int32_t aTagTxtLen, |
|
249 const char* tagHTML, const char* attributeHTML, |
|
250 nsString& aOutputString, uint32_t& openTags); |
|
251 |
|
252 /** |
|
253 @param text (in), col0 (in): see GlyphHit |
|
254 @param tagTXT (in): Smily, see also StructPhraseHit |
|
255 @param imageName (in): the basename of the file that contains the image for this smilie |
|
256 @param outputHTML (out): new string containing the html for the smily |
|
257 @param glyphTextLen (out): see GlyphHit |
|
258 */ |
|
259 bool |
|
260 SmilyHit(const char16_t * aInString, int32_t aLength, bool col0, |
|
261 const char* tagTXT, const char* imageName, |
|
262 nsString& outputHTML, int32_t& glyphTextLen); |
|
263 |
|
264 /** |
|
265 Checks, if we can replace some chars at the start of line with prettier HTML |
|
266 code.<p> |
|
267 If success is reported, replace the first glyphTextLen chars with outputHTML |
|
268 |
|
269 @param text (in): line of text possibly with Glyph.<p> |
|
270 If col0 is true, |
|
271 starting with Glyph <br><!-- (br not part of text) --> |
|
272 else |
|
273 starting one char before Glyph |
|
274 @param col0 (in): text starts at the beginning of the line (or paragraph) |
|
275 @param aOutString (out): APPENDS html for the glyph to this string |
|
276 @param glyphTextLen (out): Length of original text to replace |
|
277 @return see StructPhraseHit |
|
278 */ |
|
279 bool GlyphHit(const char16_t * aInString, int32_t aInLength, bool col0, |
|
280 nsString& aOutString, int32_t& glyphTextLen); |
|
281 |
|
282 /** |
|
283 Check if a given url should be linkified. |
|
284 @param aURL (in): url to be checked on. |
|
285 */ |
|
286 bool ShouldLinkify(const nsCString& aURL); |
|
287 }; |
|
288 |
|
289 // It's said, that Win32 and Mac don't like static const members |
|
290 const int32_t mozTXTToHTMLConv_lastMode = 4; |
|
291 // Needed (only) by mozTXTToHTMLConv::FindURL |
|
292 const int32_t mozTXTToHTMLConv_numberOfModes = 4; // dito; unknown not counted |
|
293 |
|
294 #endif |