netwerk/streamconv/converters/mozTXTToHTMLConv.h

branch
TOR_BUG_9701
changeset 9
a63d609f5ebe
equal deleted inserted replaced
-1:000000000000 0:16abb6e6d28f
1 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5
6 /**
7 Description: Currently only functions to enhance plain text with HTML tags. See mozITXTToHTMLConv. Stream conversion is defunct.
8 */
9
10 #ifndef _mozTXTToHTMLConv_h__
11 #define _mozTXTToHTMLConv_h__
12
13 #include "mozITXTToHTMLConv.h"
14 #include "nsString.h"
15 #include "nsCOMPtr.h"
16
17 class nsIIOService;
18
19 class mozTXTToHTMLConv : public mozITXTToHTMLConv
20 {
21
22
23 //////////////////////////////////////////////////////////
24 public:
25 //////////////////////////////////////////////////////////
26
27 mozTXTToHTMLConv();
28 virtual ~mozTXTToHTMLConv();
29 NS_DECL_ISUPPORTS
30
31 NS_DECL_MOZITXTTOHTMLCONV
32 NS_DECL_NSIREQUESTOBSERVER
33 NS_DECL_NSISTREAMLISTENER
34 NS_DECL_NSISTREAMCONVERTER
35
36 /**
37 see mozITXTToHTMLConv::ScanTXT
38 */
39 void ScanTXT(const char16_t * aInString, int32_t aInStringLength, uint32_t whattodo, nsString& aOutString);
40
41 /**
42 see mozITXTToHTMLConv::ScanHTML. We will modify aInString potentially...
43 */
44 void ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOutString);
45
46 /**
47 see mozITXTToHTMLConv::CiteLevelTXT
48 */
49 int32_t CiteLevelTXT(const char16_t * line,uint32_t& logLineStart);
50
51
52 //////////////////////////////////////////////////////////
53 protected:
54 //////////////////////////////////////////////////////////
55 nsCOMPtr<nsIIOService> mIOService; // for performance reasons, cache the netwerk service...
56 /**
57 Completes<ul>
58 <li>Case 1: mailto: "mozilla@bucksch.org" -> "mailto:mozilla@bucksch.org"
59 <li>Case 2: http: "www.mozilla.org" -> "http://www.mozilla.org"
60 <li>Case 3: ftp: "ftp.mozilla.org" -> "ftp://www.mozilla.org"
61 </ul>
62 It does no check, if the resulting URL is valid.
63 @param text (in): abbreviated URL
64 @param pos (in): position of "@" (case 1) or first "." (case 2 and 3)
65 @return Completed URL at success and empty string at failure
66 */
67 void CompleteAbbreviatedURL(const char16_t * aInString, int32_t aInLength,
68 const uint32_t pos, nsString& aOutString);
69
70
71 //////////////////////////////////////////////////////////
72 private:
73 //////////////////////////////////////////////////////////
74
75 enum LIMTYPE
76 {
77 LT_IGNORE, // limitation not checked
78 LT_DELIMITER, // not alphanumeric and not rep[0]. End of text is also ok.
79 LT_ALPHA, // alpha char
80 LT_DIGIT
81 };
82
83 /**
84 @param text (in): the string to search through.<p>
85 If before = IGNORE,<br>
86 rep is compared starting at 1. char of text (text[0]),<br>
87 else starting at 2. char of text (text[1]).
88 Chars after "after"-delimiter are ignored.
89 @param rep (in): the string to look for
90 @param aRepLen (in): the number of bytes in the string to look for
91 @param before (in): limitation before rep
92 @param after (in): limitation after rep
93 @return true, if rep is found and limitation spec is met or rep is empty
94 */
95 bool ItMatchesDelimited(const char16_t * aInString, int32_t aInLength,
96 const char16_t * rep, int32_t aRepLen, LIMTYPE before, LIMTYPE after);
97
98 /**
99 @param see ItMatchesDelimited
100 @return Number of ItMatchesDelimited in text
101 */
102 uint32_t NumberOfMatches(const char16_t * aInString, int32_t aInStringLength,
103 const char16_t* rep, int32_t aRepLen, LIMTYPE before, LIMTYPE after);
104
105 /**
106 Currently only changes "<", ">" and "&". All others stay as they are.<p>
107 "Char" in function name to avoid side effects with nsString(ch)
108 constructors.
109 @param ch (in)
110 @param aStringToAppendto (out) - the string to append the escaped
111 string to.
112 @param inAttribute (in) - will escape quotes, too (which is
113 only needed for attribute values)
114 */
115 void EscapeChar(const char16_t ch, nsString& aStringToAppendto,
116 bool inAttribute);
117
118 /**
119 See EscapeChar. Escapes the string in place.
120 */
121 void EscapeStr(nsString& aInString, bool inAttribute);
122
123 /**
124 Currently only reverts "<", ">" and "&". All others stay as they are.<p>
125 @param aInString (in) HTML string
126 @param aStartPos (in) start index into the buffer
127 @param aLength (in) length of the buffer
128 @param aOutString (out) unescaped buffer
129 */
130 void UnescapeStr(const char16_t * aInString, int32_t aStartPos,
131 int32_t aLength, nsString& aOutString);
132
133 /**
134 <em>Note</em>: I use different strategies to pass context between the
135 functions (full text and pos vs. cutted text and col0, glphyTextLen vs.
136 replaceBefore/-After). It makes some sense, but is hard to understand
137 (maintain) :-(.
138 */
139
140 /**
141 <p><em>Note:</em> replaceBefore + replaceAfter + 1 (for char at pos) chars
142 in text should be replaced by outputHTML.</p>
143 <p><em>Note:</em> This function should be able to process a URL on multiple
144 lines, but currently, ScanForURLs is called for every line, so it can't.</p>
145 @param text (in): includes possibly a URL
146 @param pos (in): position in text, where either ":", "." or "@" are found
147 @param whathasbeendone (in): What the calling ScanTXT did/has to do with the
148 (not-linkified) text, i.e. usually the "whattodo" parameter.
149 (Needed to calculate replaceBefore.) NOT what will be done with
150 the content of the link.
151 @param outputHTML (out): URL with HTML-a tag
152 @param replaceBefore (out): Number of chars of URL before pos
153 @param replaceAfter (out): Number of chars of URL after pos
154 @return URL found
155 */
156 bool FindURL(const char16_t * aInString, int32_t aInLength, const uint32_t pos,
157 const uint32_t whathasbeendone,
158 nsString& outputHTML, int32_t& replaceBefore, int32_t& replaceAfter);
159
160 enum modetype {
161 unknown,
162 RFC1738, /* Check, if RFC1738, APPENDIX compliant,
163 like "<URL:http://www.mozilla.org>". */
164 RFC2396E, /* RFC2396, APPENDIX E allows anglebrackets (like
165 "<http://www.mozilla.org>") (without "URL:") or
166 quotation marks(like ""http://www.mozilla.org"").
167 Also allow email addresses without scheme,
168 e.g. "<mozilla@bucksch.org>" */
169 freetext, /* assume heading scheme
170 with "[a-zA-Z][a-zA-Z0-9+\-\.]*:" like "news:"
171 (see RFC2396, Section 3.1).
172 Certain characters (see code) or any whitespace
173 (including linebreaks) end the URL.
174 Other certain (punctation) characters (see code)
175 at the end are stripped off. */
176 abbreviated /* Similar to freetext, but without scheme, e.g.
177 "www.mozilla.org", "ftp.mozilla.org" and
178 "mozilla@bucksch.org". */
179 /* RFC1738 and RFC2396E type URLs may use multiple lines,
180 whitespace is stripped. Special characters like ")" stay intact.*/
181 };
182
183 /**
184 * @param text (in), pos (in): see FindURL
185 * @param check (in): Start must be conform with this mode
186 * @param start (out): Position in text, where URL (including brackets or
187 * similar) starts
188 * @return |check|-conform start has been found
189 */
190 bool FindURLStart(const char16_t * aInString, int32_t aInLength, const uint32_t pos,
191 const modetype check, uint32_t& start);
192
193 /**
194 * @param text (in), pos (in): see FindURL
195 * @param check (in): End must be conform with this mode
196 * @param start (in): see FindURLStart
197 * @param end (out): Similar to |start| param of FindURLStart
198 * @return |check|-conform end has been found
199 */
200 bool FindURLEnd(const char16_t * aInString, int32_t aInStringLength, const uint32_t pos,
201 const modetype check, const uint32_t start, uint32_t& end);
202
203 /**
204 * @param text (in), pos (in), whathasbeendone (in): see FindURL
205 * @param check (in): Current mode
206 * @param start (in), end (in): see FindURLEnd
207 * @param txtURL (out): Guessed (raw) URL.
208 * Without whitespace, but not completed.
209 * @param desc (out): Link as shown to the user, but already escaped.
210 * Should be placed between the <a> and </a> tags.
211 * @param replaceBefore(out), replaceAfter (out): see FindURL
212 */
213 void CalculateURLBoundaries(const char16_t * aInString, int32_t aInStringLength,
214 const uint32_t pos, const uint32_t whathasbeendone,
215 const modetype check, const uint32_t start, const uint32_t end,
216 nsString& txtURL, nsString& desc,
217 int32_t& replaceBefore, int32_t& replaceAfter);
218
219 /**
220 * @param txtURL (in), desc (in): see CalculateURLBoundaries
221 * @param outputHTML (out): see FindURL
222 * @return A valid URL could be found (and creation of HTML successful)
223 */
224 bool CheckURLAndCreateHTML(
225 const nsString& txtURL, const nsString& desc, const modetype mode,
226 nsString& outputHTML);
227
228 /**
229 @param text (in): line of text possibly with tagTXT.<p>
230 if col0 is true,
231 starting with tagTXT<br>
232 else
233 starting one char before tagTXT
234 @param col0 (in): tagTXT is on the beginning of the line (or paragraph).
235 open must be 0 then.
236 @param tagTXT (in): Tag in plaintext to search for, e.g. "*"
237 @param aTagTxtLen (in): length of tagTXT.
238 @param tagHTML (in): HTML-Tag to replace tagTXT with,
239 without "<" and ">", e.g. "strong"
240 @param attributeHTML (in): HTML-attribute to add to opening tagHTML,
241 e.g. "class=txt_star"
242 @param aOutString: string to APPEND the converted html into
243 @param open (in/out): Number of currently open tags of type tagHTML
244 @return Conversion succeeded
245 */
246 bool StructPhraseHit(const char16_t * aInString, int32_t aInStringLength, bool col0,
247 const char16_t* tagTXT,
248 int32_t aTagTxtLen,
249 const char* tagHTML, const char* attributeHTML,
250 nsString& aOutputString, uint32_t& openTags);
251
252 /**
253 @param text (in), col0 (in): see GlyphHit
254 @param tagTXT (in): Smily, see also StructPhraseHit
255 @param imageName (in): the basename of the file that contains the image for this smilie
256 @param outputHTML (out): new string containing the html for the smily
257 @param glyphTextLen (out): see GlyphHit
258 */
259 bool
260 SmilyHit(const char16_t * aInString, int32_t aLength, bool col0,
261 const char* tagTXT, const char* imageName,
262 nsString& outputHTML, int32_t& glyphTextLen);
263
264 /**
265 Checks, if we can replace some chars at the start of line with prettier HTML
266 code.<p>
267 If success is reported, replace the first glyphTextLen chars with outputHTML
268
269 @param text (in): line of text possibly with Glyph.<p>
270 If col0 is true,
271 starting with Glyph <br><!-- (br not part of text) -->
272 else
273 starting one char before Glyph
274 @param col0 (in): text starts at the beginning of the line (or paragraph)
275 @param aOutString (out): APPENDS html for the glyph to this string
276 @param glyphTextLen (out): Length of original text to replace
277 @return see StructPhraseHit
278 */
279 bool GlyphHit(const char16_t * aInString, int32_t aInLength, bool col0,
280 nsString& aOutString, int32_t& glyphTextLen);
281
282 /**
283 Check if a given url should be linkified.
284 @param aURL (in): url to be checked on.
285 */
286 bool ShouldLinkify(const nsCString& aURL);
287 };
288
289 // It's said, that Win32 and Mac don't like static const members
290 const int32_t mozTXTToHTMLConv_lastMode = 4;
291 // Needed (only) by mozTXTToHTMLConv::FindURL
292 const int32_t mozTXTToHTMLConv_numberOfModes = 4; // dito; unknown not counted
293
294 #endif

mercurial