netwerk/streamconv/converters/mozTXTToHTMLConv.h

Thu, 15 Jan 2015 15:55:04 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:55:04 +0100
branch
TOR_BUG_9701
changeset 9
a63d609f5ebe
permissions
-rw-r--r--

Back out 97036ab72558 which inappropriately compared turds to third parties.

michael@0 1 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6 /**
michael@0 7 Description: Currently only functions to enhance plain text with HTML tags. See mozITXTToHTMLConv. Stream conversion is defunct.
michael@0 8 */
michael@0 9
michael@0 10 #ifndef _mozTXTToHTMLConv_h__
michael@0 11 #define _mozTXTToHTMLConv_h__
michael@0 12
michael@0 13 #include "mozITXTToHTMLConv.h"
michael@0 14 #include "nsString.h"
michael@0 15 #include "nsCOMPtr.h"
michael@0 16
michael@0 17 class nsIIOService;
michael@0 18
michael@0 19 class mozTXTToHTMLConv : public mozITXTToHTMLConv
michael@0 20 {
michael@0 21
michael@0 22
michael@0 23 //////////////////////////////////////////////////////////
michael@0 24 public:
michael@0 25 //////////////////////////////////////////////////////////
michael@0 26
michael@0 27 mozTXTToHTMLConv();
michael@0 28 virtual ~mozTXTToHTMLConv();
michael@0 29 NS_DECL_ISUPPORTS
michael@0 30
michael@0 31 NS_DECL_MOZITXTTOHTMLCONV
michael@0 32 NS_DECL_NSIREQUESTOBSERVER
michael@0 33 NS_DECL_NSISTREAMLISTENER
michael@0 34 NS_DECL_NSISTREAMCONVERTER
michael@0 35
michael@0 36 /**
michael@0 37 see mozITXTToHTMLConv::ScanTXT
michael@0 38 */
michael@0 39 void ScanTXT(const char16_t * aInString, int32_t aInStringLength, uint32_t whattodo, nsString& aOutString);
michael@0 40
michael@0 41 /**
michael@0 42 see mozITXTToHTMLConv::ScanHTML. We will modify aInString potentially...
michael@0 43 */
michael@0 44 void ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOutString);
michael@0 45
michael@0 46 /**
michael@0 47 see mozITXTToHTMLConv::CiteLevelTXT
michael@0 48 */
michael@0 49 int32_t CiteLevelTXT(const char16_t * line,uint32_t& logLineStart);
michael@0 50
michael@0 51
michael@0 52 //////////////////////////////////////////////////////////
michael@0 53 protected:
michael@0 54 //////////////////////////////////////////////////////////
michael@0 55 nsCOMPtr<nsIIOService> mIOService; // for performance reasons, cache the netwerk service...
michael@0 56 /**
michael@0 57 Completes<ul>
michael@0 58 <li>Case 1: mailto: "mozilla@bucksch.org" -> "mailto:mozilla@bucksch.org"
michael@0 59 <li>Case 2: http: "www.mozilla.org" -> "http://www.mozilla.org"
michael@0 60 <li>Case 3: ftp: "ftp.mozilla.org" -> "ftp://www.mozilla.org"
michael@0 61 </ul>
michael@0 62 It does no check, if the resulting URL is valid.
michael@0 63 @param text (in): abbreviated URL
michael@0 64 @param pos (in): position of "@" (case 1) or first "." (case 2 and 3)
michael@0 65 @return Completed URL at success and empty string at failure
michael@0 66 */
michael@0 67 void CompleteAbbreviatedURL(const char16_t * aInString, int32_t aInLength,
michael@0 68 const uint32_t pos, nsString& aOutString);
michael@0 69
michael@0 70
michael@0 71 //////////////////////////////////////////////////////////
michael@0 72 private:
michael@0 73 //////////////////////////////////////////////////////////
michael@0 74
michael@0 75 enum LIMTYPE
michael@0 76 {
michael@0 77 LT_IGNORE, // limitation not checked
michael@0 78 LT_DELIMITER, // not alphanumeric and not rep[0]. End of text is also ok.
michael@0 79 LT_ALPHA, // alpha char
michael@0 80 LT_DIGIT
michael@0 81 };
michael@0 82
michael@0 83 /**
michael@0 84 @param text (in): the string to search through.<p>
michael@0 85 If before = IGNORE,<br>
michael@0 86 rep is compared starting at 1. char of text (text[0]),<br>
michael@0 87 else starting at 2. char of text (text[1]).
michael@0 88 Chars after "after"-delimiter are ignored.
michael@0 89 @param rep (in): the string to look for
michael@0 90 @param aRepLen (in): the number of bytes in the string to look for
michael@0 91 @param before (in): limitation before rep
michael@0 92 @param after (in): limitation after rep
michael@0 93 @return true, if rep is found and limitation spec is met or rep is empty
michael@0 94 */
michael@0 95 bool ItMatchesDelimited(const char16_t * aInString, int32_t aInLength,
michael@0 96 const char16_t * rep, int32_t aRepLen, LIMTYPE before, LIMTYPE after);
michael@0 97
michael@0 98 /**
michael@0 99 @param see ItMatchesDelimited
michael@0 100 @return Number of ItMatchesDelimited in text
michael@0 101 */
michael@0 102 uint32_t NumberOfMatches(const char16_t * aInString, int32_t aInStringLength,
michael@0 103 const char16_t* rep, int32_t aRepLen, LIMTYPE before, LIMTYPE after);
michael@0 104
michael@0 105 /**
michael@0 106 Currently only changes "<", ">" and "&". All others stay as they are.<p>
michael@0 107 "Char" in function name to avoid side effects with nsString(ch)
michael@0 108 constructors.
michael@0 109 @param ch (in)
michael@0 110 @param aStringToAppendto (out) - the string to append the escaped
michael@0 111 string to.
michael@0 112 @param inAttribute (in) - will escape quotes, too (which is
michael@0 113 only needed for attribute values)
michael@0 114 */
michael@0 115 void EscapeChar(const char16_t ch, nsString& aStringToAppendto,
michael@0 116 bool inAttribute);
michael@0 117
michael@0 118 /**
michael@0 119 See EscapeChar. Escapes the string in place.
michael@0 120 */
michael@0 121 void EscapeStr(nsString& aInString, bool inAttribute);
michael@0 122
michael@0 123 /**
michael@0 124 Currently only reverts "<", ">" and "&". All others stay as they are.<p>
michael@0 125 @param aInString (in) HTML string
michael@0 126 @param aStartPos (in) start index into the buffer
michael@0 127 @param aLength (in) length of the buffer
michael@0 128 @param aOutString (out) unescaped buffer
michael@0 129 */
michael@0 130 void UnescapeStr(const char16_t * aInString, int32_t aStartPos,
michael@0 131 int32_t aLength, nsString& aOutString);
michael@0 132
michael@0 133 /**
michael@0 134 <em>Note</em>: I use different strategies to pass context between the
michael@0 135 functions (full text and pos vs. cutted text and col0, glphyTextLen vs.
michael@0 136 replaceBefore/-After). It makes some sense, but is hard to understand
michael@0 137 (maintain) :-(.
michael@0 138 */
michael@0 139
michael@0 140 /**
michael@0 141 <p><em>Note:</em> replaceBefore + replaceAfter + 1 (for char at pos) chars
michael@0 142 in text should be replaced by outputHTML.</p>
michael@0 143 <p><em>Note:</em> This function should be able to process a URL on multiple
michael@0 144 lines, but currently, ScanForURLs is called for every line, so it can't.</p>
michael@0 145 @param text (in): includes possibly a URL
michael@0 146 @param pos (in): position in text, where either ":", "." or "@" are found
michael@0 147 @param whathasbeendone (in): What the calling ScanTXT did/has to do with the
michael@0 148 (not-linkified) text, i.e. usually the "whattodo" parameter.
michael@0 149 (Needed to calculate replaceBefore.) NOT what will be done with
michael@0 150 the content of the link.
michael@0 151 @param outputHTML (out): URL with HTML-a tag
michael@0 152 @param replaceBefore (out): Number of chars of URL before pos
michael@0 153 @param replaceAfter (out): Number of chars of URL after pos
michael@0 154 @return URL found
michael@0 155 */
michael@0 156 bool FindURL(const char16_t * aInString, int32_t aInLength, const uint32_t pos,
michael@0 157 const uint32_t whathasbeendone,
michael@0 158 nsString& outputHTML, int32_t& replaceBefore, int32_t& replaceAfter);
michael@0 159
michael@0 160 enum modetype {
michael@0 161 unknown,
michael@0 162 RFC1738, /* Check, if RFC1738, APPENDIX compliant,
michael@0 163 like "<URL:http://www.mozilla.org>". */
michael@0 164 RFC2396E, /* RFC2396, APPENDIX E allows anglebrackets (like
michael@0 165 "<http://www.mozilla.org>") (without "URL:") or
michael@0 166 quotation marks(like ""http://www.mozilla.org"").
michael@0 167 Also allow email addresses without scheme,
michael@0 168 e.g. "<mozilla@bucksch.org>" */
michael@0 169 freetext, /* assume heading scheme
michael@0 170 with "[a-zA-Z][a-zA-Z0-9+\-\.]*:" like "news:"
michael@0 171 (see RFC2396, Section 3.1).
michael@0 172 Certain characters (see code) or any whitespace
michael@0 173 (including linebreaks) end the URL.
michael@0 174 Other certain (punctation) characters (see code)
michael@0 175 at the end are stripped off. */
michael@0 176 abbreviated /* Similar to freetext, but without scheme, e.g.
michael@0 177 "www.mozilla.org", "ftp.mozilla.org" and
michael@0 178 "mozilla@bucksch.org". */
michael@0 179 /* RFC1738 and RFC2396E type URLs may use multiple lines,
michael@0 180 whitespace is stripped. Special characters like ")" stay intact.*/
michael@0 181 };
michael@0 182
michael@0 183 /**
michael@0 184 * @param text (in), pos (in): see FindURL
michael@0 185 * @param check (in): Start must be conform with this mode
michael@0 186 * @param start (out): Position in text, where URL (including brackets or
michael@0 187 * similar) starts
michael@0 188 * @return |check|-conform start has been found
michael@0 189 */
michael@0 190 bool FindURLStart(const char16_t * aInString, int32_t aInLength, const uint32_t pos,
michael@0 191 const modetype check, uint32_t& start);
michael@0 192
michael@0 193 /**
michael@0 194 * @param text (in), pos (in): see FindURL
michael@0 195 * @param check (in): End must be conform with this mode
michael@0 196 * @param start (in): see FindURLStart
michael@0 197 * @param end (out): Similar to |start| param of FindURLStart
michael@0 198 * @return |check|-conform end has been found
michael@0 199 */
michael@0 200 bool FindURLEnd(const char16_t * aInString, int32_t aInStringLength, const uint32_t pos,
michael@0 201 const modetype check, const uint32_t start, uint32_t& end);
michael@0 202
michael@0 203 /**
michael@0 204 * @param text (in), pos (in), whathasbeendone (in): see FindURL
michael@0 205 * @param check (in): Current mode
michael@0 206 * @param start (in), end (in): see FindURLEnd
michael@0 207 * @param txtURL (out): Guessed (raw) URL.
michael@0 208 * Without whitespace, but not completed.
michael@0 209 * @param desc (out): Link as shown to the user, but already escaped.
michael@0 210 * Should be placed between the <a> and </a> tags.
michael@0 211 * @param replaceBefore(out), replaceAfter (out): see FindURL
michael@0 212 */
michael@0 213 void CalculateURLBoundaries(const char16_t * aInString, int32_t aInStringLength,
michael@0 214 const uint32_t pos, const uint32_t whathasbeendone,
michael@0 215 const modetype check, const uint32_t start, const uint32_t end,
michael@0 216 nsString& txtURL, nsString& desc,
michael@0 217 int32_t& replaceBefore, int32_t& replaceAfter);
michael@0 218
michael@0 219 /**
michael@0 220 * @param txtURL (in), desc (in): see CalculateURLBoundaries
michael@0 221 * @param outputHTML (out): see FindURL
michael@0 222 * @return A valid URL could be found (and creation of HTML successful)
michael@0 223 */
michael@0 224 bool CheckURLAndCreateHTML(
michael@0 225 const nsString& txtURL, const nsString& desc, const modetype mode,
michael@0 226 nsString& outputHTML);
michael@0 227
michael@0 228 /**
michael@0 229 @param text (in): line of text possibly with tagTXT.<p>
michael@0 230 if col0 is true,
michael@0 231 starting with tagTXT<br>
michael@0 232 else
michael@0 233 starting one char before tagTXT
michael@0 234 @param col0 (in): tagTXT is on the beginning of the line (or paragraph).
michael@0 235 open must be 0 then.
michael@0 236 @param tagTXT (in): Tag in plaintext to search for, e.g. "*"
michael@0 237 @param aTagTxtLen (in): length of tagTXT.
michael@0 238 @param tagHTML (in): HTML-Tag to replace tagTXT with,
michael@0 239 without "<" and ">", e.g. "strong"
michael@0 240 @param attributeHTML (in): HTML-attribute to add to opening tagHTML,
michael@0 241 e.g. "class=txt_star"
michael@0 242 @param aOutString: string to APPEND the converted html into
michael@0 243 @param open (in/out): Number of currently open tags of type tagHTML
michael@0 244 @return Conversion succeeded
michael@0 245 */
michael@0 246 bool StructPhraseHit(const char16_t * aInString, int32_t aInStringLength, bool col0,
michael@0 247 const char16_t* tagTXT,
michael@0 248 int32_t aTagTxtLen,
michael@0 249 const char* tagHTML, const char* attributeHTML,
michael@0 250 nsString& aOutputString, uint32_t& openTags);
michael@0 251
michael@0 252 /**
michael@0 253 @param text (in), col0 (in): see GlyphHit
michael@0 254 @param tagTXT (in): Smily, see also StructPhraseHit
michael@0 255 @param imageName (in): the basename of the file that contains the image for this smilie
michael@0 256 @param outputHTML (out): new string containing the html for the smily
michael@0 257 @param glyphTextLen (out): see GlyphHit
michael@0 258 */
michael@0 259 bool
michael@0 260 SmilyHit(const char16_t * aInString, int32_t aLength, bool col0,
michael@0 261 const char* tagTXT, const char* imageName,
michael@0 262 nsString& outputHTML, int32_t& glyphTextLen);
michael@0 263
michael@0 264 /**
michael@0 265 Checks, if we can replace some chars at the start of line with prettier HTML
michael@0 266 code.<p>
michael@0 267 If success is reported, replace the first glyphTextLen chars with outputHTML
michael@0 268
michael@0 269 @param text (in): line of text possibly with Glyph.<p>
michael@0 270 If col0 is true,
michael@0 271 starting with Glyph <br><!-- (br not part of text) -->
michael@0 272 else
michael@0 273 starting one char before Glyph
michael@0 274 @param col0 (in): text starts at the beginning of the line (or paragraph)
michael@0 275 @param aOutString (out): APPENDS html for the glyph to this string
michael@0 276 @param glyphTextLen (out): Length of original text to replace
michael@0 277 @return see StructPhraseHit
michael@0 278 */
michael@0 279 bool GlyphHit(const char16_t * aInString, int32_t aInLength, bool col0,
michael@0 280 nsString& aOutString, int32_t& glyphTextLen);
michael@0 281
michael@0 282 /**
michael@0 283 Check if a given url should be linkified.
michael@0 284 @param aURL (in): url to be checked on.
michael@0 285 */
michael@0 286 bool ShouldLinkify(const nsCString& aURL);
michael@0 287 };
michael@0 288
michael@0 289 // It's said, that Win32 and Mac don't like static const members
michael@0 290 const int32_t mozTXTToHTMLConv_lastMode = 4;
michael@0 291 // Needed (only) by mozTXTToHTMLConv::FindURL
michael@0 292 const int32_t mozTXTToHTMLConv_numberOfModes = 4; // dito; unknown not counted
michael@0 293
michael@0 294 #endif

mercurial