1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/netwerk/streamconv/converters/mozTXTToHTMLConv.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,294 @@ 1.4 +/* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +/** 1.10 + Description: Currently only functions to enhance plain text with HTML tags. See mozITXTToHTMLConv. Stream conversion is defunct. 1.11 +*/ 1.12 + 1.13 +#ifndef _mozTXTToHTMLConv_h__ 1.14 +#define _mozTXTToHTMLConv_h__ 1.15 + 1.16 +#include "mozITXTToHTMLConv.h" 1.17 +#include "nsString.h" 1.18 +#include "nsCOMPtr.h" 1.19 + 1.20 +class nsIIOService; 1.21 + 1.22 +class mozTXTToHTMLConv : public mozITXTToHTMLConv 1.23 +{ 1.24 + 1.25 + 1.26 +////////////////////////////////////////////////////////// 1.27 +public: 1.28 +////////////////////////////////////////////////////////// 1.29 + 1.30 + mozTXTToHTMLConv(); 1.31 + virtual ~mozTXTToHTMLConv(); 1.32 + NS_DECL_ISUPPORTS 1.33 + 1.34 + NS_DECL_MOZITXTTOHTMLCONV 1.35 + NS_DECL_NSIREQUESTOBSERVER 1.36 + NS_DECL_NSISTREAMLISTENER 1.37 + NS_DECL_NSISTREAMCONVERTER 1.38 + 1.39 +/** 1.40 + see mozITXTToHTMLConv::ScanTXT 1.41 + */ 1.42 + void ScanTXT(const char16_t * aInString, int32_t aInStringLength, uint32_t whattodo, nsString& aOutString); 1.43 + 1.44 +/** 1.45 + see mozITXTToHTMLConv::ScanHTML. We will modify aInString potentially... 1.46 + */ 1.47 + void ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOutString); 1.48 + 1.49 +/** 1.50 + see mozITXTToHTMLConv::CiteLevelTXT 1.51 + */ 1.52 + int32_t CiteLevelTXT(const char16_t * line,uint32_t& logLineStart); 1.53 + 1.54 + 1.55 +////////////////////////////////////////////////////////// 1.56 +protected: 1.57 +////////////////////////////////////////////////////////// 1.58 + nsCOMPtr<nsIIOService> mIOService; // for performance reasons, cache the netwerk service... 1.59 +/** 1.60 + Completes<ul> 1.61 + <li>Case 1: mailto: "mozilla@bucksch.org" -> "mailto:mozilla@bucksch.org" 1.62 + <li>Case 2: http: "www.mozilla.org" -> "http://www.mozilla.org" 1.63 + <li>Case 3: ftp: "ftp.mozilla.org" -> "ftp://www.mozilla.org" 1.64 + </ul> 1.65 + It does no check, if the resulting URL is valid. 1.66 + @param text (in): abbreviated URL 1.67 + @param pos (in): position of "@" (case 1) or first "." (case 2 and 3) 1.68 + @return Completed URL at success and empty string at failure 1.69 + */ 1.70 + void CompleteAbbreviatedURL(const char16_t * aInString, int32_t aInLength, 1.71 + const uint32_t pos, nsString& aOutString); 1.72 + 1.73 + 1.74 +////////////////////////////////////////////////////////// 1.75 +private: 1.76 +////////////////////////////////////////////////////////// 1.77 + 1.78 + enum LIMTYPE 1.79 + { 1.80 + LT_IGNORE, // limitation not checked 1.81 + LT_DELIMITER, // not alphanumeric and not rep[0]. End of text is also ok. 1.82 + LT_ALPHA, // alpha char 1.83 + LT_DIGIT 1.84 + }; 1.85 + 1.86 +/** 1.87 + @param text (in): the string to search through.<p> 1.88 + If before = IGNORE,<br> 1.89 + rep is compared starting at 1. char of text (text[0]),<br> 1.90 + else starting at 2. char of text (text[1]). 1.91 + Chars after "after"-delimiter are ignored. 1.92 + @param rep (in): the string to look for 1.93 + @param aRepLen (in): the number of bytes in the string to look for 1.94 + @param before (in): limitation before rep 1.95 + @param after (in): limitation after rep 1.96 + @return true, if rep is found and limitation spec is met or rep is empty 1.97 +*/ 1.98 + bool ItMatchesDelimited(const char16_t * aInString, int32_t aInLength, 1.99 + const char16_t * rep, int32_t aRepLen, LIMTYPE before, LIMTYPE after); 1.100 + 1.101 +/** 1.102 + @param see ItMatchesDelimited 1.103 + @return Number of ItMatchesDelimited in text 1.104 +*/ 1.105 + uint32_t NumberOfMatches(const char16_t * aInString, int32_t aInStringLength, 1.106 + const char16_t* rep, int32_t aRepLen, LIMTYPE before, LIMTYPE after); 1.107 + 1.108 +/** 1.109 + Currently only changes "<", ">" and "&". All others stay as they are.<p> 1.110 + "Char" in function name to avoid side effects with nsString(ch) 1.111 + constructors. 1.112 + @param ch (in) 1.113 + @param aStringToAppendto (out) - the string to append the escaped 1.114 + string to. 1.115 + @param inAttribute (in) - will escape quotes, too (which is 1.116 + only needed for attribute values) 1.117 +*/ 1.118 + void EscapeChar(const char16_t ch, nsString& aStringToAppendto, 1.119 + bool inAttribute); 1.120 + 1.121 +/** 1.122 + See EscapeChar. Escapes the string in place. 1.123 +*/ 1.124 + void EscapeStr(nsString& aInString, bool inAttribute); 1.125 + 1.126 +/** 1.127 + Currently only reverts "<", ">" and "&". All others stay as they are.<p> 1.128 + @param aInString (in) HTML string 1.129 + @param aStartPos (in) start index into the buffer 1.130 + @param aLength (in) length of the buffer 1.131 + @param aOutString (out) unescaped buffer 1.132 +*/ 1.133 + void UnescapeStr(const char16_t * aInString, int32_t aStartPos, 1.134 + int32_t aLength, nsString& aOutString); 1.135 + 1.136 +/** 1.137 + <em>Note</em>: I use different strategies to pass context between the 1.138 + functions (full text and pos vs. cutted text and col0, glphyTextLen vs. 1.139 + replaceBefore/-After). It makes some sense, but is hard to understand 1.140 + (maintain) :-(. 1.141 +*/ 1.142 + 1.143 +/** 1.144 + <p><em>Note:</em> replaceBefore + replaceAfter + 1 (for char at pos) chars 1.145 + in text should be replaced by outputHTML.</p> 1.146 + <p><em>Note:</em> This function should be able to process a URL on multiple 1.147 + lines, but currently, ScanForURLs is called for every line, so it can't.</p> 1.148 + @param text (in): includes possibly a URL 1.149 + @param pos (in): position in text, where either ":", "." or "@" are found 1.150 + @param whathasbeendone (in): What the calling ScanTXT did/has to do with the 1.151 + (not-linkified) text, i.e. usually the "whattodo" parameter. 1.152 + (Needed to calculate replaceBefore.) NOT what will be done with 1.153 + the content of the link. 1.154 + @param outputHTML (out): URL with HTML-a tag 1.155 + @param replaceBefore (out): Number of chars of URL before pos 1.156 + @param replaceAfter (out): Number of chars of URL after pos 1.157 + @return URL found 1.158 +*/ 1.159 + bool FindURL(const char16_t * aInString, int32_t aInLength, const uint32_t pos, 1.160 + const uint32_t whathasbeendone, 1.161 + nsString& outputHTML, int32_t& replaceBefore, int32_t& replaceAfter); 1.162 + 1.163 + enum modetype { 1.164 + unknown, 1.165 + RFC1738, /* Check, if RFC1738, APPENDIX compliant, 1.166 + like "<URL:http://www.mozilla.org>". */ 1.167 + RFC2396E, /* RFC2396, APPENDIX E allows anglebrackets (like 1.168 + "<http://www.mozilla.org>") (without "URL:") or 1.169 + quotation marks(like ""http://www.mozilla.org""). 1.170 + Also allow email addresses without scheme, 1.171 + e.g. "<mozilla@bucksch.org>" */ 1.172 + freetext, /* assume heading scheme 1.173 + with "[a-zA-Z][a-zA-Z0-9+\-\.]*:" like "news:" 1.174 + (see RFC2396, Section 3.1). 1.175 + Certain characters (see code) or any whitespace 1.176 + (including linebreaks) end the URL. 1.177 + Other certain (punctation) characters (see code) 1.178 + at the end are stripped off. */ 1.179 + abbreviated /* Similar to freetext, but without scheme, e.g. 1.180 + "www.mozilla.org", "ftp.mozilla.org" and 1.181 + "mozilla@bucksch.org". */ 1.182 + /* RFC1738 and RFC2396E type URLs may use multiple lines, 1.183 + whitespace is stripped. Special characters like ")" stay intact.*/ 1.184 + }; 1.185 + 1.186 +/** 1.187 + * @param text (in), pos (in): see FindURL 1.188 + * @param check (in): Start must be conform with this mode 1.189 + * @param start (out): Position in text, where URL (including brackets or 1.190 + * similar) starts 1.191 + * @return |check|-conform start has been found 1.192 + */ 1.193 + bool FindURLStart(const char16_t * aInString, int32_t aInLength, const uint32_t pos, 1.194 + const modetype check, uint32_t& start); 1.195 + 1.196 +/** 1.197 + * @param text (in), pos (in): see FindURL 1.198 + * @param check (in): End must be conform with this mode 1.199 + * @param start (in): see FindURLStart 1.200 + * @param end (out): Similar to |start| param of FindURLStart 1.201 + * @return |check|-conform end has been found 1.202 + */ 1.203 + bool FindURLEnd(const char16_t * aInString, int32_t aInStringLength, const uint32_t pos, 1.204 + const modetype check, const uint32_t start, uint32_t& end); 1.205 + 1.206 +/** 1.207 + * @param text (in), pos (in), whathasbeendone (in): see FindURL 1.208 + * @param check (in): Current mode 1.209 + * @param start (in), end (in): see FindURLEnd 1.210 + * @param txtURL (out): Guessed (raw) URL. 1.211 + * Without whitespace, but not completed. 1.212 + * @param desc (out): Link as shown to the user, but already escaped. 1.213 + * Should be placed between the <a> and </a> tags. 1.214 + * @param replaceBefore(out), replaceAfter (out): see FindURL 1.215 + */ 1.216 + void CalculateURLBoundaries(const char16_t * aInString, int32_t aInStringLength, 1.217 + const uint32_t pos, const uint32_t whathasbeendone, 1.218 + const modetype check, const uint32_t start, const uint32_t end, 1.219 + nsString& txtURL, nsString& desc, 1.220 + int32_t& replaceBefore, int32_t& replaceAfter); 1.221 + 1.222 +/** 1.223 + * @param txtURL (in), desc (in): see CalculateURLBoundaries 1.224 + * @param outputHTML (out): see FindURL 1.225 + * @return A valid URL could be found (and creation of HTML successful) 1.226 + */ 1.227 + bool CheckURLAndCreateHTML( 1.228 + const nsString& txtURL, const nsString& desc, const modetype mode, 1.229 + nsString& outputHTML); 1.230 + 1.231 +/** 1.232 + @param text (in): line of text possibly with tagTXT.<p> 1.233 + if col0 is true, 1.234 + starting with tagTXT<br> 1.235 + else 1.236 + starting one char before tagTXT 1.237 + @param col0 (in): tagTXT is on the beginning of the line (or paragraph). 1.238 + open must be 0 then. 1.239 + @param tagTXT (in): Tag in plaintext to search for, e.g. "*" 1.240 + @param aTagTxtLen (in): length of tagTXT. 1.241 + @param tagHTML (in): HTML-Tag to replace tagTXT with, 1.242 + without "<" and ">", e.g. "strong" 1.243 + @param attributeHTML (in): HTML-attribute to add to opening tagHTML, 1.244 + e.g. "class=txt_star" 1.245 + @param aOutString: string to APPEND the converted html into 1.246 + @param open (in/out): Number of currently open tags of type tagHTML 1.247 + @return Conversion succeeded 1.248 +*/ 1.249 + bool StructPhraseHit(const char16_t * aInString, int32_t aInStringLength, bool col0, 1.250 + const char16_t* tagTXT, 1.251 + int32_t aTagTxtLen, 1.252 + const char* tagHTML, const char* attributeHTML, 1.253 + nsString& aOutputString, uint32_t& openTags); 1.254 + 1.255 +/** 1.256 + @param text (in), col0 (in): see GlyphHit 1.257 + @param tagTXT (in): Smily, see also StructPhraseHit 1.258 + @param imageName (in): the basename of the file that contains the image for this smilie 1.259 + @param outputHTML (out): new string containing the html for the smily 1.260 + @param glyphTextLen (out): see GlyphHit 1.261 +*/ 1.262 + bool 1.263 + SmilyHit(const char16_t * aInString, int32_t aLength, bool col0, 1.264 + const char* tagTXT, const char* imageName, 1.265 + nsString& outputHTML, int32_t& glyphTextLen); 1.266 + 1.267 +/** 1.268 + Checks, if we can replace some chars at the start of line with prettier HTML 1.269 + code.<p> 1.270 + If success is reported, replace the first glyphTextLen chars with outputHTML 1.271 + 1.272 + @param text (in): line of text possibly with Glyph.<p> 1.273 + If col0 is true, 1.274 + starting with Glyph <br><!-- (br not part of text) --> 1.275 + else 1.276 + starting one char before Glyph 1.277 + @param col0 (in): text starts at the beginning of the line (or paragraph) 1.278 + @param aOutString (out): APPENDS html for the glyph to this string 1.279 + @param glyphTextLen (out): Length of original text to replace 1.280 + @return see StructPhraseHit 1.281 +*/ 1.282 + bool GlyphHit(const char16_t * aInString, int32_t aInLength, bool col0, 1.283 + nsString& aOutString, int32_t& glyphTextLen); 1.284 + 1.285 +/** 1.286 + Check if a given url should be linkified. 1.287 + @param aURL (in): url to be checked on. 1.288 +*/ 1.289 + bool ShouldLinkify(const nsCString& aURL); 1.290 +}; 1.291 + 1.292 +// It's said, that Win32 and Mac don't like static const members 1.293 +const int32_t mozTXTToHTMLConv_lastMode = 4; 1.294 + // Needed (only) by mozTXTToHTMLConv::FindURL 1.295 +const int32_t mozTXTToHTMLConv_numberOfModes = 4; // dito; unknown not counted 1.296 + 1.297 +#endif