netwerk/streamconv/converters/mozTXTToHTMLConv.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     6 /**
     7   Description: Currently only functions to enhance plain text with HTML tags. See mozITXTToHTMLConv. Stream conversion is defunct.
     8 */
    10 #ifndef _mozTXTToHTMLConv_h__
    11 #define _mozTXTToHTMLConv_h__
    13 #include "mozITXTToHTMLConv.h"
    14 #include "nsString.h"
    15 #include "nsCOMPtr.h"
    17 class nsIIOService;
    19 class mozTXTToHTMLConv : public mozITXTToHTMLConv
    20 {
    23 //////////////////////////////////////////////////////////
    24 public:
    25 //////////////////////////////////////////////////////////
    27   mozTXTToHTMLConv();
    28   virtual ~mozTXTToHTMLConv();
    29   NS_DECL_ISUPPORTS
    31   NS_DECL_MOZITXTTOHTMLCONV
    32   NS_DECL_NSIREQUESTOBSERVER
    33   NS_DECL_NSISTREAMLISTENER
    34   NS_DECL_NSISTREAMCONVERTER
    36 /**
    37   see mozITXTToHTMLConv::ScanTXT
    38  */
    39   void ScanTXT(const char16_t * aInString, int32_t aInStringLength, uint32_t whattodo, nsString& aOutString);
    41 /**
    42   see mozITXTToHTMLConv::ScanHTML. We will modify aInString potentially...
    43  */
    44   void ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOutString);
    46 /**
    47   see mozITXTToHTMLConv::CiteLevelTXT
    48  */
    49   int32_t CiteLevelTXT(const char16_t * line,uint32_t& logLineStart);
    52 //////////////////////////////////////////////////////////
    53 protected:
    54 //////////////////////////////////////////////////////////
    55   nsCOMPtr<nsIIOService> mIOService; // for performance reasons, cache the netwerk service...
    56 /**
    57   Completes<ul>
    58   <li>Case 1: mailto: "mozilla@bucksch.org" -> "mailto:mozilla@bucksch.org"
    59   <li>Case 2: http:   "www.mozilla.org"     -> "http://www.mozilla.org"
    60   <li>Case 3: ftp:    "ftp.mozilla.org"     -> "ftp://www.mozilla.org"
    61   </ul>
    62   It does no check, if the resulting URL is valid.
    63   @param text (in): abbreviated URL
    64   @param pos (in): position of "@" (case 1) or first "." (case 2 and 3)
    65   @return Completed URL at success and empty string at failure
    66  */
    67   void CompleteAbbreviatedURL(const char16_t * aInString, int32_t aInLength, 
    68                               const uint32_t pos, nsString& aOutString);
    71 //////////////////////////////////////////////////////////
    72 private:
    73 //////////////////////////////////////////////////////////
    75   enum LIMTYPE
    76   {
    77     LT_IGNORE,     // limitation not checked
    78     LT_DELIMITER,  // not alphanumeric and not rep[0]. End of text is also ok.
    79     LT_ALPHA,      // alpha char
    80     LT_DIGIT
    81   };
    83 /**
    84   @param text (in): the string to search through.<p>
    85          If before = IGNORE,<br>
    86            rep is compared starting at 1. char of text (text[0]),<br>
    87            else starting at 2. char of text (text[1]).
    88          Chars after "after"-delimiter are ignored.
    89   @param rep (in): the string to look for
    90   @param aRepLen (in): the number of bytes in the string to look for
    91   @param before (in): limitation before rep
    92   @param after (in): limitation after rep
    93   @return true, if rep is found and limitation spec is met or rep is empty
    94 */
    95   bool ItMatchesDelimited(const char16_t * aInString, int32_t aInLength,
    96       const char16_t * rep, int32_t aRepLen, LIMTYPE before, LIMTYPE after);
    98 /**
    99   @param see ItMatchesDelimited
   100   @return Number of ItMatchesDelimited in text
   101 */
   102   uint32_t NumberOfMatches(const char16_t * aInString, int32_t aInStringLength,
   103       const char16_t* rep, int32_t aRepLen, LIMTYPE before, LIMTYPE after);
   105 /**
   106   Currently only changes "<", ">" and "&". All others stay as they are.<p>
   107   "Char" in function name to avoid side effects with nsString(ch)
   108   constructors.
   109   @param ch (in) 
   110   @param aStringToAppendto (out) - the string to append the escaped
   111                                    string to.
   112   @param inAttribute (in) - will escape quotes, too (which is
   113                             only needed for attribute values)
   114 */
   115   void EscapeChar(const char16_t ch, nsString& aStringToAppendto,
   116                   bool inAttribute);
   118 /**
   119   See EscapeChar. Escapes the string in place.
   120 */
   121   void EscapeStr(nsString& aInString, bool inAttribute);
   123 /**
   124   Currently only reverts "<", ">" and "&". All others stay as they are.<p>
   125   @param aInString (in) HTML string
   126   @param aStartPos (in) start index into the buffer
   127   @param aLength (in) length of the buffer
   128   @param aOutString (out) unescaped buffer
   129 */
   130   void UnescapeStr(const char16_t * aInString, int32_t aStartPos,
   131                    int32_t aLength, nsString& aOutString);
   133 /**
   134   <em>Note</em>: I use different strategies to pass context between the
   135   functions (full text and pos vs. cutted text and col0, glphyTextLen vs.
   136   replaceBefore/-After). It makes some sense, but is hard to understand
   137   (maintain) :-(.
   138 */
   140 /**
   141   <p><em>Note:</em> replaceBefore + replaceAfter + 1 (for char at pos) chars
   142   in text should be replaced by outputHTML.</p>
   143   <p><em>Note:</em> This function should be able to process a URL on multiple
   144   lines, but currently, ScanForURLs is called for every line, so it can't.</p>
   145   @param text (in): includes possibly a URL
   146   @param pos (in): position in text, where either ":", "." or "@" are found
   147   @param whathasbeendone (in): What the calling ScanTXT did/has to do with the
   148               (not-linkified) text, i.e. usually the "whattodo" parameter.
   149               (Needed to calculate replaceBefore.) NOT what will be done with
   150               the content of the link.
   151   @param outputHTML (out): URL with HTML-a tag
   152   @param replaceBefore (out): Number of chars of URL before pos
   153   @param replaceAfter (out): Number of chars of URL after pos
   154   @return URL found
   155 */
   156   bool FindURL(const char16_t * aInString, int32_t aInLength, const uint32_t pos,
   157           const uint32_t whathasbeendone,
   158           nsString& outputHTML, int32_t& replaceBefore, int32_t& replaceAfter);
   160   enum modetype {
   161          unknown,
   162          RFC1738,          /* Check, if RFC1738, APPENDIX compliant,
   163                               like "<URL:http://www.mozilla.org>". */
   164          RFC2396E,         /* RFC2396, APPENDIX E allows anglebrackets (like
   165                               "<http://www.mozilla.org>") (without "URL:") or
   166                               quotation marks(like ""http://www.mozilla.org"").
   167                               Also allow email addresses without scheme,
   168                               e.g. "<mozilla@bucksch.org>" */
   169          freetext,         /* assume heading scheme
   170                               with "[a-zA-Z][a-zA-Z0-9+\-\.]*:" like "news:"
   171                               (see RFC2396, Section 3.1).
   172                               Certain characters (see code) or any whitespace
   173                               (including linebreaks) end the URL.
   174                               Other certain (punctation) characters (see code)
   175                               at the end are stripped off. */
   176          abbreviated       /* Similar to freetext, but without scheme, e.g.
   177 	                      "www.mozilla.org", "ftp.mozilla.org" and
   178                               "mozilla@bucksch.org". */
   179       /* RFC1738 and RFC2396E type URLs may use multiple lines,
   180          whitespace is stripped. Special characters like ")" stay intact.*/
   181   };
   183 /**
   184  * @param text (in), pos (in): see FindURL
   185  * @param check (in): Start must be conform with this mode
   186  * @param start (out): Position in text, where URL (including brackets or
   187  *             similar) starts
   188  * @return |check|-conform start has been found
   189  */
   190   bool FindURLStart(const char16_t * aInString, int32_t aInLength, const uint32_t pos,
   191             	               const modetype check, uint32_t& start);
   193 /**
   194  * @param text (in), pos (in): see FindURL
   195  * @param check (in): End must be conform with this mode
   196  * @param start (in): see FindURLStart
   197  * @param end (out): Similar to |start| param of FindURLStart
   198  * @return |check|-conform end has been found
   199  */
   200   bool FindURLEnd(const char16_t * aInString, int32_t aInStringLength, const uint32_t pos,
   201            const modetype check, const uint32_t start, uint32_t& end);
   203 /**
   204  * @param text (in), pos (in), whathasbeendone (in): see FindURL
   205  * @param check (in): Current mode
   206  * @param start (in), end (in): see FindURLEnd
   207  * @param txtURL (out): Guessed (raw) URL.
   208  *             Without whitespace, but not completed.
   209  * @param desc (out): Link as shown to the user, but already escaped.
   210  *             Should be placed between the <a> and </a> tags.
   211  * @param replaceBefore(out), replaceAfter (out): see FindURL
   212  */
   213   void CalculateURLBoundaries(const char16_t * aInString, int32_t aInStringLength, 
   214      const uint32_t pos, const uint32_t whathasbeendone,
   215      const modetype check, const uint32_t start, const uint32_t end,
   216      nsString& txtURL, nsString& desc,
   217      int32_t& replaceBefore, int32_t& replaceAfter);
   219 /**
   220  * @param txtURL (in), desc (in): see CalculateURLBoundaries
   221  * @param outputHTML (out): see FindURL
   222  * @return A valid URL could be found (and creation of HTML successful)
   223  */
   224   bool CheckURLAndCreateHTML(
   225        const nsString& txtURL, const nsString& desc, const modetype mode,
   226        nsString& outputHTML);
   228 /**
   229   @param text (in): line of text possibly with tagTXT.<p>
   230               if col0 is true,
   231                 starting with tagTXT<br>
   232               else
   233                 starting one char before tagTXT
   234   @param col0 (in): tagTXT is on the beginning of the line (or paragraph).
   235               open must be 0 then.
   236   @param tagTXT (in): Tag in plaintext to search for, e.g. "*"
   237   @param aTagTxtLen (in): length of tagTXT.
   238   @param tagHTML (in): HTML-Tag to replace tagTXT with,
   239               without "<" and ">", e.g. "strong"
   240   @param attributeHTML (in): HTML-attribute to add to opening tagHTML,
   241               e.g. "class=txt_star"
   242   @param aOutString: string to APPEND the converted html into
   243   @param open (in/out): Number of currently open tags of type tagHTML
   244   @return Conversion succeeded
   245 */
   246   bool StructPhraseHit(const char16_t * aInString, int32_t aInStringLength, bool col0,
   247      const char16_t* tagTXT,
   248      int32_t aTagTxtLen, 
   249      const char* tagHTML, const char* attributeHTML,
   250      nsString& aOutputString, uint32_t& openTags);
   252 /**
   253   @param text (in), col0 (in): see GlyphHit
   254   @param tagTXT (in): Smily, see also StructPhraseHit
   255   @param imageName (in): the basename of the file that contains the image for this smilie
   256   @param outputHTML (out): new string containing the html for the smily
   257   @param glyphTextLen (out): see GlyphHit
   258 */
   259   bool
   260          SmilyHit(const char16_t * aInString, int32_t aLength, bool col0,
   261          const char* tagTXT, const char* imageName,
   262          nsString& outputHTML, int32_t& glyphTextLen);
   264 /**
   265   Checks, if we can replace some chars at the start of line with prettier HTML
   266   code.<p>
   267   If success is reported, replace the first glyphTextLen chars with outputHTML
   269   @param text (in): line of text possibly with Glyph.<p>
   270               If col0 is true,
   271                 starting with Glyph <br><!-- (br not part of text) -->
   272               else
   273                 starting one char before Glyph
   274   @param col0 (in): text starts at the beginning of the line (or paragraph)
   275   @param aOutString (out): APPENDS html for the glyph to this string
   276   @param glyphTextLen (out): Length of original text to replace
   277   @return see StructPhraseHit
   278 */
   279   bool GlyphHit(const char16_t * aInString, int32_t aInLength, bool col0,
   280        nsString& aOutString, int32_t& glyphTextLen);
   282 /**
   283   Check if a given url should be linkified.
   284   @param aURL (in): url to be checked on.
   285 */
   286   bool ShouldLinkify(const nsCString& aURL);
   287 };
   289 // It's said, that Win32 and Mac don't like static const members
   290 const int32_t mozTXTToHTMLConv_lastMode = 4;
   291 	                        // Needed (only) by mozTXTToHTMLConv::FindURL
   292 const int32_t mozTXTToHTMLConv_numberOfModes = 4;  // dito; unknown not counted
   294 #endif

mercurial