netwerk/streamconv/converters/mozTXTToHTMLConv.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/netwerk/streamconv/converters/mozTXTToHTMLConv.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,294 @@
     1.4 +/* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +/**
    1.10 +  Description: Currently only functions to enhance plain text with HTML tags. See mozITXTToHTMLConv. Stream conversion is defunct.
    1.11 +*/
    1.12 +
    1.13 +#ifndef _mozTXTToHTMLConv_h__
    1.14 +#define _mozTXTToHTMLConv_h__
    1.15 +
    1.16 +#include "mozITXTToHTMLConv.h"
    1.17 +#include "nsString.h"
    1.18 +#include "nsCOMPtr.h"
    1.19 +
    1.20 +class nsIIOService;
    1.21 +
    1.22 +class mozTXTToHTMLConv : public mozITXTToHTMLConv
    1.23 +{
    1.24 +
    1.25 +
    1.26 +//////////////////////////////////////////////////////////
    1.27 +public:
    1.28 +//////////////////////////////////////////////////////////
    1.29 +
    1.30 +  mozTXTToHTMLConv();
    1.31 +  virtual ~mozTXTToHTMLConv();
    1.32 +  NS_DECL_ISUPPORTS
    1.33 +
    1.34 +  NS_DECL_MOZITXTTOHTMLCONV
    1.35 +  NS_DECL_NSIREQUESTOBSERVER
    1.36 +  NS_DECL_NSISTREAMLISTENER
    1.37 +  NS_DECL_NSISTREAMCONVERTER
    1.38 +
    1.39 +/**
    1.40 +  see mozITXTToHTMLConv::ScanTXT
    1.41 + */
    1.42 +  void ScanTXT(const char16_t * aInString, int32_t aInStringLength, uint32_t whattodo, nsString& aOutString);
    1.43 +
    1.44 +/**
    1.45 +  see mozITXTToHTMLConv::ScanHTML. We will modify aInString potentially...
    1.46 + */
    1.47 +  void ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOutString);
    1.48 +
    1.49 +/**
    1.50 +  see mozITXTToHTMLConv::CiteLevelTXT
    1.51 + */
    1.52 +  int32_t CiteLevelTXT(const char16_t * line,uint32_t& logLineStart);
    1.53 +
    1.54 +
    1.55 +//////////////////////////////////////////////////////////
    1.56 +protected:
    1.57 +//////////////////////////////////////////////////////////
    1.58 +  nsCOMPtr<nsIIOService> mIOService; // for performance reasons, cache the netwerk service...
    1.59 +/**
    1.60 +  Completes<ul>
    1.61 +  <li>Case 1: mailto: "mozilla@bucksch.org" -> "mailto:mozilla@bucksch.org"
    1.62 +  <li>Case 2: http:   "www.mozilla.org"     -> "http://www.mozilla.org"
    1.63 +  <li>Case 3: ftp:    "ftp.mozilla.org"     -> "ftp://www.mozilla.org"
    1.64 +  </ul>
    1.65 +  It does no check, if the resulting URL is valid.
    1.66 +  @param text (in): abbreviated URL
    1.67 +  @param pos (in): position of "@" (case 1) or first "." (case 2 and 3)
    1.68 +  @return Completed URL at success and empty string at failure
    1.69 + */
    1.70 +  void CompleteAbbreviatedURL(const char16_t * aInString, int32_t aInLength, 
    1.71 +                              const uint32_t pos, nsString& aOutString);
    1.72 +
    1.73 +
    1.74 +//////////////////////////////////////////////////////////
    1.75 +private:
    1.76 +//////////////////////////////////////////////////////////
    1.77 +
    1.78 +  enum LIMTYPE
    1.79 +  {
    1.80 +    LT_IGNORE,     // limitation not checked
    1.81 +    LT_DELIMITER,  // not alphanumeric and not rep[0]. End of text is also ok.
    1.82 +    LT_ALPHA,      // alpha char
    1.83 +    LT_DIGIT
    1.84 +  };
    1.85 +
    1.86 +/**
    1.87 +  @param text (in): the string to search through.<p>
    1.88 +         If before = IGNORE,<br>
    1.89 +           rep is compared starting at 1. char of text (text[0]),<br>
    1.90 +           else starting at 2. char of text (text[1]).
    1.91 +         Chars after "after"-delimiter are ignored.
    1.92 +  @param rep (in): the string to look for
    1.93 +  @param aRepLen (in): the number of bytes in the string to look for
    1.94 +  @param before (in): limitation before rep
    1.95 +  @param after (in): limitation after rep
    1.96 +  @return true, if rep is found and limitation spec is met or rep is empty
    1.97 +*/
    1.98 +  bool ItMatchesDelimited(const char16_t * aInString, int32_t aInLength,
    1.99 +      const char16_t * rep, int32_t aRepLen, LIMTYPE before, LIMTYPE after);
   1.100 +
   1.101 +/**
   1.102 +  @param see ItMatchesDelimited
   1.103 +  @return Number of ItMatchesDelimited in text
   1.104 +*/
   1.105 +  uint32_t NumberOfMatches(const char16_t * aInString, int32_t aInStringLength,
   1.106 +      const char16_t* rep, int32_t aRepLen, LIMTYPE before, LIMTYPE after);
   1.107 +
   1.108 +/**
   1.109 +  Currently only changes "<", ">" and "&". All others stay as they are.<p>
   1.110 +  "Char" in function name to avoid side effects with nsString(ch)
   1.111 +  constructors.
   1.112 +  @param ch (in) 
   1.113 +  @param aStringToAppendto (out) - the string to append the escaped
   1.114 +                                   string to.
   1.115 +  @param inAttribute (in) - will escape quotes, too (which is
   1.116 +                            only needed for attribute values)
   1.117 +*/
   1.118 +  void EscapeChar(const char16_t ch, nsString& aStringToAppendto,
   1.119 +                  bool inAttribute);
   1.120 +
   1.121 +/**
   1.122 +  See EscapeChar. Escapes the string in place.
   1.123 +*/
   1.124 +  void EscapeStr(nsString& aInString, bool inAttribute);
   1.125 +
   1.126 +/**
   1.127 +  Currently only reverts "<", ">" and "&". All others stay as they are.<p>
   1.128 +  @param aInString (in) HTML string
   1.129 +  @param aStartPos (in) start index into the buffer
   1.130 +  @param aLength (in) length of the buffer
   1.131 +  @param aOutString (out) unescaped buffer
   1.132 +*/
   1.133 +  void UnescapeStr(const char16_t * aInString, int32_t aStartPos,
   1.134 +                   int32_t aLength, nsString& aOutString);
   1.135 +
   1.136 +/**
   1.137 +  <em>Note</em>: I use different strategies to pass context between the
   1.138 +  functions (full text and pos vs. cutted text and col0, glphyTextLen vs.
   1.139 +  replaceBefore/-After). It makes some sense, but is hard to understand
   1.140 +  (maintain) :-(.
   1.141 +*/
   1.142 +
   1.143 +/**
   1.144 +  <p><em>Note:</em> replaceBefore + replaceAfter + 1 (for char at pos) chars
   1.145 +  in text should be replaced by outputHTML.</p>
   1.146 +  <p><em>Note:</em> This function should be able to process a URL on multiple
   1.147 +  lines, but currently, ScanForURLs is called for every line, so it can't.</p>
   1.148 +  @param text (in): includes possibly a URL
   1.149 +  @param pos (in): position in text, where either ":", "." or "@" are found
   1.150 +  @param whathasbeendone (in): What the calling ScanTXT did/has to do with the
   1.151 +              (not-linkified) text, i.e. usually the "whattodo" parameter.
   1.152 +              (Needed to calculate replaceBefore.) NOT what will be done with
   1.153 +              the content of the link.
   1.154 +  @param outputHTML (out): URL with HTML-a tag
   1.155 +  @param replaceBefore (out): Number of chars of URL before pos
   1.156 +  @param replaceAfter (out): Number of chars of URL after pos
   1.157 +  @return URL found
   1.158 +*/
   1.159 +  bool FindURL(const char16_t * aInString, int32_t aInLength, const uint32_t pos,
   1.160 +          const uint32_t whathasbeendone,
   1.161 +          nsString& outputHTML, int32_t& replaceBefore, int32_t& replaceAfter);
   1.162 +
   1.163 +  enum modetype {
   1.164 +         unknown,
   1.165 +         RFC1738,          /* Check, if RFC1738, APPENDIX compliant,
   1.166 +                              like "<URL:http://www.mozilla.org>". */
   1.167 +         RFC2396E,         /* RFC2396, APPENDIX E allows anglebrackets (like
   1.168 +                              "<http://www.mozilla.org>") (without "URL:") or
   1.169 +                              quotation marks(like ""http://www.mozilla.org"").
   1.170 +                              Also allow email addresses without scheme,
   1.171 +                              e.g. "<mozilla@bucksch.org>" */
   1.172 +         freetext,         /* assume heading scheme
   1.173 +                              with "[a-zA-Z][a-zA-Z0-9+\-\.]*:" like "news:"
   1.174 +                              (see RFC2396, Section 3.1).
   1.175 +                              Certain characters (see code) or any whitespace
   1.176 +                              (including linebreaks) end the URL.
   1.177 +                              Other certain (punctation) characters (see code)
   1.178 +                              at the end are stripped off. */
   1.179 +         abbreviated       /* Similar to freetext, but without scheme, e.g.
   1.180 +	                      "www.mozilla.org", "ftp.mozilla.org" and
   1.181 +                              "mozilla@bucksch.org". */
   1.182 +      /* RFC1738 and RFC2396E type URLs may use multiple lines,
   1.183 +         whitespace is stripped. Special characters like ")" stay intact.*/
   1.184 +  };
   1.185 +
   1.186 +/**
   1.187 + * @param text (in), pos (in): see FindURL
   1.188 + * @param check (in): Start must be conform with this mode
   1.189 + * @param start (out): Position in text, where URL (including brackets or
   1.190 + *             similar) starts
   1.191 + * @return |check|-conform start has been found
   1.192 + */
   1.193 +  bool FindURLStart(const char16_t * aInString, int32_t aInLength, const uint32_t pos,
   1.194 +            	               const modetype check, uint32_t& start);
   1.195 +
   1.196 +/**
   1.197 + * @param text (in), pos (in): see FindURL
   1.198 + * @param check (in): End must be conform with this mode
   1.199 + * @param start (in): see FindURLStart
   1.200 + * @param end (out): Similar to |start| param of FindURLStart
   1.201 + * @return |check|-conform end has been found
   1.202 + */
   1.203 +  bool FindURLEnd(const char16_t * aInString, int32_t aInStringLength, const uint32_t pos,
   1.204 +           const modetype check, const uint32_t start, uint32_t& end);
   1.205 +
   1.206 +/**
   1.207 + * @param text (in), pos (in), whathasbeendone (in): see FindURL
   1.208 + * @param check (in): Current mode
   1.209 + * @param start (in), end (in): see FindURLEnd
   1.210 + * @param txtURL (out): Guessed (raw) URL.
   1.211 + *             Without whitespace, but not completed.
   1.212 + * @param desc (out): Link as shown to the user, but already escaped.
   1.213 + *             Should be placed between the <a> and </a> tags.
   1.214 + * @param replaceBefore(out), replaceAfter (out): see FindURL
   1.215 + */
   1.216 +  void CalculateURLBoundaries(const char16_t * aInString, int32_t aInStringLength, 
   1.217 +     const uint32_t pos, const uint32_t whathasbeendone,
   1.218 +     const modetype check, const uint32_t start, const uint32_t end,
   1.219 +     nsString& txtURL, nsString& desc,
   1.220 +     int32_t& replaceBefore, int32_t& replaceAfter);
   1.221 +
   1.222 +/**
   1.223 + * @param txtURL (in), desc (in): see CalculateURLBoundaries
   1.224 + * @param outputHTML (out): see FindURL
   1.225 + * @return A valid URL could be found (and creation of HTML successful)
   1.226 + */
   1.227 +  bool CheckURLAndCreateHTML(
   1.228 +       const nsString& txtURL, const nsString& desc, const modetype mode,
   1.229 +       nsString& outputHTML);
   1.230 +
   1.231 +/**
   1.232 +  @param text (in): line of text possibly with tagTXT.<p>
   1.233 +              if col0 is true,
   1.234 +                starting with tagTXT<br>
   1.235 +              else
   1.236 +                starting one char before tagTXT
   1.237 +  @param col0 (in): tagTXT is on the beginning of the line (or paragraph).
   1.238 +              open must be 0 then.
   1.239 +  @param tagTXT (in): Tag in plaintext to search for, e.g. "*"
   1.240 +  @param aTagTxtLen (in): length of tagTXT.
   1.241 +  @param tagHTML (in): HTML-Tag to replace tagTXT with,
   1.242 +              without "<" and ">", e.g. "strong"
   1.243 +  @param attributeHTML (in): HTML-attribute to add to opening tagHTML,
   1.244 +              e.g. "class=txt_star"
   1.245 +  @param aOutString: string to APPEND the converted html into
   1.246 +  @param open (in/out): Number of currently open tags of type tagHTML
   1.247 +  @return Conversion succeeded
   1.248 +*/
   1.249 +  bool StructPhraseHit(const char16_t * aInString, int32_t aInStringLength, bool col0,
   1.250 +     const char16_t* tagTXT,
   1.251 +     int32_t aTagTxtLen, 
   1.252 +     const char* tagHTML, const char* attributeHTML,
   1.253 +     nsString& aOutputString, uint32_t& openTags);
   1.254 +
   1.255 +/**
   1.256 +  @param text (in), col0 (in): see GlyphHit
   1.257 +  @param tagTXT (in): Smily, see also StructPhraseHit
   1.258 +  @param imageName (in): the basename of the file that contains the image for this smilie
   1.259 +  @param outputHTML (out): new string containing the html for the smily
   1.260 +  @param glyphTextLen (out): see GlyphHit
   1.261 +*/
   1.262 +  bool
   1.263 +         SmilyHit(const char16_t * aInString, int32_t aLength, bool col0,
   1.264 +         const char* tagTXT, const char* imageName,
   1.265 +         nsString& outputHTML, int32_t& glyphTextLen);
   1.266 +
   1.267 +/**
   1.268 +  Checks, if we can replace some chars at the start of line with prettier HTML
   1.269 +  code.<p>
   1.270 +  If success is reported, replace the first glyphTextLen chars with outputHTML
   1.271 +
   1.272 +  @param text (in): line of text possibly with Glyph.<p>
   1.273 +              If col0 is true,
   1.274 +                starting with Glyph <br><!-- (br not part of text) -->
   1.275 +              else
   1.276 +                starting one char before Glyph
   1.277 +  @param col0 (in): text starts at the beginning of the line (or paragraph)
   1.278 +  @param aOutString (out): APPENDS html for the glyph to this string
   1.279 +  @param glyphTextLen (out): Length of original text to replace
   1.280 +  @return see StructPhraseHit
   1.281 +*/
   1.282 +  bool GlyphHit(const char16_t * aInString, int32_t aInLength, bool col0,
   1.283 +       nsString& aOutString, int32_t& glyphTextLen);
   1.284 +
   1.285 +/**
   1.286 +  Check if a given url should be linkified.
   1.287 +  @param aURL (in): url to be checked on.
   1.288 +*/
   1.289 +  bool ShouldLinkify(const nsCString& aURL);
   1.290 +};
   1.291 +
   1.292 +// It's said, that Win32 and Mac don't like static const members
   1.293 +const int32_t mozTXTToHTMLConv_lastMode = 4;
   1.294 +	                        // Needed (only) by mozTXTToHTMLConv::FindURL
   1.295 +const int32_t mozTXTToHTMLConv_numberOfModes = 4;  // dito; unknown not counted
   1.296 +
   1.297 +#endif

mercurial