michael@0: /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: /** michael@0: Description: Currently only functions to enhance plain text with HTML tags. See mozITXTToHTMLConv. Stream conversion is defunct. michael@0: */ michael@0: michael@0: #ifndef _mozTXTToHTMLConv_h__ michael@0: #define _mozTXTToHTMLConv_h__ michael@0: michael@0: #include "mozITXTToHTMLConv.h" michael@0: #include "nsString.h" michael@0: #include "nsCOMPtr.h" michael@0: michael@0: class nsIIOService; michael@0: michael@0: class mozTXTToHTMLConv : public mozITXTToHTMLConv michael@0: { michael@0: michael@0: michael@0: ////////////////////////////////////////////////////////// michael@0: public: michael@0: ////////////////////////////////////////////////////////// michael@0: michael@0: mozTXTToHTMLConv(); michael@0: virtual ~mozTXTToHTMLConv(); michael@0: NS_DECL_ISUPPORTS michael@0: michael@0: NS_DECL_MOZITXTTOHTMLCONV michael@0: NS_DECL_NSIREQUESTOBSERVER michael@0: NS_DECL_NSISTREAMLISTENER michael@0: NS_DECL_NSISTREAMCONVERTER michael@0: michael@0: /** michael@0: see mozITXTToHTMLConv::ScanTXT michael@0: */ michael@0: void ScanTXT(const char16_t * aInString, int32_t aInStringLength, uint32_t whattodo, nsString& aOutString); michael@0: michael@0: /** michael@0: see mozITXTToHTMLConv::ScanHTML. We will modify aInString potentially... michael@0: */ michael@0: void ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOutString); michael@0: michael@0: /** michael@0: see mozITXTToHTMLConv::CiteLevelTXT michael@0: */ michael@0: int32_t CiteLevelTXT(const char16_t * line,uint32_t& logLineStart); michael@0: michael@0: michael@0: ////////////////////////////////////////////////////////// michael@0: protected: michael@0: ////////////////////////////////////////////////////////// michael@0: nsCOMPtr mIOService; // for performance reasons, cache the netwerk service... michael@0: /** michael@0: Completes michael@0: It does no check, if the resulting URL is valid. michael@0: @param text (in): abbreviated URL michael@0: @param pos (in): position of "@" (case 1) or first "." (case 2 and 3) michael@0: @return Completed URL at success and empty string at failure michael@0: */ michael@0: void CompleteAbbreviatedURL(const char16_t * aInString, int32_t aInLength, michael@0: const uint32_t pos, nsString& aOutString); michael@0: michael@0: michael@0: ////////////////////////////////////////////////////////// michael@0: private: michael@0: ////////////////////////////////////////////////////////// michael@0: michael@0: enum LIMTYPE michael@0: { michael@0: LT_IGNORE, // limitation not checked michael@0: LT_DELIMITER, // not alphanumeric and not rep[0]. End of text is also ok. michael@0: LT_ALPHA, // alpha char michael@0: LT_DIGIT michael@0: }; michael@0: michael@0: /** michael@0: @param text (in): the string to search through.

michael@0: If before = IGNORE,
michael@0: rep is compared starting at 1. char of text (text[0]),
michael@0: else starting at 2. char of text (text[1]). michael@0: Chars after "after"-delimiter are ignored. michael@0: @param rep (in): the string to look for michael@0: @param aRepLen (in): the number of bytes in the string to look for michael@0: @param before (in): limitation before rep michael@0: @param after (in): limitation after rep michael@0: @return true, if rep is found and limitation spec is met or rep is empty michael@0: */ michael@0: bool ItMatchesDelimited(const char16_t * aInString, int32_t aInLength, michael@0: const char16_t * rep, int32_t aRepLen, LIMTYPE before, LIMTYPE after); michael@0: michael@0: /** michael@0: @param see ItMatchesDelimited michael@0: @return Number of ItMatchesDelimited in text michael@0: */ michael@0: uint32_t NumberOfMatches(const char16_t * aInString, int32_t aInStringLength, michael@0: const char16_t* rep, int32_t aRepLen, LIMTYPE before, LIMTYPE after); michael@0: michael@0: /** michael@0: Currently only changes "<", ">" and "&". All others stay as they are.

michael@0: "Char" in function name to avoid side effects with nsString(ch) michael@0: constructors. michael@0: @param ch (in) michael@0: @param aStringToAppendto (out) - the string to append the escaped michael@0: string to. michael@0: @param inAttribute (in) - will escape quotes, too (which is michael@0: only needed for attribute values) michael@0: */ michael@0: void EscapeChar(const char16_t ch, nsString& aStringToAppendto, michael@0: bool inAttribute); michael@0: michael@0: /** michael@0: See EscapeChar. Escapes the string in place. michael@0: */ michael@0: void EscapeStr(nsString& aInString, bool inAttribute); michael@0: michael@0: /** michael@0: Currently only reverts "<", ">" and "&". All others stay as they are.

michael@0: @param aInString (in) HTML string michael@0: @param aStartPos (in) start index into the buffer michael@0: @param aLength (in) length of the buffer michael@0: @param aOutString (out) unescaped buffer michael@0: */ michael@0: void UnescapeStr(const char16_t * aInString, int32_t aStartPos, michael@0: int32_t aLength, nsString& aOutString); michael@0: michael@0: /** michael@0: Note: I use different strategies to pass context between the michael@0: functions (full text and pos vs. cutted text and col0, glphyTextLen vs. michael@0: replaceBefore/-After). It makes some sense, but is hard to understand michael@0: (maintain) :-(. michael@0: */ michael@0: michael@0: /** michael@0:

Note: replaceBefore + replaceAfter + 1 (for char at pos) chars michael@0: in text should be replaced by outputHTML.

michael@0:

Note: This function should be able to process a URL on multiple michael@0: lines, but currently, ScanForURLs is called for every line, so it can't.

michael@0: @param text (in): includes possibly a URL michael@0: @param pos (in): position in text, where either ":", "." or "@" are found michael@0: @param whathasbeendone (in): What the calling ScanTXT did/has to do with the michael@0: (not-linkified) text, i.e. usually the "whattodo" parameter. michael@0: (Needed to calculate replaceBefore.) NOT what will be done with michael@0: the content of the link. michael@0: @param outputHTML (out): URL with HTML-a tag michael@0: @param replaceBefore (out): Number of chars of URL before pos michael@0: @param replaceAfter (out): Number of chars of URL after pos michael@0: @return URL found michael@0: */ michael@0: bool FindURL(const char16_t * aInString, int32_t aInLength, const uint32_t pos, michael@0: const uint32_t whathasbeendone, michael@0: nsString& outputHTML, int32_t& replaceBefore, int32_t& replaceAfter); michael@0: michael@0: enum modetype { michael@0: unknown, michael@0: RFC1738, /* Check, if RFC1738, APPENDIX compliant, michael@0: like "". */ michael@0: RFC2396E, /* RFC2396, APPENDIX E allows anglebrackets (like michael@0: "") (without "URL:") or michael@0: quotation marks(like ""http://www.mozilla.org""). michael@0: Also allow email addresses without scheme, michael@0: e.g. "" */ michael@0: freetext, /* assume heading scheme michael@0: with "[a-zA-Z][a-zA-Z0-9+\-\.]*:" like "news:" michael@0: (see RFC2396, Section 3.1). michael@0: Certain characters (see code) or any whitespace michael@0: (including linebreaks) end the URL. michael@0: Other certain (punctation) characters (see code) michael@0: at the end are stripped off. */ michael@0: abbreviated /* Similar to freetext, but without scheme, e.g. michael@0: "www.mozilla.org", "ftp.mozilla.org" and michael@0: "mozilla@bucksch.org". */ michael@0: /* RFC1738 and RFC2396E type URLs may use multiple lines, michael@0: whitespace is stripped. Special characters like ")" stay intact.*/ michael@0: }; michael@0: michael@0: /** michael@0: * @param text (in), pos (in): see FindURL michael@0: * @param check (in): Start must be conform with this mode michael@0: * @param start (out): Position in text, where URL (including brackets or michael@0: * similar) starts michael@0: * @return |check|-conform start has been found michael@0: */ michael@0: bool FindURLStart(const char16_t * aInString, int32_t aInLength, const uint32_t pos, michael@0: const modetype check, uint32_t& start); michael@0: michael@0: /** michael@0: * @param text (in), pos (in): see FindURL michael@0: * @param check (in): End must be conform with this mode michael@0: * @param start (in): see FindURLStart michael@0: * @param end (out): Similar to |start| param of FindURLStart michael@0: * @return |check|-conform end has been found michael@0: */ michael@0: bool FindURLEnd(const char16_t * aInString, int32_t aInStringLength, const uint32_t pos, michael@0: const modetype check, const uint32_t start, uint32_t& end); michael@0: michael@0: /** michael@0: * @param text (in), pos (in), whathasbeendone (in): see FindURL michael@0: * @param check (in): Current mode michael@0: * @param start (in), end (in): see FindURLEnd michael@0: * @param txtURL (out): Guessed (raw) URL. michael@0: * Without whitespace, but not completed. michael@0: * @param desc (out): Link as shown to the user, but already escaped. michael@0: * Should be placed between the and tags. michael@0: * @param replaceBefore(out), replaceAfter (out): see FindURL michael@0: */ michael@0: void CalculateURLBoundaries(const char16_t * aInString, int32_t aInStringLength, michael@0: const uint32_t pos, const uint32_t whathasbeendone, michael@0: const modetype check, const uint32_t start, const uint32_t end, michael@0: nsString& txtURL, nsString& desc, michael@0: int32_t& replaceBefore, int32_t& replaceAfter); michael@0: michael@0: /** michael@0: * @param txtURL (in), desc (in): see CalculateURLBoundaries michael@0: * @param outputHTML (out): see FindURL michael@0: * @return A valid URL could be found (and creation of HTML successful) michael@0: */ michael@0: bool CheckURLAndCreateHTML( michael@0: const nsString& txtURL, const nsString& desc, const modetype mode, michael@0: nsString& outputHTML); michael@0: michael@0: /** michael@0: @param text (in): line of text possibly with tagTXT.

michael@0: if col0 is true, michael@0: starting with tagTXT
michael@0: else michael@0: starting one char before tagTXT michael@0: @param col0 (in): tagTXT is on the beginning of the line (or paragraph). michael@0: open must be 0 then. michael@0: @param tagTXT (in): Tag in plaintext to search for, e.g. "*" michael@0: @param aTagTxtLen (in): length of tagTXT. michael@0: @param tagHTML (in): HTML-Tag to replace tagTXT with, michael@0: without "<" and ">", e.g. "strong" michael@0: @param attributeHTML (in): HTML-attribute to add to opening tagHTML, michael@0: e.g. "class=txt_star" michael@0: @param aOutString: string to APPEND the converted html into michael@0: @param open (in/out): Number of currently open tags of type tagHTML michael@0: @return Conversion succeeded michael@0: */ michael@0: bool StructPhraseHit(const char16_t * aInString, int32_t aInStringLength, bool col0, michael@0: const char16_t* tagTXT, michael@0: int32_t aTagTxtLen, michael@0: const char* tagHTML, const char* attributeHTML, michael@0: nsString& aOutputString, uint32_t& openTags); michael@0: michael@0: /** michael@0: @param text (in), col0 (in): see GlyphHit michael@0: @param tagTXT (in): Smily, see also StructPhraseHit michael@0: @param imageName (in): the basename of the file that contains the image for this smilie michael@0: @param outputHTML (out): new string containing the html for the smily michael@0: @param glyphTextLen (out): see GlyphHit michael@0: */ michael@0: bool michael@0: SmilyHit(const char16_t * aInString, int32_t aLength, bool col0, michael@0: const char* tagTXT, const char* imageName, michael@0: nsString& outputHTML, int32_t& glyphTextLen); michael@0: michael@0: /** michael@0: Checks, if we can replace some chars at the start of line with prettier HTML michael@0: code.

michael@0: If success is reported, replace the first glyphTextLen chars with outputHTML michael@0: michael@0: @param text (in): line of text possibly with Glyph.

michael@0: If col0 is true, michael@0: starting with Glyph
michael@0: else michael@0: starting one char before Glyph michael@0: @param col0 (in): text starts at the beginning of the line (or paragraph) michael@0: @param aOutString (out): APPENDS html for the glyph to this string michael@0: @param glyphTextLen (out): Length of original text to replace michael@0: @return see StructPhraseHit michael@0: */ michael@0: bool GlyphHit(const char16_t * aInString, int32_t aInLength, bool col0, michael@0: nsString& aOutString, int32_t& glyphTextLen); michael@0: michael@0: /** michael@0: Check if a given url should be linkified. michael@0: @param aURL (in): url to be checked on. michael@0: */ michael@0: bool ShouldLinkify(const nsCString& aURL); michael@0: }; michael@0: michael@0: // It's said, that Win32 and Mac don't like static const members michael@0: const int32_t mozTXTToHTMLConv_lastMode = 4; michael@0: // Needed (only) by mozTXTToHTMLConv::FindURL michael@0: const int32_t mozTXTToHTMLConv_numberOfModes = 4; // dito; unknown not counted michael@0: michael@0: #endif