Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* |
michael@0 | 2 | ******************************************************************************* |
michael@0 | 3 | * Copyright (C) 2003-2007, International Business Machines |
michael@0 | 4 | * Corporation and others. All Rights Reserved. |
michael@0 | 5 | ******************************************************************************* |
michael@0 | 6 | * |
michael@0 | 7 | * File prscmnts.cpp |
michael@0 | 8 | * |
michael@0 | 9 | * Modification History: |
michael@0 | 10 | * |
michael@0 | 11 | * Date Name Description |
michael@0 | 12 | * 08/22/2003 ram Creation. |
michael@0 | 13 | ******************************************************************************* |
michael@0 | 14 | */ |
michael@0 | 15 | |
michael@0 | 16 | #include "unicode/regex.h" |
michael@0 | 17 | #include "unicode/unistr.h" |
michael@0 | 18 | #include "unicode/parseerr.h" |
michael@0 | 19 | #include "prscmnts.h" |
michael@0 | 20 | #include <stdio.h> |
michael@0 | 21 | #include <stdlib.h> |
michael@0 | 22 | |
michael@0 | 23 | U_NAMESPACE_USE |
michael@0 | 24 | |
michael@0 | 25 | #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */ |
michael@0 | 26 | |
michael@0 | 27 | #define MAX_SPLIT_STRINGS 20 |
michael@0 | 28 | |
michael@0 | 29 | const char *patternStrings[UPC_LIMIT]={ |
michael@0 | 30 | "^translate\\s*(.*)", |
michael@0 | 31 | "^note\\s*(.*)" |
michael@0 | 32 | }; |
michael@0 | 33 | |
michael@0 | 34 | U_CFUNC int32_t |
michael@0 | 35 | removeText(UChar *source, int32_t srcLen, |
michael@0 | 36 | UnicodeString patString,uint32_t options, |
michael@0 | 37 | UnicodeString replaceText, UErrorCode *status){ |
michael@0 | 38 | |
michael@0 | 39 | if(status == NULL || U_FAILURE(*status)){ |
michael@0 | 40 | return 0; |
michael@0 | 41 | } |
michael@0 | 42 | |
michael@0 | 43 | UnicodeString src(source, srcLen); |
michael@0 | 44 | |
michael@0 | 45 | RegexMatcher myMatcher(patString, src, options, *status); |
michael@0 | 46 | if(U_FAILURE(*status)){ |
michael@0 | 47 | return 0; |
michael@0 | 48 | } |
michael@0 | 49 | UnicodeString dest; |
michael@0 | 50 | |
michael@0 | 51 | |
michael@0 | 52 | dest = myMatcher.replaceAll(replaceText,*status); |
michael@0 | 53 | |
michael@0 | 54 | |
michael@0 | 55 | return dest.extract(source, srcLen, *status); |
michael@0 | 56 | |
michael@0 | 57 | } |
michael@0 | 58 | U_CFUNC int32_t |
michael@0 | 59 | trim(UChar *src, int32_t srcLen, UErrorCode *status){ |
michael@0 | 60 | srcLen = removeText(src, srcLen, "^[ \\r\\n]+ ", 0, "", status); // remove leading new lines |
michael@0 | 61 | srcLen = removeText(src, srcLen, "^\\s+", 0, "", status); // remove leading spaces |
michael@0 | 62 | srcLen = removeText(src, srcLen, "\\s+$", 0, "", status); // remvoe trailing spcaes |
michael@0 | 63 | return srcLen; |
michael@0 | 64 | } |
michael@0 | 65 | |
michael@0 | 66 | U_CFUNC int32_t |
michael@0 | 67 | removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){ |
michael@0 | 68 | srcLen = trim(source, srcLen, status); |
michael@0 | 69 | UnicodeString patString = "^\\s*?\\*\\s*?"; // remove pattern like " * " at the begining of the line |
michael@0 | 70 | srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, "", status); |
michael@0 | 71 | return removeText(source, srcLen, "[ \\r\\n]+", 0, " ", status);// remove new lines; |
michael@0 | 72 | } |
michael@0 | 73 | |
michael@0 | 74 | U_CFUNC int32_t |
michael@0 | 75 | getText(const UChar* source, int32_t srcLen, |
michael@0 | 76 | UChar** dest, int32_t destCapacity, |
michael@0 | 77 | UnicodeString patternString, |
michael@0 | 78 | UErrorCode* status){ |
michael@0 | 79 | |
michael@0 | 80 | if(status == NULL || U_FAILURE(*status)){ |
michael@0 | 81 | return 0; |
michael@0 | 82 | } |
michael@0 | 83 | |
michael@0 | 84 | UnicodeString stringArray[MAX_SPLIT_STRINGS]; |
michael@0 | 85 | RegexPattern *pattern = RegexPattern::compile("@", 0, *status); |
michael@0 | 86 | UnicodeString src (source,srcLen); |
michael@0 | 87 | |
michael@0 | 88 | if (U_FAILURE(*status)) { |
michael@0 | 89 | return 0; |
michael@0 | 90 | } |
michael@0 | 91 | pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); |
michael@0 | 92 | |
michael@0 | 93 | RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); |
michael@0 | 94 | if (U_FAILURE(*status)) { |
michael@0 | 95 | return 0; |
michael@0 | 96 | } |
michael@0 | 97 | for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){ |
michael@0 | 98 | matcher.reset(stringArray[i]); |
michael@0 | 99 | if(matcher.lookingAt(*status)){ |
michael@0 | 100 | UnicodeString out = matcher.group(1, *status); |
michael@0 | 101 | |
michael@0 | 102 | return out.extract(*dest, destCapacity,*status); |
michael@0 | 103 | } |
michael@0 | 104 | } |
michael@0 | 105 | return 0; |
michael@0 | 106 | } |
michael@0 | 107 | |
michael@0 | 108 | |
michael@0 | 109 | #define AT_SIGN 0x0040 |
michael@0 | 110 | |
michael@0 | 111 | U_CFUNC int32_t |
michael@0 | 112 | getDescription( const UChar* source, int32_t srcLen, |
michael@0 | 113 | UChar** dest, int32_t destCapacity, |
michael@0 | 114 | UErrorCode* status){ |
michael@0 | 115 | if(status == NULL || U_FAILURE(*status)){ |
michael@0 | 116 | return 0; |
michael@0 | 117 | } |
michael@0 | 118 | |
michael@0 | 119 | UnicodeString stringArray[MAX_SPLIT_STRINGS]; |
michael@0 | 120 | RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status); |
michael@0 | 121 | UnicodeString src(source, srcLen); |
michael@0 | 122 | |
michael@0 | 123 | if (U_FAILURE(*status)) { |
michael@0 | 124 | return 0; |
michael@0 | 125 | } |
michael@0 | 126 | pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status); |
michael@0 | 127 | |
michael@0 | 128 | if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){ |
michael@0 | 129 | int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status); |
michael@0 | 130 | return trim(*dest, destLen, status); |
michael@0 | 131 | } |
michael@0 | 132 | return 0; |
michael@0 | 133 | } |
michael@0 | 134 | |
michael@0 | 135 | U_CFUNC int32_t |
michael@0 | 136 | getCount(const UChar* source, int32_t srcLen, |
michael@0 | 137 | UParseCommentsOption option, UErrorCode *status){ |
michael@0 | 138 | |
michael@0 | 139 | if(status == NULL || U_FAILURE(*status)){ |
michael@0 | 140 | return 0; |
michael@0 | 141 | } |
michael@0 | 142 | |
michael@0 | 143 | UnicodeString stringArray[MAX_SPLIT_STRINGS]; |
michael@0 | 144 | RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status); |
michael@0 | 145 | UnicodeString src (source, srcLen); |
michael@0 | 146 | |
michael@0 | 147 | |
michael@0 | 148 | if (U_FAILURE(*status)) { |
michael@0 | 149 | return 0; |
michael@0 | 150 | } |
michael@0 | 151 | int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); |
michael@0 | 152 | |
michael@0 | 153 | RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status); |
michael@0 | 154 | if (U_FAILURE(*status)) { |
michael@0 | 155 | return 0; |
michael@0 | 156 | } |
michael@0 | 157 | int32_t count = 0; |
michael@0 | 158 | for(int32_t i=0; i<retLen; i++){ |
michael@0 | 159 | matcher.reset(stringArray[i]); |
michael@0 | 160 | if(matcher.lookingAt(*status)){ |
michael@0 | 161 | count++; |
michael@0 | 162 | } |
michael@0 | 163 | } |
michael@0 | 164 | if(option == UPC_TRANSLATE && count > 1){ |
michael@0 | 165 | fprintf(stderr, "Multiple @translate tags cannot be supported.\n"); |
michael@0 | 166 | exit(U_UNSUPPORTED_ERROR); |
michael@0 | 167 | } |
michael@0 | 168 | return count; |
michael@0 | 169 | } |
michael@0 | 170 | |
michael@0 | 171 | U_CFUNC int32_t |
michael@0 | 172 | getAt(const UChar* source, int32_t srcLen, |
michael@0 | 173 | UChar** dest, int32_t destCapacity, |
michael@0 | 174 | int32_t index, |
michael@0 | 175 | UParseCommentsOption option, |
michael@0 | 176 | UErrorCode* status){ |
michael@0 | 177 | |
michael@0 | 178 | if(status == NULL || U_FAILURE(*status)){ |
michael@0 | 179 | return 0; |
michael@0 | 180 | } |
michael@0 | 181 | |
michael@0 | 182 | UnicodeString stringArray[MAX_SPLIT_STRINGS]; |
michael@0 | 183 | RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status); |
michael@0 | 184 | UnicodeString src (source, srcLen); |
michael@0 | 185 | |
michael@0 | 186 | |
michael@0 | 187 | if (U_FAILURE(*status)) { |
michael@0 | 188 | return 0; |
michael@0 | 189 | } |
michael@0 | 190 | int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); |
michael@0 | 191 | |
michael@0 | 192 | RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status); |
michael@0 | 193 | if (U_FAILURE(*status)) { |
michael@0 | 194 | return 0; |
michael@0 | 195 | } |
michael@0 | 196 | int32_t count = 0; |
michael@0 | 197 | for(int32_t i=0; i<retLen; i++){ |
michael@0 | 198 | matcher.reset(stringArray[i]); |
michael@0 | 199 | if(matcher.lookingAt(*status)){ |
michael@0 | 200 | if(count == index){ |
michael@0 | 201 | UnicodeString out = matcher.group(1, *status); |
michael@0 | 202 | return out.extract(*dest, destCapacity,*status); |
michael@0 | 203 | } |
michael@0 | 204 | count++; |
michael@0 | 205 | |
michael@0 | 206 | } |
michael@0 | 207 | } |
michael@0 | 208 | return 0; |
michael@0 | 209 | |
michael@0 | 210 | } |
michael@0 | 211 | |
michael@0 | 212 | U_CFUNC int32_t |
michael@0 | 213 | getTranslate( const UChar* source, int32_t srcLen, |
michael@0 | 214 | UChar** dest, int32_t destCapacity, |
michael@0 | 215 | UErrorCode* status){ |
michael@0 | 216 | UnicodeString notePatternString = "^translate\\s*?(.*)"; |
michael@0 | 217 | |
michael@0 | 218 | int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status); |
michael@0 | 219 | return trim(*dest, destLen, status); |
michael@0 | 220 | } |
michael@0 | 221 | |
michael@0 | 222 | U_CFUNC int32_t |
michael@0 | 223 | getNote(const UChar* source, int32_t srcLen, |
michael@0 | 224 | UChar** dest, int32_t destCapacity, |
michael@0 | 225 | UErrorCode* status){ |
michael@0 | 226 | |
michael@0 | 227 | UnicodeString notePatternString = "^note\\s*?(.*)"; |
michael@0 | 228 | int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status); |
michael@0 | 229 | return trim(*dest, destLen, status); |
michael@0 | 230 | |
michael@0 | 231 | } |
michael@0 | 232 | |
michael@0 | 233 | #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */ |
michael@0 | 234 |