intl/icu/source/tools/genrb/prscmnts.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 * Copyright (C) 2003-2007, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 *******************************************************************************
michael@0 6 *
michael@0 7 * File prscmnts.cpp
michael@0 8 *
michael@0 9 * Modification History:
michael@0 10 *
michael@0 11 * Date Name Description
michael@0 12 * 08/22/2003 ram Creation.
michael@0 13 *******************************************************************************
michael@0 14 */
michael@0 15
michael@0 16 #include "unicode/regex.h"
michael@0 17 #include "unicode/unistr.h"
michael@0 18 #include "unicode/parseerr.h"
michael@0 19 #include "prscmnts.h"
michael@0 20 #include <stdio.h>
michael@0 21 #include <stdlib.h>
michael@0 22
michael@0 23 U_NAMESPACE_USE
michael@0 24
michael@0 25 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */
michael@0 26
michael@0 27 #define MAX_SPLIT_STRINGS 20
michael@0 28
michael@0 29 const char *patternStrings[UPC_LIMIT]={
michael@0 30 "^translate\\s*(.*)",
michael@0 31 "^note\\s*(.*)"
michael@0 32 };
michael@0 33
michael@0 34 U_CFUNC int32_t
michael@0 35 removeText(UChar *source, int32_t srcLen,
michael@0 36 UnicodeString patString,uint32_t options,
michael@0 37 UnicodeString replaceText, UErrorCode *status){
michael@0 38
michael@0 39 if(status == NULL || U_FAILURE(*status)){
michael@0 40 return 0;
michael@0 41 }
michael@0 42
michael@0 43 UnicodeString src(source, srcLen);
michael@0 44
michael@0 45 RegexMatcher myMatcher(patString, src, options, *status);
michael@0 46 if(U_FAILURE(*status)){
michael@0 47 return 0;
michael@0 48 }
michael@0 49 UnicodeString dest;
michael@0 50
michael@0 51
michael@0 52 dest = myMatcher.replaceAll(replaceText,*status);
michael@0 53
michael@0 54
michael@0 55 return dest.extract(source, srcLen, *status);
michael@0 56
michael@0 57 }
michael@0 58 U_CFUNC int32_t
michael@0 59 trim(UChar *src, int32_t srcLen, UErrorCode *status){
michael@0 60 srcLen = removeText(src, srcLen, "^[ \\r\\n]+ ", 0, "", status); // remove leading new lines
michael@0 61 srcLen = removeText(src, srcLen, "^\\s+", 0, "", status); // remove leading spaces
michael@0 62 srcLen = removeText(src, srcLen, "\\s+$", 0, "", status); // remvoe trailing spcaes
michael@0 63 return srcLen;
michael@0 64 }
michael@0 65
michael@0 66 U_CFUNC int32_t
michael@0 67 removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){
michael@0 68 srcLen = trim(source, srcLen, status);
michael@0 69 UnicodeString patString = "^\\s*?\\*\\s*?"; // remove pattern like " * " at the begining of the line
michael@0 70 srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, "", status);
michael@0 71 return removeText(source, srcLen, "[ \\r\\n]+", 0, " ", status);// remove new lines;
michael@0 72 }
michael@0 73
michael@0 74 U_CFUNC int32_t
michael@0 75 getText(const UChar* source, int32_t srcLen,
michael@0 76 UChar** dest, int32_t destCapacity,
michael@0 77 UnicodeString patternString,
michael@0 78 UErrorCode* status){
michael@0 79
michael@0 80 if(status == NULL || U_FAILURE(*status)){
michael@0 81 return 0;
michael@0 82 }
michael@0 83
michael@0 84 UnicodeString stringArray[MAX_SPLIT_STRINGS];
michael@0 85 RegexPattern *pattern = RegexPattern::compile("@", 0, *status);
michael@0 86 UnicodeString src (source,srcLen);
michael@0 87
michael@0 88 if (U_FAILURE(*status)) {
michael@0 89 return 0;
michael@0 90 }
michael@0 91 pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
michael@0 92
michael@0 93 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
michael@0 94 if (U_FAILURE(*status)) {
michael@0 95 return 0;
michael@0 96 }
michael@0 97 for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){
michael@0 98 matcher.reset(stringArray[i]);
michael@0 99 if(matcher.lookingAt(*status)){
michael@0 100 UnicodeString out = matcher.group(1, *status);
michael@0 101
michael@0 102 return out.extract(*dest, destCapacity,*status);
michael@0 103 }
michael@0 104 }
michael@0 105 return 0;
michael@0 106 }
michael@0 107
michael@0 108
michael@0 109 #define AT_SIGN 0x0040
michael@0 110
michael@0 111 U_CFUNC int32_t
michael@0 112 getDescription( const UChar* source, int32_t srcLen,
michael@0 113 UChar** dest, int32_t destCapacity,
michael@0 114 UErrorCode* status){
michael@0 115 if(status == NULL || U_FAILURE(*status)){
michael@0 116 return 0;
michael@0 117 }
michael@0 118
michael@0 119 UnicodeString stringArray[MAX_SPLIT_STRINGS];
michael@0 120 RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
michael@0 121 UnicodeString src(source, srcLen);
michael@0 122
michael@0 123 if (U_FAILURE(*status)) {
michael@0 124 return 0;
michael@0 125 }
michael@0 126 pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status);
michael@0 127
michael@0 128 if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){
michael@0 129 int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status);
michael@0 130 return trim(*dest, destLen, status);
michael@0 131 }
michael@0 132 return 0;
michael@0 133 }
michael@0 134
michael@0 135 U_CFUNC int32_t
michael@0 136 getCount(const UChar* source, int32_t srcLen,
michael@0 137 UParseCommentsOption option, UErrorCode *status){
michael@0 138
michael@0 139 if(status == NULL || U_FAILURE(*status)){
michael@0 140 return 0;
michael@0 141 }
michael@0 142
michael@0 143 UnicodeString stringArray[MAX_SPLIT_STRINGS];
michael@0 144 RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
michael@0 145 UnicodeString src (source, srcLen);
michael@0 146
michael@0 147
michael@0 148 if (U_FAILURE(*status)) {
michael@0 149 return 0;
michael@0 150 }
michael@0 151 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
michael@0 152
michael@0 153 RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status);
michael@0 154 if (U_FAILURE(*status)) {
michael@0 155 return 0;
michael@0 156 }
michael@0 157 int32_t count = 0;
michael@0 158 for(int32_t i=0; i<retLen; i++){
michael@0 159 matcher.reset(stringArray[i]);
michael@0 160 if(matcher.lookingAt(*status)){
michael@0 161 count++;
michael@0 162 }
michael@0 163 }
michael@0 164 if(option == UPC_TRANSLATE && count > 1){
michael@0 165 fprintf(stderr, "Multiple @translate tags cannot be supported.\n");
michael@0 166 exit(U_UNSUPPORTED_ERROR);
michael@0 167 }
michael@0 168 return count;
michael@0 169 }
michael@0 170
michael@0 171 U_CFUNC int32_t
michael@0 172 getAt(const UChar* source, int32_t srcLen,
michael@0 173 UChar** dest, int32_t destCapacity,
michael@0 174 int32_t index,
michael@0 175 UParseCommentsOption option,
michael@0 176 UErrorCode* status){
michael@0 177
michael@0 178 if(status == NULL || U_FAILURE(*status)){
michael@0 179 return 0;
michael@0 180 }
michael@0 181
michael@0 182 UnicodeString stringArray[MAX_SPLIT_STRINGS];
michael@0 183 RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
michael@0 184 UnicodeString src (source, srcLen);
michael@0 185
michael@0 186
michael@0 187 if (U_FAILURE(*status)) {
michael@0 188 return 0;
michael@0 189 }
michael@0 190 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
michael@0 191
michael@0 192 RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status);
michael@0 193 if (U_FAILURE(*status)) {
michael@0 194 return 0;
michael@0 195 }
michael@0 196 int32_t count = 0;
michael@0 197 for(int32_t i=0; i<retLen; i++){
michael@0 198 matcher.reset(stringArray[i]);
michael@0 199 if(matcher.lookingAt(*status)){
michael@0 200 if(count == index){
michael@0 201 UnicodeString out = matcher.group(1, *status);
michael@0 202 return out.extract(*dest, destCapacity,*status);
michael@0 203 }
michael@0 204 count++;
michael@0 205
michael@0 206 }
michael@0 207 }
michael@0 208 return 0;
michael@0 209
michael@0 210 }
michael@0 211
michael@0 212 U_CFUNC int32_t
michael@0 213 getTranslate( const UChar* source, int32_t srcLen,
michael@0 214 UChar** dest, int32_t destCapacity,
michael@0 215 UErrorCode* status){
michael@0 216 UnicodeString notePatternString = "^translate\\s*?(.*)";
michael@0 217
michael@0 218 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
michael@0 219 return trim(*dest, destLen, status);
michael@0 220 }
michael@0 221
michael@0 222 U_CFUNC int32_t
michael@0 223 getNote(const UChar* source, int32_t srcLen,
michael@0 224 UChar** dest, int32_t destCapacity,
michael@0 225 UErrorCode* status){
michael@0 226
michael@0 227 UnicodeString notePatternString = "^note\\s*?(.*)";
michael@0 228 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
michael@0 229 return trim(*dest, destLen, status);
michael@0 230
michael@0 231 }
michael@0 232
michael@0 233 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
michael@0 234

mercurial