michael@0: /* michael@0: ******************************************************************************* michael@0: * Copyright (C) 2003-2007, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ******************************************************************************* michael@0: * michael@0: * File prscmnts.cpp michael@0: * michael@0: * Modification History: michael@0: * michael@0: * Date Name Description michael@0: * 08/22/2003 ram Creation. michael@0: ******************************************************************************* michael@0: */ michael@0: michael@0: #include "unicode/regex.h" michael@0: #include "unicode/unistr.h" michael@0: #include "unicode/parseerr.h" michael@0: #include "prscmnts.h" michael@0: #include michael@0: #include michael@0: michael@0: U_NAMESPACE_USE michael@0: michael@0: #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */ michael@0: michael@0: #define MAX_SPLIT_STRINGS 20 michael@0: michael@0: const char *patternStrings[UPC_LIMIT]={ michael@0: "^translate\\s*(.*)", michael@0: "^note\\s*(.*)" michael@0: }; michael@0: michael@0: U_CFUNC int32_t michael@0: removeText(UChar *source, int32_t srcLen, michael@0: UnicodeString patString,uint32_t options, michael@0: UnicodeString replaceText, UErrorCode *status){ michael@0: michael@0: if(status == NULL || U_FAILURE(*status)){ michael@0: return 0; michael@0: } michael@0: michael@0: UnicodeString src(source, srcLen); michael@0: michael@0: RegexMatcher myMatcher(patString, src, options, *status); michael@0: if(U_FAILURE(*status)){ michael@0: return 0; michael@0: } michael@0: UnicodeString dest; michael@0: michael@0: michael@0: dest = myMatcher.replaceAll(replaceText,*status); michael@0: michael@0: michael@0: return dest.extract(source, srcLen, *status); michael@0: michael@0: } michael@0: U_CFUNC int32_t michael@0: trim(UChar *src, int32_t srcLen, UErrorCode *status){ michael@0: srcLen = removeText(src, srcLen, "^[ \\r\\n]+ ", 0, "", status); // remove leading new lines michael@0: srcLen = removeText(src, srcLen, "^\\s+", 0, "", status); // remove leading spaces michael@0: srcLen = removeText(src, srcLen, "\\s+$", 0, "", status); // remvoe trailing spcaes michael@0: return srcLen; michael@0: } michael@0: michael@0: U_CFUNC int32_t michael@0: removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){ michael@0: srcLen = trim(source, srcLen, status); michael@0: UnicodeString patString = "^\\s*?\\*\\s*?"; // remove pattern like " * " at the begining of the line michael@0: srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, "", status); michael@0: return removeText(source, srcLen, "[ \\r\\n]+", 0, " ", status);// remove new lines; michael@0: } michael@0: michael@0: U_CFUNC int32_t michael@0: getText(const UChar* source, int32_t srcLen, michael@0: UChar** dest, int32_t destCapacity, michael@0: UnicodeString patternString, michael@0: UErrorCode* status){ michael@0: michael@0: if(status == NULL || U_FAILURE(*status)){ michael@0: return 0; michael@0: } michael@0: michael@0: UnicodeString stringArray[MAX_SPLIT_STRINGS]; michael@0: RegexPattern *pattern = RegexPattern::compile("@", 0, *status); michael@0: UnicodeString src (source,srcLen); michael@0: michael@0: if (U_FAILURE(*status)) { michael@0: return 0; michael@0: } michael@0: pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); michael@0: michael@0: RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); michael@0: if (U_FAILURE(*status)) { michael@0: return 0; michael@0: } michael@0: for(int32_t i=0; isplit(src, stringArray,MAX_SPLIT_STRINGS , *status); michael@0: michael@0: if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){ michael@0: int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status); michael@0: return trim(*dest, destLen, status); michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: U_CFUNC int32_t michael@0: getCount(const UChar* source, int32_t srcLen, michael@0: UParseCommentsOption option, UErrorCode *status){ michael@0: michael@0: if(status == NULL || U_FAILURE(*status)){ michael@0: return 0; michael@0: } michael@0: michael@0: UnicodeString stringArray[MAX_SPLIT_STRINGS]; michael@0: RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status); michael@0: UnicodeString src (source, srcLen); michael@0: michael@0: michael@0: if (U_FAILURE(*status)) { michael@0: return 0; michael@0: } michael@0: int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); michael@0: michael@0: RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status); michael@0: if (U_FAILURE(*status)) { michael@0: return 0; michael@0: } michael@0: int32_t count = 0; michael@0: for(int32_t i=0; i 1){ michael@0: fprintf(stderr, "Multiple @translate tags cannot be supported.\n"); michael@0: exit(U_UNSUPPORTED_ERROR); michael@0: } michael@0: return count; michael@0: } michael@0: michael@0: U_CFUNC int32_t michael@0: getAt(const UChar* source, int32_t srcLen, michael@0: UChar** dest, int32_t destCapacity, michael@0: int32_t index, michael@0: UParseCommentsOption option, michael@0: UErrorCode* status){ michael@0: michael@0: if(status == NULL || U_FAILURE(*status)){ michael@0: return 0; michael@0: } michael@0: michael@0: UnicodeString stringArray[MAX_SPLIT_STRINGS]; michael@0: RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status); michael@0: UnicodeString src (source, srcLen); michael@0: michael@0: michael@0: if (U_FAILURE(*status)) { michael@0: return 0; michael@0: } michael@0: int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); michael@0: michael@0: RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status); michael@0: if (U_FAILURE(*status)) { michael@0: return 0; michael@0: } michael@0: int32_t count = 0; michael@0: for(int32_t i=0; i