intl/icu/source/tools/genrb/prscmnts.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2  *******************************************************************************
     3  *   Copyright (C) 2003-2007, International Business Machines
     4  *   Corporation and others.  All Rights Reserved.
     5  *******************************************************************************
     6  *
     7  * File prscmnts.cpp
     8  *
     9  * Modification History:
    10  *
    11  *   Date          Name        Description
    12  *   08/22/2003    ram         Creation.
    13  *******************************************************************************
    14  */
    16 #include "unicode/regex.h"
    17 #include "unicode/unistr.h"
    18 #include "unicode/parseerr.h"
    19 #include "prscmnts.h"
    20 #include <stdio.h>
    21 #include <stdlib.h>
    23 U_NAMESPACE_USE
    25 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */
    27 #define MAX_SPLIT_STRINGS 20
    29 const char *patternStrings[UPC_LIMIT]={
    30     "^translate\\s*(.*)",
    31     "^note\\s*(.*)"
    32 };
    34 U_CFUNC int32_t 
    35 removeText(UChar *source, int32_t srcLen, 
    36            UnicodeString patString,uint32_t options,  
    37            UnicodeString replaceText, UErrorCode *status){
    39     if(status == NULL || U_FAILURE(*status)){
    40         return 0;
    41     }
    43     UnicodeString src(source, srcLen);
    45     RegexMatcher    myMatcher(patString, src, options, *status);
    46     if(U_FAILURE(*status)){
    47         return 0;
    48     }
    49     UnicodeString dest;
    52     dest = myMatcher.replaceAll(replaceText,*status);
    55     return dest.extract(source, srcLen, *status);
    57 }
    58 U_CFUNC int32_t
    59 trim(UChar *src, int32_t srcLen, UErrorCode *status){
    60      srcLen = removeText(src, srcLen, "^[ \\r\\n]+ ", 0, "", status); // remove leading new lines
    61      srcLen = removeText(src, srcLen, "^\\s+", 0, "", status); // remove leading spaces
    62      srcLen = removeText(src, srcLen, "\\s+$", 0, "", status); // remvoe trailing spcaes
    63      return srcLen;
    64 }
    66 U_CFUNC int32_t 
    67 removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){
    68     srcLen = trim(source, srcLen, status);
    69     UnicodeString     patString = "^\\s*?\\*\\s*?";     // remove pattern like " * " at the begining of the line
    70     srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, "", status);
    71     return removeText(source, srcLen, "[ \\r\\n]+", 0, " ", status);// remove new lines;
    72 }
    74 U_CFUNC int32_t 
    75 getText(const UChar* source, int32_t srcLen,
    76         UChar** dest, int32_t destCapacity,
    77         UnicodeString patternString, 
    78         UErrorCode* status){
    80     if(status == NULL || U_FAILURE(*status)){
    81         return 0;
    82     }
    84     UnicodeString     stringArray[MAX_SPLIT_STRINGS];
    85     RegexPattern      *pattern = RegexPattern::compile("@", 0, *status);
    86     UnicodeString src (source,srcLen);
    88     if (U_FAILURE(*status)) {
    89         return 0;
    90     }
    91     pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
    93     RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
    94     if (U_FAILURE(*status)) {
    95         return 0;
    96     }
    97     for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){
    98         matcher.reset(stringArray[i]);
    99         if(matcher.lookingAt(*status)){
   100             UnicodeString out = matcher.group(1, *status);
   102             return out.extract(*dest, destCapacity,*status);
   103         }
   104     }
   105     return 0;
   106 }
   109 #define AT_SIGN  0x0040
   111 U_CFUNC int32_t
   112 getDescription( const UChar* source, int32_t srcLen,
   113                 UChar** dest, int32_t destCapacity,
   114                 UErrorCode* status){
   115     if(status == NULL || U_FAILURE(*status)){
   116         return 0;
   117     }
   119     UnicodeString     stringArray[MAX_SPLIT_STRINGS];
   120     RegexPattern      *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
   121     UnicodeString src(source, srcLen);
   123     if (U_FAILURE(*status)) {
   124         return 0;
   125     }
   126     pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status);
   128     if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){
   129         int32_t destLen =  stringArray[0].extract(*dest, destCapacity, *status);
   130         return trim(*dest, destLen, status);
   131     }
   132     return 0;
   133 }
   135 U_CFUNC int32_t
   136 getCount(const UChar* source, int32_t srcLen, 
   137          UParseCommentsOption option, UErrorCode *status){
   139     if(status == NULL || U_FAILURE(*status)){
   140         return 0;
   141     }
   143     UnicodeString     stringArray[MAX_SPLIT_STRINGS];
   144     RegexPattern      *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
   145     UnicodeString src (source, srcLen);
   148     if (U_FAILURE(*status)) {
   149         return 0;
   150     }
   151     int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
   153     RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status);
   154     if (U_FAILURE(*status)) {
   155         return 0;
   156     } 
   157     int32_t count = 0;
   158     for(int32_t i=0; i<retLen; i++){
   159         matcher.reset(stringArray[i]);
   160         if(matcher.lookingAt(*status)){
   161             count++;
   162         }
   163     }
   164     if(option == UPC_TRANSLATE && count > 1){
   165         fprintf(stderr, "Multiple @translate tags cannot be supported.\n");
   166         exit(U_UNSUPPORTED_ERROR);
   167     }
   168     return count;
   169 }
   171 U_CFUNC int32_t 
   172 getAt(const UChar* source, int32_t srcLen,
   173         UChar** dest, int32_t destCapacity,
   174         int32_t index,
   175         UParseCommentsOption option,
   176         UErrorCode* status){
   178     if(status == NULL || U_FAILURE(*status)){
   179         return 0;
   180     }
   182     UnicodeString     stringArray[MAX_SPLIT_STRINGS];
   183     RegexPattern      *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
   184     UnicodeString src (source, srcLen);
   187     if (U_FAILURE(*status)) {
   188         return 0;
   189     }
   190     int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
   192     RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status);
   193     if (U_FAILURE(*status)) {
   194         return 0;
   195     } 
   196     int32_t count = 0;
   197     for(int32_t i=0; i<retLen; i++){
   198         matcher.reset(stringArray[i]);
   199         if(matcher.lookingAt(*status)){
   200             if(count == index){
   201                 UnicodeString out = matcher.group(1, *status);
   202                 return out.extract(*dest, destCapacity,*status);
   203             }
   204             count++;
   206         }
   207     }
   208     return 0;
   210 }
   212 U_CFUNC int32_t
   213 getTranslate( const UChar* source, int32_t srcLen,
   214               UChar** dest, int32_t destCapacity,
   215               UErrorCode* status){
   216     UnicodeString     notePatternString = "^translate\\s*?(.*)"; 
   218     int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
   219     return trim(*dest, destLen, status);
   220 }
   222 U_CFUNC int32_t 
   223 getNote(const UChar* source, int32_t srcLen,
   224         UChar** dest, int32_t destCapacity,
   225         UErrorCode* status){
   227     UnicodeString     notePatternString = "^note\\s*?(.*)"; 
   228     int32_t destLen =  getText(source, srcLen, dest, destCapacity, notePatternString, status);
   229     return trim(*dest, destLen, status);
   231 }
   233 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */

mercurial