Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* |
michael@0 | 2 | ******************************************************************************* |
michael@0 | 3 | * |
michael@0 | 4 | * Copyright (C) 2000-2012, International Business Machines |
michael@0 | 5 | * Corporation and others. All Rights Reserved. |
michael@0 | 6 | * |
michael@0 | 7 | ******************************************************************************* |
michael@0 | 8 | * file name: uparse.c |
michael@0 | 9 | * encoding: US-ASCII |
michael@0 | 10 | * tab size: 8 (not used) |
michael@0 | 11 | * indentation:4 |
michael@0 | 12 | * |
michael@0 | 13 | * created on: 2000apr18 |
michael@0 | 14 | * created by: Markus W. Scherer |
michael@0 | 15 | * |
michael@0 | 16 | * This file provides a parser for files that are delimited by one single |
michael@0 | 17 | * character like ';' or TAB. Example: the Unicode Character Properties files |
michael@0 | 18 | * like UnicodeData.txt are semicolon-delimited. |
michael@0 | 19 | */ |
michael@0 | 20 | |
michael@0 | 21 | #include "unicode/utypes.h" |
michael@0 | 22 | #include "unicode/uchar.h" |
michael@0 | 23 | #include "unicode/ustring.h" |
michael@0 | 24 | #include "unicode/utf16.h" |
michael@0 | 25 | #include "cstring.h" |
michael@0 | 26 | #include "filestrm.h" |
michael@0 | 27 | #include "uparse.h" |
michael@0 | 28 | #include "ustr_imp.h" |
michael@0 | 29 | |
michael@0 | 30 | #include <stdio.h> |
michael@0 | 31 | |
michael@0 | 32 | U_CAPI const char * U_EXPORT2 |
michael@0 | 33 | u_skipWhitespace(const char *s) { |
michael@0 | 34 | while(U_IS_INV_WHITESPACE(*s)) { |
michael@0 | 35 | ++s; |
michael@0 | 36 | } |
michael@0 | 37 | return s; |
michael@0 | 38 | } |
michael@0 | 39 | |
michael@0 | 40 | U_CAPI char * U_EXPORT2 |
michael@0 | 41 | u_rtrim(char *s) { |
michael@0 | 42 | char *end=uprv_strchr(s, 0); |
michael@0 | 43 | while(s<end && U_IS_INV_WHITESPACE(*(end-1))) { |
michael@0 | 44 | *--end = 0; |
michael@0 | 45 | } |
michael@0 | 46 | return end; |
michael@0 | 47 | } |
michael@0 | 48 | |
michael@0 | 49 | /* |
michael@0 | 50 | * If the string starts with # @missing: then return the pointer to the |
michael@0 | 51 | * following non-whitespace character. |
michael@0 | 52 | * Otherwise return the original pointer. |
michael@0 | 53 | * Unicode 5.0 adds such lines in some data files to document |
michael@0 | 54 | * default property values. |
michael@0 | 55 | * Poor man's regex for variable amounts of white space. |
michael@0 | 56 | */ |
michael@0 | 57 | static const char * |
michael@0 | 58 | getMissingLimit(const char *s) { |
michael@0 | 59 | const char *s0=s; |
michael@0 | 60 | if( |
michael@0 | 61 | *(s=u_skipWhitespace(s))=='#' && |
michael@0 | 62 | *(s=u_skipWhitespace(s+1))=='@' && |
michael@0 | 63 | 0==strncmp((s=u_skipWhitespace(s+1)), "missing", 7) && |
michael@0 | 64 | *(s=u_skipWhitespace(s+7))==':' |
michael@0 | 65 | ) { |
michael@0 | 66 | return u_skipWhitespace(s+1); |
michael@0 | 67 | } else { |
michael@0 | 68 | return s0; |
michael@0 | 69 | } |
michael@0 | 70 | } |
michael@0 | 71 | |
michael@0 | 72 | U_CAPI void U_EXPORT2 |
michael@0 | 73 | u_parseDelimitedFile(const char *filename, char delimiter, |
michael@0 | 74 | char *fields[][2], int32_t fieldCount, |
michael@0 | 75 | UParseLineFn *lineFn, void *context, |
michael@0 | 76 | UErrorCode *pErrorCode) { |
michael@0 | 77 | FileStream *file; |
michael@0 | 78 | char line[300]; |
michael@0 | 79 | char *start, *limit; |
michael@0 | 80 | int32_t i, length; |
michael@0 | 81 | |
michael@0 | 82 | if(U_FAILURE(*pErrorCode)) { |
michael@0 | 83 | return; |
michael@0 | 84 | } |
michael@0 | 85 | |
michael@0 | 86 | if(fields==NULL || lineFn==NULL || fieldCount<=0) { |
michael@0 | 87 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 88 | return; |
michael@0 | 89 | } |
michael@0 | 90 | |
michael@0 | 91 | if(filename==NULL || *filename==0 || (*filename=='-' && filename[1]==0)) { |
michael@0 | 92 | filename=NULL; |
michael@0 | 93 | file=T_FileStream_stdin(); |
michael@0 | 94 | } else { |
michael@0 | 95 | file=T_FileStream_open(filename, "r"); |
michael@0 | 96 | } |
michael@0 | 97 | if(file==NULL) { |
michael@0 | 98 | *pErrorCode=U_FILE_ACCESS_ERROR; |
michael@0 | 99 | return; |
michael@0 | 100 | } |
michael@0 | 101 | |
michael@0 | 102 | while(T_FileStream_readLine(file, line, sizeof(line))!=NULL) { |
michael@0 | 103 | /* remove trailing newline characters */ |
michael@0 | 104 | length=(int32_t)(u_rtrim(line)-line); |
michael@0 | 105 | |
michael@0 | 106 | /* |
michael@0 | 107 | * detect a line with # @missing: |
michael@0 | 108 | * start parsing after that, or else from the beginning of the line |
michael@0 | 109 | * set the default warning for @missing lines |
michael@0 | 110 | */ |
michael@0 | 111 | start=(char *)getMissingLimit(line); |
michael@0 | 112 | if(start==line) { |
michael@0 | 113 | *pErrorCode=U_ZERO_ERROR; |
michael@0 | 114 | } else { |
michael@0 | 115 | *pErrorCode=U_USING_DEFAULT_WARNING; |
michael@0 | 116 | } |
michael@0 | 117 | |
michael@0 | 118 | /* skip this line if it is empty or a comment */ |
michael@0 | 119 | if(*start==0 || *start=='#') { |
michael@0 | 120 | continue; |
michael@0 | 121 | } |
michael@0 | 122 | |
michael@0 | 123 | /* remove in-line comments */ |
michael@0 | 124 | limit=uprv_strchr(start, '#'); |
michael@0 | 125 | if(limit!=NULL) { |
michael@0 | 126 | /* get white space before the pound sign */ |
michael@0 | 127 | while(limit>start && U_IS_INV_WHITESPACE(*(limit-1))) { |
michael@0 | 128 | --limit; |
michael@0 | 129 | } |
michael@0 | 130 | |
michael@0 | 131 | /* truncate the line */ |
michael@0 | 132 | *limit=0; |
michael@0 | 133 | } |
michael@0 | 134 | |
michael@0 | 135 | /* skip lines with only whitespace */ |
michael@0 | 136 | if(u_skipWhitespace(start)[0]==0) { |
michael@0 | 137 | continue; |
michael@0 | 138 | } |
michael@0 | 139 | |
michael@0 | 140 | /* for each field, call the corresponding field function */ |
michael@0 | 141 | for(i=0; i<fieldCount; ++i) { |
michael@0 | 142 | /* set the limit pointer of this field */ |
michael@0 | 143 | limit=start; |
michael@0 | 144 | while(*limit!=delimiter && *limit!=0) { |
michael@0 | 145 | ++limit; |
michael@0 | 146 | } |
michael@0 | 147 | |
michael@0 | 148 | /* set the field start and limit in the fields array */ |
michael@0 | 149 | fields[i][0]=start; |
michael@0 | 150 | fields[i][1]=limit; |
michael@0 | 151 | |
michael@0 | 152 | /* set start to the beginning of the next field, if any */ |
michael@0 | 153 | start=limit; |
michael@0 | 154 | if(*start!=0) { |
michael@0 | 155 | ++start; |
michael@0 | 156 | } else if(i+1<fieldCount) { |
michael@0 | 157 | *pErrorCode=U_PARSE_ERROR; |
michael@0 | 158 | limit=line+length; |
michael@0 | 159 | i=fieldCount; |
michael@0 | 160 | break; |
michael@0 | 161 | } |
michael@0 | 162 | } |
michael@0 | 163 | |
michael@0 | 164 | /* error in a field function? */ |
michael@0 | 165 | if(U_FAILURE(*pErrorCode)) { |
michael@0 | 166 | break; |
michael@0 | 167 | } |
michael@0 | 168 | |
michael@0 | 169 | /* call the field function */ |
michael@0 | 170 | lineFn(context, fields, fieldCount, pErrorCode); |
michael@0 | 171 | if(U_FAILURE(*pErrorCode)) { |
michael@0 | 172 | break; |
michael@0 | 173 | } |
michael@0 | 174 | } |
michael@0 | 175 | |
michael@0 | 176 | if(filename!=NULL) { |
michael@0 | 177 | T_FileStream_close(file); |
michael@0 | 178 | } |
michael@0 | 179 | } |
michael@0 | 180 | |
michael@0 | 181 | /* |
michael@0 | 182 | * parse a list of code points |
michael@0 | 183 | * store them as a UTF-32 string in dest[destCapacity] |
michael@0 | 184 | * return the number of code points |
michael@0 | 185 | */ |
michael@0 | 186 | U_CAPI int32_t U_EXPORT2 |
michael@0 | 187 | u_parseCodePoints(const char *s, |
michael@0 | 188 | uint32_t *dest, int32_t destCapacity, |
michael@0 | 189 | UErrorCode *pErrorCode) { |
michael@0 | 190 | char *end; |
michael@0 | 191 | uint32_t value; |
michael@0 | 192 | int32_t count; |
michael@0 | 193 | |
michael@0 | 194 | if(U_FAILURE(*pErrorCode)) { |
michael@0 | 195 | return 0; |
michael@0 | 196 | } |
michael@0 | 197 | if(s==NULL || destCapacity<0 || (destCapacity>0 && dest==NULL)) { |
michael@0 | 198 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 199 | return 0; |
michael@0 | 200 | } |
michael@0 | 201 | |
michael@0 | 202 | count=0; |
michael@0 | 203 | for(;;) { |
michael@0 | 204 | s=u_skipWhitespace(s); |
michael@0 | 205 | if(*s==';' || *s==0) { |
michael@0 | 206 | return count; |
michael@0 | 207 | } |
michael@0 | 208 | |
michael@0 | 209 | /* read one code point */ |
michael@0 | 210 | value=(uint32_t)uprv_strtoul(s, &end, 16); |
michael@0 | 211 | if(end<=s || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || value>=0x110000) { |
michael@0 | 212 | *pErrorCode=U_PARSE_ERROR; |
michael@0 | 213 | return 0; |
michael@0 | 214 | } |
michael@0 | 215 | |
michael@0 | 216 | /* append it to the destination array */ |
michael@0 | 217 | if(count<destCapacity) { |
michael@0 | 218 | dest[count++]=value; |
michael@0 | 219 | } else { |
michael@0 | 220 | *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
michael@0 | 221 | } |
michael@0 | 222 | |
michael@0 | 223 | /* go to the following characters */ |
michael@0 | 224 | s=end; |
michael@0 | 225 | } |
michael@0 | 226 | } |
michael@0 | 227 | |
michael@0 | 228 | /* |
michael@0 | 229 | * parse a list of code points |
michael@0 | 230 | * store them as a string in dest[destCapacity] |
michael@0 | 231 | * set the first code point in *pFirst |
michael@0 | 232 | * @return The length of the string in numbers of UChars. |
michael@0 | 233 | */ |
michael@0 | 234 | U_CAPI int32_t U_EXPORT2 |
michael@0 | 235 | u_parseString(const char *s, |
michael@0 | 236 | UChar *dest, int32_t destCapacity, |
michael@0 | 237 | uint32_t *pFirst, |
michael@0 | 238 | UErrorCode *pErrorCode) { |
michael@0 | 239 | char *end; |
michael@0 | 240 | uint32_t value; |
michael@0 | 241 | int32_t destLength; |
michael@0 | 242 | |
michael@0 | 243 | if(U_FAILURE(*pErrorCode)) { |
michael@0 | 244 | return 0; |
michael@0 | 245 | } |
michael@0 | 246 | if(s==NULL || destCapacity<0 || (destCapacity>0 && dest==NULL)) { |
michael@0 | 247 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 248 | return 0; |
michael@0 | 249 | } |
michael@0 | 250 | |
michael@0 | 251 | if(pFirst!=NULL) { |
michael@0 | 252 | *pFirst=0xffffffff; |
michael@0 | 253 | } |
michael@0 | 254 | |
michael@0 | 255 | destLength=0; |
michael@0 | 256 | for(;;) { |
michael@0 | 257 | s=u_skipWhitespace(s); |
michael@0 | 258 | if(*s==';' || *s==0) { |
michael@0 | 259 | if(destLength<destCapacity) { |
michael@0 | 260 | dest[destLength]=0; |
michael@0 | 261 | } else if(destLength==destCapacity) { |
michael@0 | 262 | *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; |
michael@0 | 263 | } else { |
michael@0 | 264 | *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
michael@0 | 265 | } |
michael@0 | 266 | return destLength; |
michael@0 | 267 | } |
michael@0 | 268 | |
michael@0 | 269 | /* read one code point */ |
michael@0 | 270 | value=(uint32_t)uprv_strtoul(s, &end, 16); |
michael@0 | 271 | if(end<=s || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || value>=0x110000) { |
michael@0 | 272 | *pErrorCode=U_PARSE_ERROR; |
michael@0 | 273 | return 0; |
michael@0 | 274 | } |
michael@0 | 275 | |
michael@0 | 276 | /* store the first code point */ |
michael@0 | 277 | if(pFirst!=NULL) { |
michael@0 | 278 | *pFirst=value; |
michael@0 | 279 | pFirst=NULL; |
michael@0 | 280 | } |
michael@0 | 281 | |
michael@0 | 282 | /* append it to the destination array */ |
michael@0 | 283 | if((destLength+U16_LENGTH(value))<=destCapacity) { |
michael@0 | 284 | U16_APPEND_UNSAFE(dest, destLength, value); |
michael@0 | 285 | } else { |
michael@0 | 286 | destLength+=U16_LENGTH(value); |
michael@0 | 287 | } |
michael@0 | 288 | |
michael@0 | 289 | /* go to the following characters */ |
michael@0 | 290 | s=end; |
michael@0 | 291 | } |
michael@0 | 292 | } |
michael@0 | 293 | |
michael@0 | 294 | /* read a range like start or start..end */ |
michael@0 | 295 | U_CAPI int32_t U_EXPORT2 |
michael@0 | 296 | u_parseCodePointRangeAnyTerminator(const char *s, |
michael@0 | 297 | uint32_t *pStart, uint32_t *pEnd, |
michael@0 | 298 | const char **terminator, |
michael@0 | 299 | UErrorCode *pErrorCode) { |
michael@0 | 300 | char *end; |
michael@0 | 301 | uint32_t value; |
michael@0 | 302 | |
michael@0 | 303 | if(U_FAILURE(*pErrorCode)) { |
michael@0 | 304 | return 0; |
michael@0 | 305 | } |
michael@0 | 306 | if(s==NULL || pStart==NULL || pEnd==NULL) { |
michael@0 | 307 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 308 | return 0; |
michael@0 | 309 | } |
michael@0 | 310 | |
michael@0 | 311 | /* read the start code point */ |
michael@0 | 312 | s=u_skipWhitespace(s); |
michael@0 | 313 | value=(uint32_t)uprv_strtoul(s, &end, 16); |
michael@0 | 314 | if(end<=s || value>=0x110000) { |
michael@0 | 315 | *pErrorCode=U_PARSE_ERROR; |
michael@0 | 316 | return 0; |
michael@0 | 317 | } |
michael@0 | 318 | *pStart=*pEnd=value; |
michael@0 | 319 | |
michael@0 | 320 | /* is there a "..end"? */ |
michael@0 | 321 | s=u_skipWhitespace(end); |
michael@0 | 322 | if(*s!='.' || s[1]!='.') { |
michael@0 | 323 | *terminator=end; |
michael@0 | 324 | return 1; |
michael@0 | 325 | } |
michael@0 | 326 | s=u_skipWhitespace(s+2); |
michael@0 | 327 | |
michael@0 | 328 | /* read the end code point */ |
michael@0 | 329 | value=(uint32_t)uprv_strtoul(s, &end, 16); |
michael@0 | 330 | if(end<=s || value>=0x110000) { |
michael@0 | 331 | *pErrorCode=U_PARSE_ERROR; |
michael@0 | 332 | return 0; |
michael@0 | 333 | } |
michael@0 | 334 | *pEnd=value; |
michael@0 | 335 | |
michael@0 | 336 | /* is this a valid range? */ |
michael@0 | 337 | if(value<*pStart) { |
michael@0 | 338 | *pErrorCode=U_PARSE_ERROR; |
michael@0 | 339 | return 0; |
michael@0 | 340 | } |
michael@0 | 341 | |
michael@0 | 342 | *terminator=end; |
michael@0 | 343 | return value-*pStart+1; |
michael@0 | 344 | } |
michael@0 | 345 | |
michael@0 | 346 | U_CAPI int32_t U_EXPORT2 |
michael@0 | 347 | u_parseCodePointRange(const char *s, |
michael@0 | 348 | uint32_t *pStart, uint32_t *pEnd, |
michael@0 | 349 | UErrorCode *pErrorCode) { |
michael@0 | 350 | const char *terminator; |
michael@0 | 351 | int32_t rangeLength= |
michael@0 | 352 | u_parseCodePointRangeAnyTerminator(s, pStart, pEnd, &terminator, pErrorCode); |
michael@0 | 353 | if(U_SUCCESS(*pErrorCode)) { |
michael@0 | 354 | terminator=u_skipWhitespace(terminator); |
michael@0 | 355 | if(*terminator!=';' && *terminator!=0) { |
michael@0 | 356 | *pErrorCode=U_PARSE_ERROR; |
michael@0 | 357 | return 0; |
michael@0 | 358 | } |
michael@0 | 359 | } |
michael@0 | 360 | return rangeLength; |
michael@0 | 361 | } |
michael@0 | 362 | |
michael@0 | 363 | U_CAPI int32_t U_EXPORT2 |
michael@0 | 364 | u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status) { |
michael@0 | 365 | const char *read = source; |
michael@0 | 366 | int32_t i = 0; |
michael@0 | 367 | unsigned int value = 0; |
michael@0 | 368 | if(sLen == -1) { |
michael@0 | 369 | sLen = (int32_t)strlen(source); |
michael@0 | 370 | } |
michael@0 | 371 | |
michael@0 | 372 | while(read < source+sLen) { |
michael@0 | 373 | sscanf(read, "%2x", &value); |
michael@0 | 374 | if(i < destCapacity) { |
michael@0 | 375 | dest[i] = (char)value; |
michael@0 | 376 | } |
michael@0 | 377 | i++; |
michael@0 | 378 | read += 2; |
michael@0 | 379 | } |
michael@0 | 380 | return u_terminateChars(dest, destCapacity, i, status); |
michael@0 | 381 | } |