michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: #include "prefread.h" michael@0: #include "nsString.h" michael@0: #include "nsUTF8Utils.h" michael@0: michael@0: #ifdef TEST_PREFREAD michael@0: #include michael@0: #define NS_WARNING(_s) printf(">>> " _s "!\n") michael@0: #define NS_NOTREACHED(_s) NS_WARNING(_s) michael@0: #else michael@0: #include "nsDebug.h" // for NS_WARNING michael@0: #endif michael@0: michael@0: /* pref parser states */ michael@0: enum { michael@0: PREF_PARSE_INIT, michael@0: PREF_PARSE_MATCH_STRING, michael@0: PREF_PARSE_UNTIL_NAME, michael@0: PREF_PARSE_QUOTED_STRING, michael@0: PREF_PARSE_UNTIL_COMMA, michael@0: PREF_PARSE_UNTIL_VALUE, michael@0: PREF_PARSE_INT_VALUE, michael@0: PREF_PARSE_COMMENT_MAYBE_START, michael@0: PREF_PARSE_COMMENT_BLOCK, michael@0: PREF_PARSE_COMMENT_BLOCK_MAYBE_END, michael@0: PREF_PARSE_ESC_SEQUENCE, michael@0: PREF_PARSE_HEX_ESCAPE, michael@0: PREF_PARSE_UTF16_LOW_SURROGATE, michael@0: PREF_PARSE_UNTIL_OPEN_PAREN, michael@0: PREF_PARSE_UNTIL_CLOSE_PAREN, michael@0: PREF_PARSE_UNTIL_SEMICOLON, michael@0: PREF_PARSE_UNTIL_EOL michael@0: }; michael@0: michael@0: #define UTF16_ESC_NUM_DIGITS 4 michael@0: #define HEX_ESC_NUM_DIGITS 2 michael@0: #define BITS_PER_HEX_DIGIT 4 michael@0: michael@0: static const char kUserPref[] = "user_pref"; michael@0: static const char kPref[] = "pref"; michael@0: static const char kTrue[] = "true"; michael@0: static const char kFalse[] = "false"; michael@0: michael@0: /** michael@0: * pref_GrowBuf michael@0: * michael@0: * this function will increase the size of the buffer owned michael@0: * by the given pref parse state. We currently use a simple michael@0: * doubling algorithm, but the only hard requirement is that michael@0: * it increase the buffer by at least the size of the ps->esctmp michael@0: * buffer used for escape processing (currently 6 bytes). michael@0: * michael@0: * this buffer is used to store partial pref lines. it is michael@0: * freed when the parse state is destroyed. michael@0: * michael@0: * @param ps michael@0: * parse state instance michael@0: * michael@0: * this function updates all pointers that reference an michael@0: * address within lb since realloc may relocate the buffer. michael@0: * michael@0: * @return false if insufficient memory. michael@0: */ michael@0: static bool michael@0: pref_GrowBuf(PrefParseState *ps) michael@0: { michael@0: int bufLen, curPos, valPos; michael@0: michael@0: bufLen = ps->lbend - ps->lb; michael@0: curPos = ps->lbcur - ps->lb; michael@0: valPos = ps->vb - ps->lb; michael@0: michael@0: if (bufLen == 0) michael@0: bufLen = 128; /* default buffer size */ michael@0: else michael@0: bufLen <<= 1; /* double buffer size */ michael@0: michael@0: #ifdef TEST_PREFREAD michael@0: fprintf(stderr, ">>> realloc(%d)\n", bufLen); michael@0: #endif michael@0: michael@0: ps->lb = (char*) realloc(ps->lb, bufLen); michael@0: if (!ps->lb) michael@0: return false; michael@0: michael@0: ps->lbcur = ps->lb + curPos; michael@0: ps->lbend = ps->lb + bufLen; michael@0: ps->vb = ps->lb + valPos; michael@0: michael@0: return true; michael@0: } michael@0: michael@0: /** michael@0: * pref_DoCallback michael@0: * michael@0: * this function is called when a complete pref name-value pair has michael@0: * been extracted from the input data. michael@0: * michael@0: * @param ps michael@0: * parse state instance michael@0: * michael@0: * @return false to indicate a fatal error. michael@0: */ michael@0: static bool michael@0: pref_DoCallback(PrefParseState *ps) michael@0: { michael@0: PrefValue value; michael@0: michael@0: switch (ps->vtype) { michael@0: case PREF_STRING: michael@0: value.stringVal = ps->vb; michael@0: break; michael@0: case PREF_INT: michael@0: if ((ps->vb[0] == '-' || ps->vb[0] == '+') && ps->vb[1] == '\0') { michael@0: NS_WARNING("malformed integer value"); michael@0: return false; michael@0: } michael@0: value.intVal = atoi(ps->vb); michael@0: break; michael@0: case PREF_BOOL: michael@0: value.boolVal = (ps->vb == kTrue); michael@0: break; michael@0: default: michael@0: break; michael@0: } michael@0: (*ps->reader)(ps->closure, ps->lb, value, ps->vtype, ps->fdefault); michael@0: return true; michael@0: } michael@0: michael@0: void michael@0: PREF_InitParseState(PrefParseState *ps, PrefReader reader, void *closure) michael@0: { michael@0: memset(ps, 0, sizeof(*ps)); michael@0: ps->reader = reader; michael@0: ps->closure = closure; michael@0: } michael@0: michael@0: void michael@0: PREF_FinalizeParseState(PrefParseState *ps) michael@0: { michael@0: if (ps->lb) michael@0: free(ps->lb); michael@0: } michael@0: michael@0: /** michael@0: * Pseudo-BNF michael@0: * ---------- michael@0: * function = LJUNK function-name JUNK function-args michael@0: * function-name = "user_pref" | "pref" michael@0: * function-args = "(" JUNK pref-name JUNK "," JUNK pref-value JUNK ")" JUNK ";" michael@0: * pref-name = quoted-string michael@0: * pref-value = quoted-string | "true" | "false" | integer-value michael@0: * JUNK = *(WS | comment-block | comment-line) michael@0: * LJUNK = *(WS | comment-block | comment-line | bcomment-line) michael@0: * WS = SP | HT | LF | VT | FF | CR michael@0: * SP = michael@0: * HT = michael@0: * LF = michael@0: * VT = michael@0: * FF = michael@0: * CR = michael@0: * comment-block = michael@0: * comment-line = michael@0: * bcomment-line = michael@0: */ michael@0: bool michael@0: PREF_ParseBuf(PrefParseState *ps, const char *buf, int bufLen) michael@0: { michael@0: const char *end; michael@0: char c; michael@0: char udigit; michael@0: int state; michael@0: michael@0: state = ps->state; michael@0: for (end = buf + bufLen; buf != end; ++buf) { michael@0: c = *buf; michael@0: switch (state) { michael@0: /* initial state */ michael@0: case PREF_PARSE_INIT: michael@0: if (ps->lbcur != ps->lb) { /* reset state */ michael@0: ps->lbcur = ps->lb; michael@0: ps->vb = nullptr; michael@0: ps->vtype = PREF_INVALID; michael@0: ps->fdefault = false; michael@0: } michael@0: switch (c) { michael@0: case '/': /* begin comment block or line? */ michael@0: state = PREF_PARSE_COMMENT_MAYBE_START; michael@0: break; michael@0: case '#': /* accept shell style comments */ michael@0: state = PREF_PARSE_UNTIL_EOL; michael@0: break; michael@0: case 'u': /* indicating user_pref */ michael@0: case 'p': /* indicating pref */ michael@0: ps->smatch = (c == 'u' ? kUserPref : kPref); michael@0: ps->sindex = 1; michael@0: ps->nextstate = PREF_PARSE_UNTIL_OPEN_PAREN; michael@0: state = PREF_PARSE_MATCH_STRING; michael@0: break; michael@0: /* else skip char */ michael@0: } michael@0: break; michael@0: michael@0: /* string matching */ michael@0: case PREF_PARSE_MATCH_STRING: michael@0: if (c == ps->smatch[ps->sindex++]) { michael@0: /* if we've matched all characters, then move to next state. */ michael@0: if (ps->smatch[ps->sindex] == '\0') { michael@0: state = ps->nextstate; michael@0: ps->nextstate = PREF_PARSE_INIT; /* reset next state */ michael@0: } michael@0: /* else wait for next char */ michael@0: } michael@0: else { michael@0: NS_WARNING("malformed pref file"); michael@0: return false; michael@0: } michael@0: break; michael@0: michael@0: /* quoted string parsing */ michael@0: case PREF_PARSE_QUOTED_STRING: michael@0: /* we assume that the initial quote has already been consumed */ michael@0: if (ps->lbcur == ps->lbend && !pref_GrowBuf(ps)) michael@0: return false; /* out of memory */ michael@0: if (c == '\\') michael@0: state = PREF_PARSE_ESC_SEQUENCE; michael@0: else if (c == ps->quotechar) { michael@0: *ps->lbcur++ = '\0'; michael@0: state = ps->nextstate; michael@0: ps->nextstate = PREF_PARSE_INIT; /* reset next state */ michael@0: } michael@0: else michael@0: *ps->lbcur++ = c; michael@0: break; michael@0: michael@0: /* name parsing */ michael@0: case PREF_PARSE_UNTIL_NAME: michael@0: if (c == '\"' || c == '\'') { michael@0: ps->fdefault = (ps->smatch == kPref); michael@0: ps->quotechar = c; michael@0: ps->nextstate = PREF_PARSE_UNTIL_COMMA; /* return here when done */ michael@0: state = PREF_PARSE_QUOTED_STRING; michael@0: } michael@0: else if (c == '/') { /* allow embedded comment */ michael@0: ps->nextstate = state; /* return here when done with comment */ michael@0: state = PREF_PARSE_COMMENT_MAYBE_START; michael@0: } michael@0: else if (!isspace(c)) { michael@0: NS_WARNING("malformed pref file"); michael@0: return false; michael@0: } michael@0: break; michael@0: michael@0: /* parse until we find a comma separating name and value */ michael@0: case PREF_PARSE_UNTIL_COMMA: michael@0: if (c == ',') { michael@0: ps->vb = ps->lbcur; michael@0: state = PREF_PARSE_UNTIL_VALUE; michael@0: } michael@0: else if (c == '/') { /* allow embedded comment */ michael@0: ps->nextstate = state; /* return here when done with comment */ michael@0: state = PREF_PARSE_COMMENT_MAYBE_START; michael@0: } michael@0: else if (!isspace(c)) { michael@0: NS_WARNING("malformed pref file"); michael@0: return false; michael@0: } michael@0: break; michael@0: michael@0: /* value parsing */ michael@0: case PREF_PARSE_UNTIL_VALUE: michael@0: /* the pref value type is unknown. so, we scan for the first michael@0: * character of the value, and determine the type from that. */ michael@0: if (c == '\"' || c == '\'') { michael@0: ps->vtype = PREF_STRING; michael@0: ps->quotechar = c; michael@0: ps->nextstate = PREF_PARSE_UNTIL_CLOSE_PAREN; michael@0: state = PREF_PARSE_QUOTED_STRING; michael@0: } michael@0: else if (c == 't' || c == 'f') { michael@0: ps->vb = (char *) (c == 't' ? kTrue : kFalse); michael@0: ps->vtype = PREF_BOOL; michael@0: ps->smatch = ps->vb; michael@0: ps->sindex = 1; michael@0: ps->nextstate = PREF_PARSE_UNTIL_CLOSE_PAREN; michael@0: state = PREF_PARSE_MATCH_STRING; michael@0: } michael@0: else if (isdigit(c) || (c == '-') || (c == '+')) { michael@0: ps->vtype = PREF_INT; michael@0: /* write c to line buffer... */ michael@0: if (ps->lbcur == ps->lbend && !pref_GrowBuf(ps)) michael@0: return false; /* out of memory */ michael@0: *ps->lbcur++ = c; michael@0: state = PREF_PARSE_INT_VALUE; michael@0: } michael@0: else if (c == '/') { /* allow embedded comment */ michael@0: ps->nextstate = state; /* return here when done with comment */ michael@0: state = PREF_PARSE_COMMENT_MAYBE_START; michael@0: } michael@0: else if (!isspace(c)) { michael@0: NS_WARNING("malformed pref file"); michael@0: return false; michael@0: } michael@0: break; michael@0: case PREF_PARSE_INT_VALUE: michael@0: /* grow line buffer if necessary... */ michael@0: if (ps->lbcur == ps->lbend && !pref_GrowBuf(ps)) michael@0: return false; /* out of memory */ michael@0: if (isdigit(c)) michael@0: *ps->lbcur++ = c; michael@0: else { michael@0: *ps->lbcur++ = '\0'; /* stomp null terminator; we are done. */ michael@0: if (c == ')') michael@0: state = PREF_PARSE_UNTIL_SEMICOLON; michael@0: else if (c == '/') { /* allow embedded comment */ michael@0: ps->nextstate = PREF_PARSE_UNTIL_CLOSE_PAREN; michael@0: state = PREF_PARSE_COMMENT_MAYBE_START; michael@0: } michael@0: else if (isspace(c)) michael@0: state = PREF_PARSE_UNTIL_CLOSE_PAREN; michael@0: else { michael@0: NS_WARNING("malformed pref file"); michael@0: return false; michael@0: } michael@0: } michael@0: break; michael@0: michael@0: /* comment parsing */ michael@0: case PREF_PARSE_COMMENT_MAYBE_START: michael@0: switch (c) { michael@0: case '*': /* comment block */ michael@0: state = PREF_PARSE_COMMENT_BLOCK; michael@0: break; michael@0: case '/': /* comment line */ michael@0: state = PREF_PARSE_UNTIL_EOL; michael@0: break; michael@0: default: michael@0: /* pref file is malformed */ michael@0: NS_WARNING("malformed pref file"); michael@0: return false; michael@0: } michael@0: break; michael@0: case PREF_PARSE_COMMENT_BLOCK: michael@0: if (c == '*') michael@0: state = PREF_PARSE_COMMENT_BLOCK_MAYBE_END; michael@0: break; michael@0: case PREF_PARSE_COMMENT_BLOCK_MAYBE_END: michael@0: switch (c) { michael@0: case '/': michael@0: state = ps->nextstate; michael@0: ps->nextstate = PREF_PARSE_INIT; michael@0: break; michael@0: case '*': /* stay in this state */ michael@0: break; michael@0: default: michael@0: state = PREF_PARSE_COMMENT_BLOCK; michael@0: } michael@0: break; michael@0: michael@0: /* string escape sequence parsing */ michael@0: case PREF_PARSE_ESC_SEQUENCE: michael@0: /* not necessary to resize buffer here since we should be writing michael@0: * only one character and the resize check would have been done michael@0: * for us in the previous state */ michael@0: switch (c) { michael@0: case '\"': michael@0: case '\'': michael@0: case '\\': michael@0: break; michael@0: case 'r': michael@0: c = '\r'; michael@0: break; michael@0: case 'n': michael@0: c = '\n'; michael@0: break; michael@0: case 'x': /* hex escape -- always interpreted as Latin-1 */ michael@0: case 'u': /* UTF16 escape */ michael@0: ps->esctmp[0] = c; michael@0: ps->esclen = 1; michael@0: ps->utf16[0] = ps->utf16[1] = 0; michael@0: ps->sindex = (c == 'x' ) ? michael@0: HEX_ESC_NUM_DIGITS : michael@0: UTF16_ESC_NUM_DIGITS; michael@0: state = PREF_PARSE_HEX_ESCAPE; michael@0: continue; michael@0: default: michael@0: NS_WARNING("preserving unexpected JS escape sequence"); michael@0: /* Invalid escape sequence so we do have to write more than michael@0: * one character. Grow line buffer if necessary... */ michael@0: if ((ps->lbcur+1) == ps->lbend && !pref_GrowBuf(ps)) michael@0: return false; /* out of memory */ michael@0: *ps->lbcur++ = '\\'; /* preserve the escape sequence */ michael@0: break; michael@0: } michael@0: *ps->lbcur++ = c; michael@0: state = PREF_PARSE_QUOTED_STRING; michael@0: break; michael@0: michael@0: /* parsing a hex (\xHH) or utf16 escape (\uHHHH) */ michael@0: case PREF_PARSE_HEX_ESCAPE: michael@0: if ( c >= '0' && c <= '9' ) michael@0: udigit = (c - '0'); michael@0: else if ( c >= 'A' && c <= 'F' ) michael@0: udigit = (c - 'A') + 10; michael@0: else if ( c >= 'a' && c <= 'f' ) michael@0: udigit = (c - 'a') + 10; michael@0: else { michael@0: /* bad escape sequence found, write out broken escape as-is */ michael@0: NS_WARNING("preserving invalid or incomplete hex escape"); michael@0: *ps->lbcur++ = '\\'; /* original escape slash */ michael@0: if ((ps->lbcur + ps->esclen) >= ps->lbend && !pref_GrowBuf(ps)) michael@0: return false; michael@0: for (int i = 0; i < ps->esclen; ++i) michael@0: *ps->lbcur++ = ps->esctmp[i]; michael@0: michael@0: /* push the non-hex character back for re-parsing. */ michael@0: /* (++buf at the top of the loop keeps this safe) */ michael@0: --buf; michael@0: state = PREF_PARSE_QUOTED_STRING; michael@0: continue; michael@0: } michael@0: michael@0: /* have a digit */ michael@0: ps->esctmp[ps->esclen++] = c; /* preserve it */ michael@0: ps->utf16[1] <<= BITS_PER_HEX_DIGIT; michael@0: ps->utf16[1] |= udigit; michael@0: ps->sindex--; michael@0: if (ps->sindex == 0) { michael@0: /* have the full escape. Convert to UTF8 */ michael@0: int utf16len = 0; michael@0: if (ps->utf16[0]) { michael@0: /* already have a high surrogate, this is a two char seq */ michael@0: utf16len = 2; michael@0: } michael@0: else if (0xD800 == (0xFC00 & ps->utf16[1])) { michael@0: /* a high surrogate, can't convert until we have the low */ michael@0: ps->utf16[0] = ps->utf16[1]; michael@0: ps->utf16[1] = 0; michael@0: state = PREF_PARSE_UTF16_LOW_SURROGATE; michael@0: break; michael@0: } michael@0: else { michael@0: /* a single utf16 character */ michael@0: ps->utf16[0] = ps->utf16[1]; michael@0: utf16len = 1; michael@0: } michael@0: michael@0: /* actual conversion */ michael@0: /* make sure there's room, 6 bytes is max utf8 len (in */ michael@0: /* theory; 4 bytes covers the actual utf16 range) */ michael@0: if (ps->lbcur+6 >= ps->lbend && !pref_GrowBuf(ps)) michael@0: return false; michael@0: michael@0: ConvertUTF16toUTF8 converter(ps->lbcur); michael@0: converter.write(ps->utf16, utf16len); michael@0: ps->lbcur += converter.Size(); michael@0: state = PREF_PARSE_QUOTED_STRING; michael@0: } michael@0: break; michael@0: michael@0: /* looking for beginning of utf16 low surrogate */ michael@0: case PREF_PARSE_UTF16_LOW_SURROGATE: michael@0: if (ps->sindex == 0 && c == '\\') { michael@0: ++ps->sindex; michael@0: } michael@0: else if (ps->sindex == 1 && c == 'u') { michael@0: /* escape sequence is correct, now parse hex */ michael@0: ps->sindex = UTF16_ESC_NUM_DIGITS; michael@0: ps->esctmp[0] = 'u'; michael@0: ps->esclen = 1; michael@0: state = PREF_PARSE_HEX_ESCAPE; michael@0: } michael@0: else { michael@0: /* didn't find expected low surrogate. Ignore high surrogate michael@0: * (it would just get converted to nothing anyway) and start michael@0: * over with this character */ michael@0: --buf; michael@0: if (ps->sindex == 1) michael@0: state = PREF_PARSE_ESC_SEQUENCE; michael@0: else michael@0: state = PREF_PARSE_QUOTED_STRING; michael@0: continue; michael@0: } michael@0: break; michael@0: michael@0: /* function open and close parsing */ michael@0: case PREF_PARSE_UNTIL_OPEN_PAREN: michael@0: /* tolerate only whitespace and embedded comments */ michael@0: if (c == '(') michael@0: state = PREF_PARSE_UNTIL_NAME; michael@0: else if (c == '/') { michael@0: ps->nextstate = state; /* return here when done with comment */ michael@0: state = PREF_PARSE_COMMENT_MAYBE_START; michael@0: } michael@0: else if (!isspace(c)) { michael@0: NS_WARNING("malformed pref file"); michael@0: return false; michael@0: } michael@0: break; michael@0: case PREF_PARSE_UNTIL_CLOSE_PAREN: michael@0: /* tolerate only whitespace and embedded comments */ michael@0: if (c == ')') michael@0: state = PREF_PARSE_UNTIL_SEMICOLON; michael@0: else if (c == '/') { michael@0: ps->nextstate = state; /* return here when done with comment */ michael@0: state = PREF_PARSE_COMMENT_MAYBE_START; michael@0: } michael@0: else if (!isspace(c)) { michael@0: NS_WARNING("malformed pref file"); michael@0: return false; michael@0: } michael@0: break; michael@0: michael@0: /* function terminator ';' parsing */ michael@0: case PREF_PARSE_UNTIL_SEMICOLON: michael@0: /* tolerate only whitespace and embedded comments */ michael@0: if (c == ';') { michael@0: if (!pref_DoCallback(ps)) michael@0: return false; michael@0: state = PREF_PARSE_INIT; michael@0: } michael@0: else if (c == '/') { michael@0: ps->nextstate = state; /* return here when done with comment */ michael@0: state = PREF_PARSE_COMMENT_MAYBE_START; michael@0: } michael@0: else if (!isspace(c)) { michael@0: NS_WARNING("malformed pref file"); michael@0: return false; michael@0: } michael@0: break; michael@0: michael@0: /* eol parsing */ michael@0: case PREF_PARSE_UNTIL_EOL: michael@0: /* need to handle mac, unix, or dos line endings. michael@0: * PREF_PARSE_INIT will eat the next \n in case michael@0: * we have \r\n. */ michael@0: if (c == '\r' || c == '\n' || c == 0x1A) { michael@0: state = ps->nextstate; michael@0: ps->nextstate = PREF_PARSE_INIT; /* reset next state */ michael@0: } michael@0: break; michael@0: } michael@0: } michael@0: ps->state = state; michael@0: return true; michael@0: } michael@0: michael@0: #ifdef TEST_PREFREAD michael@0: michael@0: static void michael@0: pref_reader(void *closure, michael@0: const char *pref, michael@0: PrefValue val, michael@0: PrefType type, michael@0: bool defPref) michael@0: { michael@0: printf("%spref(\"%s\", ", defPref ? "" : "user_", pref); michael@0: switch (type) { michael@0: case PREF_STRING: michael@0: printf("\"%s\");\n", val.stringVal); michael@0: break; michael@0: case PREF_INT: michael@0: printf("%i);\n", val.intVal); michael@0: break; michael@0: case PREF_BOOL: michael@0: printf("%s);\n", val.boolVal == false ? "false" : "true"); michael@0: break; michael@0: } michael@0: } michael@0: michael@0: int michael@0: main(int argc, char **argv) michael@0: { michael@0: PrefParseState ps; michael@0: char buf[4096]; /* i/o buffer */ michael@0: FILE *fp; michael@0: int n; michael@0: michael@0: if (argc == 1) { michael@0: printf("usage: prefread file.js\n"); michael@0: return -1; michael@0: } michael@0: michael@0: fp = fopen(argv[1], "r"); michael@0: if (!fp) { michael@0: printf("failed to open file\n"); michael@0: return -1; michael@0: } michael@0: michael@0: PREF_InitParseState(&ps, pref_reader, nullptr); michael@0: michael@0: while ((n = fread(buf, 1, sizeof(buf), fp)) > 0) michael@0: PREF_ParseBuf(&ps, buf, n); michael@0: michael@0: PREF_FinalizeParseState(&ps); michael@0: michael@0: fclose(fp); michael@0: return 0; michael@0: } michael@0: michael@0: #endif /* TEST_PREFREAD */