addon-sdk/source/python-lib/cuddlefish/property_parser.py

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/addon-sdk/source/python-lib/cuddlefish/property_parser.py	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,111 @@
     1.4 +# This Source Code Form is subject to the terms of the Mozilla Public
     1.5 +# License, v. 2.0. If a copy of the MPL was not distributed with this
     1.6 +# file, You can obtain one at http://mozilla.org/MPL/2.0/.
     1.7 +
     1.8 +import re
     1.9 +import codecs
    1.10 +
    1.11 +class MalformedLocaleFileError(Exception):
    1.12 +    pass
    1.13 +
    1.14 +def parse_file(path):
    1.15 +    return parse(read_file(path), path)
    1.16 +
    1.17 +def read_file(path):
    1.18 +    try:
    1.19 +        return codecs.open( path, "r", "utf-8" ).readlines()
    1.20 +    except UnicodeDecodeError, e:
    1.21 +        raise MalformedLocaleFileError(
    1.22 +          'Following locale file is not a valid ' +
    1.23 +          'UTF-8 file: %s\n%s"' % (path, str(e)))
    1.24 +
    1.25 +COMMENT = re.compile(r'\s*#')
    1.26 +EMPTY = re.compile(r'^\s+$')
    1.27 +KEYVALUE = re.compile(r"\s*([^=:]+)(=|:)\s*(.*)")
    1.28 +
    1.29 +def parse(lines, path=None):
    1.30 +    lines = iter(lines)
    1.31 +    lineNo = 1
    1.32 +    pairs = dict()
    1.33 +    for line in lines:
    1.34 +        if COMMENT.match(line) or EMPTY.match(line) or len(line) == 0:
    1.35 +            continue
    1.36 +        m = KEYVALUE.match(line)
    1.37 +        if not m:
    1.38 +            raise MalformedLocaleFileError(
    1.39 +                  'Following locale file is not a valid .properties file: %s\n'
    1.40 +                  'Line %d is incorrect:\n%s' % (path, lineNo, line))
    1.41 +
    1.42 +        # All spaces are strip. Spaces at the beginning are stripped
    1.43 +        # by the regular expression. We have to strip spaces at the end.
    1.44 +        key = m.group(1).rstrip()
    1.45 +        val = m.group(3).rstrip()
    1.46 +        val = val.encode('raw-unicode-escape').decode('raw-unicode-escape')
    1.47 +
    1.48 +        # `key` can be empty when key is only made of spaces
    1.49 +        if not key:
    1.50 +            raise MalformedLocaleFileError(
    1.51 +                  'Following locale file is not a valid .properties file: %s\n'
    1.52 +                  'Key is invalid on line %d is incorrect:\n%s' %
    1.53 +                  (path, lineNo, line))
    1.54 +
    1.55 +        # Multiline value: keep reading lines, while lines end with backslash
    1.56 +        # and strip spaces at the beginning of lines except the last line
    1.57 +        # that doesn't end up with backslash, we strip all spaces for this one.
    1.58 +        if val.endswith("\\"):
    1.59 +            val = val[:-1]
    1.60 +            try:
    1.61 +                # remove spaces before/after and especially the \n at EOL
    1.62 +                line = lines.next().strip()
    1.63 +                while line.endswith("\\"):
    1.64 +                    val += line[:-1].lstrip()
    1.65 +                    line = lines.next()
    1.66 +                    lineNo += 1
    1.67 +                val += line.strip()
    1.68 +            except StopIteration:
    1.69 +                raise MalformedLocaleFileError(
    1.70 +                  'Following locale file is not a valid .properties file: %s\n'
    1.71 +                  'Unexpected EOF in multiline sequence at line %d:\n%s' %
    1.72 +                  (path, lineNo, line))
    1.73 +        # Save this new pair
    1.74 +        pairs[key] = val
    1.75 +        lineNo += 1
    1.76 +
    1.77 +    normalize_plural(path, pairs)
    1.78 +    return pairs
    1.79 +
    1.80 +# Plural forms in properties files are defined like this:
    1.81 +#   key = other form
    1.82 +#   key[one] = one form
    1.83 +#   key[...] = ...
    1.84 +# Parse them and merge each key into one object containing all forms:
    1.85 +#   key: {
    1.86 +#     other: "other form",
    1.87 +#     one: "one form",
    1.88 +#     ...: ...
    1.89 +#   }
    1.90 +PLURAL_FORM = re.compile(r'^(.*)\[(zero|one|two|few|many|other)\]$')
    1.91 +def normalize_plural(path, pairs):
    1.92 +    for key in list(pairs.keys()):
    1.93 +        m = PLURAL_FORM.match(key)
    1.94 +        if not m:
    1.95 +            continue
    1.96 +        main_key = m.group(1)
    1.97 +        plural_form = m.group(2)
    1.98 +        # Allows not specifying a generic key (i.e a key without [form])
    1.99 +        if not main_key in pairs:
   1.100 +            pairs[main_key] = {}
   1.101 +            # Ensure that we always have the [other] form
   1.102 +            if not main_key + "[other]" in pairs:
   1.103 +                raise MalformedLocaleFileError(
   1.104 +                      'Following locale file is not a valid UTF-8 file: %s\n'
   1.105 +                      'This plural form doesn\'t have a matching `%s[other]` form:\n'
   1.106 +                      '%s\n'
   1.107 +                      'You have to defined following key:\n%s'
   1.108 +                      % (path, main_key, key, main_key))
   1.109 +        # convert generic form into an object if it is still a string
   1.110 +        if isinstance(pairs[main_key], unicode):
   1.111 +            pairs[main_key] = {"other": pairs[main_key]}
   1.112 +        # then, add this new plural form
   1.113 +        pairs[main_key][plural_form] = pairs[key]
   1.114 +        del pairs[key]

mercurial