intl/icu/source/tools/icu-svnprops-check.py

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/tools/icu-svnprops-check.py	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,243 @@
     1.4 +#! /usr/bin/python
     1.5 +
     1.6 +# Copyright (C) 2009-2011, International Business Machines Corporation, Google and Others.
     1.7 +# All rights reserved.
     1.8 +
     1.9 +#
    1.10 +#  Script to check and fix svn property settings for ICU source files.
    1.11 +#  Also check for the correct line endings on files with svn:eol-style = native
    1.12 +#
    1.13 +#  THIS SCRIPT DOES NOT WORK ON WINDOWS
    1.14 +#     It only works correctly on platforms where the native line ending is a plain \n
    1.15 +#
    1.16 +#  usage:
    1.17 +#     icu-svnprops-check.py  [options]
    1.18 +#
    1.19 +#  options:
    1.20 +#     -f | --fix     Fix any problems that are found
    1.21 +#     -h | --help    Print a usage line and exit.
    1.22 +#
    1.23 +#  The tool operates recursively on the directory from which it is run.
    1.24 +#  Only files from the svn repository are checked.
    1.25 +#  No changes are made to the repository; only the working copy will be altered.
    1.26 +
    1.27 +import sys
    1.28 +import os
    1.29 +import os.path
    1.30 +import re
    1.31 +import getopt
    1.32 +
    1.33 +#
    1.34 +#  svn autoprops definitions.
    1.35 +#      Copy and paste here the ICU recommended auto-props from
    1.36 +#      http://icu-project.org/docs/subversion_howto/index.html
    1.37 +#
    1.38 +#  This program will parse this autoprops string, and verify that files in
    1.39 +#  the repository have the recommeded properties set.
    1.40 +#
    1.41 +svn_auto_props = """
    1.42 +### Section for configuring automatic properties.
    1.43 +[auto-props]
    1.44 +### The format of the entries is:
    1.45 +###   file-name-pattern = propname[=value][;propname[=value]...]
    1.46 +### The file-name-pattern can contain wildcards (such as '*' and
    1.47 +### '?').  All entries which match will be applied to the file.
    1.48 +### Note that auto-props functionality must be enabled, which
    1.49 +### is typically done by setting the 'enable-auto-props' option.
    1.50 +*.c = svn:eol-style=native
    1.51 +*.cc = svn:eol-style=native
    1.52 +*.cpp = svn:eol-style=native
    1.53 +*.h = svn:eol-style=native
    1.54 +*.rc = svn:eol-style=native
    1.55 +*.dsp = svn:eol-style=native
    1.56 +*.dsw = svn:eol-style=native
    1.57 +*.sln = svn:eol-style=native
    1.58 +*.vcproj = svn:eol-style=native
    1.59 +configure = svn:eol-style=native;svn:executable
    1.60 +*.sh = svn:eol-style=native;svn:executable
    1.61 +*.pl = svn:eol-style=native;svn:executable
    1.62 +*.py = svn:eol-style=native;svn:executable
    1.63 +*.txt = svn:mime-type=text/plain;svn:eol-style=native
    1.64 +*.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8
    1.65 +*.ucm = svn:eol-style=native
    1.66 +*.html = svn:eol-style=native;svn:mime-type=text/html
    1.67 +*.htm = svn:eol-style=native;svn:mime-type=text/html
    1.68 +*.xml = svn:eol-style=native
    1.69 +Makefile = svn:eol-style=native
    1.70 +*.in = svn:eol-style=native
    1.71 +*.mak = svn:eol-style=native
    1.72 +*.mk = svn:eol-style=native
    1.73 +*.png = svn:mime-type=image/png
    1.74 +*.jpeg = svn:mime-type=image/jpeg
    1.75 +*.jpg = svn:mime-type=image/jpeg
    1.76 +*.bin = svn:mime-type=application/octet-stream
    1.77 +*.brk = svn:mime-type=application/octet-stream
    1.78 +*.cnv = svn:mime-type=application/octet-stream
    1.79 +*.dat = svn:mime-type=application/octet-stream
    1.80 +*.icu = svn:mime-type=application/octet-stream
    1.81 +*.res = svn:mime-type=application/octet-stream
    1.82 +*.spp = svn:mime-type=application/octet-stream
    1.83 +# new additions 2007-dec-5 srl
    1.84 +*.rtf = mime-type=text/rtf
    1.85 +*.pdf = mime-type=application/pdf
    1.86 +# changed 2008-04-08: modified .txt, above, adding mime-type
    1.87 +# changed 2010-11-09: modified .java, adding mime-type
    1.88 +# Note: The escape syntax for semicolon (";;") is supported since subversion 1.6.1
    1.89 +"""
    1.90 +
    1.91 +
    1.92 +# file_types:  The parsed form of the svn auto-props specification.
    1.93 +#              A list of file types - .cc, .cpp, .txt, etc.
    1.94 +#              each element is a [type, proplist]
    1.95 +#              "type" is a regular expression string that will match a file name
    1.96 +#              prop list is another list, one element per property.
    1.97 +#              Each property item is a two element list, [prop name, prop value]
    1.98 +file_types = list()
    1.99 +
   1.100 +def parse_auto_props():
   1.101 +    aprops = svn_auto_props.splitlines()
   1.102 +    for propline in aprops:
   1.103 +        if re.match("\s*(#.*)?$", propline):         # Match comment and blank lines
   1.104 +            continue
   1.105 +        if re.match("\s*\[auto-props\]", propline):  # Match the [auto-props] line.
   1.106 +            continue
   1.107 +        if not re.match("\s*[^\s]+\s*=", propline):  # minimal syntax check for <file-type> =
   1.108 +            print "Bad line from autoprops definitions: " + propline
   1.109 +            continue
   1.110 +        file_type, string_proplist = propline.split("=", 1)
   1.111 +
   1.112 +        #transform the file type expression from autoprops into a normal regular expression.
   1.113 +        #  e.g.  "*.cpp"  ==>  ".*\.cpp$"
   1.114 +        file_type = file_type.strip()
   1.115 +        file_type = file_type.replace(".", "\.")
   1.116 +        file_type = file_type.replace("*", ".*")
   1.117 +        file_type = file_type + "$"
   1.118 +
   1.119 +        # example string_proplist at this point: " svn:eol-style=native;svn:executable"
   1.120 +        # split on ';' into a list of properties.  The negative lookahead and lookbehind
   1.121 +        # in the split regexp are to prevent matching on ';;', which is an escaped ';'
   1.122 +        # within a property value.
   1.123 +        string_proplist = re.split("(?<!;);(?!;)", string_proplist)
   1.124 +        proplist = list()
   1.125 +        for prop in string_proplist:
   1.126 +            if prop.find("=") >= 0:
   1.127 +                prop_name, prop_val = prop.split("=", 1)
   1.128 +            else:
   1.129 +                # properties with no explicit value, e.g. svn:executable
   1.130 +                prop_name, prop_val = prop, ""
   1.131 +            prop_name = prop_name.strip()
   1.132 +            prop_val = prop_val.strip()
   1.133 +            # unescape any ";;" in a property value, e.g. the mime-type from
   1.134 +            #    *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8
   1.135 +            prop_val = prop_val.replace(";;", ";");
   1.136 +            proplist.append((prop_name, prop_val))
   1.137 +
   1.138 +        file_types.append((file_type, proplist))
   1.139 +    # print file_types
   1.140 +
   1.141 +        
   1.142 +def runCommand(cmd):
   1.143 +    output_file = os.popen(cmd);
   1.144 +    output_text = output_file.read();
   1.145 +    exit_status = output_file.close();
   1.146 +    if exit_status:
   1.147 +        print >>sys.stderr, '"', cmd, '" failed.  Exiting.'
   1.148 +        sys.exit(exit_status)
   1.149 +    return output_text
   1.150 +
   1.151 +
   1.152 +def usage():
   1.153 +    print "usage: " + sys.argv[0] + " [-f | --fix] [-h | --help]"
   1.154 +
   1.155 +    
   1.156 +#
   1.157 +#  UTF-8 file check.   For text files, add a charset to the mime-type if their contents are UTF-8
   1.158 +#    file_name:        name of a text file.
   1.159 +#    base_mime_type:   svn:mime-type property value from the auto-props file (no charset= part)
   1.160 +#    actual_mime_type: existing svn:mime-type property value for the file.
   1.161 +#    return:           svn:mime-type property value, with charset added when appropriate.
   1.162 +#
   1.163 +def check_utf8(file_name, base_mime_type, actual_mime_type):
   1.164 +
   1.165 +    # If the file already has a charset in its mime-type, don't make any change.
   1.166 +
   1.167 +    if actual_mime_type.find("charset=") > 0:
   1.168 +        return actual_mime_type;
   1.169 +
   1.170 +    f = open(file_name, 'r')
   1.171 +    bytes = f.read()
   1.172 +    f.close()
   1.173 +
   1.174 +    if all(ord(byte) < 128 for byte in bytes):
   1.175 +        # pure ASCII.
   1.176 +        # print "Pure ASCII " + file_name
   1.177 +        return base_mime_type
   1.178 +
   1.179 +    try:
   1.180 +        bytes.decode("UTF-8")
   1.181 +    except UnicodeDecodeError:
   1.182 +        print "warning: %s: not ASCII, not UTF-8" % file_name
   1.183 +        return base_mime_type
   1.184 +
   1.185 +    if ord(bytes[0]) != 0xef:
   1.186 +      print "UTF-8 file with no BOM: " + file_name
   1.187 +
   1.188 +    # Append charset=utf-8.
   1.189 +    return base_mime_type + ';charset=utf-8'
   1.190 +
   1.191 +
   1.192 +def main(argv):
   1.193 +    fix_problems = False;
   1.194 +    try:
   1.195 +        opts, args = getopt.getopt(argv, "fh", ("fix", "help"))
   1.196 +    except getopt.GetoptError:
   1.197 +        print "unrecognized option: " + argv[0]
   1.198 +        usage()
   1.199 +        sys.exit(2)
   1.200 +    for opt, arg in opts:
   1.201 +        if opt in ("-h", "--help"):
   1.202 +            usage()
   1.203 +            sys.exit()
   1.204 +        if opt in ("-f", "--fix"):
   1.205 +            fix_problems = True
   1.206 +    if args:
   1.207 +        print "unexpected command line argument"
   1.208 +        usage()
   1.209 +        sys.exit()
   1.210 +
   1.211 +    parse_auto_props()
   1.212 +    output = runCommand("svn ls -R ");
   1.213 +    file_list = output.splitlines()
   1.214 +
   1.215 +    for f in file_list:
   1.216 +        if os.path.isdir(f):
   1.217 +            # print "Skipping dir " + f
   1.218 +            continue
   1.219 +        if not os.path.isfile(f):
   1.220 +            print "Repository file not in working copy: " + f
   1.221 +            continue;
   1.222 +
   1.223 +        for file_pattern, props in file_types:
   1.224 +            if re.match(file_pattern, f):
   1.225 +                # print "doing " + f
   1.226 +                for propname, propval in props:
   1.227 +                    actual_propval = runCommand("svn propget --strict " + propname + " " + f)
   1.228 +                    #print propname + ": " + actual_propval
   1.229 +                    if propname == "svn:mime-type" and propval.find("text/") == 0:
   1.230 +                        # check for UTF-8 text files, should have svn:mime-type=text/something; charset=utf8
   1.231 +                        propval = check_utf8(f, propval, actual_propval)
   1.232 +                    if not (propval == actual_propval or (propval == "" and actual_propval == "*")):
   1.233 +                        print "svn propset %s '%s' %s" % (propname, propval, f)
   1.234 +                        if fix_problems:
   1.235 +                            os.system("svn propset %s '%s' %s" % (propname, propval, f))
   1.236 +                    if propname == "svn:eol-style" and propval == "native":
   1.237 +                        if os.system("grep -q -v \r " + f):
   1.238 +                            if fix_problems:
   1.239 +                                print f + ": Removing DOS CR characters."
   1.240 +                                os.system("sed -i s/\r// " + f);
   1.241 +                            else:
   1.242 +                                print f + " contains DOS CR characters."
   1.243 +
   1.244 +
   1.245 +if __name__ == "__main__":
   1.246 +    main(sys.argv[1:])

mercurial