intl/icu/source/tools/icu-svnprops-check.py

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rwxr-xr-x

Correct previous dual key logic pending first delivery installment.

     1 #! /usr/bin/python
     3 # Copyright (C) 2009-2011, International Business Machines Corporation, Google and Others.
     4 # All rights reserved.
     6 #
     7 #  Script to check and fix svn property settings for ICU source files.
     8 #  Also check for the correct line endings on files with svn:eol-style = native
     9 #
    10 #  THIS SCRIPT DOES NOT WORK ON WINDOWS
    11 #     It only works correctly on platforms where the native line ending is a plain \n
    12 #
    13 #  usage:
    14 #     icu-svnprops-check.py  [options]
    15 #
    16 #  options:
    17 #     -f | --fix     Fix any problems that are found
    18 #     -h | --help    Print a usage line and exit.
    19 #
    20 #  The tool operates recursively on the directory from which it is run.
    21 #  Only files from the svn repository are checked.
    22 #  No changes are made to the repository; only the working copy will be altered.
    24 import sys
    25 import os
    26 import os.path
    27 import re
    28 import getopt
    30 #
    31 #  svn autoprops definitions.
    32 #      Copy and paste here the ICU recommended auto-props from
    33 #      http://icu-project.org/docs/subversion_howto/index.html
    34 #
    35 #  This program will parse this autoprops string, and verify that files in
    36 #  the repository have the recommeded properties set.
    37 #
    38 svn_auto_props = """
    39 ### Section for configuring automatic properties.
    40 [auto-props]
    41 ### The format of the entries is:
    42 ###   file-name-pattern = propname[=value][;propname[=value]...]
    43 ### The file-name-pattern can contain wildcards (such as '*' and
    44 ### '?').  All entries which match will be applied to the file.
    45 ### Note that auto-props functionality must be enabled, which
    46 ### is typically done by setting the 'enable-auto-props' option.
    47 *.c = svn:eol-style=native
    48 *.cc = svn:eol-style=native
    49 *.cpp = svn:eol-style=native
    50 *.h = svn:eol-style=native
    51 *.rc = svn:eol-style=native
    52 *.dsp = svn:eol-style=native
    53 *.dsw = svn:eol-style=native
    54 *.sln = svn:eol-style=native
    55 *.vcproj = svn:eol-style=native
    56 configure = svn:eol-style=native;svn:executable
    57 *.sh = svn:eol-style=native;svn:executable
    58 *.pl = svn:eol-style=native;svn:executable
    59 *.py = svn:eol-style=native;svn:executable
    60 *.txt = svn:mime-type=text/plain;svn:eol-style=native
    61 *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8
    62 *.ucm = svn:eol-style=native
    63 *.html = svn:eol-style=native;svn:mime-type=text/html
    64 *.htm = svn:eol-style=native;svn:mime-type=text/html
    65 *.xml = svn:eol-style=native
    66 Makefile = svn:eol-style=native
    67 *.in = svn:eol-style=native
    68 *.mak = svn:eol-style=native
    69 *.mk = svn:eol-style=native
    70 *.png = svn:mime-type=image/png
    71 *.jpeg = svn:mime-type=image/jpeg
    72 *.jpg = svn:mime-type=image/jpeg
    73 *.bin = svn:mime-type=application/octet-stream
    74 *.brk = svn:mime-type=application/octet-stream
    75 *.cnv = svn:mime-type=application/octet-stream
    76 *.dat = svn:mime-type=application/octet-stream
    77 *.icu = svn:mime-type=application/octet-stream
    78 *.res = svn:mime-type=application/octet-stream
    79 *.spp = svn:mime-type=application/octet-stream
    80 # new additions 2007-dec-5 srl
    81 *.rtf = mime-type=text/rtf
    82 *.pdf = mime-type=application/pdf
    83 # changed 2008-04-08: modified .txt, above, adding mime-type
    84 # changed 2010-11-09: modified .java, adding mime-type
    85 # Note: The escape syntax for semicolon (";;") is supported since subversion 1.6.1
    86 """
    89 # file_types:  The parsed form of the svn auto-props specification.
    90 #              A list of file types - .cc, .cpp, .txt, etc.
    91 #              each element is a [type, proplist]
    92 #              "type" is a regular expression string that will match a file name
    93 #              prop list is another list, one element per property.
    94 #              Each property item is a two element list, [prop name, prop value]
    95 file_types = list()
    97 def parse_auto_props():
    98     aprops = svn_auto_props.splitlines()
    99     for propline in aprops:
   100         if re.match("\s*(#.*)?$", propline):         # Match comment and blank lines
   101             continue
   102         if re.match("\s*\[auto-props\]", propline):  # Match the [auto-props] line.
   103             continue
   104         if not re.match("\s*[^\s]+\s*=", propline):  # minimal syntax check for <file-type> =
   105             print "Bad line from autoprops definitions: " + propline
   106             continue
   107         file_type, string_proplist = propline.split("=", 1)
   109         #transform the file type expression from autoprops into a normal regular expression.
   110         #  e.g.  "*.cpp"  ==>  ".*\.cpp$"
   111         file_type = file_type.strip()
   112         file_type = file_type.replace(".", "\.")
   113         file_type = file_type.replace("*", ".*")
   114         file_type = file_type + "$"
   116         # example string_proplist at this point: " svn:eol-style=native;svn:executable"
   117         # split on ';' into a list of properties.  The negative lookahead and lookbehind
   118         # in the split regexp are to prevent matching on ';;', which is an escaped ';'
   119         # within a property value.
   120         string_proplist = re.split("(?<!;);(?!;)", string_proplist)
   121         proplist = list()
   122         for prop in string_proplist:
   123             if prop.find("=") >= 0:
   124                 prop_name, prop_val = prop.split("=", 1)
   125             else:
   126                 # properties with no explicit value, e.g. svn:executable
   127                 prop_name, prop_val = prop, ""
   128             prop_name = prop_name.strip()
   129             prop_val = prop_val.strip()
   130             # unescape any ";;" in a property value, e.g. the mime-type from
   131             #    *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8
   132             prop_val = prop_val.replace(";;", ";");
   133             proplist.append((prop_name, prop_val))
   135         file_types.append((file_type, proplist))
   136     # print file_types
   139 def runCommand(cmd):
   140     output_file = os.popen(cmd);
   141     output_text = output_file.read();
   142     exit_status = output_file.close();
   143     if exit_status:
   144         print >>sys.stderr, '"', cmd, '" failed.  Exiting.'
   145         sys.exit(exit_status)
   146     return output_text
   149 def usage():
   150     print "usage: " + sys.argv[0] + " [-f | --fix] [-h | --help]"
   153 #
   154 #  UTF-8 file check.   For text files, add a charset to the mime-type if their contents are UTF-8
   155 #    file_name:        name of a text file.
   156 #    base_mime_type:   svn:mime-type property value from the auto-props file (no charset= part)
   157 #    actual_mime_type: existing svn:mime-type property value for the file.
   158 #    return:           svn:mime-type property value, with charset added when appropriate.
   159 #
   160 def check_utf8(file_name, base_mime_type, actual_mime_type):
   162     # If the file already has a charset in its mime-type, don't make any change.
   164     if actual_mime_type.find("charset=") > 0:
   165         return actual_mime_type;
   167     f = open(file_name, 'r')
   168     bytes = f.read()
   169     f.close()
   171     if all(ord(byte) < 128 for byte in bytes):
   172         # pure ASCII.
   173         # print "Pure ASCII " + file_name
   174         return base_mime_type
   176     try:
   177         bytes.decode("UTF-8")
   178     except UnicodeDecodeError:
   179         print "warning: %s: not ASCII, not UTF-8" % file_name
   180         return base_mime_type
   182     if ord(bytes[0]) != 0xef:
   183       print "UTF-8 file with no BOM: " + file_name
   185     # Append charset=utf-8.
   186     return base_mime_type + ';charset=utf-8'
   189 def main(argv):
   190     fix_problems = False;
   191     try:
   192         opts, args = getopt.getopt(argv, "fh", ("fix", "help"))
   193     except getopt.GetoptError:
   194         print "unrecognized option: " + argv[0]
   195         usage()
   196         sys.exit(2)
   197     for opt, arg in opts:
   198         if opt in ("-h", "--help"):
   199             usage()
   200             sys.exit()
   201         if opt in ("-f", "--fix"):
   202             fix_problems = True
   203     if args:
   204         print "unexpected command line argument"
   205         usage()
   206         sys.exit()
   208     parse_auto_props()
   209     output = runCommand("svn ls -R ");
   210     file_list = output.splitlines()
   212     for f in file_list:
   213         if os.path.isdir(f):
   214             # print "Skipping dir " + f
   215             continue
   216         if not os.path.isfile(f):
   217             print "Repository file not in working copy: " + f
   218             continue;
   220         for file_pattern, props in file_types:
   221             if re.match(file_pattern, f):
   222                 # print "doing " + f
   223                 for propname, propval in props:
   224                     actual_propval = runCommand("svn propget --strict " + propname + " " + f)
   225                     #print propname + ": " + actual_propval
   226                     if propname == "svn:mime-type" and propval.find("text/") == 0:
   227                         # check for UTF-8 text files, should have svn:mime-type=text/something; charset=utf8
   228                         propval = check_utf8(f, propval, actual_propval)
   229                     if not (propval == actual_propval or (propval == "" and actual_propval == "*")):
   230                         print "svn propset %s '%s' %s" % (propname, propval, f)
   231                         if fix_problems:
   232                             os.system("svn propset %s '%s' %s" % (propname, propval, f))
   233                     if propname == "svn:eol-style" and propval == "native":
   234                         if os.system("grep -q -v \r " + f):
   235                             if fix_problems:
   236                                 print f + ": Removing DOS CR characters."
   237                                 os.system("sed -i s/\r// " + f);
   238                             else:
   239                                 print f + " contains DOS CR characters."
   242 if __name__ == "__main__":
   243     main(sys.argv[1:])

mercurial