intl/icu/source/tools/icu-svnprops-check.py

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rwxr-xr-x

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 #! /usr/bin/python
michael@0 2
michael@0 3 # Copyright (C) 2009-2011, International Business Machines Corporation, Google and Others.
michael@0 4 # All rights reserved.
michael@0 5
michael@0 6 #
michael@0 7 # Script to check and fix svn property settings for ICU source files.
michael@0 8 # Also check for the correct line endings on files with svn:eol-style = native
michael@0 9 #
michael@0 10 # THIS SCRIPT DOES NOT WORK ON WINDOWS
michael@0 11 # It only works correctly on platforms where the native line ending is a plain \n
michael@0 12 #
michael@0 13 # usage:
michael@0 14 # icu-svnprops-check.py [options]
michael@0 15 #
michael@0 16 # options:
michael@0 17 # -f | --fix Fix any problems that are found
michael@0 18 # -h | --help Print a usage line and exit.
michael@0 19 #
michael@0 20 # The tool operates recursively on the directory from which it is run.
michael@0 21 # Only files from the svn repository are checked.
michael@0 22 # No changes are made to the repository; only the working copy will be altered.
michael@0 23
michael@0 24 import sys
michael@0 25 import os
michael@0 26 import os.path
michael@0 27 import re
michael@0 28 import getopt
michael@0 29
michael@0 30 #
michael@0 31 # svn autoprops definitions.
michael@0 32 # Copy and paste here the ICU recommended auto-props from
michael@0 33 # http://icu-project.org/docs/subversion_howto/index.html
michael@0 34 #
michael@0 35 # This program will parse this autoprops string, and verify that files in
michael@0 36 # the repository have the recommeded properties set.
michael@0 37 #
michael@0 38 svn_auto_props = """
michael@0 39 ### Section for configuring automatic properties.
michael@0 40 [auto-props]
michael@0 41 ### The format of the entries is:
michael@0 42 ### file-name-pattern = propname[=value][;propname[=value]...]
michael@0 43 ### The file-name-pattern can contain wildcards (such as '*' and
michael@0 44 ### '?'). All entries which match will be applied to the file.
michael@0 45 ### Note that auto-props functionality must be enabled, which
michael@0 46 ### is typically done by setting the 'enable-auto-props' option.
michael@0 47 *.c = svn:eol-style=native
michael@0 48 *.cc = svn:eol-style=native
michael@0 49 *.cpp = svn:eol-style=native
michael@0 50 *.h = svn:eol-style=native
michael@0 51 *.rc = svn:eol-style=native
michael@0 52 *.dsp = svn:eol-style=native
michael@0 53 *.dsw = svn:eol-style=native
michael@0 54 *.sln = svn:eol-style=native
michael@0 55 *.vcproj = svn:eol-style=native
michael@0 56 configure = svn:eol-style=native;svn:executable
michael@0 57 *.sh = svn:eol-style=native;svn:executable
michael@0 58 *.pl = svn:eol-style=native;svn:executable
michael@0 59 *.py = svn:eol-style=native;svn:executable
michael@0 60 *.txt = svn:mime-type=text/plain;svn:eol-style=native
michael@0 61 *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8
michael@0 62 *.ucm = svn:eol-style=native
michael@0 63 *.html = svn:eol-style=native;svn:mime-type=text/html
michael@0 64 *.htm = svn:eol-style=native;svn:mime-type=text/html
michael@0 65 *.xml = svn:eol-style=native
michael@0 66 Makefile = svn:eol-style=native
michael@0 67 *.in = svn:eol-style=native
michael@0 68 *.mak = svn:eol-style=native
michael@0 69 *.mk = svn:eol-style=native
michael@0 70 *.png = svn:mime-type=image/png
michael@0 71 *.jpeg = svn:mime-type=image/jpeg
michael@0 72 *.jpg = svn:mime-type=image/jpeg
michael@0 73 *.bin = svn:mime-type=application/octet-stream
michael@0 74 *.brk = svn:mime-type=application/octet-stream
michael@0 75 *.cnv = svn:mime-type=application/octet-stream
michael@0 76 *.dat = svn:mime-type=application/octet-stream
michael@0 77 *.icu = svn:mime-type=application/octet-stream
michael@0 78 *.res = svn:mime-type=application/octet-stream
michael@0 79 *.spp = svn:mime-type=application/octet-stream
michael@0 80 # new additions 2007-dec-5 srl
michael@0 81 *.rtf = mime-type=text/rtf
michael@0 82 *.pdf = mime-type=application/pdf
michael@0 83 # changed 2008-04-08: modified .txt, above, adding mime-type
michael@0 84 # changed 2010-11-09: modified .java, adding mime-type
michael@0 85 # Note: The escape syntax for semicolon (";;") is supported since subversion 1.6.1
michael@0 86 """
michael@0 87
michael@0 88
michael@0 89 # file_types: The parsed form of the svn auto-props specification.
michael@0 90 # A list of file types - .cc, .cpp, .txt, etc.
michael@0 91 # each element is a [type, proplist]
michael@0 92 # "type" is a regular expression string that will match a file name
michael@0 93 # prop list is another list, one element per property.
michael@0 94 # Each property item is a two element list, [prop name, prop value]
michael@0 95 file_types = list()
michael@0 96
michael@0 97 def parse_auto_props():
michael@0 98 aprops = svn_auto_props.splitlines()
michael@0 99 for propline in aprops:
michael@0 100 if re.match("\s*(#.*)?$", propline): # Match comment and blank lines
michael@0 101 continue
michael@0 102 if re.match("\s*\[auto-props\]", propline): # Match the [auto-props] line.
michael@0 103 continue
michael@0 104 if not re.match("\s*[^\s]+\s*=", propline): # minimal syntax check for <file-type> =
michael@0 105 print "Bad line from autoprops definitions: " + propline
michael@0 106 continue
michael@0 107 file_type, string_proplist = propline.split("=", 1)
michael@0 108
michael@0 109 #transform the file type expression from autoprops into a normal regular expression.
michael@0 110 # e.g. "*.cpp" ==> ".*\.cpp$"
michael@0 111 file_type = file_type.strip()
michael@0 112 file_type = file_type.replace(".", "\.")
michael@0 113 file_type = file_type.replace("*", ".*")
michael@0 114 file_type = file_type + "$"
michael@0 115
michael@0 116 # example string_proplist at this point: " svn:eol-style=native;svn:executable"
michael@0 117 # split on ';' into a list of properties. The negative lookahead and lookbehind
michael@0 118 # in the split regexp are to prevent matching on ';;', which is an escaped ';'
michael@0 119 # within a property value.
michael@0 120 string_proplist = re.split("(?<!;);(?!;)", string_proplist)
michael@0 121 proplist = list()
michael@0 122 for prop in string_proplist:
michael@0 123 if prop.find("=") >= 0:
michael@0 124 prop_name, prop_val = prop.split("=", 1)
michael@0 125 else:
michael@0 126 # properties with no explicit value, e.g. svn:executable
michael@0 127 prop_name, prop_val = prop, ""
michael@0 128 prop_name = prop_name.strip()
michael@0 129 prop_val = prop_val.strip()
michael@0 130 # unescape any ";;" in a property value, e.g. the mime-type from
michael@0 131 # *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8
michael@0 132 prop_val = prop_val.replace(";;", ";");
michael@0 133 proplist.append((prop_name, prop_val))
michael@0 134
michael@0 135 file_types.append((file_type, proplist))
michael@0 136 # print file_types
michael@0 137
michael@0 138
michael@0 139 def runCommand(cmd):
michael@0 140 output_file = os.popen(cmd);
michael@0 141 output_text = output_file.read();
michael@0 142 exit_status = output_file.close();
michael@0 143 if exit_status:
michael@0 144 print >>sys.stderr, '"', cmd, '" failed. Exiting.'
michael@0 145 sys.exit(exit_status)
michael@0 146 return output_text
michael@0 147
michael@0 148
michael@0 149 def usage():
michael@0 150 print "usage: " + sys.argv[0] + " [-f | --fix] [-h | --help]"
michael@0 151
michael@0 152
michael@0 153 #
michael@0 154 # UTF-8 file check. For text files, add a charset to the mime-type if their contents are UTF-8
michael@0 155 # file_name: name of a text file.
michael@0 156 # base_mime_type: svn:mime-type property value from the auto-props file (no charset= part)
michael@0 157 # actual_mime_type: existing svn:mime-type property value for the file.
michael@0 158 # return: svn:mime-type property value, with charset added when appropriate.
michael@0 159 #
michael@0 160 def check_utf8(file_name, base_mime_type, actual_mime_type):
michael@0 161
michael@0 162 # If the file already has a charset in its mime-type, don't make any change.
michael@0 163
michael@0 164 if actual_mime_type.find("charset=") > 0:
michael@0 165 return actual_mime_type;
michael@0 166
michael@0 167 f = open(file_name, 'r')
michael@0 168 bytes = f.read()
michael@0 169 f.close()
michael@0 170
michael@0 171 if all(ord(byte) < 128 for byte in bytes):
michael@0 172 # pure ASCII.
michael@0 173 # print "Pure ASCII " + file_name
michael@0 174 return base_mime_type
michael@0 175
michael@0 176 try:
michael@0 177 bytes.decode("UTF-8")
michael@0 178 except UnicodeDecodeError:
michael@0 179 print "warning: %s: not ASCII, not UTF-8" % file_name
michael@0 180 return base_mime_type
michael@0 181
michael@0 182 if ord(bytes[0]) != 0xef:
michael@0 183 print "UTF-8 file with no BOM: " + file_name
michael@0 184
michael@0 185 # Append charset=utf-8.
michael@0 186 return base_mime_type + ';charset=utf-8'
michael@0 187
michael@0 188
michael@0 189 def main(argv):
michael@0 190 fix_problems = False;
michael@0 191 try:
michael@0 192 opts, args = getopt.getopt(argv, "fh", ("fix", "help"))
michael@0 193 except getopt.GetoptError:
michael@0 194 print "unrecognized option: " + argv[0]
michael@0 195 usage()
michael@0 196 sys.exit(2)
michael@0 197 for opt, arg in opts:
michael@0 198 if opt in ("-h", "--help"):
michael@0 199 usage()
michael@0 200 sys.exit()
michael@0 201 if opt in ("-f", "--fix"):
michael@0 202 fix_problems = True
michael@0 203 if args:
michael@0 204 print "unexpected command line argument"
michael@0 205 usage()
michael@0 206 sys.exit()
michael@0 207
michael@0 208 parse_auto_props()
michael@0 209 output = runCommand("svn ls -R ");
michael@0 210 file_list = output.splitlines()
michael@0 211
michael@0 212 for f in file_list:
michael@0 213 if os.path.isdir(f):
michael@0 214 # print "Skipping dir " + f
michael@0 215 continue
michael@0 216 if not os.path.isfile(f):
michael@0 217 print "Repository file not in working copy: " + f
michael@0 218 continue;
michael@0 219
michael@0 220 for file_pattern, props in file_types:
michael@0 221 if re.match(file_pattern, f):
michael@0 222 # print "doing " + f
michael@0 223 for propname, propval in props:
michael@0 224 actual_propval = runCommand("svn propget --strict " + propname + " " + f)
michael@0 225 #print propname + ": " + actual_propval
michael@0 226 if propname == "svn:mime-type" and propval.find("text/") == 0:
michael@0 227 # check for UTF-8 text files, should have svn:mime-type=text/something; charset=utf8
michael@0 228 propval = check_utf8(f, propval, actual_propval)
michael@0 229 if not (propval == actual_propval or (propval == "" and actual_propval == "*")):
michael@0 230 print "svn propset %s '%s' %s" % (propname, propval, f)
michael@0 231 if fix_problems:
michael@0 232 os.system("svn propset %s '%s' %s" % (propname, propval, f))
michael@0 233 if propname == "svn:eol-style" and propval == "native":
michael@0 234 if os.system("grep -q -v \r " + f):
michael@0 235 if fix_problems:
michael@0 236 print f + ": Removing DOS CR characters."
michael@0 237 os.system("sed -i s/\r// " + f);
michael@0 238 else:
michael@0 239 print f + " contains DOS CR characters."
michael@0 240
michael@0 241
michael@0 242 if __name__ == "__main__":
michael@0 243 main(sys.argv[1:])

mercurial