michael@0: #! /usr/bin/python michael@0: michael@0: # Copyright (C) 2009-2011, International Business Machines Corporation, Google and Others. michael@0: # All rights reserved. michael@0: michael@0: # michael@0: # Script to check and fix svn property settings for ICU source files. michael@0: # Also check for the correct line endings on files with svn:eol-style = native michael@0: # michael@0: # THIS SCRIPT DOES NOT WORK ON WINDOWS michael@0: # It only works correctly on platforms where the native line ending is a plain \n michael@0: # michael@0: # usage: michael@0: # icu-svnprops-check.py [options] michael@0: # michael@0: # options: michael@0: # -f | --fix Fix any problems that are found michael@0: # -h | --help Print a usage line and exit. michael@0: # michael@0: # The tool operates recursively on the directory from which it is run. michael@0: # Only files from the svn repository are checked. michael@0: # No changes are made to the repository; only the working copy will be altered. michael@0: michael@0: import sys michael@0: import os michael@0: import os.path michael@0: import re michael@0: import getopt michael@0: michael@0: # michael@0: # svn autoprops definitions. michael@0: # Copy and paste here the ICU recommended auto-props from michael@0: # http://icu-project.org/docs/subversion_howto/index.html michael@0: # michael@0: # This program will parse this autoprops string, and verify that files in michael@0: # the repository have the recommeded properties set. michael@0: # michael@0: svn_auto_props = """ michael@0: ### Section for configuring automatic properties. michael@0: [auto-props] michael@0: ### The format of the entries is: michael@0: ### file-name-pattern = propname[=value][;propname[=value]...] michael@0: ### The file-name-pattern can contain wildcards (such as '*' and michael@0: ### '?'). All entries which match will be applied to the file. michael@0: ### Note that auto-props functionality must be enabled, which michael@0: ### is typically done by setting the 'enable-auto-props' option. michael@0: *.c = svn:eol-style=native michael@0: *.cc = svn:eol-style=native michael@0: *.cpp = svn:eol-style=native michael@0: *.h = svn:eol-style=native michael@0: *.rc = svn:eol-style=native michael@0: *.dsp = svn:eol-style=native michael@0: *.dsw = svn:eol-style=native michael@0: *.sln = svn:eol-style=native michael@0: *.vcproj = svn:eol-style=native michael@0: configure = svn:eol-style=native;svn:executable michael@0: *.sh = svn:eol-style=native;svn:executable michael@0: *.pl = svn:eol-style=native;svn:executable michael@0: *.py = svn:eol-style=native;svn:executable michael@0: *.txt = svn:mime-type=text/plain;svn:eol-style=native michael@0: *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8 michael@0: *.ucm = svn:eol-style=native michael@0: *.html = svn:eol-style=native;svn:mime-type=text/html michael@0: *.htm = svn:eol-style=native;svn:mime-type=text/html michael@0: *.xml = svn:eol-style=native michael@0: Makefile = svn:eol-style=native michael@0: *.in = svn:eol-style=native michael@0: *.mak = svn:eol-style=native michael@0: *.mk = svn:eol-style=native michael@0: *.png = svn:mime-type=image/png michael@0: *.jpeg = svn:mime-type=image/jpeg michael@0: *.jpg = svn:mime-type=image/jpeg michael@0: *.bin = svn:mime-type=application/octet-stream michael@0: *.brk = svn:mime-type=application/octet-stream michael@0: *.cnv = svn:mime-type=application/octet-stream michael@0: *.dat = svn:mime-type=application/octet-stream michael@0: *.icu = svn:mime-type=application/octet-stream michael@0: *.res = svn:mime-type=application/octet-stream michael@0: *.spp = svn:mime-type=application/octet-stream michael@0: # new additions 2007-dec-5 srl michael@0: *.rtf = mime-type=text/rtf michael@0: *.pdf = mime-type=application/pdf michael@0: # changed 2008-04-08: modified .txt, above, adding mime-type michael@0: # changed 2010-11-09: modified .java, adding mime-type michael@0: # Note: The escape syntax for semicolon (";;") is supported since subversion 1.6.1 michael@0: """ michael@0: michael@0: michael@0: # file_types: The parsed form of the svn auto-props specification. michael@0: # A list of file types - .cc, .cpp, .txt, etc. michael@0: # each element is a [type, proplist] michael@0: # "type" is a regular expression string that will match a file name michael@0: # prop list is another list, one element per property. michael@0: # Each property item is a two element list, [prop name, prop value] michael@0: file_types = list() michael@0: michael@0: def parse_auto_props(): michael@0: aprops = svn_auto_props.splitlines() michael@0: for propline in aprops: michael@0: if re.match("\s*(#.*)?$", propline): # Match comment and blank lines michael@0: continue michael@0: if re.match("\s*\[auto-props\]", propline): # Match the [auto-props] line. michael@0: continue michael@0: if not re.match("\s*[^\s]+\s*=", propline): # minimal syntax check for = michael@0: print "Bad line from autoprops definitions: " + propline michael@0: continue michael@0: file_type, string_proplist = propline.split("=", 1) michael@0: michael@0: #transform the file type expression from autoprops into a normal regular expression. michael@0: # e.g. "*.cpp" ==> ".*\.cpp$" michael@0: file_type = file_type.strip() michael@0: file_type = file_type.replace(".", "\.") michael@0: file_type = file_type.replace("*", ".*") michael@0: file_type = file_type + "$" michael@0: michael@0: # example string_proplist at this point: " svn:eol-style=native;svn:executable" michael@0: # split on ';' into a list of properties. The negative lookahead and lookbehind michael@0: # in the split regexp are to prevent matching on ';;', which is an escaped ';' michael@0: # within a property value. michael@0: string_proplist = re.split("(?= 0: michael@0: prop_name, prop_val = prop.split("=", 1) michael@0: else: michael@0: # properties with no explicit value, e.g. svn:executable michael@0: prop_name, prop_val = prop, "" michael@0: prop_name = prop_name.strip() michael@0: prop_val = prop_val.strip() michael@0: # unescape any ";;" in a property value, e.g. the mime-type from michael@0: # *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8 michael@0: prop_val = prop_val.replace(";;", ";"); michael@0: proplist.append((prop_name, prop_val)) michael@0: michael@0: file_types.append((file_type, proplist)) michael@0: # print file_types michael@0: michael@0: michael@0: def runCommand(cmd): michael@0: output_file = os.popen(cmd); michael@0: output_text = output_file.read(); michael@0: exit_status = output_file.close(); michael@0: if exit_status: michael@0: print >>sys.stderr, '"', cmd, '" failed. Exiting.' michael@0: sys.exit(exit_status) michael@0: return output_text michael@0: michael@0: michael@0: def usage(): michael@0: print "usage: " + sys.argv[0] + " [-f | --fix] [-h | --help]" michael@0: michael@0: michael@0: # michael@0: # UTF-8 file check. For text files, add a charset to the mime-type if their contents are UTF-8 michael@0: # file_name: name of a text file. michael@0: # base_mime_type: svn:mime-type property value from the auto-props file (no charset= part) michael@0: # actual_mime_type: existing svn:mime-type property value for the file. michael@0: # return: svn:mime-type property value, with charset added when appropriate. michael@0: # michael@0: def check_utf8(file_name, base_mime_type, actual_mime_type): michael@0: michael@0: # If the file already has a charset in its mime-type, don't make any change. michael@0: michael@0: if actual_mime_type.find("charset=") > 0: michael@0: return actual_mime_type; michael@0: michael@0: f = open(file_name, 'r') michael@0: bytes = f.read() michael@0: f.close() michael@0: michael@0: if all(ord(byte) < 128 for byte in bytes): michael@0: # pure ASCII. michael@0: # print "Pure ASCII " + file_name michael@0: return base_mime_type michael@0: michael@0: try: michael@0: bytes.decode("UTF-8") michael@0: except UnicodeDecodeError: michael@0: print "warning: %s: not ASCII, not UTF-8" % file_name michael@0: return base_mime_type michael@0: michael@0: if ord(bytes[0]) != 0xef: michael@0: print "UTF-8 file with no BOM: " + file_name michael@0: michael@0: # Append charset=utf-8. michael@0: return base_mime_type + ';charset=utf-8' michael@0: michael@0: michael@0: def main(argv): michael@0: fix_problems = False; michael@0: try: michael@0: opts, args = getopt.getopt(argv, "fh", ("fix", "help")) michael@0: except getopt.GetoptError: michael@0: print "unrecognized option: " + argv[0] michael@0: usage() michael@0: sys.exit(2) michael@0: for opt, arg in opts: michael@0: if opt in ("-h", "--help"): michael@0: usage() michael@0: sys.exit() michael@0: if opt in ("-f", "--fix"): michael@0: fix_problems = True michael@0: if args: michael@0: print "unexpected command line argument" michael@0: usage() michael@0: sys.exit() michael@0: michael@0: parse_auto_props() michael@0: output = runCommand("svn ls -R "); michael@0: file_list = output.splitlines() michael@0: michael@0: for f in file_list: michael@0: if os.path.isdir(f): michael@0: # print "Skipping dir " + f michael@0: continue michael@0: if not os.path.isfile(f): michael@0: print "Repository file not in working copy: " + f michael@0: continue; michael@0: michael@0: for file_pattern, props in file_types: michael@0: if re.match(file_pattern, f): michael@0: # print "doing " + f michael@0: for propname, propval in props: michael@0: actual_propval = runCommand("svn propget --strict " + propname + " " + f) michael@0: #print propname + ": " + actual_propval michael@0: if propname == "svn:mime-type" and propval.find("text/") == 0: michael@0: # check for UTF-8 text files, should have svn:mime-type=text/something; charset=utf8 michael@0: propval = check_utf8(f, propval, actual_propval) michael@0: if not (propval == actual_propval or (propval == "" and actual_propval == "*")): michael@0: print "svn propset %s '%s' %s" % (propname, propval, f) michael@0: if fix_problems: michael@0: os.system("svn propset %s '%s' %s" % (propname, propval, f)) michael@0: if propname == "svn:eol-style" and propval == "native": michael@0: if os.system("grep -q -v \r " + f): michael@0: if fix_problems: michael@0: print f + ": Removing DOS CR characters." michael@0: os.system("sed -i s/\r// " + f); michael@0: else: michael@0: print f + " contains DOS CR characters." michael@0: michael@0: michael@0: if __name__ == "__main__": michael@0: main(sys.argv[1:])