1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/tools/icu-svnprops-check.py Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,243 @@ 1.4 +#! /usr/bin/python 1.5 + 1.6 +# Copyright (C) 2009-2011, International Business Machines Corporation, Google and Others. 1.7 +# All rights reserved. 1.8 + 1.9 +# 1.10 +# Script to check and fix svn property settings for ICU source files. 1.11 +# Also check for the correct line endings on files with svn:eol-style = native 1.12 +# 1.13 +# THIS SCRIPT DOES NOT WORK ON WINDOWS 1.14 +# It only works correctly on platforms where the native line ending is a plain \n 1.15 +# 1.16 +# usage: 1.17 +# icu-svnprops-check.py [options] 1.18 +# 1.19 +# options: 1.20 +# -f | --fix Fix any problems that are found 1.21 +# -h | --help Print a usage line and exit. 1.22 +# 1.23 +# The tool operates recursively on the directory from which it is run. 1.24 +# Only files from the svn repository are checked. 1.25 +# No changes are made to the repository; only the working copy will be altered. 1.26 + 1.27 +import sys 1.28 +import os 1.29 +import os.path 1.30 +import re 1.31 +import getopt 1.32 + 1.33 +# 1.34 +# svn autoprops definitions. 1.35 +# Copy and paste here the ICU recommended auto-props from 1.36 +# http://icu-project.org/docs/subversion_howto/index.html 1.37 +# 1.38 +# This program will parse this autoprops string, and verify that files in 1.39 +# the repository have the recommeded properties set. 1.40 +# 1.41 +svn_auto_props = """ 1.42 +### Section for configuring automatic properties. 1.43 +[auto-props] 1.44 +### The format of the entries is: 1.45 +### file-name-pattern = propname[=value][;propname[=value]...] 1.46 +### The file-name-pattern can contain wildcards (such as '*' and 1.47 +### '?'). All entries which match will be applied to the file. 1.48 +### Note that auto-props functionality must be enabled, which 1.49 +### is typically done by setting the 'enable-auto-props' option. 1.50 +*.c = svn:eol-style=native 1.51 +*.cc = svn:eol-style=native 1.52 +*.cpp = svn:eol-style=native 1.53 +*.h = svn:eol-style=native 1.54 +*.rc = svn:eol-style=native 1.55 +*.dsp = svn:eol-style=native 1.56 +*.dsw = svn:eol-style=native 1.57 +*.sln = svn:eol-style=native 1.58 +*.vcproj = svn:eol-style=native 1.59 +configure = svn:eol-style=native;svn:executable 1.60 +*.sh = svn:eol-style=native;svn:executable 1.61 +*.pl = svn:eol-style=native;svn:executable 1.62 +*.py = svn:eol-style=native;svn:executable 1.63 +*.txt = svn:mime-type=text/plain;svn:eol-style=native 1.64 +*.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8 1.65 +*.ucm = svn:eol-style=native 1.66 +*.html = svn:eol-style=native;svn:mime-type=text/html 1.67 +*.htm = svn:eol-style=native;svn:mime-type=text/html 1.68 +*.xml = svn:eol-style=native 1.69 +Makefile = svn:eol-style=native 1.70 +*.in = svn:eol-style=native 1.71 +*.mak = svn:eol-style=native 1.72 +*.mk = svn:eol-style=native 1.73 +*.png = svn:mime-type=image/png 1.74 +*.jpeg = svn:mime-type=image/jpeg 1.75 +*.jpg = svn:mime-type=image/jpeg 1.76 +*.bin = svn:mime-type=application/octet-stream 1.77 +*.brk = svn:mime-type=application/octet-stream 1.78 +*.cnv = svn:mime-type=application/octet-stream 1.79 +*.dat = svn:mime-type=application/octet-stream 1.80 +*.icu = svn:mime-type=application/octet-stream 1.81 +*.res = svn:mime-type=application/octet-stream 1.82 +*.spp = svn:mime-type=application/octet-stream 1.83 +# new additions 2007-dec-5 srl 1.84 +*.rtf = mime-type=text/rtf 1.85 +*.pdf = mime-type=application/pdf 1.86 +# changed 2008-04-08: modified .txt, above, adding mime-type 1.87 +# changed 2010-11-09: modified .java, adding mime-type 1.88 +# Note: The escape syntax for semicolon (";;") is supported since subversion 1.6.1 1.89 +""" 1.90 + 1.91 + 1.92 +# file_types: The parsed form of the svn auto-props specification. 1.93 +# A list of file types - .cc, .cpp, .txt, etc. 1.94 +# each element is a [type, proplist] 1.95 +# "type" is a regular expression string that will match a file name 1.96 +# prop list is another list, one element per property. 1.97 +# Each property item is a two element list, [prop name, prop value] 1.98 +file_types = list() 1.99 + 1.100 +def parse_auto_props(): 1.101 + aprops = svn_auto_props.splitlines() 1.102 + for propline in aprops: 1.103 + if re.match("\s*(#.*)?$", propline): # Match comment and blank lines 1.104 + continue 1.105 + if re.match("\s*\[auto-props\]", propline): # Match the [auto-props] line. 1.106 + continue 1.107 + if not re.match("\s*[^\s]+\s*=", propline): # minimal syntax check for <file-type> = 1.108 + print "Bad line from autoprops definitions: " + propline 1.109 + continue 1.110 + file_type, string_proplist = propline.split("=", 1) 1.111 + 1.112 + #transform the file type expression from autoprops into a normal regular expression. 1.113 + # e.g. "*.cpp" ==> ".*\.cpp$" 1.114 + file_type = file_type.strip() 1.115 + file_type = file_type.replace(".", "\.") 1.116 + file_type = file_type.replace("*", ".*") 1.117 + file_type = file_type + "$" 1.118 + 1.119 + # example string_proplist at this point: " svn:eol-style=native;svn:executable" 1.120 + # split on ';' into a list of properties. The negative lookahead and lookbehind 1.121 + # in the split regexp are to prevent matching on ';;', which is an escaped ';' 1.122 + # within a property value. 1.123 + string_proplist = re.split("(?<!;);(?!;)", string_proplist) 1.124 + proplist = list() 1.125 + for prop in string_proplist: 1.126 + if prop.find("=") >= 0: 1.127 + prop_name, prop_val = prop.split("=", 1) 1.128 + else: 1.129 + # properties with no explicit value, e.g. svn:executable 1.130 + prop_name, prop_val = prop, "" 1.131 + prop_name = prop_name.strip() 1.132 + prop_val = prop_val.strip() 1.133 + # unescape any ";;" in a property value, e.g. the mime-type from 1.134 + # *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8 1.135 + prop_val = prop_val.replace(";;", ";"); 1.136 + proplist.append((prop_name, prop_val)) 1.137 + 1.138 + file_types.append((file_type, proplist)) 1.139 + # print file_types 1.140 + 1.141 + 1.142 +def runCommand(cmd): 1.143 + output_file = os.popen(cmd); 1.144 + output_text = output_file.read(); 1.145 + exit_status = output_file.close(); 1.146 + if exit_status: 1.147 + print >>sys.stderr, '"', cmd, '" failed. Exiting.' 1.148 + sys.exit(exit_status) 1.149 + return output_text 1.150 + 1.151 + 1.152 +def usage(): 1.153 + print "usage: " + sys.argv[0] + " [-f | --fix] [-h | --help]" 1.154 + 1.155 + 1.156 +# 1.157 +# UTF-8 file check. For text files, add a charset to the mime-type if their contents are UTF-8 1.158 +# file_name: name of a text file. 1.159 +# base_mime_type: svn:mime-type property value from the auto-props file (no charset= part) 1.160 +# actual_mime_type: existing svn:mime-type property value for the file. 1.161 +# return: svn:mime-type property value, with charset added when appropriate. 1.162 +# 1.163 +def check_utf8(file_name, base_mime_type, actual_mime_type): 1.164 + 1.165 + # If the file already has a charset in its mime-type, don't make any change. 1.166 + 1.167 + if actual_mime_type.find("charset=") > 0: 1.168 + return actual_mime_type; 1.169 + 1.170 + f = open(file_name, 'r') 1.171 + bytes = f.read() 1.172 + f.close() 1.173 + 1.174 + if all(ord(byte) < 128 for byte in bytes): 1.175 + # pure ASCII. 1.176 + # print "Pure ASCII " + file_name 1.177 + return base_mime_type 1.178 + 1.179 + try: 1.180 + bytes.decode("UTF-8") 1.181 + except UnicodeDecodeError: 1.182 + print "warning: %s: not ASCII, not UTF-8" % file_name 1.183 + return base_mime_type 1.184 + 1.185 + if ord(bytes[0]) != 0xef: 1.186 + print "UTF-8 file with no BOM: " + file_name 1.187 + 1.188 + # Append charset=utf-8. 1.189 + return base_mime_type + ';charset=utf-8' 1.190 + 1.191 + 1.192 +def main(argv): 1.193 + fix_problems = False; 1.194 + try: 1.195 + opts, args = getopt.getopt(argv, "fh", ("fix", "help")) 1.196 + except getopt.GetoptError: 1.197 + print "unrecognized option: " + argv[0] 1.198 + usage() 1.199 + sys.exit(2) 1.200 + for opt, arg in opts: 1.201 + if opt in ("-h", "--help"): 1.202 + usage() 1.203 + sys.exit() 1.204 + if opt in ("-f", "--fix"): 1.205 + fix_problems = True 1.206 + if args: 1.207 + print "unexpected command line argument" 1.208 + usage() 1.209 + sys.exit() 1.210 + 1.211 + parse_auto_props() 1.212 + output = runCommand("svn ls -R "); 1.213 + file_list = output.splitlines() 1.214 + 1.215 + for f in file_list: 1.216 + if os.path.isdir(f): 1.217 + # print "Skipping dir " + f 1.218 + continue 1.219 + if not os.path.isfile(f): 1.220 + print "Repository file not in working copy: " + f 1.221 + continue; 1.222 + 1.223 + for file_pattern, props in file_types: 1.224 + if re.match(file_pattern, f): 1.225 + # print "doing " + f 1.226 + for propname, propval in props: 1.227 + actual_propval = runCommand("svn propget --strict " + propname + " " + f) 1.228 + #print propname + ": " + actual_propval 1.229 + if propname == "svn:mime-type" and propval.find("text/") == 0: 1.230 + # check for UTF-8 text files, should have svn:mime-type=text/something; charset=utf8 1.231 + propval = check_utf8(f, propval, actual_propval) 1.232 + if not (propval == actual_propval or (propval == "" and actual_propval == "*")): 1.233 + print "svn propset %s '%s' %s" % (propname, propval, f) 1.234 + if fix_problems: 1.235 + os.system("svn propset %s '%s' %s" % (propname, propval, f)) 1.236 + if propname == "svn:eol-style" and propval == "native": 1.237 + if os.system("grep -q -v \r " + f): 1.238 + if fix_problems: 1.239 + print f + ": Removing DOS CR characters." 1.240 + os.system("sed -i s/\r// " + f); 1.241 + else: 1.242 + print f + " contains DOS CR characters." 1.243 + 1.244 + 1.245 +if __name__ == "__main__": 1.246 + main(sys.argv[1:])