|
1 #! /usr/bin/python |
|
2 |
|
3 # Copyright (C) 2009-2011, International Business Machines Corporation, Google and Others. |
|
4 # All rights reserved. |
|
5 |
|
6 # |
|
7 # Script to check and fix svn property settings for ICU source files. |
|
8 # Also check for the correct line endings on files with svn:eol-style = native |
|
9 # |
|
10 # THIS SCRIPT DOES NOT WORK ON WINDOWS |
|
11 # It only works correctly on platforms where the native line ending is a plain \n |
|
12 # |
|
13 # usage: |
|
14 # icu-svnprops-check.py [options] |
|
15 # |
|
16 # options: |
|
17 # -f | --fix Fix any problems that are found |
|
18 # -h | --help Print a usage line and exit. |
|
19 # |
|
20 # The tool operates recursively on the directory from which it is run. |
|
21 # Only files from the svn repository are checked. |
|
22 # No changes are made to the repository; only the working copy will be altered. |
|
23 |
|
24 import sys |
|
25 import os |
|
26 import os.path |
|
27 import re |
|
28 import getopt |
|
29 |
|
30 # |
|
31 # svn autoprops definitions. |
|
32 # Copy and paste here the ICU recommended auto-props from |
|
33 # http://icu-project.org/docs/subversion_howto/index.html |
|
34 # |
|
35 # This program will parse this autoprops string, and verify that files in |
|
36 # the repository have the recommeded properties set. |
|
37 # |
|
38 svn_auto_props = """ |
|
39 ### Section for configuring automatic properties. |
|
40 [auto-props] |
|
41 ### The format of the entries is: |
|
42 ### file-name-pattern = propname[=value][;propname[=value]...] |
|
43 ### The file-name-pattern can contain wildcards (such as '*' and |
|
44 ### '?'). All entries which match will be applied to the file. |
|
45 ### Note that auto-props functionality must be enabled, which |
|
46 ### is typically done by setting the 'enable-auto-props' option. |
|
47 *.c = svn:eol-style=native |
|
48 *.cc = svn:eol-style=native |
|
49 *.cpp = svn:eol-style=native |
|
50 *.h = svn:eol-style=native |
|
51 *.rc = svn:eol-style=native |
|
52 *.dsp = svn:eol-style=native |
|
53 *.dsw = svn:eol-style=native |
|
54 *.sln = svn:eol-style=native |
|
55 *.vcproj = svn:eol-style=native |
|
56 configure = svn:eol-style=native;svn:executable |
|
57 *.sh = svn:eol-style=native;svn:executable |
|
58 *.pl = svn:eol-style=native;svn:executable |
|
59 *.py = svn:eol-style=native;svn:executable |
|
60 *.txt = svn:mime-type=text/plain;svn:eol-style=native |
|
61 *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8 |
|
62 *.ucm = svn:eol-style=native |
|
63 *.html = svn:eol-style=native;svn:mime-type=text/html |
|
64 *.htm = svn:eol-style=native;svn:mime-type=text/html |
|
65 *.xml = svn:eol-style=native |
|
66 Makefile = svn:eol-style=native |
|
67 *.in = svn:eol-style=native |
|
68 *.mak = svn:eol-style=native |
|
69 *.mk = svn:eol-style=native |
|
70 *.png = svn:mime-type=image/png |
|
71 *.jpeg = svn:mime-type=image/jpeg |
|
72 *.jpg = svn:mime-type=image/jpeg |
|
73 *.bin = svn:mime-type=application/octet-stream |
|
74 *.brk = svn:mime-type=application/octet-stream |
|
75 *.cnv = svn:mime-type=application/octet-stream |
|
76 *.dat = svn:mime-type=application/octet-stream |
|
77 *.icu = svn:mime-type=application/octet-stream |
|
78 *.res = svn:mime-type=application/octet-stream |
|
79 *.spp = svn:mime-type=application/octet-stream |
|
80 # new additions 2007-dec-5 srl |
|
81 *.rtf = mime-type=text/rtf |
|
82 *.pdf = mime-type=application/pdf |
|
83 # changed 2008-04-08: modified .txt, above, adding mime-type |
|
84 # changed 2010-11-09: modified .java, adding mime-type |
|
85 # Note: The escape syntax for semicolon (";;") is supported since subversion 1.6.1 |
|
86 """ |
|
87 |
|
88 |
|
89 # file_types: The parsed form of the svn auto-props specification. |
|
90 # A list of file types - .cc, .cpp, .txt, etc. |
|
91 # each element is a [type, proplist] |
|
92 # "type" is a regular expression string that will match a file name |
|
93 # prop list is another list, one element per property. |
|
94 # Each property item is a two element list, [prop name, prop value] |
|
95 file_types = list() |
|
96 |
|
97 def parse_auto_props(): |
|
98 aprops = svn_auto_props.splitlines() |
|
99 for propline in aprops: |
|
100 if re.match("\s*(#.*)?$", propline): # Match comment and blank lines |
|
101 continue |
|
102 if re.match("\s*\[auto-props\]", propline): # Match the [auto-props] line. |
|
103 continue |
|
104 if not re.match("\s*[^\s]+\s*=", propline): # minimal syntax check for <file-type> = |
|
105 print "Bad line from autoprops definitions: " + propline |
|
106 continue |
|
107 file_type, string_proplist = propline.split("=", 1) |
|
108 |
|
109 #transform the file type expression from autoprops into a normal regular expression. |
|
110 # e.g. "*.cpp" ==> ".*\.cpp$" |
|
111 file_type = file_type.strip() |
|
112 file_type = file_type.replace(".", "\.") |
|
113 file_type = file_type.replace("*", ".*") |
|
114 file_type = file_type + "$" |
|
115 |
|
116 # example string_proplist at this point: " svn:eol-style=native;svn:executable" |
|
117 # split on ';' into a list of properties. The negative lookahead and lookbehind |
|
118 # in the split regexp are to prevent matching on ';;', which is an escaped ';' |
|
119 # within a property value. |
|
120 string_proplist = re.split("(?<!;);(?!;)", string_proplist) |
|
121 proplist = list() |
|
122 for prop in string_proplist: |
|
123 if prop.find("=") >= 0: |
|
124 prop_name, prop_val = prop.split("=", 1) |
|
125 else: |
|
126 # properties with no explicit value, e.g. svn:executable |
|
127 prop_name, prop_val = prop, "" |
|
128 prop_name = prop_name.strip() |
|
129 prop_val = prop_val.strip() |
|
130 # unescape any ";;" in a property value, e.g. the mime-type from |
|
131 # *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8 |
|
132 prop_val = prop_val.replace(";;", ";"); |
|
133 proplist.append((prop_name, prop_val)) |
|
134 |
|
135 file_types.append((file_type, proplist)) |
|
136 # print file_types |
|
137 |
|
138 |
|
139 def runCommand(cmd): |
|
140 output_file = os.popen(cmd); |
|
141 output_text = output_file.read(); |
|
142 exit_status = output_file.close(); |
|
143 if exit_status: |
|
144 print >>sys.stderr, '"', cmd, '" failed. Exiting.' |
|
145 sys.exit(exit_status) |
|
146 return output_text |
|
147 |
|
148 |
|
149 def usage(): |
|
150 print "usage: " + sys.argv[0] + " [-f | --fix] [-h | --help]" |
|
151 |
|
152 |
|
153 # |
|
154 # UTF-8 file check. For text files, add a charset to the mime-type if their contents are UTF-8 |
|
155 # file_name: name of a text file. |
|
156 # base_mime_type: svn:mime-type property value from the auto-props file (no charset= part) |
|
157 # actual_mime_type: existing svn:mime-type property value for the file. |
|
158 # return: svn:mime-type property value, with charset added when appropriate. |
|
159 # |
|
160 def check_utf8(file_name, base_mime_type, actual_mime_type): |
|
161 |
|
162 # If the file already has a charset in its mime-type, don't make any change. |
|
163 |
|
164 if actual_mime_type.find("charset=") > 0: |
|
165 return actual_mime_type; |
|
166 |
|
167 f = open(file_name, 'r') |
|
168 bytes = f.read() |
|
169 f.close() |
|
170 |
|
171 if all(ord(byte) < 128 for byte in bytes): |
|
172 # pure ASCII. |
|
173 # print "Pure ASCII " + file_name |
|
174 return base_mime_type |
|
175 |
|
176 try: |
|
177 bytes.decode("UTF-8") |
|
178 except UnicodeDecodeError: |
|
179 print "warning: %s: not ASCII, not UTF-8" % file_name |
|
180 return base_mime_type |
|
181 |
|
182 if ord(bytes[0]) != 0xef: |
|
183 print "UTF-8 file with no BOM: " + file_name |
|
184 |
|
185 # Append charset=utf-8. |
|
186 return base_mime_type + ';charset=utf-8' |
|
187 |
|
188 |
|
189 def main(argv): |
|
190 fix_problems = False; |
|
191 try: |
|
192 opts, args = getopt.getopt(argv, "fh", ("fix", "help")) |
|
193 except getopt.GetoptError: |
|
194 print "unrecognized option: " + argv[0] |
|
195 usage() |
|
196 sys.exit(2) |
|
197 for opt, arg in opts: |
|
198 if opt in ("-h", "--help"): |
|
199 usage() |
|
200 sys.exit() |
|
201 if opt in ("-f", "--fix"): |
|
202 fix_problems = True |
|
203 if args: |
|
204 print "unexpected command line argument" |
|
205 usage() |
|
206 sys.exit() |
|
207 |
|
208 parse_auto_props() |
|
209 output = runCommand("svn ls -R "); |
|
210 file_list = output.splitlines() |
|
211 |
|
212 for f in file_list: |
|
213 if os.path.isdir(f): |
|
214 # print "Skipping dir " + f |
|
215 continue |
|
216 if not os.path.isfile(f): |
|
217 print "Repository file not in working copy: " + f |
|
218 continue; |
|
219 |
|
220 for file_pattern, props in file_types: |
|
221 if re.match(file_pattern, f): |
|
222 # print "doing " + f |
|
223 for propname, propval in props: |
|
224 actual_propval = runCommand("svn propget --strict " + propname + " " + f) |
|
225 #print propname + ": " + actual_propval |
|
226 if propname == "svn:mime-type" and propval.find("text/") == 0: |
|
227 # check for UTF-8 text files, should have svn:mime-type=text/something; charset=utf8 |
|
228 propval = check_utf8(f, propval, actual_propval) |
|
229 if not (propval == actual_propval or (propval == "" and actual_propval == "*")): |
|
230 print "svn propset %s '%s' %s" % (propname, propval, f) |
|
231 if fix_problems: |
|
232 os.system("svn propset %s '%s' %s" % (propname, propval, f)) |
|
233 if propname == "svn:eol-style" and propval == "native": |
|
234 if os.system("grep -q -v \r " + f): |
|
235 if fix_problems: |
|
236 print f + ": Removing DOS CR characters." |
|
237 os.system("sed -i s/\r// " + f); |
|
238 else: |
|
239 print f + " contains DOS CR characters." |
|
240 |
|
241 |
|
242 if __name__ == "__main__": |
|
243 main(sys.argv[1:]) |