michael@0: # vim: set ts=8 sts=4 et sw=4 tw=99: michael@0: # This Source Code Form is subject to the terms of the Mozilla Public michael@0: # License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: # file, You can obtain one at http://mozilla.org/MPL/2.0/. michael@0: michael@0: #---------------------------------------------------------------------------- michael@0: # This script checks various aspects of SpiderMonkey code style. The current checks are as michael@0: # follows. michael@0: # michael@0: # We check the following things in headers. michael@0: # michael@0: # - No cyclic dependencies. michael@0: # michael@0: # - No normal header should #include a inlines.h/-inl.h file. michael@0: # michael@0: # - #ifndef wrappers should have the right form. (XXX: not yet implemented) michael@0: # - Every header file should have one. michael@0: # - The guard name used should be appropriate for the filename. michael@0: # michael@0: # We check the following things in all files. michael@0: # michael@0: # - #includes should have full paths, e.g. "jit/Ion.h", not "Ion.h". michael@0: # michael@0: # - #includes should use the appropriate form for system headers (<...>) and michael@0: # local headers ("..."). michael@0: # michael@0: # - #includes should be ordered correctly. michael@0: # - Each one should be in the correct section. michael@0: # - Alphabetical order should be used within sections. michael@0: # - Sections should be in the right order. michael@0: # Note that the presence of #if/#endif blocks complicates things, to the michael@0: # point that it's not always clear where a conditionally-compiled #include michael@0: # statement should go, even to a human. Therefore, we check the #include michael@0: # statements within each #if/#endif block (including nested ones) in michael@0: # isolation, but don't try to do any order checking between such blocks. michael@0: #---------------------------------------------------------------------------- michael@0: michael@0: from __future__ import print_function michael@0: michael@0: import difflib michael@0: import os michael@0: import re michael@0: import subprocess michael@0: import sys michael@0: import traceback michael@0: michael@0: # We don't bother checking files in these directories, because they're (a) auxiliary or (b) michael@0: # imported code that doesn't follow our coding style. michael@0: ignored_js_src_dirs = [ michael@0: 'js/src/config/', # auxiliary stuff michael@0: 'js/src/ctypes/libffi/', # imported code michael@0: 'js/src/devtools/', # auxiliary stuff michael@0: 'js/src/editline/', # imported code michael@0: 'js/src/gdb/', # auxiliary stuff michael@0: 'js/src/vtune/' # imported code michael@0: ] michael@0: michael@0: # We ignore #includes of these files, because they don't follow the usual rules. michael@0: included_inclnames_to_ignore = set([ michael@0: 'ffi.h', # generated in ctypes/libffi/ michael@0: 'devtools/sharkctl.h', # we ignore devtools/ in general michael@0: 'devtools/Instruments.h', # we ignore devtools/ in general michael@0: 'double-conversion.h', # strange MFBT case michael@0: 'javascript-trace.h', # generated in $OBJDIR if HAVE_DTRACE is defined michael@0: 'jsautokw.h', # generated in $OBJDIR michael@0: 'jscustomallocator.h', # provided by embedders; allowed to be missing michael@0: 'js-config.h', # generated in $OBJDIR michael@0: 'pratom.h', # NSPR michael@0: 'prcvar.h', # NSPR michael@0: 'prinit.h', # NSPR michael@0: 'prlink.h', # NSPR michael@0: 'prlock.h', # NSPR michael@0: 'prprf.h', # NSPR michael@0: 'prthread.h', # NSPR michael@0: 'prtypes.h', # NSPR michael@0: 'selfhosted.out.h', # generated in $OBJDIR michael@0: 'unicode/locid.h', # ICU michael@0: 'unicode/numsys.h', # ICU michael@0: 'unicode/ucal.h', # ICU michael@0: 'unicode/uclean.h', # ICU michael@0: 'unicode/ucol.h', # ICU michael@0: 'unicode/udat.h', # ICU michael@0: 'unicode/udatpg.h', # ICU michael@0: 'unicode/uenum.h', # ICU michael@0: 'unicode/unorm.h', # ICU michael@0: 'unicode/unum.h', # ICU michael@0: 'unicode/ustring.h', # ICU michael@0: 'unicode/utypes.h', # ICU michael@0: 'vtune/VTuneWrapper.h' # VTune michael@0: ]) michael@0: michael@0: # These files have additional constraints on where they are #included, so we michael@0: # ignore #includes of them when checking #include ordering. michael@0: oddly_ordered_inclnames = set([ michael@0: 'ctypes/typedefs.h', # Included multiple times in the body of ctypes/CTypes.h michael@0: 'jsautokw.h', # Included in the body of frontend/TokenStream.h michael@0: 'jswin.h', # Must be #included before michael@0: 'machine/endian.h', # Must be included after on BSD michael@0: 'winbase.h', # Must precede other system headers(?) michael@0: 'windef.h' # Must precede other system headers(?) michael@0: ]) michael@0: michael@0: # The files in tests/style/ contain code that fails this checking in various michael@0: # ways. Here is the output we expect. If the actual output differs from michael@0: # this, one of the following must have happened. michael@0: # - New SpiderMonkey code violates one of the checked rules. michael@0: # - The tests/style/ files have changed without expected_output being changed michael@0: # accordingly. michael@0: # - This script has been broken somehow. michael@0: # michael@0: expected_output = '''\ michael@0: js/src/tests/style/BadIncludes2.h:1: error: michael@0: vanilla header includes an inline-header file "tests/style/BadIncludes2-inl.h" michael@0: michael@0: js/src/tests/style/BadIncludes.h:3: error: michael@0: the file includes itself michael@0: michael@0: js/src/tests/style/BadIncludes.h:6: error: michael@0: "BadIncludes2.h" is included using the wrong path; michael@0: did you forget a prefix, or is the file not yet committed? michael@0: michael@0: js/src/tests/style/BadIncludes.h:8: error: michael@0: should be included using michael@0: the #include "..." form michael@0: michael@0: js/src/tests/style/BadIncludes.h:10: error: michael@0: "stdio.h" is included using the wrong path; michael@0: did you forget a prefix, or is the file not yet committed? michael@0: michael@0: js/src/tests/style/BadIncludesOrder-inl.h:5:6: error: michael@0: "vm/Interpreter-inl.h" should be included after "jsscriptinlines.h" michael@0: michael@0: js/src/tests/style/BadIncludesOrder-inl.h:6:7: error: michael@0: "jsscriptinlines.h" should be included after "js/Value.h" michael@0: michael@0: js/src/tests/style/BadIncludesOrder-inl.h:7:8: error: michael@0: "js/Value.h" should be included after "ds/LifoAlloc.h" michael@0: michael@0: js/src/tests/style/BadIncludesOrder-inl.h:8:9: error: michael@0: "ds/LifoAlloc.h" should be included after "jsapi.h" michael@0: michael@0: js/src/tests/style/BadIncludesOrder-inl.h:9:10: error: michael@0: "jsapi.h" should be included after michael@0: michael@0: js/src/tests/style/BadIncludesOrder-inl.h:10:11: error: michael@0: should be included after "mozilla/HashFunctions.h" michael@0: michael@0: js/src/tests/style/BadIncludesOrder-inl.h:27:28: error: michael@0: "jsobj.h" should be included after "jsfun.h" michael@0: michael@0: (multiple files): error: michael@0: header files form one or more cycles michael@0: michael@0: tests/style/HeaderCycleA1.h michael@0: -> tests/style/HeaderCycleA2.h michael@0: -> tests/style/HeaderCycleA3.h michael@0: -> tests/style/HeaderCycleA1.h michael@0: michael@0: tests/style/HeaderCycleB1-inl.h michael@0: -> tests/style/HeaderCycleB2-inl.h michael@0: -> tests/style/HeaderCycleB3-inl.h michael@0: -> tests/style/HeaderCycleB4-inl.h michael@0: -> tests/style/HeaderCycleB1-inl.h michael@0: -> tests/style/jsheadercycleB5inlines.h michael@0: -> tests/style/HeaderCycleB1-inl.h michael@0: -> tests/style/HeaderCycleB4-inl.h michael@0: michael@0: '''.splitlines(True) michael@0: michael@0: actual_output = [] michael@0: michael@0: michael@0: def out(*lines): michael@0: for line in lines: michael@0: actual_output.append(line + '\n') michael@0: michael@0: michael@0: def error(filename, linenum, *lines): michael@0: location = filename michael@0: if linenum is not None: michael@0: location += ':' + str(linenum) michael@0: out(location + ': error:') michael@0: for line in (lines): michael@0: out(' ' + line) michael@0: out('') michael@0: michael@0: michael@0: class FileKind(object): michael@0: C = 1 michael@0: CPP = 2 michael@0: INL_H = 3 michael@0: H = 4 michael@0: TBL = 5 michael@0: MSG = 6 michael@0: michael@0: @staticmethod michael@0: def get(filename): michael@0: if filename.endswith('.c'): michael@0: return FileKind.C michael@0: michael@0: if filename.endswith('.cpp'): michael@0: return FileKind.CPP michael@0: michael@0: if filename.endswith(('inlines.h', '-inl.h')): michael@0: return FileKind.INL_H michael@0: michael@0: if filename.endswith('.h'): michael@0: return FileKind.H michael@0: michael@0: if filename.endswith('.tbl'): michael@0: return FileKind.TBL michael@0: michael@0: if filename.endswith('.msg'): michael@0: return FileKind.MSG michael@0: michael@0: error(filename, None, 'unknown file kind') michael@0: michael@0: michael@0: def get_all_filenames(): michael@0: '''Get a list of all the files in the (Mercurial or Git) repository.''' michael@0: cmds = [['hg', 'manifest', '-q'], ['git', 'ls-files', '--full-name', '../..']] michael@0: for cmd in cmds: michael@0: try: michael@0: all_filenames = subprocess.check_output(cmd, universal_newlines=True, michael@0: stderr=subprocess.PIPE).split('\n') michael@0: return all_filenames michael@0: except: michael@0: continue michael@0: else: michael@0: raise Exception('failed to run any of the repo manifest commands', cmds) michael@0: michael@0: michael@0: def check_style(): michael@0: # We deal with two kinds of name. michael@0: # - A "filename" is a full path to a file from the repository root. michael@0: # - An "inclname" is how a file is referred to in a #include statement. michael@0: # michael@0: # Examples (filename -> inclname) michael@0: # - "mfbt/Attributes.h" -> "mozilla/Attributes.h" michael@0: # - "js/public/Vector.h" -> "js/Vector.h" michael@0: # - "js/src/vm/String.h" -> "vm/String.h" michael@0: michael@0: mfbt_inclnames = set() # type: set(inclname) michael@0: js_names = dict() # type: dict(filename, inclname) michael@0: michael@0: # Select the appropriate files. michael@0: for filename in get_all_filenames(): michael@0: if filename.startswith('mfbt/') and filename.endswith('.h'): michael@0: inclname = 'mozilla/' + filename[len('mfbt/'):] michael@0: mfbt_inclnames.add(inclname) michael@0: michael@0: if filename.startswith('js/public/') and filename.endswith('.h'): michael@0: inclname = 'js/' + filename[len('js/public/'):] michael@0: js_names[filename] = inclname michael@0: michael@0: if filename.startswith('js/src/') and \ michael@0: not filename.startswith(tuple(ignored_js_src_dirs)) and \ michael@0: filename.endswith(('.c', '.cpp', '.h', '.tbl', '.msg')): michael@0: inclname = filename[len('js/src/'):] michael@0: js_names[filename] = inclname michael@0: michael@0: all_inclnames = mfbt_inclnames | set(js_names.values()) michael@0: michael@0: edges = dict() # type: dict(inclname, set(inclname)) michael@0: michael@0: # We don't care what's inside the MFBT files, but because they are michael@0: # #included from JS files we have to add them to the inclusion graph. michael@0: for inclname in mfbt_inclnames: michael@0: edges[inclname] = set() michael@0: michael@0: # Process all the JS files. michael@0: for filename in js_names.keys(): michael@0: inclname = js_names[filename] michael@0: file_kind = FileKind.get(filename) michael@0: if file_kind == FileKind.C or file_kind == FileKind.CPP or \ michael@0: file_kind == FileKind.H or file_kind == FileKind.INL_H: michael@0: included_h_inclnames = set() # type: set(inclname) michael@0: michael@0: # This script is run in js/src/, so prepend '../../' to get to the root of the Mozilla michael@0: # source tree. michael@0: with open(os.path.join('../..', filename)) as f: michael@0: do_file(filename, inclname, file_kind, f, all_inclnames, included_h_inclnames) michael@0: michael@0: edges[inclname] = included_h_inclnames michael@0: michael@0: find_cycles(all_inclnames, edges) michael@0: michael@0: # Compare expected and actual output. michael@0: difflines = difflib.unified_diff(expected_output, actual_output, michael@0: fromfile='check_spider_monkey_style.py expected output', michael@0: tofile='check_spider_monkey_style.py actual output') michael@0: ok = True michael@0: for diffline in difflines: michael@0: ok = False michael@0: print(diffline, end='') michael@0: michael@0: return ok michael@0: michael@0: michael@0: def module_name(name): michael@0: '''Strip the trailing .cpp, .h, inlines.h or -inl.h from a filename.''' michael@0: michael@0: return name.replace('inlines.h', '').replace('-inl.h', '').replace('.h', '').replace('.cpp', '') michael@0: michael@0: michael@0: def is_module_header(enclosing_inclname, header_inclname): michael@0: '''Determine if an included name is the "module header", i.e. should be michael@0: first in the file.''' michael@0: michael@0: module = module_name(enclosing_inclname) michael@0: michael@0: # Normal case, e.g. module == "foo/Bar", header_inclname == "foo/Bar.h". michael@0: if module == module_name(header_inclname): michael@0: return True michael@0: michael@0: # A public header, e.g. module == "foo/Bar", header_inclname == "js/Bar.h". michael@0: m = re.match(r'js\/(.*)\.h', header_inclname) michael@0: if m is not None and module.endswith('/' + m.group(1)): michael@0: return True michael@0: michael@0: return False michael@0: michael@0: michael@0: class Include(object): michael@0: '''Important information for a single #include statement.''' michael@0: michael@0: def __init__(self, inclname, linenum, is_system): michael@0: self.inclname = inclname michael@0: self.linenum = linenum michael@0: self.is_system = is_system michael@0: michael@0: def isLeaf(self): michael@0: return True michael@0: michael@0: def section(self, enclosing_inclname): michael@0: '''Identify which section inclname belongs to. michael@0: michael@0: The section numbers are as follows. michael@0: 0. Module header (e.g. jsfoo.h or jsfooinlines.h within jsfoo.cpp) michael@0: 1. mozilla/Foo.h michael@0: 2. or michael@0: 3. jsfoo.h, prmjtime.h, etc michael@0: 4. foo/Bar.h michael@0: 5. jsfooinlines.h michael@0: 6. foo/Bar-inl.h michael@0: 7. non-.h, e.g. *.tbl, *.msg michael@0: ''' michael@0: michael@0: if self.is_system: michael@0: return 2 michael@0: michael@0: if not self.inclname.endswith('.h'): michael@0: return 7 michael@0: michael@0: # A couple of modules have the .h file in js/ and the .cpp file elsewhere and so need michael@0: # special handling. michael@0: if is_module_header(enclosing_inclname, self.inclname): michael@0: return 0 michael@0: michael@0: if '/' in self.inclname: michael@0: if self.inclname.startswith('mozilla/'): michael@0: return 1 michael@0: michael@0: if self.inclname.endswith('-inl.h'): michael@0: return 6 michael@0: michael@0: return 4 michael@0: michael@0: if self.inclname.endswith('inlines.h'): michael@0: return 5 michael@0: michael@0: return 3 michael@0: michael@0: def quote(self): michael@0: if self.is_system: michael@0: return '<' + self.inclname + '>' michael@0: else: michael@0: return '"' + self.inclname + '"' michael@0: michael@0: michael@0: class HashIfBlock(object): michael@0: '''Important information about a #if/#endif block. michael@0: michael@0: A #if/#endif block is the contents of a #if/#endif (or similar) section. michael@0: The top-level block, which is not within a #if/#endif pair, is also michael@0: considered a block. michael@0: michael@0: Each leaf is either an Include (representing a #include), or another michael@0: nested HashIfBlock.''' michael@0: def __init__(self): michael@0: self.kids = [] michael@0: michael@0: def isLeaf(self): michael@0: return False michael@0: michael@0: michael@0: def do_file(filename, inclname, file_kind, f, all_inclnames, included_h_inclnames): michael@0: block_stack = [HashIfBlock()] michael@0: michael@0: # Extract the #include statements as a tree of IBlocks and IIncludes. michael@0: for linenum, line in enumerate(f, start=1): michael@0: # Look for a |#include "..."| line. michael@0: m = re.match(r'\s*#\s*include\s+"([^"]*)"', line) michael@0: if m is not None: michael@0: block_stack[-1].kids.append(Include(m.group(1), linenum, False)) michael@0: michael@0: # Look for a |#include <...>| line. michael@0: m = re.match(r'\s*#\s*include\s+<([^>]*)>', line) michael@0: if m is not None: michael@0: block_stack[-1].kids.append(Include(m.group(1), linenum, True)) michael@0: michael@0: # Look for a |#{if,ifdef,ifndef}| line. michael@0: m = re.match(r'\s*#\s*(if|ifdef|ifndef)\b', line) michael@0: if m is not None: michael@0: # Open a new block. michael@0: new_block = HashIfBlock() michael@0: block_stack[-1].kids.append(new_block) michael@0: block_stack.append(new_block) michael@0: michael@0: # Look for a |#{elif,else}| line. michael@0: m = re.match(r'\s*#\s*(elif|else)\b', line) michael@0: if m is not None: michael@0: # Close the current block, and open an adjacent one. michael@0: block_stack.pop() michael@0: new_block = HashIfBlock() michael@0: block_stack[-1].kids.append(new_block) michael@0: block_stack.append(new_block) michael@0: michael@0: # Look for a |#endif| line. michael@0: m = re.match(r'\s*#\s*endif\b', line) michael@0: if m is not None: michael@0: # Close the current block. michael@0: block_stack.pop() michael@0: michael@0: def check_include_statement(include): michael@0: '''Check the style of a single #include statement.''' michael@0: michael@0: if include.is_system: michael@0: # Check it is not a known local file (in which case it's probably a system header). michael@0: if include.inclname in included_inclnames_to_ignore or \ michael@0: include.inclname in all_inclnames: michael@0: error(filename, include.linenum, michael@0: include.quote() + ' should be included using', michael@0: 'the #include "..." form') michael@0: michael@0: else: michael@0: if include.inclname not in included_inclnames_to_ignore: michael@0: included_kind = FileKind.get(include.inclname) michael@0: michael@0: # Check the #include path has the correct form. michael@0: if include.inclname not in all_inclnames: michael@0: error(filename, include.linenum, michael@0: include.quote() + ' is included ' + 'using the wrong path;', michael@0: 'did you forget a prefix, or is the file not yet committed?') michael@0: michael@0: # Record inclusions of .h files for cycle detection later. michael@0: # (Exclude .tbl and .msg files.) michael@0: elif included_kind == FileKind.H or included_kind == FileKind.INL_H: michael@0: included_h_inclnames.add(include.inclname) michael@0: michael@0: # Check a H file doesn't #include an INL_H file. michael@0: if file_kind == FileKind.H and included_kind == FileKind.INL_H: michael@0: error(filename, include.linenum, michael@0: 'vanilla header includes an inline-header file ' + include.quote()) michael@0: michael@0: # Check a file doesn't #include itself. (We do this here because the cycle michael@0: # detection below doesn't detect this case.) michael@0: if inclname == include.inclname: michael@0: error(filename, include.linenum, 'the file includes itself') michael@0: michael@0: def check_includes_order(include1, include2): michael@0: '''Check the ordering of two #include statements.''' michael@0: michael@0: if include1.inclname in oddly_ordered_inclnames or \ michael@0: include2.inclname in oddly_ordered_inclnames: michael@0: return michael@0: michael@0: section1 = include1.section(inclname) michael@0: section2 = include2.section(inclname) michael@0: if (section1 > section2) or \ michael@0: ((section1 == section2) and (include1.inclname.lower() > include2.inclname.lower())): michael@0: error(filename, str(include1.linenum) + ':' + str(include2.linenum), michael@0: include1.quote() + ' should be included after ' + include2.quote()) michael@0: michael@0: # The #include statements in the files in assembler/ and yarr/ have all manner of implicit michael@0: # ordering requirements. Boo. Ignore them. michael@0: skip_order_checking = inclname.startswith(('assembler/', 'yarr/')) michael@0: michael@0: # Check the extracted #include statements, both individually, and the ordering of michael@0: # adjacent pairs that live in the same block. michael@0: def pair_traverse(prev, this): michael@0: if this.isLeaf(): michael@0: check_include_statement(this) michael@0: if prev is not None and prev.isLeaf() and not skip_order_checking: michael@0: check_includes_order(prev, this) michael@0: else: michael@0: for prev2, this2 in zip([None] + this.kids[0:-1], this.kids): michael@0: pair_traverse(prev2, this2) michael@0: michael@0: pair_traverse(None, block_stack[-1]) michael@0: michael@0: michael@0: def find_cycles(all_inclnames, edges): michael@0: '''Find and draw any cycles.''' michael@0: michael@0: SCCs = tarjan(all_inclnames, edges) michael@0: michael@0: # The various sorted() calls below ensure the output is deterministic. michael@0: michael@0: def draw_SCC(c): michael@0: cset = set(c) michael@0: drawn = set() michael@0: def draw(v, indent): michael@0: out(' ' * indent + ('-> ' if indent else ' ') + v) michael@0: if v in drawn: michael@0: return michael@0: drawn.add(v) michael@0: for succ in sorted(edges[v]): michael@0: if succ in cset: michael@0: draw(succ, indent + 1) michael@0: draw(sorted(c)[0], 0) michael@0: out('') michael@0: michael@0: have_drawn_an_SCC = False michael@0: for scc in sorted(SCCs): michael@0: if len(scc) != 1: michael@0: if not have_drawn_an_SCC: michael@0: error('(multiple files)', None, 'header files form one or more cycles') michael@0: have_drawn_an_SCC = True michael@0: michael@0: draw_SCC(scc) michael@0: michael@0: michael@0: # Tarjan's algorithm for finding the strongly connected components (SCCs) of a graph. michael@0: # https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm michael@0: def tarjan(V, E): michael@0: vertex_index = {} michael@0: vertex_lowlink = {} michael@0: index = 0 michael@0: S = [] michael@0: all_SCCs = [] michael@0: michael@0: def strongconnect(v, index): michael@0: # Set the depth index for v to the smallest unused index michael@0: vertex_index[v] = index michael@0: vertex_lowlink[v] = index michael@0: index += 1 michael@0: S.append(v) michael@0: michael@0: # Consider successors of v michael@0: for w in E[v]: michael@0: if w not in vertex_index: michael@0: # Successor w has not yet been visited; recurse on it michael@0: index = strongconnect(w, index) michael@0: vertex_lowlink[v] = min(vertex_lowlink[v], vertex_lowlink[w]) michael@0: elif w in S: michael@0: # Successor w is in stack S and hence in the current SCC michael@0: vertex_lowlink[v] = min(vertex_lowlink[v], vertex_index[w]) michael@0: michael@0: # If v is a root node, pop the stack and generate an SCC michael@0: if vertex_lowlink[v] == vertex_index[v]: michael@0: i = S.index(v) michael@0: scc = S[i:] michael@0: del S[i:] michael@0: all_SCCs.append(scc) michael@0: michael@0: return index michael@0: michael@0: for v in V: michael@0: if v not in vertex_index: michael@0: index = strongconnect(v, index) michael@0: michael@0: return all_SCCs michael@0: michael@0: michael@0: def main(): michael@0: ok = check_style() michael@0: michael@0: if ok: michael@0: print('TEST-PASS | check_spidermonkey_style.py | ok') michael@0: else: michael@0: print('TEST-UNEXPECTED-FAIL | check_spidermonkey_style.py | actual output does not match expected output; diff is above') michael@0: michael@0: sys.exit(0 if ok else 1) michael@0: michael@0: michael@0: if __name__ == '__main__': michael@0: main()