config/check_spidermonkey_style.py

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 # vim: set ts=8 sts=4 et sw=4 tw=99:
     2 # This Source Code Form is subject to the terms of the Mozilla Public
     3 # License, v. 2.0. If a copy of the MPL was not distributed with this
     4 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
     6 #----------------------------------------------------------------------------
     7 # This script checks various aspects of SpiderMonkey code style.  The current checks are as
     8 # follows.
     9 #
    10 # We check the following things in headers.
    11 #
    12 # - No cyclic dependencies.
    13 #
    14 # - No normal header should #include a inlines.h/-inl.h file.
    15 #
    16 # - #ifndef wrappers should have the right form. (XXX: not yet implemented)
    17 #   - Every header file should have one.
    18 #   - The guard name used should be appropriate for the filename.
    19 #
    20 # We check the following things in all files.
    21 #
    22 # - #includes should have full paths, e.g. "jit/Ion.h", not "Ion.h".
    23 #
    24 # - #includes should use the appropriate form for system headers (<...>) and
    25 #   local headers ("...").
    26 #
    27 # - #includes should be ordered correctly.
    28 #   - Each one should be in the correct section.
    29 #   - Alphabetical order should be used within sections.
    30 #   - Sections should be in the right order.
    31 #   Note that the presence of #if/#endif blocks complicates things, to the
    32 #   point that it's not always clear where a conditionally-compiled #include
    33 #   statement should go, even to a human.  Therefore, we check the #include
    34 #   statements within each #if/#endif block (including nested ones) in
    35 #   isolation, but don't try to do any order checking between such blocks.
    36 #----------------------------------------------------------------------------
    38 from __future__ import print_function
    40 import difflib
    41 import os
    42 import re
    43 import subprocess
    44 import sys
    45 import traceback
    47 # We don't bother checking files in these directories, because they're (a) auxiliary or (b)
    48 # imported code that doesn't follow our coding style.
    49 ignored_js_src_dirs = [
    50    'js/src/config/',            # auxiliary stuff
    51    'js/src/ctypes/libffi/',     # imported code
    52    'js/src/devtools/',          # auxiliary stuff
    53    'js/src/editline/',          # imported code
    54    'js/src/gdb/',               # auxiliary stuff
    55    'js/src/vtune/'              # imported code
    56 ]
    58 # We ignore #includes of these files, because they don't follow the usual rules.
    59 included_inclnames_to_ignore = set([
    60     'ffi.h',                    # generated in ctypes/libffi/
    61     'devtools/sharkctl.h',      # we ignore devtools/ in general
    62     'devtools/Instruments.h',   # we ignore devtools/ in general
    63     'double-conversion.h',      # strange MFBT case
    64     'javascript-trace.h',       # generated in $OBJDIR if HAVE_DTRACE is defined
    65     'jsautokw.h',               # generated in $OBJDIR
    66     'jscustomallocator.h',      # provided by embedders;  allowed to be missing
    67     'js-config.h',              # generated in $OBJDIR
    68     'pratom.h',                 # NSPR
    69     'prcvar.h',                 # NSPR
    70     'prinit.h',                 # NSPR
    71     'prlink.h',                 # NSPR
    72     'prlock.h',                 # NSPR
    73     'prprf.h',                  # NSPR
    74     'prthread.h',               # NSPR
    75     'prtypes.h',                # NSPR
    76     'selfhosted.out.h',         # generated in $OBJDIR
    77     'unicode/locid.h',          # ICU
    78     'unicode/numsys.h',         # ICU
    79     'unicode/ucal.h',           # ICU
    80     'unicode/uclean.h',         # ICU
    81     'unicode/ucol.h',           # ICU
    82     'unicode/udat.h',           # ICU
    83     'unicode/udatpg.h',         # ICU
    84     'unicode/uenum.h',          # ICU
    85     'unicode/unorm.h',          # ICU
    86     'unicode/unum.h',           # ICU
    87     'unicode/ustring.h',        # ICU
    88     'unicode/utypes.h',         # ICU
    89     'vtune/VTuneWrapper.h'      # VTune
    90 ])
    92 # These files have additional constraints on where they are #included, so we
    93 # ignore #includes of them when checking #include ordering.
    94 oddly_ordered_inclnames = set([
    95     'ctypes/typedefs.h',        # Included multiple times in the body of ctypes/CTypes.h
    96     'jsautokw.h',               # Included in the body of frontend/TokenStream.h
    97     'jswin.h',                  # Must be #included before <psapi.h>
    98     'machine/endian.h',         # Must be included after <sys/types.h> on BSD
    99     'winbase.h',                # Must precede other system headers(?)
   100     'windef.h'                  # Must precede other system headers(?)
   101 ])
   103 # The files in tests/style/ contain code that fails this checking in various
   104 # ways.  Here is the output we expect.  If the actual output differs from
   105 # this, one of the following must have happened.
   106 # - New SpiderMonkey code violates one of the checked rules.
   107 # - The tests/style/ files have changed without expected_output being changed
   108 #   accordingly.
   109 # - This script has been broken somehow.
   110 #
   111 expected_output = '''\
   112 js/src/tests/style/BadIncludes2.h:1: error:
   113     vanilla header includes an inline-header file "tests/style/BadIncludes2-inl.h"
   115 js/src/tests/style/BadIncludes.h:3: error:
   116     the file includes itself
   118 js/src/tests/style/BadIncludes.h:6: error:
   119     "BadIncludes2.h" is included using the wrong path;
   120     did you forget a prefix, or is the file not yet committed?
   122 js/src/tests/style/BadIncludes.h:8: error:
   123     <tests/style/BadIncludes2.h> should be included using
   124     the #include "..." form
   126 js/src/tests/style/BadIncludes.h:10: error:
   127     "stdio.h" is included using the wrong path;
   128     did you forget a prefix, or is the file not yet committed?
   130 js/src/tests/style/BadIncludesOrder-inl.h:5:6: error:
   131     "vm/Interpreter-inl.h" should be included after "jsscriptinlines.h"
   133 js/src/tests/style/BadIncludesOrder-inl.h:6:7: error:
   134     "jsscriptinlines.h" should be included after "js/Value.h"
   136 js/src/tests/style/BadIncludesOrder-inl.h:7:8: error:
   137     "js/Value.h" should be included after "ds/LifoAlloc.h"
   139 js/src/tests/style/BadIncludesOrder-inl.h:8:9: error:
   140     "ds/LifoAlloc.h" should be included after "jsapi.h"
   142 js/src/tests/style/BadIncludesOrder-inl.h:9:10: error:
   143     "jsapi.h" should be included after <stdio.h>
   145 js/src/tests/style/BadIncludesOrder-inl.h:10:11: error:
   146     <stdio.h> should be included after "mozilla/HashFunctions.h"
   148 js/src/tests/style/BadIncludesOrder-inl.h:27:28: error:
   149     "jsobj.h" should be included after "jsfun.h"
   151 (multiple files): error:
   152     header files form one or more cycles
   154    tests/style/HeaderCycleA1.h
   155    -> tests/style/HeaderCycleA2.h
   156       -> tests/style/HeaderCycleA3.h
   157          -> tests/style/HeaderCycleA1.h
   159    tests/style/HeaderCycleB1-inl.h
   160    -> tests/style/HeaderCycleB2-inl.h
   161       -> tests/style/HeaderCycleB3-inl.h
   162          -> tests/style/HeaderCycleB4-inl.h
   163             -> tests/style/HeaderCycleB1-inl.h
   164             -> tests/style/jsheadercycleB5inlines.h
   165                -> tests/style/HeaderCycleB1-inl.h
   166       -> tests/style/HeaderCycleB4-inl.h
   168 '''.splitlines(True)
   170 actual_output = []
   173 def out(*lines):
   174     for line in lines:
   175         actual_output.append(line + '\n')
   178 def error(filename, linenum, *lines):
   179     location = filename
   180     if linenum is not None:
   181         location += ':' + str(linenum)
   182     out(location + ': error:')
   183     for line in (lines):
   184         out('    ' + line)
   185     out('')
   188 class FileKind(object):
   189     C = 1
   190     CPP = 2
   191     INL_H = 3
   192     H = 4
   193     TBL = 5
   194     MSG = 6
   196     @staticmethod
   197     def get(filename):
   198         if filename.endswith('.c'):
   199             return FileKind.C
   201         if filename.endswith('.cpp'):
   202             return FileKind.CPP
   204         if filename.endswith(('inlines.h', '-inl.h')):
   205             return FileKind.INL_H
   207         if filename.endswith('.h'):
   208             return FileKind.H
   210         if filename.endswith('.tbl'):
   211             return FileKind.TBL
   213         if filename.endswith('.msg'):
   214             return FileKind.MSG
   216         error(filename, None, 'unknown file kind')
   219 def get_all_filenames():
   220     '''Get a list of all the files in the (Mercurial or Git) repository.'''
   221     cmds = [['hg', 'manifest', '-q'], ['git', 'ls-files', '--full-name', '../..']]
   222     for cmd in cmds:
   223         try:
   224             all_filenames = subprocess.check_output(cmd, universal_newlines=True,
   225                                                     stderr=subprocess.PIPE).split('\n')
   226             return all_filenames
   227         except:
   228             continue
   229     else:
   230         raise Exception('failed to run any of the repo manifest commands', cmds)
   233 def check_style():
   234     # We deal with two kinds of name.
   235     # - A "filename" is a full path to a file from the repository root.
   236     # - An "inclname" is how a file is referred to in a #include statement.
   237     #
   238     # Examples (filename -> inclname)
   239     # - "mfbt/Attributes.h"  -> "mozilla/Attributes.h"
   240     # - "js/public/Vector.h" -> "js/Vector.h"
   241     # - "js/src/vm/String.h" -> "vm/String.h"
   243     mfbt_inclnames = set()      # type: set(inclname)
   244     js_names = dict()           # type: dict(filename, inclname)
   246     # Select the appropriate files.
   247     for filename in get_all_filenames():
   248         if filename.startswith('mfbt/') and filename.endswith('.h'):
   249             inclname = 'mozilla/' + filename[len('mfbt/'):]
   250             mfbt_inclnames.add(inclname)
   252         if filename.startswith('js/public/') and filename.endswith('.h'):
   253             inclname = 'js/' + filename[len('js/public/'):]
   254             js_names[filename] = inclname
   256         if filename.startswith('js/src/') and \
   257            not filename.startswith(tuple(ignored_js_src_dirs)) and \
   258            filename.endswith(('.c', '.cpp', '.h', '.tbl', '.msg')):
   259             inclname = filename[len('js/src/'):]
   260             js_names[filename] = inclname
   262     all_inclnames = mfbt_inclnames | set(js_names.values())
   264     edges = dict()      # type: dict(inclname, set(inclname))
   266     # We don't care what's inside the MFBT files, but because they are
   267     # #included from JS files we have to add them to the inclusion graph.
   268     for inclname in mfbt_inclnames:
   269         edges[inclname] = set()
   271     # Process all the JS files.
   272     for filename in js_names.keys():
   273         inclname = js_names[filename]
   274         file_kind = FileKind.get(filename)
   275         if file_kind == FileKind.C or file_kind == FileKind.CPP or \
   276            file_kind == FileKind.H or file_kind == FileKind.INL_H:
   277             included_h_inclnames = set()    # type: set(inclname)
   279             # This script is run in js/src/, so prepend '../../' to get to the root of the Mozilla
   280             # source tree.
   281             with open(os.path.join('../..', filename)) as f:
   282                 do_file(filename, inclname, file_kind, f, all_inclnames, included_h_inclnames)
   284         edges[inclname] = included_h_inclnames
   286     find_cycles(all_inclnames, edges)
   288     # Compare expected and actual output.
   289     difflines = difflib.unified_diff(expected_output, actual_output,
   290                                      fromfile='check_spider_monkey_style.py expected output',
   291                                        tofile='check_spider_monkey_style.py actual output')
   292     ok = True
   293     for diffline in difflines:
   294         ok = False
   295         print(diffline, end='')
   297     return ok
   300 def module_name(name):
   301     '''Strip the trailing .cpp, .h, inlines.h or -inl.h from a filename.'''
   303     return name.replace('inlines.h', '').replace('-inl.h', '').replace('.h', '').replace('.cpp', '')
   306 def is_module_header(enclosing_inclname, header_inclname):
   307     '''Determine if an included name is the "module header", i.e. should be
   308     first in the file.'''
   310     module = module_name(enclosing_inclname)
   312     # Normal case, e.g. module == "foo/Bar", header_inclname == "foo/Bar.h".
   313     if module == module_name(header_inclname):
   314         return True
   316     # A public header, e.g. module == "foo/Bar", header_inclname == "js/Bar.h".
   317     m = re.match(r'js\/(.*)\.h', header_inclname)
   318     if m is not None and module.endswith('/' + m.group(1)):
   319         return True
   321     return False
   324 class Include(object):
   325     '''Important information for a single #include statement.'''
   327     def __init__(self, inclname, linenum, is_system):
   328         self.inclname = inclname
   329         self.linenum = linenum
   330         self.is_system = is_system
   332     def isLeaf(self):
   333         return True
   335     def section(self, enclosing_inclname):
   336         '''Identify which section inclname belongs to.
   338         The section numbers are as follows.
   339           0. Module header (e.g. jsfoo.h or jsfooinlines.h within jsfoo.cpp)
   340           1. mozilla/Foo.h
   341           2. <foo.h> or <foo>
   342           3. jsfoo.h, prmjtime.h, etc
   343           4. foo/Bar.h
   344           5. jsfooinlines.h
   345           6. foo/Bar-inl.h
   346           7. non-.h, e.g. *.tbl, *.msg
   347         '''
   349         if self.is_system:
   350             return 2
   352         if not self.inclname.endswith('.h'):
   353             return 7
   355         # A couple of modules have the .h file in js/ and the .cpp file elsewhere and so need
   356         # special handling.
   357         if is_module_header(enclosing_inclname, self.inclname):
   358             return 0
   360         if '/' in self.inclname:
   361             if self.inclname.startswith('mozilla/'):
   362                 return 1
   364             if self.inclname.endswith('-inl.h'):
   365                 return 6
   367             return 4
   369         if self.inclname.endswith('inlines.h'):
   370             return 5
   372         return 3
   374     def quote(self):
   375         if self.is_system:
   376             return '<' + self.inclname + '>'
   377         else:
   378             return '"' + self.inclname + '"'
   381 class HashIfBlock(object):
   382     '''Important information about a #if/#endif block.
   384     A #if/#endif block is the contents of a #if/#endif (or similar) section.
   385     The top-level block, which is not within a #if/#endif pair, is also
   386     considered a block.
   388     Each leaf is either an Include (representing a #include), or another
   389     nested HashIfBlock.'''
   390     def __init__(self):
   391         self.kids = []
   393     def isLeaf(self):
   394         return False
   397 def do_file(filename, inclname, file_kind, f, all_inclnames, included_h_inclnames):
   398     block_stack = [HashIfBlock()]
   400     # Extract the #include statements as a tree of IBlocks and IIncludes.
   401     for linenum, line in enumerate(f, start=1):
   402         # Look for a |#include "..."| line.
   403         m = re.match(r'\s*#\s*include\s+"([^"]*)"', line)
   404         if m is not None:
   405             block_stack[-1].kids.append(Include(m.group(1), linenum, False))
   407         # Look for a |#include <...>| line.
   408         m = re.match(r'\s*#\s*include\s+<([^>]*)>', line)
   409         if m is not None:
   410             block_stack[-1].kids.append(Include(m.group(1), linenum, True))
   412         # Look for a |#{if,ifdef,ifndef}| line.
   413         m = re.match(r'\s*#\s*(if|ifdef|ifndef)\b', line)
   414         if m is not None:
   415             # Open a new block.
   416             new_block = HashIfBlock()
   417             block_stack[-1].kids.append(new_block)
   418             block_stack.append(new_block)
   420         # Look for a |#{elif,else}| line.
   421         m = re.match(r'\s*#\s*(elif|else)\b', line)
   422         if m is not None:
   423             # Close the current block, and open an adjacent one.
   424             block_stack.pop()
   425             new_block = HashIfBlock()
   426             block_stack[-1].kids.append(new_block)
   427             block_stack.append(new_block)
   429         # Look for a |#endif| line.
   430         m = re.match(r'\s*#\s*endif\b', line)
   431         if m is not None:
   432             # Close the current block.
   433             block_stack.pop()
   435     def check_include_statement(include):
   436         '''Check the style of a single #include statement.'''
   438         if include.is_system:
   439             # Check it is not a known local file (in which case it's probably a system header).
   440             if include.inclname in included_inclnames_to_ignore or \
   441                include.inclname in all_inclnames:
   442                 error(filename, include.linenum,
   443                       include.quote() + ' should be included using',
   444                       'the #include "..." form')
   446         else:
   447             if include.inclname not in included_inclnames_to_ignore:
   448                 included_kind = FileKind.get(include.inclname)
   450                 # Check the #include path has the correct form.
   451                 if include.inclname not in all_inclnames:
   452                     error(filename, include.linenum,
   453                           include.quote() + ' is included ' + 'using the wrong path;',
   454                           'did you forget a prefix, or is the file not yet committed?')
   456                 # Record inclusions of .h files for cycle detection later.
   457                 # (Exclude .tbl and .msg files.)
   458                 elif included_kind == FileKind.H or included_kind == FileKind.INL_H:
   459                     included_h_inclnames.add(include.inclname)
   461                 # Check a H file doesn't #include an INL_H file.
   462                 if file_kind == FileKind.H and included_kind == FileKind.INL_H:
   463                     error(filename, include.linenum,
   464                           'vanilla header includes an inline-header file ' + include.quote())
   466                 # Check a file doesn't #include itself.  (We do this here because the cycle
   467                 # detection below doesn't detect this case.)
   468                 if inclname == include.inclname:
   469                     error(filename, include.linenum, 'the file includes itself')
   471     def check_includes_order(include1, include2):
   472         '''Check the ordering of two #include statements.'''
   474         if include1.inclname in oddly_ordered_inclnames or \
   475            include2.inclname in oddly_ordered_inclnames:
   476             return
   478         section1 = include1.section(inclname)
   479         section2 = include2.section(inclname)
   480         if (section1 > section2) or \
   481            ((section1 == section2) and (include1.inclname.lower() > include2.inclname.lower())):
   482             error(filename, str(include1.linenum) + ':' + str(include2.linenum),
   483                   include1.quote() + ' should be included after ' + include2.quote())
   485     # The #include statements in the files in assembler/ and yarr/ have all manner of implicit
   486     # ordering requirements.  Boo.  Ignore them.
   487     skip_order_checking = inclname.startswith(('assembler/', 'yarr/'))
   489     # Check the extracted #include statements, both individually, and the ordering of
   490     # adjacent pairs that live in the same block.
   491     def pair_traverse(prev, this):
   492         if this.isLeaf():
   493             check_include_statement(this)
   494             if prev is not None and prev.isLeaf() and not skip_order_checking:
   495                 check_includes_order(prev, this)
   496         else:
   497             for prev2, this2 in zip([None] + this.kids[0:-1], this.kids):
   498                 pair_traverse(prev2, this2)
   500     pair_traverse(None, block_stack[-1])
   503 def find_cycles(all_inclnames, edges):
   504     '''Find and draw any cycles.'''
   506     SCCs = tarjan(all_inclnames, edges)
   508     # The various sorted() calls below ensure the output is deterministic.
   510     def draw_SCC(c):
   511         cset = set(c)
   512         drawn = set()
   513         def draw(v, indent):
   514             out('   ' * indent + ('-> ' if indent else '   ') + v)
   515             if v in drawn:
   516                 return
   517             drawn.add(v)
   518             for succ in sorted(edges[v]):
   519                 if succ in cset:
   520                     draw(succ, indent + 1)
   521         draw(sorted(c)[0], 0)
   522         out('')
   524     have_drawn_an_SCC = False
   525     for scc in sorted(SCCs):
   526         if len(scc) != 1:
   527             if not have_drawn_an_SCC:
   528                 error('(multiple files)', None, 'header files form one or more cycles')
   529                 have_drawn_an_SCC = True
   531             draw_SCC(scc)
   534 # Tarjan's algorithm for finding the strongly connected components (SCCs) of a graph.
   535 # https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
   536 def tarjan(V, E):
   537     vertex_index = {}
   538     vertex_lowlink = {}
   539     index = 0
   540     S = []
   541     all_SCCs = []
   543     def strongconnect(v, index):
   544         # Set the depth index for v to the smallest unused index
   545         vertex_index[v] = index
   546         vertex_lowlink[v] = index
   547         index += 1
   548         S.append(v)
   550         # Consider successors of v
   551         for w in E[v]:
   552             if w not in vertex_index:
   553                 # Successor w has not yet been visited; recurse on it
   554                 index = strongconnect(w, index)
   555                 vertex_lowlink[v] = min(vertex_lowlink[v], vertex_lowlink[w])
   556             elif w in S:
   557                 # Successor w is in stack S and hence in the current SCC
   558                 vertex_lowlink[v] = min(vertex_lowlink[v], vertex_index[w])
   560         # If v is a root node, pop the stack and generate an SCC
   561         if vertex_lowlink[v] == vertex_index[v]:
   562             i = S.index(v)
   563             scc = S[i:]
   564             del S[i:]
   565             all_SCCs.append(scc)
   567         return index
   569     for v in V:
   570         if v not in vertex_index:
   571             index = strongconnect(v, index)
   573     return all_SCCs
   576 def main():
   577     ok = check_style()
   579     if ok:
   580         print('TEST-PASS | check_spidermonkey_style.py | ok')
   581     else:
   582         print('TEST-UNEXPECTED-FAIL | check_spidermonkey_style.py | actual output does not match expected output;  diff is above')
   584     sys.exit(0 if ok else 1)
   587 if __name__ == '__main__':
   588     main()

mercurial