js/src/vm/ForkJoin.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
     2  * vim: set ts=8 sts=4 et sw=4 tw=99:
     3  * This Source Code Form is subject to the terms of the Mozilla Public
     4  * License, v. 2.0. If a copy of the MPL was not distributed with this
     5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     7 #ifndef vm_ForkJoin_h
     8 #define vm_ForkJoin_h
    10 #include "mozilla/ThreadLocal.h"
    12 #include "jscntxt.h"
    14 #include "gc/GCInternals.h"
    16 #include "jit/Ion.h"
    18 ///////////////////////////////////////////////////////////////////////////
    19 // Read Me First
    20 //
    21 // The ForkJoin abstraction:
    22 // -------------------------
    23 //
    24 // This is the building block for executing multi-threaded JavaScript with
    25 // shared memory (as distinct from Web Workers).  The idea is that you have
    26 // some (typically data-parallel) operation which you wish to execute in
    27 // parallel across as many threads as you have available.
    28 //
    29 // The ForkJoin abstraction is intended to be used by self-hosted code
    30 // to enable parallel execution.  At the top-level, it consists of a native
    31 // function (exposed as the ForkJoin intrinsic) that is used like so:
    32 //
    33 //     ForkJoin(func, sliceStart, sliceEnd, mode)
    34 //
    35 // The intention of this statement is to start some some number (usually the
    36 // number of hardware threads) of copies of |func()| running in parallel. Each
    37 // copy will then do a portion of the total work, depending on
    38 // workstealing-based load balancing.
    39 //
    40 // Typically, each of the N slices runs in a different worker thread, but that
    41 // is not something you should rely upon---if work-stealing is enabled it
    42 // could be that a single worker thread winds up handling multiple slices.
    43 //
    44 // The second and third arguments, |sliceStart| and |sliceEnd|, are the slice
    45 // boundaries. These numbers must each fit inside an uint16_t.
    46 //
    47 // The fourth argument, |mode|, is an internal mode integer giving finer
    48 // control over the behavior of ForkJoin. See the |ForkJoinMode| enum.
    49 //
    50 // func() should expect the following arguments:
    51 //
    52 //     func(workerId, sliceStart, sliceEnd)
    53 //
    54 // The |workerId| parameter is the id of the worker executing the function. It
    55 // is 0 in sequential mode.
    56 //
    57 // The |sliceStart| and |sliceEnd| parameters are the current bounds that that
    58 // the worker is handling. In parallel execution, these parameters are not
    59 // used. In sequential execution, they tell the worker what slices should be
    60 // processed. During the warm up phase, sliceEnd == sliceStart + 1.
    61 //
    62 // |func| can keep asking for more work from the scheduler by calling the
    63 // intrinsic |GetForkJoinSlice(sliceStart, sliceEnd, id)|. When there are no
    64 // more slices to hand out, ThreadPool::MAX_SLICE_ID is returned as a sentinel
    65 // value. By exposing this function as an intrinsic, we reduce the number of
    66 // JS-C++ boundary crossings incurred by workstealing, which may have many
    67 // slices.
    68 //
    69 // In sequential execution, |func| should return the maximum computed slice id
    70 // S for which all slices with id < S have already been processed. This is so
    71 // ThreadPool can track the leftmost completed slice id to maintain
    72 // determinism. Slices which have been completed in sequential execution
    73 // cannot be re-run in parallel execution.
    74 //
    75 // In parallel execution, |func| MUST PROCESS ALL SLICES BEFORE RETURNING!
    76 // Not doing so is an error and is protected by debug asserts in ThreadPool.
    77 //
    78 // Warmups and Sequential Fallbacks
    79 // --------------------------------
    80 //
    81 // ForkJoin can only execute code in parallel when it has been
    82 // ion-compiled in Parallel Execution Mode. ForkJoin handles this part
    83 // for you. However, because ion relies on having decent type
    84 // information available, it is necessary to run the code sequentially
    85 // for a few iterations first to get the various type sets "primed"
    86 // with reasonable information.  We try to make do with just a few
    87 // runs, under the hypothesis that parallel execution code which reach
    88 // type stability relatively quickly.
    89 //
    90 // The general strategy of ForkJoin is as follows:
    91 //
    92 // - If the code has not yet been run, invoke `func` sequentially with
    93 //   warmup set to true.  When warmup is true, `func` should try and
    94 //   do less work than normal---just enough to prime type sets. (See
    95 //   ParallelArray.js for a discussion of specifically how we do this
    96 //   in the case of ParallelArray).
    97 //
    98 // - Try to execute the code in parallel.  Parallel execution mode has
    99 //   three possible results: success, fatal error, or bailout.  If a
   100 //   bailout occurs, it means that the code attempted some action
   101 //   which is not possible in parallel mode.  This might be a
   102 //   modification to shared state, but it might also be that it
   103 //   attempted to take some theoreticaly pure action that has not been
   104 //   made threadsafe (yet?).
   105 //
   106 // - If parallel execution is successful, ForkJoin returns true.
   107 //
   108 // - If parallel execution results in a fatal error, ForkJoin returns false.
   109 //
   110 // - If parallel execution results in a *bailout*, this is when things
   111 //   get interesting.  In that case, the semantics of parallel
   112 //   execution guarantee us that no visible side effects have occurred
   113 //   (unless they were performed with the intrinsic
   114 //   |UnsafePutElements()|, which can only be used in self-hosted
   115 //   code).  We therefore reinvoke |func()| but with warmup set to
   116 //   true.  The idea here is that often parallel bailouts result from
   117 //   a failed type guard or other similar assumption, so rerunning the
   118 //   warmup sequentially gives us a chance to recompile with more
   119 //   data.  Because warmup is true, we do not expect this sequential
   120 //   call to process all remaining data, just a chunk.  After this
   121 //   recovery execution is complete, we again attempt parallel
   122 //   execution.
   123 //
   124 // - If more than a fixed number of bailouts occur, we give up on
   125 //   parallelization and just invoke |func()| N times in a row (once
   126 //   for each worker) but with |warmup| set to false.
   127 //
   128 // Interrupts:
   129 //
   130 // During parallel execution, |cx.check()| must be periodically invoked to
   131 // check for interrupts. This is automatically done by the Ion-generated
   132 // code. If an interrupt has been requested |cx.check()| aborts parallel
   133 // execution.
   134 //
   135 // Transitive compilation:
   136 //
   137 // One of the challenges for parallel compilation is that we
   138 // (currently) have to abort when we encounter an uncompiled script.
   139 // Therefore, we try to compile everything that might be needed
   140 // beforehand. The exact strategy is described in `ParallelDo::apply()`
   141 // in ForkJoin.cpp, but at the highest level the idea is:
   142 //
   143 // 1. We maintain a flag on every script telling us if that script and
   144 //    its transitive callees are believed to be compiled. If that flag
   145 //    is set, we can skip the initial compilation.
   146 // 2. Otherwise, we maintain a worklist that begins with the main
   147 //    script. We compile it and then examine the generated parallel IonScript,
   148 //    which will have a list of callees. We enqueue those. Some of these
   149 //    compilations may take place off the main thread, in which case
   150 //    we will run warmup iterations while we wait for them to complete.
   151 // 3. If the warmup iterations finish all the work, we're done.
   152 // 4. If compilations fail, we fallback to sequential.
   153 // 5. Otherwise, we will try running in parallel once we're all done.
   154 //
   155 // Bailout tracing and recording:
   156 //
   157 // When a bailout occurs, we record a bit of state so that we can
   158 // recover with grace. Each |ForkJoinContext| has a pointer to a
   159 // |ParallelBailoutRecord| pre-allocated for this purpose. This
   160 // structure is used to record the cause of the bailout, the JSScript
   161 // which was executing, as well as the location in the source where
   162 // the bailout occurred (in principle, we can record a full stack
   163 // trace, but right now we only record the top-most frame). Note that
   164 // the error location might not be in the same JSScript as the one
   165 // which was executing due to inlining.
   166 //
   167 // Garbage collection and allocation:
   168 //
   169 // Code which executes on these parallel threads must be very careful
   170 // with respect to garbage collection and allocation.  The typical
   171 // allocation paths are UNSAFE in parallel code because they access
   172 // shared state (the compartment's arena lists and so forth) without
   173 // any synchronization.  They can also trigger GC in an ad-hoc way.
   174 //
   175 // To deal with this, the forkjoin code creates a distinct |Allocator|
   176 // object for each slice.  You can access the appropriate object via
   177 // the |ForkJoinContext| object that is provided to the callbacks.  Once
   178 // the execution is complete, all the objects found in these distinct
   179 // |Allocator| is merged back into the main compartment lists and
   180 // things proceed normally.
   181 //
   182 // In Ion-generated code, we will do allocation through the
   183 // |Allocator| found in |ForkJoinContext| (which is obtained via TLS).
   184 // Also, no write barriers are emitted.  Conceptually, we should never
   185 // need a write barrier because we only permit writes to objects that
   186 // are newly allocated, and such objects are always black (to use
   187 // incremental GC terminology).  However, to be safe, we also block
   188 // upon entering a parallel section to ensure that any concurrent
   189 // marking or incremental GC has completed.
   190 //
   191 // In the future, it should be possible to lift the restriction that
   192 // we must block until inc. GC has completed and also to permit GC
   193 // during parallel exeution. But we're not there yet.
   194 //
   195 // Load balancing (work stealing):
   196 //
   197 // The ForkJoin job is dynamically divided into a fixed number of slices,
   198 // and is submitted for parallel execution in the pool. When the number
   199 // of slices is big enough (typically greater than the number of workers
   200 // in the pool) -and the workload is unbalanced- each worker thread
   201 // will perform load balancing through work stealing. The number
   202 // of slices is computed by the self-hosted function |ComputeNumSlices|
   203 // and can be used to know how many slices will be executed by the
   204 // runtime for an array of the given size.
   205 //
   206 // Current Limitations:
   207 //
   208 // - The API does not support recursive or nested use.  That is, the
   209 //   JavaScript function given to |ForkJoin| should not itself invoke
   210 //   |ForkJoin()|. Instead, use the intrinsic |InParallelSection()| to
   211 //   check for recursive use and execute a sequential fallback.
   212 //
   213 ///////////////////////////////////////////////////////////////////////////
   215 namespace js {
   217 class ForkJoinActivation : public Activation
   218 {
   219     uint8_t *prevIonTop_;
   221     // We ensure that incremental GC be finished before we enter into a fork
   222     // join section, but the runtime/zone might still be marked as needing
   223     // barriers due to being in the middle of verifying barriers. Pause
   224     // verification during the fork join section.
   225     gc::AutoStopVerifyingBarriers av_;
   227   public:
   228     ForkJoinActivation(JSContext *cx);
   229     ~ForkJoinActivation();
   230 };
   232 class ForkJoinContext;
   234 bool ForkJoin(JSContext *cx, CallArgs &args);
   236 struct IonLIRTraceData {
   237     uint32_t blockIndex;
   238     uint32_t lirIndex;
   239     uint32_t execModeInt;
   240     const char *lirOpName;
   241     const char *mirOpName;
   242     JSScript *script;
   243     jsbytecode *pc;
   244 };
   246 ///////////////////////////////////////////////////////////////////////////
   247 // Bailout tracking
   249 enum ParallelBailoutCause {
   250     ParallelBailoutNone,
   252     // Compiler returned Method_Skipped
   253     ParallelBailoutCompilationSkipped,
   255     // Compiler returned Method_CantCompile
   256     ParallelBailoutCompilationFailure,
   258     // The periodic interrupt failed, which can mean that either
   259     // another thread canceled, the user interrupted us, etc
   260     ParallelBailoutInterrupt,
   262     // An IC update failed
   263     ParallelBailoutFailedIC,
   265     // Heap busy flag was set during interrupt
   266     ParallelBailoutHeapBusy,
   268     ParallelBailoutMainScriptNotPresent,
   269     ParallelBailoutCalledToUncompiledScript,
   270     ParallelBailoutIllegalWrite,
   271     ParallelBailoutAccessToIntrinsic,
   272     ParallelBailoutOverRecursed,
   273     ParallelBailoutOutOfMemory,
   274     ParallelBailoutUnsupported,
   275     ParallelBailoutUnsupportedVM,
   276     ParallelBailoutUnsupportedStringComparison,
   277     ParallelBailoutRequestedGC,
   278     ParallelBailoutRequestedZoneGC,
   279 };
   281 struct ParallelBailoutTrace {
   282     JSScript *script;
   283     jsbytecode *bytecode;
   284 };
   286 // See "Bailouts" section in comment above.
   287 struct ParallelBailoutRecord {
   288     JSScript *topScript;
   289     ParallelBailoutCause cause;
   291     // Eventually we will support deeper traces,
   292     // but for now we gather at most a single frame.
   293     static const uint32_t MaxDepth = 1;
   294     uint32_t depth;
   295     ParallelBailoutTrace trace[MaxDepth];
   297     void init(JSContext *cx);
   298     void reset(JSContext *cx);
   299     void setCause(ParallelBailoutCause cause,
   300                   JSScript *outermostScript = nullptr,   // inliner (if applicable)
   301                   JSScript *currentScript = nullptr,     // inlinee (if applicable)
   302                   jsbytecode *currentPc = nullptr);
   303     void updateCause(ParallelBailoutCause cause,
   304                      JSScript *outermostScript,
   305                      JSScript *currentScript,
   306                      jsbytecode *currentPc);
   307     void addTrace(JSScript *script,
   308                   jsbytecode *pc);
   309 };
   311 struct ForkJoinShared;
   313 class ForkJoinContext : public ThreadSafeContext
   314 {
   315   public:
   316     // Bailout record used to record the reason this thread stopped executing
   317     ParallelBailoutRecord *const bailoutRecord;
   319 #ifdef DEBUG
   320     // Records the last instr. to execute on this thread.
   321     IonLIRTraceData traceData;
   323     // The maximum worker id.
   324     uint32_t maxWorkerId;
   325 #endif
   327     // When we run a par operation like mapPar, we create an out pointer
   328     // into a specific region of the destination buffer. Even though the
   329     // destination buffer is not thread-local, it is permissible to write into
   330     // it via the handles provided. These two fields identify the memory
   331     // region where writes are allowed so that the write guards can test for
   332     // it.
   333     //
   334     // Note: we only permit writes into the *specific region* that the user
   335     // is supposed to write. Normally, they only have access to this region
   336     // anyhow. But due to sequential fallback it is possible for handles into
   337     // other regions to escape into global variables in the sequential
   338     // execution and then get accessed by later parallel sections. Thus we
   339     // must be careful and ensure that the write is going through a handle
   340     // into the correct *region* of the buffer.
   341     uint8_t *targetRegionStart;
   342     uint8_t *targetRegionEnd;
   344     ForkJoinContext(PerThreadData *perThreadData, ThreadPoolWorker *worker,
   345                     Allocator *allocator, ForkJoinShared *shared,
   346                     ParallelBailoutRecord *bailoutRecord);
   348     // Get the worker id. The main thread by convention has the id of the max
   349     // worker thread id + 1.
   350     uint32_t workerId() const { return worker_->id(); }
   352     // Get a slice of work for the worker associated with the context.
   353     bool getSlice(uint16_t *sliceId) { return worker_->getSlice(this, sliceId); }
   355     // True if this is the main thread, false if it is one of the parallel workers.
   356     bool isMainThread() const;
   358     // When the code would normally trigger a GC, we don't trigger it
   359     // immediately but instead record that request here.  This will
   360     // cause |ExecuteForkJoinOp()| to invoke |TriggerGC()| or
   361     // |TriggerCompartmentGC()| as appropriate once the parallel
   362     // section is complete. This is done because those routines do
   363     // various preparations that are not thread-safe, and because the
   364     // full set of arenas is not available until the end of the
   365     // parallel section.
   366     void requestGC(JS::gcreason::Reason reason);
   367     void requestZoneGC(JS::Zone *zone, JS::gcreason::Reason reason);
   369     // Set the fatal flag for the next abort. Used to distinguish retry or
   370     // fatal aborts from VM functions.
   371     bool setPendingAbortFatal(ParallelBailoutCause cause);
   373     // Reports an unsupported operation, returning false if we are reporting
   374     // an error. Otherwise drop the warning on the floor.
   375     bool reportError(ParallelBailoutCause cause, unsigned report) {
   376         if (report & JSREPORT_ERROR)
   377             return setPendingAbortFatal(cause);
   378         return true;
   379     }
   381     // During the parallel phase, this method should be invoked
   382     // periodically, for example on every backedge, similar to the
   383     // interrupt check.  If it returns false, then the parallel phase
   384     // has been aborted and so you should bailout.  The function may
   385     // also rendesvous to perform GC or do other similar things.
   386     //
   387     // This function is guaranteed to have no effect if both
   388     // runtime()->interruptPar is zero.  Ion-generated code takes
   389     // advantage of this by inlining the checks on those flags before
   390     // actually calling this function.  If this function ends up
   391     // getting called a lot from outside ion code, we can refactor
   392     // it into an inlined version with this check that calls a slower
   393     // version.
   394     bool check();
   396     // Be wary, the runtime is shared between all threads!
   397     JSRuntime *runtime();
   399     // Acquire and release the JSContext from the runtime.
   400     JSContext *acquireJSContext();
   401     void releaseJSContext();
   402     bool hasAcquiredJSContext() const;
   404     // Check the current state of parallel execution.
   405     static inline ForkJoinContext *current();
   407     // Initializes the thread-local state.
   408     static bool initialize();
   410     // Used in inlining GetForkJoinSlice.
   411     static size_t offsetOfWorker() {
   412         return offsetof(ForkJoinContext, worker_);
   413     }
   415   private:
   416     friend class AutoSetForkJoinContext;
   418     // Initialized by initialize()
   419     static mozilla::ThreadLocal<ForkJoinContext*> tlsForkJoinContext;
   421     ForkJoinShared *const shared_;
   423     ThreadPoolWorker *worker_;
   425     bool acquiredJSContext_;
   427     // ForkJoinContext is allocated on the stack. It would be dangerous to GC
   428     // with it live because of the GC pointer fields stored in the context.
   429     JS::AutoAssertNoGC nogc_;
   430 };
   432 // Locks a JSContext for its scope. Be very careful, because locking a
   433 // JSContext does *not* allow you to safely mutate the data in the
   434 // JSContext unless you can guarantee that any of the other threads
   435 // that want to access that data will also acquire the lock, which is
   436 // generally not the case. For example, the lock is used in the IC
   437 // code to allow us to atomically patch up the dispatch table, but we
   438 // must be aware that other threads may be reading from the table even
   439 // as we write to it (though they cannot be writing, since they must
   440 // hold the lock to write).
   441 class LockedJSContext
   442 {
   443 #if defined(JS_THREADSAFE) && defined(JS_ION)
   444     ForkJoinContext *cx_;
   445 #endif
   446     JSContext *jscx_;
   448   public:
   449     LockedJSContext(ForkJoinContext *cx)
   450 #if defined(JS_THREADSAFE) && defined(JS_ION)
   451       : cx_(cx),
   452         jscx_(cx->acquireJSContext())
   453 #else
   454       : jscx_(nullptr)
   455 #endif
   456     { }
   458     ~LockedJSContext() {
   459 #if defined(JS_THREADSAFE) && defined(JS_ION)
   460         cx_->releaseJSContext();
   461 #endif
   462     }
   464     operator JSContext *() { return jscx_; }
   465     JSContext *operator->() { return jscx_; }
   466 };
   468 bool InExclusiveParallelSection();
   470 bool ParallelTestsShouldPass(JSContext *cx);
   472 void RequestInterruptForForkJoin(JSRuntime *rt, JSRuntime::InterruptMode mode);
   474 bool intrinsic_SetForkJoinTargetRegion(JSContext *cx, unsigned argc, Value *vp);
   475 extern const JSJitInfo intrinsic_SetForkJoinTargetRegionInfo;
   477 bool intrinsic_ClearThreadLocalArenas(JSContext *cx, unsigned argc, Value *vp);
   478 extern const JSJitInfo intrinsic_ClearThreadLocalArenasInfo;
   480 ///////////////////////////////////////////////////////////////////////////
   481 // Debug Spew
   483 namespace jit {
   484     class MDefinition;
   485 }
   487 namespace parallel {
   489 enum ExecutionStatus {
   490     // Parallel or seq execution terminated in a fatal way, operation failed
   491     ExecutionFatal,
   493     // Parallel exec failed and so we fell back to sequential
   494     ExecutionSequential,
   496     // We completed the work in seq mode before parallel compilation completed
   497     ExecutionWarmup,
   499     // Parallel exec was successful after some number of bailouts
   500     ExecutionParallel
   501 };
   503 enum SpewChannel {
   504     SpewOps,
   505     SpewCompile,
   506     SpewBailouts,
   507     NumSpewChannels
   508 };
   510 #if defined(DEBUG) && defined(JS_THREADSAFE) && defined(JS_ION)
   512 bool SpewEnabled(SpewChannel channel);
   513 void Spew(SpewChannel channel, const char *fmt, ...);
   514 void SpewBeginOp(JSContext *cx, const char *name);
   515 void SpewBailout(uint32_t count, HandleScript script, jsbytecode *pc,
   516                  ParallelBailoutCause cause);
   517 ExecutionStatus SpewEndOp(ExecutionStatus status);
   518 void SpewBeginCompile(HandleScript script);
   519 jit::MethodStatus SpewEndCompile(jit::MethodStatus status);
   520 void SpewMIR(jit::MDefinition *mir, const char *fmt, ...);
   521 void SpewBailoutIR(IonLIRTraceData *data);
   523 #else
   525 static inline bool SpewEnabled(SpewChannel channel) { return false; }
   526 static inline void Spew(SpewChannel channel, const char *fmt, ...) { }
   527 static inline void SpewBeginOp(JSContext *cx, const char *name) { }
   528 static inline void SpewBailout(uint32_t count, HandleScript script,
   529                                jsbytecode *pc, ParallelBailoutCause cause) {}
   530 static inline ExecutionStatus SpewEndOp(ExecutionStatus status) { return status; }
   531 static inline void SpewBeginCompile(HandleScript script) { }
   532 #ifdef JS_ION
   533 static inline jit::MethodStatus SpewEndCompile(jit::MethodStatus status) { return status; }
   534 static inline void SpewMIR(jit::MDefinition *mir, const char *fmt, ...) { }
   535 #endif
   536 static inline void SpewBailoutIR(IonLIRTraceData *data) { }
   538 #endif // DEBUG && JS_THREADSAFE && JS_ION
   540 } // namespace parallel
   541 } // namespace js
   543 /* static */ inline js::ForkJoinContext *
   544 js::ForkJoinContext::current()
   545 {
   546     return tlsForkJoinContext.get();
   547 }
   549 namespace js {
   551 static inline bool
   552 InParallelSection()
   553 {
   554     return ForkJoinContext::current() != nullptr;
   555 }
   557 } // namespace js
   559 #endif /* vm_ForkJoin_h */

mercurial