js/src/vm/ForkJoin.h

changeset 0
6474c204b198
equal deleted inserted replaced
-1:000000000000 0:32cc6d314b52
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sts=4 et sw=4 tw=99:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #ifndef vm_ForkJoin_h
8 #define vm_ForkJoin_h
9
10 #include "mozilla/ThreadLocal.h"
11
12 #include "jscntxt.h"
13
14 #include "gc/GCInternals.h"
15
16 #include "jit/Ion.h"
17
18 ///////////////////////////////////////////////////////////////////////////
19 // Read Me First
20 //
21 // The ForkJoin abstraction:
22 // -------------------------
23 //
24 // This is the building block for executing multi-threaded JavaScript with
25 // shared memory (as distinct from Web Workers). The idea is that you have
26 // some (typically data-parallel) operation which you wish to execute in
27 // parallel across as many threads as you have available.
28 //
29 // The ForkJoin abstraction is intended to be used by self-hosted code
30 // to enable parallel execution. At the top-level, it consists of a native
31 // function (exposed as the ForkJoin intrinsic) that is used like so:
32 //
33 // ForkJoin(func, sliceStart, sliceEnd, mode)
34 //
35 // The intention of this statement is to start some some number (usually the
36 // number of hardware threads) of copies of |func()| running in parallel. Each
37 // copy will then do a portion of the total work, depending on
38 // workstealing-based load balancing.
39 //
40 // Typically, each of the N slices runs in a different worker thread, but that
41 // is not something you should rely upon---if work-stealing is enabled it
42 // could be that a single worker thread winds up handling multiple slices.
43 //
44 // The second and third arguments, |sliceStart| and |sliceEnd|, are the slice
45 // boundaries. These numbers must each fit inside an uint16_t.
46 //
47 // The fourth argument, |mode|, is an internal mode integer giving finer
48 // control over the behavior of ForkJoin. See the |ForkJoinMode| enum.
49 //
50 // func() should expect the following arguments:
51 //
52 // func(workerId, sliceStart, sliceEnd)
53 //
54 // The |workerId| parameter is the id of the worker executing the function. It
55 // is 0 in sequential mode.
56 //
57 // The |sliceStart| and |sliceEnd| parameters are the current bounds that that
58 // the worker is handling. In parallel execution, these parameters are not
59 // used. In sequential execution, they tell the worker what slices should be
60 // processed. During the warm up phase, sliceEnd == sliceStart + 1.
61 //
62 // |func| can keep asking for more work from the scheduler by calling the
63 // intrinsic |GetForkJoinSlice(sliceStart, sliceEnd, id)|. When there are no
64 // more slices to hand out, ThreadPool::MAX_SLICE_ID is returned as a sentinel
65 // value. By exposing this function as an intrinsic, we reduce the number of
66 // JS-C++ boundary crossings incurred by workstealing, which may have many
67 // slices.
68 //
69 // In sequential execution, |func| should return the maximum computed slice id
70 // S for which all slices with id < S have already been processed. This is so
71 // ThreadPool can track the leftmost completed slice id to maintain
72 // determinism. Slices which have been completed in sequential execution
73 // cannot be re-run in parallel execution.
74 //
75 // In parallel execution, |func| MUST PROCESS ALL SLICES BEFORE RETURNING!
76 // Not doing so is an error and is protected by debug asserts in ThreadPool.
77 //
78 // Warmups and Sequential Fallbacks
79 // --------------------------------
80 //
81 // ForkJoin can only execute code in parallel when it has been
82 // ion-compiled in Parallel Execution Mode. ForkJoin handles this part
83 // for you. However, because ion relies on having decent type
84 // information available, it is necessary to run the code sequentially
85 // for a few iterations first to get the various type sets "primed"
86 // with reasonable information. We try to make do with just a few
87 // runs, under the hypothesis that parallel execution code which reach
88 // type stability relatively quickly.
89 //
90 // The general strategy of ForkJoin is as follows:
91 //
92 // - If the code has not yet been run, invoke `func` sequentially with
93 // warmup set to true. When warmup is true, `func` should try and
94 // do less work than normal---just enough to prime type sets. (See
95 // ParallelArray.js for a discussion of specifically how we do this
96 // in the case of ParallelArray).
97 //
98 // - Try to execute the code in parallel. Parallel execution mode has
99 // three possible results: success, fatal error, or bailout. If a
100 // bailout occurs, it means that the code attempted some action
101 // which is not possible in parallel mode. This might be a
102 // modification to shared state, but it might also be that it
103 // attempted to take some theoreticaly pure action that has not been
104 // made threadsafe (yet?).
105 //
106 // - If parallel execution is successful, ForkJoin returns true.
107 //
108 // - If parallel execution results in a fatal error, ForkJoin returns false.
109 //
110 // - If parallel execution results in a *bailout*, this is when things
111 // get interesting. In that case, the semantics of parallel
112 // execution guarantee us that no visible side effects have occurred
113 // (unless they were performed with the intrinsic
114 // |UnsafePutElements()|, which can only be used in self-hosted
115 // code). We therefore reinvoke |func()| but with warmup set to
116 // true. The idea here is that often parallel bailouts result from
117 // a failed type guard or other similar assumption, so rerunning the
118 // warmup sequentially gives us a chance to recompile with more
119 // data. Because warmup is true, we do not expect this sequential
120 // call to process all remaining data, just a chunk. After this
121 // recovery execution is complete, we again attempt parallel
122 // execution.
123 //
124 // - If more than a fixed number of bailouts occur, we give up on
125 // parallelization and just invoke |func()| N times in a row (once
126 // for each worker) but with |warmup| set to false.
127 //
128 // Interrupts:
129 //
130 // During parallel execution, |cx.check()| must be periodically invoked to
131 // check for interrupts. This is automatically done by the Ion-generated
132 // code. If an interrupt has been requested |cx.check()| aborts parallel
133 // execution.
134 //
135 // Transitive compilation:
136 //
137 // One of the challenges for parallel compilation is that we
138 // (currently) have to abort when we encounter an uncompiled script.
139 // Therefore, we try to compile everything that might be needed
140 // beforehand. The exact strategy is described in `ParallelDo::apply()`
141 // in ForkJoin.cpp, but at the highest level the idea is:
142 //
143 // 1. We maintain a flag on every script telling us if that script and
144 // its transitive callees are believed to be compiled. If that flag
145 // is set, we can skip the initial compilation.
146 // 2. Otherwise, we maintain a worklist that begins with the main
147 // script. We compile it and then examine the generated parallel IonScript,
148 // which will have a list of callees. We enqueue those. Some of these
149 // compilations may take place off the main thread, in which case
150 // we will run warmup iterations while we wait for them to complete.
151 // 3. If the warmup iterations finish all the work, we're done.
152 // 4. If compilations fail, we fallback to sequential.
153 // 5. Otherwise, we will try running in parallel once we're all done.
154 //
155 // Bailout tracing and recording:
156 //
157 // When a bailout occurs, we record a bit of state so that we can
158 // recover with grace. Each |ForkJoinContext| has a pointer to a
159 // |ParallelBailoutRecord| pre-allocated for this purpose. This
160 // structure is used to record the cause of the bailout, the JSScript
161 // which was executing, as well as the location in the source where
162 // the bailout occurred (in principle, we can record a full stack
163 // trace, but right now we only record the top-most frame). Note that
164 // the error location might not be in the same JSScript as the one
165 // which was executing due to inlining.
166 //
167 // Garbage collection and allocation:
168 //
169 // Code which executes on these parallel threads must be very careful
170 // with respect to garbage collection and allocation. The typical
171 // allocation paths are UNSAFE in parallel code because they access
172 // shared state (the compartment's arena lists and so forth) without
173 // any synchronization. They can also trigger GC in an ad-hoc way.
174 //
175 // To deal with this, the forkjoin code creates a distinct |Allocator|
176 // object for each slice. You can access the appropriate object via
177 // the |ForkJoinContext| object that is provided to the callbacks. Once
178 // the execution is complete, all the objects found in these distinct
179 // |Allocator| is merged back into the main compartment lists and
180 // things proceed normally.
181 //
182 // In Ion-generated code, we will do allocation through the
183 // |Allocator| found in |ForkJoinContext| (which is obtained via TLS).
184 // Also, no write barriers are emitted. Conceptually, we should never
185 // need a write barrier because we only permit writes to objects that
186 // are newly allocated, and such objects are always black (to use
187 // incremental GC terminology). However, to be safe, we also block
188 // upon entering a parallel section to ensure that any concurrent
189 // marking or incremental GC has completed.
190 //
191 // In the future, it should be possible to lift the restriction that
192 // we must block until inc. GC has completed and also to permit GC
193 // during parallel exeution. But we're not there yet.
194 //
195 // Load balancing (work stealing):
196 //
197 // The ForkJoin job is dynamically divided into a fixed number of slices,
198 // and is submitted for parallel execution in the pool. When the number
199 // of slices is big enough (typically greater than the number of workers
200 // in the pool) -and the workload is unbalanced- each worker thread
201 // will perform load balancing through work stealing. The number
202 // of slices is computed by the self-hosted function |ComputeNumSlices|
203 // and can be used to know how many slices will be executed by the
204 // runtime for an array of the given size.
205 //
206 // Current Limitations:
207 //
208 // - The API does not support recursive or nested use. That is, the
209 // JavaScript function given to |ForkJoin| should not itself invoke
210 // |ForkJoin()|. Instead, use the intrinsic |InParallelSection()| to
211 // check for recursive use and execute a sequential fallback.
212 //
213 ///////////////////////////////////////////////////////////////////////////
214
215 namespace js {
216
217 class ForkJoinActivation : public Activation
218 {
219 uint8_t *prevIonTop_;
220
221 // We ensure that incremental GC be finished before we enter into a fork
222 // join section, but the runtime/zone might still be marked as needing
223 // barriers due to being in the middle of verifying barriers. Pause
224 // verification during the fork join section.
225 gc::AutoStopVerifyingBarriers av_;
226
227 public:
228 ForkJoinActivation(JSContext *cx);
229 ~ForkJoinActivation();
230 };
231
232 class ForkJoinContext;
233
234 bool ForkJoin(JSContext *cx, CallArgs &args);
235
236 struct IonLIRTraceData {
237 uint32_t blockIndex;
238 uint32_t lirIndex;
239 uint32_t execModeInt;
240 const char *lirOpName;
241 const char *mirOpName;
242 JSScript *script;
243 jsbytecode *pc;
244 };
245
246 ///////////////////////////////////////////////////////////////////////////
247 // Bailout tracking
248
249 enum ParallelBailoutCause {
250 ParallelBailoutNone,
251
252 // Compiler returned Method_Skipped
253 ParallelBailoutCompilationSkipped,
254
255 // Compiler returned Method_CantCompile
256 ParallelBailoutCompilationFailure,
257
258 // The periodic interrupt failed, which can mean that either
259 // another thread canceled, the user interrupted us, etc
260 ParallelBailoutInterrupt,
261
262 // An IC update failed
263 ParallelBailoutFailedIC,
264
265 // Heap busy flag was set during interrupt
266 ParallelBailoutHeapBusy,
267
268 ParallelBailoutMainScriptNotPresent,
269 ParallelBailoutCalledToUncompiledScript,
270 ParallelBailoutIllegalWrite,
271 ParallelBailoutAccessToIntrinsic,
272 ParallelBailoutOverRecursed,
273 ParallelBailoutOutOfMemory,
274 ParallelBailoutUnsupported,
275 ParallelBailoutUnsupportedVM,
276 ParallelBailoutUnsupportedStringComparison,
277 ParallelBailoutRequestedGC,
278 ParallelBailoutRequestedZoneGC,
279 };
280
281 struct ParallelBailoutTrace {
282 JSScript *script;
283 jsbytecode *bytecode;
284 };
285
286 // See "Bailouts" section in comment above.
287 struct ParallelBailoutRecord {
288 JSScript *topScript;
289 ParallelBailoutCause cause;
290
291 // Eventually we will support deeper traces,
292 // but for now we gather at most a single frame.
293 static const uint32_t MaxDepth = 1;
294 uint32_t depth;
295 ParallelBailoutTrace trace[MaxDepth];
296
297 void init(JSContext *cx);
298 void reset(JSContext *cx);
299 void setCause(ParallelBailoutCause cause,
300 JSScript *outermostScript = nullptr, // inliner (if applicable)
301 JSScript *currentScript = nullptr, // inlinee (if applicable)
302 jsbytecode *currentPc = nullptr);
303 void updateCause(ParallelBailoutCause cause,
304 JSScript *outermostScript,
305 JSScript *currentScript,
306 jsbytecode *currentPc);
307 void addTrace(JSScript *script,
308 jsbytecode *pc);
309 };
310
311 struct ForkJoinShared;
312
313 class ForkJoinContext : public ThreadSafeContext
314 {
315 public:
316 // Bailout record used to record the reason this thread stopped executing
317 ParallelBailoutRecord *const bailoutRecord;
318
319 #ifdef DEBUG
320 // Records the last instr. to execute on this thread.
321 IonLIRTraceData traceData;
322
323 // The maximum worker id.
324 uint32_t maxWorkerId;
325 #endif
326
327 // When we run a par operation like mapPar, we create an out pointer
328 // into a specific region of the destination buffer. Even though the
329 // destination buffer is not thread-local, it is permissible to write into
330 // it via the handles provided. These two fields identify the memory
331 // region where writes are allowed so that the write guards can test for
332 // it.
333 //
334 // Note: we only permit writes into the *specific region* that the user
335 // is supposed to write. Normally, they only have access to this region
336 // anyhow. But due to sequential fallback it is possible for handles into
337 // other regions to escape into global variables in the sequential
338 // execution and then get accessed by later parallel sections. Thus we
339 // must be careful and ensure that the write is going through a handle
340 // into the correct *region* of the buffer.
341 uint8_t *targetRegionStart;
342 uint8_t *targetRegionEnd;
343
344 ForkJoinContext(PerThreadData *perThreadData, ThreadPoolWorker *worker,
345 Allocator *allocator, ForkJoinShared *shared,
346 ParallelBailoutRecord *bailoutRecord);
347
348 // Get the worker id. The main thread by convention has the id of the max
349 // worker thread id + 1.
350 uint32_t workerId() const { return worker_->id(); }
351
352 // Get a slice of work for the worker associated with the context.
353 bool getSlice(uint16_t *sliceId) { return worker_->getSlice(this, sliceId); }
354
355 // True if this is the main thread, false if it is one of the parallel workers.
356 bool isMainThread() const;
357
358 // When the code would normally trigger a GC, we don't trigger it
359 // immediately but instead record that request here. This will
360 // cause |ExecuteForkJoinOp()| to invoke |TriggerGC()| or
361 // |TriggerCompartmentGC()| as appropriate once the parallel
362 // section is complete. This is done because those routines do
363 // various preparations that are not thread-safe, and because the
364 // full set of arenas is not available until the end of the
365 // parallel section.
366 void requestGC(JS::gcreason::Reason reason);
367 void requestZoneGC(JS::Zone *zone, JS::gcreason::Reason reason);
368
369 // Set the fatal flag for the next abort. Used to distinguish retry or
370 // fatal aborts from VM functions.
371 bool setPendingAbortFatal(ParallelBailoutCause cause);
372
373 // Reports an unsupported operation, returning false if we are reporting
374 // an error. Otherwise drop the warning on the floor.
375 bool reportError(ParallelBailoutCause cause, unsigned report) {
376 if (report & JSREPORT_ERROR)
377 return setPendingAbortFatal(cause);
378 return true;
379 }
380
381 // During the parallel phase, this method should be invoked
382 // periodically, for example on every backedge, similar to the
383 // interrupt check. If it returns false, then the parallel phase
384 // has been aborted and so you should bailout. The function may
385 // also rendesvous to perform GC or do other similar things.
386 //
387 // This function is guaranteed to have no effect if both
388 // runtime()->interruptPar is zero. Ion-generated code takes
389 // advantage of this by inlining the checks on those flags before
390 // actually calling this function. If this function ends up
391 // getting called a lot from outside ion code, we can refactor
392 // it into an inlined version with this check that calls a slower
393 // version.
394 bool check();
395
396 // Be wary, the runtime is shared between all threads!
397 JSRuntime *runtime();
398
399 // Acquire and release the JSContext from the runtime.
400 JSContext *acquireJSContext();
401 void releaseJSContext();
402 bool hasAcquiredJSContext() const;
403
404 // Check the current state of parallel execution.
405 static inline ForkJoinContext *current();
406
407 // Initializes the thread-local state.
408 static bool initialize();
409
410 // Used in inlining GetForkJoinSlice.
411 static size_t offsetOfWorker() {
412 return offsetof(ForkJoinContext, worker_);
413 }
414
415 private:
416 friend class AutoSetForkJoinContext;
417
418 // Initialized by initialize()
419 static mozilla::ThreadLocal<ForkJoinContext*> tlsForkJoinContext;
420
421 ForkJoinShared *const shared_;
422
423 ThreadPoolWorker *worker_;
424
425 bool acquiredJSContext_;
426
427 // ForkJoinContext is allocated on the stack. It would be dangerous to GC
428 // with it live because of the GC pointer fields stored in the context.
429 JS::AutoAssertNoGC nogc_;
430 };
431
432 // Locks a JSContext for its scope. Be very careful, because locking a
433 // JSContext does *not* allow you to safely mutate the data in the
434 // JSContext unless you can guarantee that any of the other threads
435 // that want to access that data will also acquire the lock, which is
436 // generally not the case. For example, the lock is used in the IC
437 // code to allow us to atomically patch up the dispatch table, but we
438 // must be aware that other threads may be reading from the table even
439 // as we write to it (though they cannot be writing, since they must
440 // hold the lock to write).
441 class LockedJSContext
442 {
443 #if defined(JS_THREADSAFE) && defined(JS_ION)
444 ForkJoinContext *cx_;
445 #endif
446 JSContext *jscx_;
447
448 public:
449 LockedJSContext(ForkJoinContext *cx)
450 #if defined(JS_THREADSAFE) && defined(JS_ION)
451 : cx_(cx),
452 jscx_(cx->acquireJSContext())
453 #else
454 : jscx_(nullptr)
455 #endif
456 { }
457
458 ~LockedJSContext() {
459 #if defined(JS_THREADSAFE) && defined(JS_ION)
460 cx_->releaseJSContext();
461 #endif
462 }
463
464 operator JSContext *() { return jscx_; }
465 JSContext *operator->() { return jscx_; }
466 };
467
468 bool InExclusiveParallelSection();
469
470 bool ParallelTestsShouldPass(JSContext *cx);
471
472 void RequestInterruptForForkJoin(JSRuntime *rt, JSRuntime::InterruptMode mode);
473
474 bool intrinsic_SetForkJoinTargetRegion(JSContext *cx, unsigned argc, Value *vp);
475 extern const JSJitInfo intrinsic_SetForkJoinTargetRegionInfo;
476
477 bool intrinsic_ClearThreadLocalArenas(JSContext *cx, unsigned argc, Value *vp);
478 extern const JSJitInfo intrinsic_ClearThreadLocalArenasInfo;
479
480 ///////////////////////////////////////////////////////////////////////////
481 // Debug Spew
482
483 namespace jit {
484 class MDefinition;
485 }
486
487 namespace parallel {
488
489 enum ExecutionStatus {
490 // Parallel or seq execution terminated in a fatal way, operation failed
491 ExecutionFatal,
492
493 // Parallel exec failed and so we fell back to sequential
494 ExecutionSequential,
495
496 // We completed the work in seq mode before parallel compilation completed
497 ExecutionWarmup,
498
499 // Parallel exec was successful after some number of bailouts
500 ExecutionParallel
501 };
502
503 enum SpewChannel {
504 SpewOps,
505 SpewCompile,
506 SpewBailouts,
507 NumSpewChannels
508 };
509
510 #if defined(DEBUG) && defined(JS_THREADSAFE) && defined(JS_ION)
511
512 bool SpewEnabled(SpewChannel channel);
513 void Spew(SpewChannel channel, const char *fmt, ...);
514 void SpewBeginOp(JSContext *cx, const char *name);
515 void SpewBailout(uint32_t count, HandleScript script, jsbytecode *pc,
516 ParallelBailoutCause cause);
517 ExecutionStatus SpewEndOp(ExecutionStatus status);
518 void SpewBeginCompile(HandleScript script);
519 jit::MethodStatus SpewEndCompile(jit::MethodStatus status);
520 void SpewMIR(jit::MDefinition *mir, const char *fmt, ...);
521 void SpewBailoutIR(IonLIRTraceData *data);
522
523 #else
524
525 static inline bool SpewEnabled(SpewChannel channel) { return false; }
526 static inline void Spew(SpewChannel channel, const char *fmt, ...) { }
527 static inline void SpewBeginOp(JSContext *cx, const char *name) { }
528 static inline void SpewBailout(uint32_t count, HandleScript script,
529 jsbytecode *pc, ParallelBailoutCause cause) {}
530 static inline ExecutionStatus SpewEndOp(ExecutionStatus status) { return status; }
531 static inline void SpewBeginCompile(HandleScript script) { }
532 #ifdef JS_ION
533 static inline jit::MethodStatus SpewEndCompile(jit::MethodStatus status) { return status; }
534 static inline void SpewMIR(jit::MDefinition *mir, const char *fmt, ...) { }
535 #endif
536 static inline void SpewBailoutIR(IonLIRTraceData *data) { }
537
538 #endif // DEBUG && JS_THREADSAFE && JS_ION
539
540 } // namespace parallel
541 } // namespace js
542
543 /* static */ inline js::ForkJoinContext *
544 js::ForkJoinContext::current()
545 {
546 return tlsForkJoinContext.get();
547 }
548
549 namespace js {
550
551 static inline bool
552 InParallelSection()
553 {
554 return ForkJoinContext::current() != nullptr;
555 }
556
557 } // namespace js
558
559 #endif /* vm_ForkJoin_h */

mercurial