michael@0: /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- michael@0: * vim: set ts=8 sts=4 et sw=4 tw=99: michael@0: * This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #ifndef vm_ThreadPool_h michael@0: #define vm_ThreadPool_h michael@0: michael@0: #include "mozilla/Atomics.h" michael@0: michael@0: #include "jsalloc.h" michael@0: #include "jslock.h" michael@0: #include "jsmath.h" michael@0: #include "jspubtd.h" michael@0: michael@0: #include "js/Vector.h" michael@0: #include "vm/Monitor.h" michael@0: michael@0: struct JSRuntime; michael@0: struct JSCompartment; michael@0: michael@0: namespace js { michael@0: michael@0: class ThreadPool; michael@0: michael@0: ///////////////////////////////////////////////////////////////////////////// michael@0: // ThreadPoolWorker michael@0: // michael@0: // Class for worker threads in the pool. All threads (i.e. helpers and main michael@0: // thread) have a worker associted with them. By convention, the worker id of michael@0: // the main thread is 0. michael@0: michael@0: class ThreadPoolWorker michael@0: { michael@0: const uint32_t workerId_; michael@0: ThreadPool *pool_; michael@0: michael@0: // Slices this thread is responsible for. michael@0: // michael@0: // This a uint32 composed of two uint16s (the lower and upper bounds) so michael@0: // that we may do a single CAS. See {Compose,Decompose}SliceBounds michael@0: // functions below. michael@0: mozilla::Atomic sliceBounds_; michael@0: michael@0: // Current point in the worker's lifecycle. michael@0: volatile enum WorkerState { michael@0: CREATED, ACTIVE, TERMINATED michael@0: } state_; michael@0: michael@0: // Per-worker scheduler RNG state used for picking a random worker during michael@0: // work stealing. michael@0: uint32_t schedulerRNGState_; michael@0: michael@0: // The thread's main function. michael@0: static void HelperThreadMain(void *arg); michael@0: void helperLoop(); michael@0: michael@0: bool hasWork() const; michael@0: bool popSliceFront(uint16_t *sliceId); michael@0: bool popSliceBack(uint16_t *sliceId); michael@0: bool stealFrom(ThreadPoolWorker *victim, uint16_t *sliceId); michael@0: michael@0: // Get a worker at random from the pool using our own thread-local RNG michael@0: // state. This is a weak, but very fast, random function [1]. We choose michael@0: // [a,b,c] = 11,21,13. michael@0: // michael@0: // [1] http://www.jstatsoft.org/v08/i14/paper michael@0: public: michael@0: static const uint32_t XORSHIFT_A = 11; michael@0: static const uint32_t XORSHIFT_B = 21; michael@0: static const uint32_t XORSHIFT_C = 13; michael@0: michael@0: private: michael@0: ThreadPoolWorker *randomWorker(); michael@0: michael@0: public: michael@0: ThreadPoolWorker(uint32_t workerId, uint32_t rngSeed, ThreadPool *pool); michael@0: michael@0: uint32_t id() const { return workerId_; } michael@0: bool isMainThread() const { return id() == 0; } michael@0: michael@0: // Submits a new set of slices. Assumes !hasWork(). michael@0: void submitSlices(uint16_t sliceStart, uint16_t sliceEnd); michael@0: michael@0: // Get the next slice; work stealing happens here if work stealing is michael@0: // on. Returns false if there are no more slices to hand out. michael@0: bool getSlice(ForkJoinContext *cx, uint16_t *sliceId); michael@0: michael@0: // Discard remaining slices. Used for aborting jobs. michael@0: void discardSlices(); michael@0: michael@0: // Invoked from the main thread; signals worker to start. michael@0: bool start(); michael@0: michael@0: // Invoked from the main thread; signals the worker loop to return. michael@0: void terminate(AutoLockMonitor &lock); michael@0: michael@0: static size_t offsetOfSliceBounds() { michael@0: return offsetof(ThreadPoolWorker, sliceBounds_); michael@0: } michael@0: michael@0: static size_t offsetOfSchedulerRNGState() { michael@0: return offsetof(ThreadPoolWorker, schedulerRNGState_); michael@0: } michael@0: }; michael@0: michael@0: ///////////////////////////////////////////////////////////////////////////// michael@0: // A ParallelJob is the main runnable abstraction in the ThreadPool. michael@0: // michael@0: // The unit of work here is in terms of threads, *not* slices. The michael@0: // user-provided function has the responsibility of getting slices of work via michael@0: // the |ForkJoinGetSlice| intrinsic. michael@0: michael@0: class ParallelJob michael@0: { michael@0: public: michael@0: virtual bool executeFromWorker(ThreadPoolWorker *worker, uintptr_t stackLimit) = 0; michael@0: virtual bool executeFromMainThread(ThreadPoolWorker *mainWorker) = 0; michael@0: }; michael@0: michael@0: ///////////////////////////////////////////////////////////////////////////// michael@0: // ThreadPool used for parallel JavaScript execution. Unless you are building michael@0: // a new kind of parallel service, it is very likely that you do not wish to michael@0: // interact with the threadpool directly. In particular, if you wish to michael@0: // execute JavaScript in parallel, you probably want to look at |js::ForkJoin| michael@0: // in |forkjoin.cpp|. michael@0: // michael@0: // The ThreadPool always maintains a fixed pool of worker threads. You can michael@0: // query the number of worker threads via the method |numWorkers()|. Note michael@0: // that this number may be zero (generally if threads are disabled, or when michael@0: // manually specified for benchmarking purposes). michael@0: // michael@0: // The way to submit a job is using |executeJob()|---in this case, the job michael@0: // will be executed by all worker threads, including the main thread. This michael@0: // does not fail if there are no worker threads, it simply runs all the work michael@0: // using the main thread only. michael@0: // michael@0: // Of course, each thread may have any number of previously submitted things michael@0: // that they are already working on, and so they will finish those before they michael@0: // get to this job. Therefore it is possible to have some worker threads pick michael@0: // up (and even finish) their piece of the job before others have even michael@0: // started. The main thread is also used by the pool as a worker thread. michael@0: // michael@0: // The ThreadPool supports work stealing. Every time a worker completes all michael@0: // the slices in its local queue, it tries to acquire some work from other michael@0: // workers (including the main thread). Execution terminates when there is no michael@0: // work left to be done, i.e., when all the workers have an empty queue. The michael@0: // stealing algorithm operates in 2 phases: (1) workers process all the slices michael@0: // in their local queue, and then (2) workers try to steal from other peers. michael@0: // Since workers start to steal only *after* they have completed all the michael@0: // slices in their queue, the design is particularly convenient in the context michael@0: // of Fork/Join-like parallelism, where workers receive a bunch of slices to michael@0: // be done at the very beginning of the job, and have to wait until all the michael@0: // threads have joined back. During phase (1) there is no synchronization michael@0: // overhead between workers introduced by the stealing algorithm, and michael@0: // therefore the execution overhead introduced is almost zero with balanced michael@0: // workloads. The way a |ParallelJob| is divided into multiple slices has to michael@0: // be specified by the instance implementing the job (e.g., |ForkJoinShared| michael@0: // in |ForkJoin.cpp|). michael@0: michael@0: class ThreadPool : public Monitor michael@0: { michael@0: private: michael@0: friend class ThreadPoolWorker; michael@0: michael@0: // Initialized lazily. michael@0: js::Vector workers_; michael@0: michael@0: // The number of active workers. Should only access under lock. michael@0: uint32_t activeWorkers_; michael@0: PRCondVar *joinBarrier_; michael@0: michael@0: // The current job. michael@0: ParallelJob *job_; michael@0: michael@0: #ifdef DEBUG michael@0: // Initialized at startup only. michael@0: JSRuntime *const runtime_; michael@0: michael@0: // Number of stolen slices in the last parallel job. michael@0: mozilla::Atomic stolenSlices_; michael@0: #endif michael@0: michael@0: // Number of pending slices in the current job. michael@0: mozilla::Atomic pendingSlices_; michael@0: michael@0: // Whether the main thread is currently processing slices. michael@0: bool isMainThreadActive_; michael@0: michael@0: bool lazyStartWorkers(JSContext *cx); michael@0: void terminateWorkers(); michael@0: void terminateWorkersAndReportOOM(JSContext *cx); michael@0: void join(AutoLockMonitor &lock); michael@0: void waitForWorkers(AutoLockMonitor &lock); michael@0: ThreadPoolWorker *mainThreadWorker() { return workers_[0]; } michael@0: michael@0: public: michael@0: #ifdef DEBUG michael@0: static size_t offsetOfStolenSlices() { michael@0: return offsetof(ThreadPool, stolenSlices_); michael@0: } michael@0: #endif michael@0: static size_t offsetOfPendingSlices() { michael@0: return offsetof(ThreadPool, pendingSlices_); michael@0: } michael@0: static size_t offsetOfWorkers() { michael@0: return offsetof(ThreadPool, workers_); michael@0: } michael@0: michael@0: static const uint16_t MAX_SLICE_ID = UINT16_MAX; michael@0: michael@0: ThreadPool(JSRuntime *rt); michael@0: ~ThreadPool(); michael@0: michael@0: bool init(); michael@0: michael@0: // Return number of worker threads in the pool, counting the main thread. michael@0: uint32_t numWorkers() const; michael@0: michael@0: // Returns whether we have any pending slices. michael@0: bool hasWork() const { return pendingSlices_ != 0; } michael@0: michael@0: // Returns the current job. Must have one. michael@0: ParallelJob *job() const { michael@0: MOZ_ASSERT(job_); michael@0: return job_; michael@0: } michael@0: michael@0: // Returns whether or not the scheduler should perform work stealing. michael@0: bool workStealing() const; michael@0: michael@0: // Returns whether or not the main thread is working. michael@0: bool isMainThreadActive() const { return isMainThreadActive_; } michael@0: michael@0: #ifdef DEBUG michael@0: // Return the number of stolen slices in the last parallel job. michael@0: uint16_t stolenSlices() { return stolenSlices_; } michael@0: #endif michael@0: michael@0: // Wait until all worker threads have finished their current set michael@0: // of slices and then return. You must not submit new jobs after michael@0: // invoking |terminate()|. michael@0: void terminate(); michael@0: michael@0: // Execute the given ParallelJob using the main thread and any available worker. michael@0: // Blocks until the main thread has completed execution. michael@0: ParallelResult executeJob(JSContext *cx, ParallelJob *job, uint16_t sliceStart, michael@0: uint16_t numSlices); michael@0: michael@0: // Abort the current job. michael@0: void abortJob(); michael@0: }; michael@0: michael@0: } // namespace js michael@0: michael@0: #endif /* vm_ThreadPool_h */