1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/js/src/vm/ThreadPool.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,257 @@ 1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- 1.5 + * vim: set ts=8 sts=4 et sw=4 tw=99: 1.6 + * This Source Code Form is subject to the terms of the Mozilla Public 1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.9 + 1.10 +#ifndef vm_ThreadPool_h 1.11 +#define vm_ThreadPool_h 1.12 + 1.13 +#include "mozilla/Atomics.h" 1.14 + 1.15 +#include "jsalloc.h" 1.16 +#include "jslock.h" 1.17 +#include "jsmath.h" 1.18 +#include "jspubtd.h" 1.19 + 1.20 +#include "js/Vector.h" 1.21 +#include "vm/Monitor.h" 1.22 + 1.23 +struct JSRuntime; 1.24 +struct JSCompartment; 1.25 + 1.26 +namespace js { 1.27 + 1.28 +class ThreadPool; 1.29 + 1.30 +///////////////////////////////////////////////////////////////////////////// 1.31 +// ThreadPoolWorker 1.32 +// 1.33 +// Class for worker threads in the pool. All threads (i.e. helpers and main 1.34 +// thread) have a worker associted with them. By convention, the worker id of 1.35 +// the main thread is 0. 1.36 + 1.37 +class ThreadPoolWorker 1.38 +{ 1.39 + const uint32_t workerId_; 1.40 + ThreadPool *pool_; 1.41 + 1.42 + // Slices this thread is responsible for. 1.43 + // 1.44 + // This a uint32 composed of two uint16s (the lower and upper bounds) so 1.45 + // that we may do a single CAS. See {Compose,Decompose}SliceBounds 1.46 + // functions below. 1.47 + mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> sliceBounds_; 1.48 + 1.49 + // Current point in the worker's lifecycle. 1.50 + volatile enum WorkerState { 1.51 + CREATED, ACTIVE, TERMINATED 1.52 + } state_; 1.53 + 1.54 + // Per-worker scheduler RNG state used for picking a random worker during 1.55 + // work stealing. 1.56 + uint32_t schedulerRNGState_; 1.57 + 1.58 + // The thread's main function. 1.59 + static void HelperThreadMain(void *arg); 1.60 + void helperLoop(); 1.61 + 1.62 + bool hasWork() const; 1.63 + bool popSliceFront(uint16_t *sliceId); 1.64 + bool popSliceBack(uint16_t *sliceId); 1.65 + bool stealFrom(ThreadPoolWorker *victim, uint16_t *sliceId); 1.66 + 1.67 + // Get a worker at random from the pool using our own thread-local RNG 1.68 + // state. This is a weak, but very fast, random function [1]. We choose 1.69 + // [a,b,c] = 11,21,13. 1.70 + // 1.71 + // [1] http://www.jstatsoft.org/v08/i14/paper 1.72 + public: 1.73 + static const uint32_t XORSHIFT_A = 11; 1.74 + static const uint32_t XORSHIFT_B = 21; 1.75 + static const uint32_t XORSHIFT_C = 13; 1.76 + 1.77 + private: 1.78 + ThreadPoolWorker *randomWorker(); 1.79 + 1.80 + public: 1.81 + ThreadPoolWorker(uint32_t workerId, uint32_t rngSeed, ThreadPool *pool); 1.82 + 1.83 + uint32_t id() const { return workerId_; } 1.84 + bool isMainThread() const { return id() == 0; } 1.85 + 1.86 + // Submits a new set of slices. Assumes !hasWork(). 1.87 + void submitSlices(uint16_t sliceStart, uint16_t sliceEnd); 1.88 + 1.89 + // Get the next slice; work stealing happens here if work stealing is 1.90 + // on. Returns false if there are no more slices to hand out. 1.91 + bool getSlice(ForkJoinContext *cx, uint16_t *sliceId); 1.92 + 1.93 + // Discard remaining slices. Used for aborting jobs. 1.94 + void discardSlices(); 1.95 + 1.96 + // Invoked from the main thread; signals worker to start. 1.97 + bool start(); 1.98 + 1.99 + // Invoked from the main thread; signals the worker loop to return. 1.100 + void terminate(AutoLockMonitor &lock); 1.101 + 1.102 + static size_t offsetOfSliceBounds() { 1.103 + return offsetof(ThreadPoolWorker, sliceBounds_); 1.104 + } 1.105 + 1.106 + static size_t offsetOfSchedulerRNGState() { 1.107 + return offsetof(ThreadPoolWorker, schedulerRNGState_); 1.108 + } 1.109 +}; 1.110 + 1.111 +///////////////////////////////////////////////////////////////////////////// 1.112 +// A ParallelJob is the main runnable abstraction in the ThreadPool. 1.113 +// 1.114 +// The unit of work here is in terms of threads, *not* slices. The 1.115 +// user-provided function has the responsibility of getting slices of work via 1.116 +// the |ForkJoinGetSlice| intrinsic. 1.117 + 1.118 +class ParallelJob 1.119 +{ 1.120 + public: 1.121 + virtual bool executeFromWorker(ThreadPoolWorker *worker, uintptr_t stackLimit) = 0; 1.122 + virtual bool executeFromMainThread(ThreadPoolWorker *mainWorker) = 0; 1.123 +}; 1.124 + 1.125 +///////////////////////////////////////////////////////////////////////////// 1.126 +// ThreadPool used for parallel JavaScript execution. Unless you are building 1.127 +// a new kind of parallel service, it is very likely that you do not wish to 1.128 +// interact with the threadpool directly. In particular, if you wish to 1.129 +// execute JavaScript in parallel, you probably want to look at |js::ForkJoin| 1.130 +// in |forkjoin.cpp|. 1.131 +// 1.132 +// The ThreadPool always maintains a fixed pool of worker threads. You can 1.133 +// query the number of worker threads via the method |numWorkers()|. Note 1.134 +// that this number may be zero (generally if threads are disabled, or when 1.135 +// manually specified for benchmarking purposes). 1.136 +// 1.137 +// The way to submit a job is using |executeJob()|---in this case, the job 1.138 +// will be executed by all worker threads, including the main thread. This 1.139 +// does not fail if there are no worker threads, it simply runs all the work 1.140 +// using the main thread only. 1.141 +// 1.142 +// Of course, each thread may have any number of previously submitted things 1.143 +// that they are already working on, and so they will finish those before they 1.144 +// get to this job. Therefore it is possible to have some worker threads pick 1.145 +// up (and even finish) their piece of the job before others have even 1.146 +// started. The main thread is also used by the pool as a worker thread. 1.147 +// 1.148 +// The ThreadPool supports work stealing. Every time a worker completes all 1.149 +// the slices in its local queue, it tries to acquire some work from other 1.150 +// workers (including the main thread). Execution terminates when there is no 1.151 +// work left to be done, i.e., when all the workers have an empty queue. The 1.152 +// stealing algorithm operates in 2 phases: (1) workers process all the slices 1.153 +// in their local queue, and then (2) workers try to steal from other peers. 1.154 +// Since workers start to steal only *after* they have completed all the 1.155 +// slices in their queue, the design is particularly convenient in the context 1.156 +// of Fork/Join-like parallelism, where workers receive a bunch of slices to 1.157 +// be done at the very beginning of the job, and have to wait until all the 1.158 +// threads have joined back. During phase (1) there is no synchronization 1.159 +// overhead between workers introduced by the stealing algorithm, and 1.160 +// therefore the execution overhead introduced is almost zero with balanced 1.161 +// workloads. The way a |ParallelJob| is divided into multiple slices has to 1.162 +// be specified by the instance implementing the job (e.g., |ForkJoinShared| 1.163 +// in |ForkJoin.cpp|). 1.164 + 1.165 +class ThreadPool : public Monitor 1.166 +{ 1.167 + private: 1.168 + friend class ThreadPoolWorker; 1.169 + 1.170 + // Initialized lazily. 1.171 + js::Vector<ThreadPoolWorker *, 8, SystemAllocPolicy> workers_; 1.172 + 1.173 + // The number of active workers. Should only access under lock. 1.174 + uint32_t activeWorkers_; 1.175 + PRCondVar *joinBarrier_; 1.176 + 1.177 + // The current job. 1.178 + ParallelJob *job_; 1.179 + 1.180 +#ifdef DEBUG 1.181 + // Initialized at startup only. 1.182 + JSRuntime *const runtime_; 1.183 + 1.184 + // Number of stolen slices in the last parallel job. 1.185 + mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> stolenSlices_; 1.186 +#endif 1.187 + 1.188 + // Number of pending slices in the current job. 1.189 + mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> pendingSlices_; 1.190 + 1.191 + // Whether the main thread is currently processing slices. 1.192 + bool isMainThreadActive_; 1.193 + 1.194 + bool lazyStartWorkers(JSContext *cx); 1.195 + void terminateWorkers(); 1.196 + void terminateWorkersAndReportOOM(JSContext *cx); 1.197 + void join(AutoLockMonitor &lock); 1.198 + void waitForWorkers(AutoLockMonitor &lock); 1.199 + ThreadPoolWorker *mainThreadWorker() { return workers_[0]; } 1.200 + 1.201 + public: 1.202 +#ifdef DEBUG 1.203 + static size_t offsetOfStolenSlices() { 1.204 + return offsetof(ThreadPool, stolenSlices_); 1.205 + } 1.206 +#endif 1.207 + static size_t offsetOfPendingSlices() { 1.208 + return offsetof(ThreadPool, pendingSlices_); 1.209 + } 1.210 + static size_t offsetOfWorkers() { 1.211 + return offsetof(ThreadPool, workers_); 1.212 + } 1.213 + 1.214 + static const uint16_t MAX_SLICE_ID = UINT16_MAX; 1.215 + 1.216 + ThreadPool(JSRuntime *rt); 1.217 + ~ThreadPool(); 1.218 + 1.219 + bool init(); 1.220 + 1.221 + // Return number of worker threads in the pool, counting the main thread. 1.222 + uint32_t numWorkers() const; 1.223 + 1.224 + // Returns whether we have any pending slices. 1.225 + bool hasWork() const { return pendingSlices_ != 0; } 1.226 + 1.227 + // Returns the current job. Must have one. 1.228 + ParallelJob *job() const { 1.229 + MOZ_ASSERT(job_); 1.230 + return job_; 1.231 + } 1.232 + 1.233 + // Returns whether or not the scheduler should perform work stealing. 1.234 + bool workStealing() const; 1.235 + 1.236 + // Returns whether or not the main thread is working. 1.237 + bool isMainThreadActive() const { return isMainThreadActive_; } 1.238 + 1.239 +#ifdef DEBUG 1.240 + // Return the number of stolen slices in the last parallel job. 1.241 + uint16_t stolenSlices() { return stolenSlices_; } 1.242 +#endif 1.243 + 1.244 + // Wait until all worker threads have finished their current set 1.245 + // of slices and then return. You must not submit new jobs after 1.246 + // invoking |terminate()|. 1.247 + void terminate(); 1.248 + 1.249 + // Execute the given ParallelJob using the main thread and any available worker. 1.250 + // Blocks until the main thread has completed execution. 1.251 + ParallelResult executeJob(JSContext *cx, ParallelJob *job, uint16_t sliceStart, 1.252 + uint16_t numSlices); 1.253 + 1.254 + // Abort the current job. 1.255 + void abortJob(); 1.256 +}; 1.257 + 1.258 +} // namespace js 1.259 + 1.260 +#endif /* vm_ThreadPool_h */