|
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- |
|
2 * vim: set ts=8 sts=4 et sw=4 tw=99: |
|
3 * This Source Code Form is subject to the terms of the Mozilla Public |
|
4 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
6 |
|
7 #ifndef vm_ThreadPool_h |
|
8 #define vm_ThreadPool_h |
|
9 |
|
10 #include "mozilla/Atomics.h" |
|
11 |
|
12 #include "jsalloc.h" |
|
13 #include "jslock.h" |
|
14 #include "jsmath.h" |
|
15 #include "jspubtd.h" |
|
16 |
|
17 #include "js/Vector.h" |
|
18 #include "vm/Monitor.h" |
|
19 |
|
20 struct JSRuntime; |
|
21 struct JSCompartment; |
|
22 |
|
23 namespace js { |
|
24 |
|
25 class ThreadPool; |
|
26 |
|
27 ///////////////////////////////////////////////////////////////////////////// |
|
28 // ThreadPoolWorker |
|
29 // |
|
30 // Class for worker threads in the pool. All threads (i.e. helpers and main |
|
31 // thread) have a worker associted with them. By convention, the worker id of |
|
32 // the main thread is 0. |
|
33 |
|
34 class ThreadPoolWorker |
|
35 { |
|
36 const uint32_t workerId_; |
|
37 ThreadPool *pool_; |
|
38 |
|
39 // Slices this thread is responsible for. |
|
40 // |
|
41 // This a uint32 composed of two uint16s (the lower and upper bounds) so |
|
42 // that we may do a single CAS. See {Compose,Decompose}SliceBounds |
|
43 // functions below. |
|
44 mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> sliceBounds_; |
|
45 |
|
46 // Current point in the worker's lifecycle. |
|
47 volatile enum WorkerState { |
|
48 CREATED, ACTIVE, TERMINATED |
|
49 } state_; |
|
50 |
|
51 // Per-worker scheduler RNG state used for picking a random worker during |
|
52 // work stealing. |
|
53 uint32_t schedulerRNGState_; |
|
54 |
|
55 // The thread's main function. |
|
56 static void HelperThreadMain(void *arg); |
|
57 void helperLoop(); |
|
58 |
|
59 bool hasWork() const; |
|
60 bool popSliceFront(uint16_t *sliceId); |
|
61 bool popSliceBack(uint16_t *sliceId); |
|
62 bool stealFrom(ThreadPoolWorker *victim, uint16_t *sliceId); |
|
63 |
|
64 // Get a worker at random from the pool using our own thread-local RNG |
|
65 // state. This is a weak, but very fast, random function [1]. We choose |
|
66 // [a,b,c] = 11,21,13. |
|
67 // |
|
68 // [1] http://www.jstatsoft.org/v08/i14/paper |
|
69 public: |
|
70 static const uint32_t XORSHIFT_A = 11; |
|
71 static const uint32_t XORSHIFT_B = 21; |
|
72 static const uint32_t XORSHIFT_C = 13; |
|
73 |
|
74 private: |
|
75 ThreadPoolWorker *randomWorker(); |
|
76 |
|
77 public: |
|
78 ThreadPoolWorker(uint32_t workerId, uint32_t rngSeed, ThreadPool *pool); |
|
79 |
|
80 uint32_t id() const { return workerId_; } |
|
81 bool isMainThread() const { return id() == 0; } |
|
82 |
|
83 // Submits a new set of slices. Assumes !hasWork(). |
|
84 void submitSlices(uint16_t sliceStart, uint16_t sliceEnd); |
|
85 |
|
86 // Get the next slice; work stealing happens here if work stealing is |
|
87 // on. Returns false if there are no more slices to hand out. |
|
88 bool getSlice(ForkJoinContext *cx, uint16_t *sliceId); |
|
89 |
|
90 // Discard remaining slices. Used for aborting jobs. |
|
91 void discardSlices(); |
|
92 |
|
93 // Invoked from the main thread; signals worker to start. |
|
94 bool start(); |
|
95 |
|
96 // Invoked from the main thread; signals the worker loop to return. |
|
97 void terminate(AutoLockMonitor &lock); |
|
98 |
|
99 static size_t offsetOfSliceBounds() { |
|
100 return offsetof(ThreadPoolWorker, sliceBounds_); |
|
101 } |
|
102 |
|
103 static size_t offsetOfSchedulerRNGState() { |
|
104 return offsetof(ThreadPoolWorker, schedulerRNGState_); |
|
105 } |
|
106 }; |
|
107 |
|
108 ///////////////////////////////////////////////////////////////////////////// |
|
109 // A ParallelJob is the main runnable abstraction in the ThreadPool. |
|
110 // |
|
111 // The unit of work here is in terms of threads, *not* slices. The |
|
112 // user-provided function has the responsibility of getting slices of work via |
|
113 // the |ForkJoinGetSlice| intrinsic. |
|
114 |
|
115 class ParallelJob |
|
116 { |
|
117 public: |
|
118 virtual bool executeFromWorker(ThreadPoolWorker *worker, uintptr_t stackLimit) = 0; |
|
119 virtual bool executeFromMainThread(ThreadPoolWorker *mainWorker) = 0; |
|
120 }; |
|
121 |
|
122 ///////////////////////////////////////////////////////////////////////////// |
|
123 // ThreadPool used for parallel JavaScript execution. Unless you are building |
|
124 // a new kind of parallel service, it is very likely that you do not wish to |
|
125 // interact with the threadpool directly. In particular, if you wish to |
|
126 // execute JavaScript in parallel, you probably want to look at |js::ForkJoin| |
|
127 // in |forkjoin.cpp|. |
|
128 // |
|
129 // The ThreadPool always maintains a fixed pool of worker threads. You can |
|
130 // query the number of worker threads via the method |numWorkers()|. Note |
|
131 // that this number may be zero (generally if threads are disabled, or when |
|
132 // manually specified for benchmarking purposes). |
|
133 // |
|
134 // The way to submit a job is using |executeJob()|---in this case, the job |
|
135 // will be executed by all worker threads, including the main thread. This |
|
136 // does not fail if there are no worker threads, it simply runs all the work |
|
137 // using the main thread only. |
|
138 // |
|
139 // Of course, each thread may have any number of previously submitted things |
|
140 // that they are already working on, and so they will finish those before they |
|
141 // get to this job. Therefore it is possible to have some worker threads pick |
|
142 // up (and even finish) their piece of the job before others have even |
|
143 // started. The main thread is also used by the pool as a worker thread. |
|
144 // |
|
145 // The ThreadPool supports work stealing. Every time a worker completes all |
|
146 // the slices in its local queue, it tries to acquire some work from other |
|
147 // workers (including the main thread). Execution terminates when there is no |
|
148 // work left to be done, i.e., when all the workers have an empty queue. The |
|
149 // stealing algorithm operates in 2 phases: (1) workers process all the slices |
|
150 // in their local queue, and then (2) workers try to steal from other peers. |
|
151 // Since workers start to steal only *after* they have completed all the |
|
152 // slices in their queue, the design is particularly convenient in the context |
|
153 // of Fork/Join-like parallelism, where workers receive a bunch of slices to |
|
154 // be done at the very beginning of the job, and have to wait until all the |
|
155 // threads have joined back. During phase (1) there is no synchronization |
|
156 // overhead between workers introduced by the stealing algorithm, and |
|
157 // therefore the execution overhead introduced is almost zero with balanced |
|
158 // workloads. The way a |ParallelJob| is divided into multiple slices has to |
|
159 // be specified by the instance implementing the job (e.g., |ForkJoinShared| |
|
160 // in |ForkJoin.cpp|). |
|
161 |
|
162 class ThreadPool : public Monitor |
|
163 { |
|
164 private: |
|
165 friend class ThreadPoolWorker; |
|
166 |
|
167 // Initialized lazily. |
|
168 js::Vector<ThreadPoolWorker *, 8, SystemAllocPolicy> workers_; |
|
169 |
|
170 // The number of active workers. Should only access under lock. |
|
171 uint32_t activeWorkers_; |
|
172 PRCondVar *joinBarrier_; |
|
173 |
|
174 // The current job. |
|
175 ParallelJob *job_; |
|
176 |
|
177 #ifdef DEBUG |
|
178 // Initialized at startup only. |
|
179 JSRuntime *const runtime_; |
|
180 |
|
181 // Number of stolen slices in the last parallel job. |
|
182 mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> stolenSlices_; |
|
183 #endif |
|
184 |
|
185 // Number of pending slices in the current job. |
|
186 mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> pendingSlices_; |
|
187 |
|
188 // Whether the main thread is currently processing slices. |
|
189 bool isMainThreadActive_; |
|
190 |
|
191 bool lazyStartWorkers(JSContext *cx); |
|
192 void terminateWorkers(); |
|
193 void terminateWorkersAndReportOOM(JSContext *cx); |
|
194 void join(AutoLockMonitor &lock); |
|
195 void waitForWorkers(AutoLockMonitor &lock); |
|
196 ThreadPoolWorker *mainThreadWorker() { return workers_[0]; } |
|
197 |
|
198 public: |
|
199 #ifdef DEBUG |
|
200 static size_t offsetOfStolenSlices() { |
|
201 return offsetof(ThreadPool, stolenSlices_); |
|
202 } |
|
203 #endif |
|
204 static size_t offsetOfPendingSlices() { |
|
205 return offsetof(ThreadPool, pendingSlices_); |
|
206 } |
|
207 static size_t offsetOfWorkers() { |
|
208 return offsetof(ThreadPool, workers_); |
|
209 } |
|
210 |
|
211 static const uint16_t MAX_SLICE_ID = UINT16_MAX; |
|
212 |
|
213 ThreadPool(JSRuntime *rt); |
|
214 ~ThreadPool(); |
|
215 |
|
216 bool init(); |
|
217 |
|
218 // Return number of worker threads in the pool, counting the main thread. |
|
219 uint32_t numWorkers() const; |
|
220 |
|
221 // Returns whether we have any pending slices. |
|
222 bool hasWork() const { return pendingSlices_ != 0; } |
|
223 |
|
224 // Returns the current job. Must have one. |
|
225 ParallelJob *job() const { |
|
226 MOZ_ASSERT(job_); |
|
227 return job_; |
|
228 } |
|
229 |
|
230 // Returns whether or not the scheduler should perform work stealing. |
|
231 bool workStealing() const; |
|
232 |
|
233 // Returns whether or not the main thread is working. |
|
234 bool isMainThreadActive() const { return isMainThreadActive_; } |
|
235 |
|
236 #ifdef DEBUG |
|
237 // Return the number of stolen slices in the last parallel job. |
|
238 uint16_t stolenSlices() { return stolenSlices_; } |
|
239 #endif |
|
240 |
|
241 // Wait until all worker threads have finished their current set |
|
242 // of slices and then return. You must not submit new jobs after |
|
243 // invoking |terminate()|. |
|
244 void terminate(); |
|
245 |
|
246 // Execute the given ParallelJob using the main thread and any available worker. |
|
247 // Blocks until the main thread has completed execution. |
|
248 ParallelResult executeJob(JSContext *cx, ParallelJob *job, uint16_t sliceStart, |
|
249 uint16_t numSlices); |
|
250 |
|
251 // Abort the current job. |
|
252 void abortJob(); |
|
253 }; |
|
254 |
|
255 } // namespace js |
|
256 |
|
257 #endif /* vm_ThreadPool_h */ |