js/src/vm/ThreadPool.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/js/src/vm/ThreadPool.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,257 @@
     1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
     1.5 + * vim: set ts=8 sts=4 et sw=4 tw=99:
     1.6 + * This Source Code Form is subject to the terms of the Mozilla Public
     1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.9 +
    1.10 +#ifndef vm_ThreadPool_h
    1.11 +#define vm_ThreadPool_h
    1.12 +
    1.13 +#include "mozilla/Atomics.h"
    1.14 +
    1.15 +#include "jsalloc.h"
    1.16 +#include "jslock.h"
    1.17 +#include "jsmath.h"
    1.18 +#include "jspubtd.h"
    1.19 +
    1.20 +#include "js/Vector.h"
    1.21 +#include "vm/Monitor.h"
    1.22 +
    1.23 +struct JSRuntime;
    1.24 +struct JSCompartment;
    1.25 +
    1.26 +namespace js {
    1.27 +
    1.28 +class ThreadPool;
    1.29 +
    1.30 +/////////////////////////////////////////////////////////////////////////////
    1.31 +// ThreadPoolWorker
    1.32 +//
    1.33 +// Class for worker threads in the pool. All threads (i.e. helpers and main
    1.34 +// thread) have a worker associted with them. By convention, the worker id of
    1.35 +// the main thread is 0.
    1.36 +
    1.37 +class ThreadPoolWorker
    1.38 +{
    1.39 +    const uint32_t workerId_;
    1.40 +    ThreadPool *pool_;
    1.41 +
    1.42 +    // Slices this thread is responsible for.
    1.43 +    //
    1.44 +    // This a uint32 composed of two uint16s (the lower and upper bounds) so
    1.45 +    // that we may do a single CAS. See {Compose,Decompose}SliceBounds
    1.46 +    // functions below.
    1.47 +    mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> sliceBounds_;
    1.48 +
    1.49 +    // Current point in the worker's lifecycle.
    1.50 +    volatile enum WorkerState {
    1.51 +        CREATED, ACTIVE, TERMINATED
    1.52 +    } state_;
    1.53 +
    1.54 +    // Per-worker scheduler RNG state used for picking a random worker during
    1.55 +    // work stealing.
    1.56 +    uint32_t schedulerRNGState_;
    1.57 +
    1.58 +    // The thread's main function.
    1.59 +    static void HelperThreadMain(void *arg);
    1.60 +    void helperLoop();
    1.61 +
    1.62 +    bool hasWork() const;
    1.63 +    bool popSliceFront(uint16_t *sliceId);
    1.64 +    bool popSliceBack(uint16_t *sliceId);
    1.65 +    bool stealFrom(ThreadPoolWorker *victim, uint16_t *sliceId);
    1.66 +
    1.67 +    // Get a worker at random from the pool using our own thread-local RNG
    1.68 +    // state. This is a weak, but very fast, random function [1]. We choose
    1.69 +    // [a,b,c] = 11,21,13.
    1.70 +    //
    1.71 +    // [1] http://www.jstatsoft.org/v08/i14/paper
    1.72 +  public:
    1.73 +    static const uint32_t XORSHIFT_A = 11;
    1.74 +    static const uint32_t XORSHIFT_B = 21;
    1.75 +    static const uint32_t XORSHIFT_C = 13;
    1.76 +
    1.77 +  private:
    1.78 +    ThreadPoolWorker *randomWorker();
    1.79 +
    1.80 +  public:
    1.81 +    ThreadPoolWorker(uint32_t workerId, uint32_t rngSeed, ThreadPool *pool);
    1.82 +
    1.83 +    uint32_t id() const { return workerId_; }
    1.84 +    bool isMainThread() const { return id() == 0; }
    1.85 +
    1.86 +    // Submits a new set of slices. Assumes !hasWork().
    1.87 +    void submitSlices(uint16_t sliceStart, uint16_t sliceEnd);
    1.88 +
    1.89 +    // Get the next slice; work stealing happens here if work stealing is
    1.90 +    // on. Returns false if there are no more slices to hand out.
    1.91 +    bool getSlice(ForkJoinContext *cx, uint16_t *sliceId);
    1.92 +
    1.93 +    // Discard remaining slices. Used for aborting jobs.
    1.94 +    void discardSlices();
    1.95 +
    1.96 +    // Invoked from the main thread; signals worker to start.
    1.97 +    bool start();
    1.98 +
    1.99 +    // Invoked from the main thread; signals the worker loop to return.
   1.100 +    void terminate(AutoLockMonitor &lock);
   1.101 +
   1.102 +    static size_t offsetOfSliceBounds() {
   1.103 +        return offsetof(ThreadPoolWorker, sliceBounds_);
   1.104 +    }
   1.105 +
   1.106 +    static size_t offsetOfSchedulerRNGState() {
   1.107 +        return offsetof(ThreadPoolWorker, schedulerRNGState_);
   1.108 +    }
   1.109 +};
   1.110 +
   1.111 +/////////////////////////////////////////////////////////////////////////////
   1.112 +// A ParallelJob is the main runnable abstraction in the ThreadPool.
   1.113 +//
   1.114 +// The unit of work here is in terms of threads, *not* slices. The
   1.115 +// user-provided function has the responsibility of getting slices of work via
   1.116 +// the |ForkJoinGetSlice| intrinsic.
   1.117 +
   1.118 +class ParallelJob
   1.119 +{
   1.120 +  public:
   1.121 +    virtual bool executeFromWorker(ThreadPoolWorker *worker, uintptr_t stackLimit) = 0;
   1.122 +    virtual bool executeFromMainThread(ThreadPoolWorker *mainWorker) = 0;
   1.123 +};
   1.124 +
   1.125 +/////////////////////////////////////////////////////////////////////////////
   1.126 +// ThreadPool used for parallel JavaScript execution. Unless you are building
   1.127 +// a new kind of parallel service, it is very likely that you do not wish to
   1.128 +// interact with the threadpool directly. In particular, if you wish to
   1.129 +// execute JavaScript in parallel, you probably want to look at |js::ForkJoin|
   1.130 +// in |forkjoin.cpp|.
   1.131 +//
   1.132 +// The ThreadPool always maintains a fixed pool of worker threads.  You can
   1.133 +// query the number of worker threads via the method |numWorkers()|.  Note
   1.134 +// that this number may be zero (generally if threads are disabled, or when
   1.135 +// manually specified for benchmarking purposes).
   1.136 +//
   1.137 +// The way to submit a job is using |executeJob()|---in this case, the job
   1.138 +// will be executed by all worker threads, including the main thread. This
   1.139 +// does not fail if there are no worker threads, it simply runs all the work
   1.140 +// using the main thread only.
   1.141 +//
   1.142 +// Of course, each thread may have any number of previously submitted things
   1.143 +// that they are already working on, and so they will finish those before they
   1.144 +// get to this job.  Therefore it is possible to have some worker threads pick
   1.145 +// up (and even finish) their piece of the job before others have even
   1.146 +// started. The main thread is also used by the pool as a worker thread.
   1.147 +//
   1.148 +// The ThreadPool supports work stealing. Every time a worker completes all
   1.149 +// the slices in its local queue, it tries to acquire some work from other
   1.150 +// workers (including the main thread).  Execution terminates when there is no
   1.151 +// work left to be done, i.e., when all the workers have an empty queue. The
   1.152 +// stealing algorithm operates in 2 phases: (1) workers process all the slices
   1.153 +// in their local queue, and then (2) workers try to steal from other peers.
   1.154 +// Since workers start to steal only *after* they have completed all the
   1.155 +// slices in their queue, the design is particularly convenient in the context
   1.156 +// of Fork/Join-like parallelism, where workers receive a bunch of slices to
   1.157 +// be done at the very beginning of the job, and have to wait until all the
   1.158 +// threads have joined back. During phase (1) there is no synchronization
   1.159 +// overhead between workers introduced by the stealing algorithm, and
   1.160 +// therefore the execution overhead introduced is almost zero with balanced
   1.161 +// workloads. The way a |ParallelJob| is divided into multiple slices has to
   1.162 +// be specified by the instance implementing the job (e.g., |ForkJoinShared|
   1.163 +// in |ForkJoin.cpp|).
   1.164 +
   1.165 +class ThreadPool : public Monitor
   1.166 +{
   1.167 +  private:
   1.168 +    friend class ThreadPoolWorker;
   1.169 +
   1.170 +    // Initialized lazily.
   1.171 +    js::Vector<ThreadPoolWorker *, 8, SystemAllocPolicy> workers_;
   1.172 +
   1.173 +    // The number of active workers. Should only access under lock.
   1.174 +    uint32_t activeWorkers_;
   1.175 +    PRCondVar *joinBarrier_;
   1.176 +
   1.177 +    // The current job.
   1.178 +    ParallelJob *job_;
   1.179 +
   1.180 +#ifdef DEBUG
   1.181 +    // Initialized at startup only.
   1.182 +    JSRuntime *const runtime_;
   1.183 +
   1.184 +    // Number of stolen slices in the last parallel job.
   1.185 +    mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> stolenSlices_;
   1.186 +#endif
   1.187 +
   1.188 +    // Number of pending slices in the current job.
   1.189 +    mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> pendingSlices_;
   1.190 +
   1.191 +    // Whether the main thread is currently processing slices.
   1.192 +    bool isMainThreadActive_;
   1.193 +
   1.194 +    bool lazyStartWorkers(JSContext *cx);
   1.195 +    void terminateWorkers();
   1.196 +    void terminateWorkersAndReportOOM(JSContext *cx);
   1.197 +    void join(AutoLockMonitor &lock);
   1.198 +    void waitForWorkers(AutoLockMonitor &lock);
   1.199 +    ThreadPoolWorker *mainThreadWorker() { return workers_[0]; }
   1.200 +
   1.201 +  public:
   1.202 +#ifdef DEBUG
   1.203 +    static size_t offsetOfStolenSlices() {
   1.204 +        return offsetof(ThreadPool, stolenSlices_);
   1.205 +    }
   1.206 +#endif
   1.207 +    static size_t offsetOfPendingSlices() {
   1.208 +        return offsetof(ThreadPool, pendingSlices_);
   1.209 +    }
   1.210 +    static size_t offsetOfWorkers() {
   1.211 +        return offsetof(ThreadPool, workers_);
   1.212 +    }
   1.213 +
   1.214 +    static const uint16_t MAX_SLICE_ID = UINT16_MAX;
   1.215 +
   1.216 +    ThreadPool(JSRuntime *rt);
   1.217 +    ~ThreadPool();
   1.218 +
   1.219 +    bool init();
   1.220 +
   1.221 +    // Return number of worker threads in the pool, counting the main thread.
   1.222 +    uint32_t numWorkers() const;
   1.223 +
   1.224 +    // Returns whether we have any pending slices.
   1.225 +    bool hasWork() const { return pendingSlices_ != 0; }
   1.226 +
   1.227 +    // Returns the current job. Must have one.
   1.228 +    ParallelJob *job() const {
   1.229 +        MOZ_ASSERT(job_);
   1.230 +        return job_;
   1.231 +    }
   1.232 +
   1.233 +    // Returns whether or not the scheduler should perform work stealing.
   1.234 +    bool workStealing() const;
   1.235 +
   1.236 +    // Returns whether or not the main thread is working.
   1.237 +    bool isMainThreadActive() const { return isMainThreadActive_; }
   1.238 +
   1.239 +#ifdef DEBUG
   1.240 +    // Return the number of stolen slices in the last parallel job.
   1.241 +    uint16_t stolenSlices() { return stolenSlices_; }
   1.242 +#endif
   1.243 +
   1.244 +    // Wait until all worker threads have finished their current set
   1.245 +    // of slices and then return.  You must not submit new jobs after
   1.246 +    // invoking |terminate()|.
   1.247 +    void terminate();
   1.248 +
   1.249 +    // Execute the given ParallelJob using the main thread and any available worker.
   1.250 +    // Blocks until the main thread has completed execution.
   1.251 +    ParallelResult executeJob(JSContext *cx, ParallelJob *job, uint16_t sliceStart,
   1.252 +                              uint16_t numSlices);
   1.253 +
   1.254 +    // Abort the current job.
   1.255 +    void abortJob();
   1.256 +};
   1.257 +
   1.258 +} // namespace js
   1.259 +
   1.260 +#endif /* vm_ThreadPool_h */

mercurial