js/src/vm/ThreadPool.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
michael@0 2 * vim: set ts=8 sts=4 et sw=4 tw=99:
michael@0 3 * This Source Code Form is subject to the terms of the Mozilla Public
michael@0 4 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 6
michael@0 7 #include "vm/ThreadPool.h"
michael@0 8
michael@0 9 #include "mozilla/Atomics.h"
michael@0 10
michael@0 11 #include "jslock.h"
michael@0 12
michael@0 13 #include "vm/ForkJoin.h"
michael@0 14 #include "vm/Monitor.h"
michael@0 15 #include "vm/Runtime.h"
michael@0 16
michael@0 17 using namespace js;
michael@0 18
michael@0 19 const size_t WORKER_THREAD_STACK_SIZE = 1*1024*1024;
michael@0 20
michael@0 21 static inline uint32_t
michael@0 22 ComposeSliceBounds(uint16_t from, uint16_t to)
michael@0 23 {
michael@0 24 MOZ_ASSERT(from <= to);
michael@0 25 return (uint32_t(from) << 16) | to;
michael@0 26 }
michael@0 27
michael@0 28 static inline void
michael@0 29 DecomposeSliceBounds(uint32_t bounds, uint16_t *from, uint16_t *to)
michael@0 30 {
michael@0 31 *from = bounds >> 16;
michael@0 32 *to = bounds & uint16_t(~0);
michael@0 33 MOZ_ASSERT(*from <= *to);
michael@0 34 }
michael@0 35
michael@0 36 ThreadPoolWorker::ThreadPoolWorker(uint32_t workerId, uint32_t rngSeed, ThreadPool *pool)
michael@0 37 : workerId_(workerId),
michael@0 38 pool_(pool),
michael@0 39 sliceBounds_(0),
michael@0 40 state_(CREATED),
michael@0 41 schedulerRNGState_(rngSeed)
michael@0 42 { }
michael@0 43
michael@0 44 bool
michael@0 45 ThreadPoolWorker::hasWork() const
michael@0 46 {
michael@0 47 uint16_t from, to;
michael@0 48 DecomposeSliceBounds(sliceBounds_, &from, &to);
michael@0 49 return from != to;
michael@0 50 }
michael@0 51
michael@0 52 bool
michael@0 53 ThreadPoolWorker::popSliceFront(uint16_t *sliceId)
michael@0 54 {
michael@0 55 uint32_t bounds;
michael@0 56 uint16_t from, to;
michael@0 57 do {
michael@0 58 bounds = sliceBounds_;
michael@0 59 DecomposeSliceBounds(bounds, &from, &to);
michael@0 60 if (from == to)
michael@0 61 return false;
michael@0 62 } while (!sliceBounds_.compareExchange(bounds, ComposeSliceBounds(from + 1, to)));
michael@0 63
michael@0 64 *sliceId = from;
michael@0 65 pool_->pendingSlices_--;
michael@0 66 return true;
michael@0 67 }
michael@0 68
michael@0 69 bool
michael@0 70 ThreadPoolWorker::popSliceBack(uint16_t *sliceId)
michael@0 71 {
michael@0 72 uint32_t bounds;
michael@0 73 uint16_t from, to;
michael@0 74 do {
michael@0 75 bounds = sliceBounds_;
michael@0 76 DecomposeSliceBounds(bounds, &from, &to);
michael@0 77 if (from == to)
michael@0 78 return false;
michael@0 79 } while (!sliceBounds_.compareExchange(bounds, ComposeSliceBounds(from, to - 1)));
michael@0 80
michael@0 81 *sliceId = to - 1;
michael@0 82 pool_->pendingSlices_--;
michael@0 83 return true;
michael@0 84 }
michael@0 85
michael@0 86 void
michael@0 87 ThreadPoolWorker::discardSlices()
michael@0 88 {
michael@0 89 uint32_t bounds;
michael@0 90 uint16_t from, to;
michael@0 91 do {
michael@0 92 bounds = sliceBounds_;
michael@0 93 DecomposeSliceBounds(bounds, &from, &to);
michael@0 94 } while (!sliceBounds_.compareExchange(bounds, 0));
michael@0 95
michael@0 96 pool_->pendingSlices_ -= to - from;
michael@0 97 }
michael@0 98
michael@0 99 bool
michael@0 100 ThreadPoolWorker::stealFrom(ThreadPoolWorker *victim, uint16_t *sliceId)
michael@0 101 {
michael@0 102 // Instead of popping the slice from the front by incrementing sliceStart_,
michael@0 103 // decrement sliceEnd_. Usually this gives us better locality.
michael@0 104 if (!victim->popSliceBack(sliceId))
michael@0 105 return false;
michael@0 106 #ifdef DEBUG
michael@0 107 pool_->stolenSlices_++;
michael@0 108 #endif
michael@0 109 return true;
michael@0 110 }
michael@0 111
michael@0 112 ThreadPoolWorker *
michael@0 113 ThreadPoolWorker::randomWorker()
michael@0 114 {
michael@0 115 // Perform 32-bit xorshift.
michael@0 116 uint32_t x = schedulerRNGState_;
michael@0 117 x ^= x << XORSHIFT_A;
michael@0 118 x ^= x >> XORSHIFT_B;
michael@0 119 x ^= x << XORSHIFT_C;
michael@0 120 schedulerRNGState_ = x;
michael@0 121 return pool_->workers_[x % pool_->numWorkers()];
michael@0 122 }
michael@0 123
michael@0 124 bool
michael@0 125 ThreadPoolWorker::start()
michael@0 126 {
michael@0 127 #ifndef JS_THREADSAFE
michael@0 128 return false;
michael@0 129 #else
michael@0 130 if (isMainThread())
michael@0 131 return true;
michael@0 132
michael@0 133 MOZ_ASSERT(state_ == CREATED);
michael@0 134
michael@0 135 // Set state to active now, *before* the thread starts:
michael@0 136 state_ = ACTIVE;
michael@0 137
michael@0 138 if (!PR_CreateThread(PR_USER_THREAD,
michael@0 139 HelperThreadMain, this,
michael@0 140 PR_PRIORITY_NORMAL, PR_GLOBAL_THREAD,
michael@0 141 PR_UNJOINABLE_THREAD,
michael@0 142 WORKER_THREAD_STACK_SIZE))
michael@0 143 {
michael@0 144 // If the thread failed to start, call it TERMINATED.
michael@0 145 state_ = TERMINATED;
michael@0 146 return false;
michael@0 147 }
michael@0 148
michael@0 149 return true;
michael@0 150 #endif
michael@0 151 }
michael@0 152
michael@0 153 void
michael@0 154 ThreadPoolWorker::HelperThreadMain(void *arg)
michael@0 155 {
michael@0 156 ThreadPoolWorker *worker = (ThreadPoolWorker*) arg;
michael@0 157 worker->helperLoop();
michael@0 158 }
michael@0 159
michael@0 160 void
michael@0 161 ThreadPoolWorker::helperLoop()
michael@0 162 {
michael@0 163 MOZ_ASSERT(!isMainThread());
michael@0 164
michael@0 165 // This is hokey in the extreme. To compute the stack limit,
michael@0 166 // subtract the size of the stack from the address of a local
michael@0 167 // variable and give a 100k buffer. Is there a better way?
michael@0 168 // (Note: 2k proved to be fine on Mac, but too little on Linux)
michael@0 169 uintptr_t stackLimitOffset = WORKER_THREAD_STACK_SIZE - 100*1024;
michael@0 170 uintptr_t stackLimit = (((uintptr_t)&stackLimitOffset) +
michael@0 171 stackLimitOffset * JS_STACK_GROWTH_DIRECTION);
michael@0 172
michael@0 173
michael@0 174 for (;;) {
michael@0 175 // Wait for work to arrive or for us to terminate.
michael@0 176 {
michael@0 177 AutoLockMonitor lock(*pool_);
michael@0 178 while (state_ == ACTIVE && !pool_->hasWork())
michael@0 179 lock.wait();
michael@0 180
michael@0 181 if (state_ == TERMINATED) {
michael@0 182 pool_->join(lock);
michael@0 183 return;
michael@0 184 }
michael@0 185
michael@0 186 pool_->activeWorkers_++;
michael@0 187 }
michael@0 188
michael@0 189 if (!pool_->job()->executeFromWorker(this, stackLimit))
michael@0 190 pool_->abortJob();
michael@0 191
michael@0 192 // Join the pool.
michael@0 193 {
michael@0 194 AutoLockMonitor lock(*pool_);
michael@0 195 pool_->join(lock);
michael@0 196 }
michael@0 197 }
michael@0 198 }
michael@0 199
michael@0 200 void
michael@0 201 ThreadPoolWorker::submitSlices(uint16_t sliceStart, uint16_t sliceEnd)
michael@0 202 {
michael@0 203 MOZ_ASSERT(!hasWork());
michael@0 204 sliceBounds_ = ComposeSliceBounds(sliceStart, sliceEnd);
michael@0 205 }
michael@0 206
michael@0 207 bool
michael@0 208 ThreadPoolWorker::getSlice(ForkJoinContext *cx, uint16_t *sliceId)
michael@0 209 {
michael@0 210 // First see whether we have any work ourself.
michael@0 211 if (popSliceFront(sliceId))
michael@0 212 return true;
michael@0 213
michael@0 214 // Try to steal work.
michael@0 215 if (!pool_->workStealing())
michael@0 216 return false;
michael@0 217
michael@0 218 do {
michael@0 219 if (!pool_->hasWork())
michael@0 220 return false;
michael@0 221 } while (!stealFrom(randomWorker(), sliceId));
michael@0 222
michael@0 223 return true;
michael@0 224 }
michael@0 225
michael@0 226 void
michael@0 227 ThreadPoolWorker::terminate(AutoLockMonitor &lock)
michael@0 228 {
michael@0 229 MOZ_ASSERT(lock.isFor(*pool_));
michael@0 230 MOZ_ASSERT(state_ != TERMINATED);
michael@0 231 state_ = TERMINATED;
michael@0 232 }
michael@0 233
michael@0 234 /////////////////////////////////////////////////////////////////////////////
michael@0 235 // ThreadPool
michael@0 236 //
michael@0 237 // The |ThreadPool| starts up workers, submits work to them, and shuts
michael@0 238 // them down when requested.
michael@0 239
michael@0 240 ThreadPool::ThreadPool(JSRuntime *rt)
michael@0 241 : activeWorkers_(0),
michael@0 242 joinBarrier_(nullptr),
michael@0 243 job_(nullptr),
michael@0 244 #ifdef DEBUG
michael@0 245 runtime_(rt),
michael@0 246 stolenSlices_(0),
michael@0 247 #endif
michael@0 248 pendingSlices_(0),
michael@0 249 isMainThreadActive_(false)
michael@0 250 { }
michael@0 251
michael@0 252 ThreadPool::~ThreadPool()
michael@0 253 {
michael@0 254 terminateWorkers();
michael@0 255 #ifdef JS_THREADSAFE
michael@0 256 if (joinBarrier_)
michael@0 257 PR_DestroyCondVar(joinBarrier_);
michael@0 258 #endif
michael@0 259 }
michael@0 260
michael@0 261 bool
michael@0 262 ThreadPool::init()
michael@0 263 {
michael@0 264 #ifdef JS_THREADSAFE
michael@0 265 if (!Monitor::init())
michael@0 266 return false;
michael@0 267 joinBarrier_ = PR_NewCondVar(lock_);
michael@0 268 return !!joinBarrier_;
michael@0 269 #else
michael@0 270 return true;
michael@0 271 #endif
michael@0 272 }
michael@0 273
michael@0 274 uint32_t
michael@0 275 ThreadPool::numWorkers() const
michael@0 276 {
michael@0 277 #ifdef JS_THREADSAFE
michael@0 278 return WorkerThreadState().cpuCount;
michael@0 279 #else
michael@0 280 return 1;
michael@0 281 #endif
michael@0 282 }
michael@0 283
michael@0 284 bool
michael@0 285 ThreadPool::workStealing() const
michael@0 286 {
michael@0 287 #ifdef DEBUG
michael@0 288 if (char *stealEnv = getenv("JS_THREADPOOL_STEAL"))
michael@0 289 return !!strtol(stealEnv, nullptr, 10);
michael@0 290 #endif
michael@0 291
michael@0 292 return true;
michael@0 293 }
michael@0 294
michael@0 295 extern uint64_t random_next(uint64_t *, int);
michael@0 296
michael@0 297 bool
michael@0 298 ThreadPool::lazyStartWorkers(JSContext *cx)
michael@0 299 {
michael@0 300 // Starts the workers if they have not already been started. If
michael@0 301 // something goes wrong, reports an error and ensures that all
michael@0 302 // partially started threads are terminated. Therefore, upon exit
michael@0 303 // from this function, the workers array is either full (upon
michael@0 304 // success) or empty (upon failure).
michael@0 305
michael@0 306 #ifdef JS_THREADSAFE
michael@0 307 if (!workers_.empty()) {
michael@0 308 MOZ_ASSERT(workers_.length() == numWorkers());
michael@0 309 return true;
michael@0 310 }
michael@0 311
michael@0 312 // Allocate workers array and then start the worker threads.
michael@0 313 // Note that numWorkers() is the number of *desired* workers,
michael@0 314 // but workers_.length() is the number of *successfully
michael@0 315 // initialized* workers.
michael@0 316 uint64_t rngState = 0;
michael@0 317 for (uint32_t workerId = 0; workerId < numWorkers(); workerId++) {
michael@0 318 uint32_t rngSeed = uint32_t(random_next(&rngState, 32));
michael@0 319 ThreadPoolWorker *worker = cx->new_<ThreadPoolWorker>(workerId, rngSeed, this);
michael@0 320 if (!worker || !workers_.append(worker)) {
michael@0 321 terminateWorkersAndReportOOM(cx);
michael@0 322 return false;
michael@0 323 }
michael@0 324 }
michael@0 325
michael@0 326 for (uint32_t workerId = 0; workerId < numWorkers(); workerId++) {
michael@0 327 if (!workers_[workerId]->start()) {
michael@0 328 // Note: do not delete worker here because it has been
michael@0 329 // added to the array and hence will be deleted by
michael@0 330 // |terminateWorkersAndReportOOM()|.
michael@0 331 terminateWorkersAndReportOOM(cx);
michael@0 332 return false;
michael@0 333 }
michael@0 334 }
michael@0 335 #endif
michael@0 336
michael@0 337 return true;
michael@0 338 }
michael@0 339
michael@0 340 void
michael@0 341 ThreadPool::terminateWorkersAndReportOOM(JSContext *cx)
michael@0 342 {
michael@0 343 terminateWorkers();
michael@0 344 MOZ_ASSERT(workers_.empty());
michael@0 345 js_ReportOutOfMemory(cx);
michael@0 346 }
michael@0 347
michael@0 348 void
michael@0 349 ThreadPool::terminateWorkers()
michael@0 350 {
michael@0 351 if (workers_.length() > 0) {
michael@0 352 AutoLockMonitor lock(*this);
michael@0 353
michael@0 354 // Signal to the workers they should quit.
michael@0 355 for (uint32_t i = 0; i < workers_.length(); i++)
michael@0 356 workers_[i]->terminate(lock);
michael@0 357
michael@0 358 // Wake up all the workers. Set the number of active workers to the
michael@0 359 // current number of workers so we can make sure they all join.
michael@0 360 activeWorkers_ = workers_.length() - 1;
michael@0 361 lock.notifyAll();
michael@0 362
michael@0 363 // Wait for all workers to join.
michael@0 364 waitForWorkers(lock);
michael@0 365
michael@0 366 while (workers_.length() > 0)
michael@0 367 js_delete(workers_.popCopy());
michael@0 368 }
michael@0 369 }
michael@0 370
michael@0 371 void
michael@0 372 ThreadPool::terminate()
michael@0 373 {
michael@0 374 terminateWorkers();
michael@0 375 }
michael@0 376
michael@0 377 void
michael@0 378 ThreadPool::join(AutoLockMonitor &lock)
michael@0 379 {
michael@0 380 MOZ_ASSERT(lock.isFor(*this));
michael@0 381 if (--activeWorkers_ == 0)
michael@0 382 lock.notify(joinBarrier_);
michael@0 383 }
michael@0 384
michael@0 385 void
michael@0 386 ThreadPool::waitForWorkers(AutoLockMonitor &lock)
michael@0 387 {
michael@0 388 MOZ_ASSERT(lock.isFor(*this));
michael@0 389 while (activeWorkers_ > 0)
michael@0 390 lock.wait(joinBarrier_);
michael@0 391 job_ = nullptr;
michael@0 392 }
michael@0 393
michael@0 394 ParallelResult
michael@0 395 ThreadPool::executeJob(JSContext *cx, ParallelJob *job, uint16_t sliceStart, uint16_t sliceMax)
michael@0 396 {
michael@0 397 MOZ_ASSERT(sliceStart < sliceMax);
michael@0 398 MOZ_ASSERT(CurrentThreadCanAccessRuntime(runtime_));
michael@0 399 MOZ_ASSERT(activeWorkers_ == 0);
michael@0 400 MOZ_ASSERT(!hasWork());
michael@0 401
michael@0 402 if (!lazyStartWorkers(cx))
michael@0 403 return TP_FATAL;
michael@0 404
michael@0 405 // Evenly distribute slices to the workers.
michael@0 406 uint16_t numSlices = sliceMax - sliceStart;
michael@0 407 uint16_t slicesPerWorker = numSlices / numWorkers();
michael@0 408 uint16_t leftover = numSlices % numWorkers();
michael@0 409 uint16_t sliceEnd = sliceStart;
michael@0 410 for (uint32_t workerId = 0; workerId < numWorkers(); workerId++) {
michael@0 411 if (leftover > 0) {
michael@0 412 sliceEnd += slicesPerWorker + 1;
michael@0 413 leftover--;
michael@0 414 } else {
michael@0 415 sliceEnd += slicesPerWorker;
michael@0 416 }
michael@0 417 workers_[workerId]->submitSlices(sliceStart, sliceEnd);
michael@0 418 sliceStart = sliceEnd;
michael@0 419 }
michael@0 420 MOZ_ASSERT(leftover == 0);
michael@0 421
michael@0 422 // Notify the worker threads that there's work now.
michael@0 423 {
michael@0 424 job_ = job;
michael@0 425 pendingSlices_ = numSlices;
michael@0 426 #ifdef DEBUG
michael@0 427 stolenSlices_ = 0;
michael@0 428 #endif
michael@0 429 AutoLockMonitor lock(*this);
michael@0 430 lock.notifyAll();
michael@0 431 }
michael@0 432
michael@0 433 // Do work on the main thread.
michael@0 434 isMainThreadActive_ = true;
michael@0 435 if (!job->executeFromMainThread(mainThreadWorker()))
michael@0 436 abortJob();
michael@0 437 isMainThreadActive_ = false;
michael@0 438
michael@0 439 // Wait for all threads to join. While there are no pending slices at this
michael@0 440 // point, the slices themselves may not be finished processing.
michael@0 441 {
michael@0 442 AutoLockMonitor lock(*this);
michael@0 443 waitForWorkers(lock);
michael@0 444 }
michael@0 445
michael@0 446 // Guard against errors in the self-hosted slice processing function. If
michael@0 447 // we still have work at this point, it is the user function's fault.
michael@0 448 MOZ_ASSERT(!hasWork(), "User function did not process all the slices!");
michael@0 449
michael@0 450 // Everything went swimmingly. Give yourself a pat on the back.
michael@0 451 return TP_SUCCESS;
michael@0 452 }
michael@0 453
michael@0 454 void
michael@0 455 ThreadPool::abortJob()
michael@0 456 {
michael@0 457 for (uint32_t workerId = 0; workerId < numWorkers(); workerId++)
michael@0 458 workers_[workerId]->discardSlices();
michael@0 459
michael@0 460 // Spin until pendingSlices_ reaches 0.
michael@0 461 //
michael@0 462 // The reason for this is that while calling discardSlices() clears all
michael@0 463 // workers' bounds, the pendingSlices_ cache might still be > 0 due to
michael@0 464 // still-executing calls to popSliceBack or popSliceFront in other
michael@0 465 // threads. When those finish, we will be sure that !hasWork(), which is
michael@0 466 // important to ensure that an aborted worker does not start again due to
michael@0 467 // the thread pool having more work.
michael@0 468 while (hasWork());
michael@0 469 }

mercurial