1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/tools/profiler/platform-macos.cc Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,422 @@ 1.4 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.5 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.6 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.7 + 1.8 +#include <dlfcn.h> 1.9 +#include <unistd.h> 1.10 +#include <sys/mman.h> 1.11 +#include <mach/mach_init.h> 1.12 +#include <mach-o/dyld.h> 1.13 +#include <mach-o/getsect.h> 1.14 + 1.15 +#include <AvailabilityMacros.h> 1.16 + 1.17 +#include <pthread.h> 1.18 +#include <semaphore.h> 1.19 +#include <signal.h> 1.20 +#include <libkern/OSAtomic.h> 1.21 +#include <mach/mach.h> 1.22 +#include <mach/semaphore.h> 1.23 +#include <mach/task.h> 1.24 +#include <mach/vm_statistics.h> 1.25 +#include <sys/time.h> 1.26 +#include <sys/resource.h> 1.27 +#include <sys/types.h> 1.28 +#include <sys/sysctl.h> 1.29 +#include <stdarg.h> 1.30 +#include <stdlib.h> 1.31 +#include <string.h> 1.32 +#include <errno.h> 1.33 +#include <math.h> 1.34 + 1.35 +#include "nsThreadUtils.h" 1.36 + 1.37 +#include "platform.h" 1.38 +#include "TableTicker.h" 1.39 +#include "UnwinderThread2.h" /* uwt__register_thread_for_profiling */ 1.40 + 1.41 +// this port is based off of v8 svn revision 9837 1.42 + 1.43 +// XXX: this is a very stubbed out implementation 1.44 +// that only supports a single Sampler 1.45 +struct SamplerRegistry { 1.46 + static void AddActiveSampler(Sampler *sampler) { 1.47 + ASSERT(!SamplerRegistry::sampler); 1.48 + SamplerRegistry::sampler = sampler; 1.49 + } 1.50 + static void RemoveActiveSampler(Sampler *sampler) { 1.51 + SamplerRegistry::sampler = NULL; 1.52 + } 1.53 + static Sampler *sampler; 1.54 +}; 1.55 + 1.56 +Sampler *SamplerRegistry::sampler = NULL; 1.57 + 1.58 +// 0 is never a valid thread id on MacOSX since a ptread_t is 1.59 +// a pointer. 1.60 +static const pthread_t kNoThread = (pthread_t) 0; 1.61 + 1.62 +void OS::Startup() { 1.63 +} 1.64 + 1.65 +void OS::Sleep(int milliseconds) { 1.66 + usleep(1000 * milliseconds); 1.67 +} 1.68 + 1.69 +void OS::SleepMicro(int microseconds) { 1.70 + usleep(microseconds); 1.71 +} 1.72 + 1.73 +Thread::Thread(const char* name) 1.74 + : stack_size_(0) { 1.75 + set_name(name); 1.76 +} 1.77 + 1.78 + 1.79 +Thread::~Thread() { 1.80 +} 1.81 + 1.82 + 1.83 +static void SetThreadName(const char* name) { 1.84 + // pthread_setname_np is only available in 10.6 or later, so test 1.85 + // for it at runtime. 1.86 + int (*dynamic_pthread_setname_np)(const char*); 1.87 + *reinterpret_cast<void**>(&dynamic_pthread_setname_np) = 1.88 + dlsym(RTLD_DEFAULT, "pthread_setname_np"); 1.89 + if (!dynamic_pthread_setname_np) 1.90 + return; 1.91 + 1.92 + // Mac OS X does not expose the length limit of the name, so hardcode it. 1.93 + static const int kMaxNameLength = 63; 1.94 + USE(kMaxNameLength); 1.95 + ASSERT(Thread::kMaxThreadNameLength <= kMaxNameLength); 1.96 + dynamic_pthread_setname_np(name); 1.97 +} 1.98 + 1.99 + 1.100 +static void* ThreadEntry(void* arg) { 1.101 + Thread* thread = reinterpret_cast<Thread*>(arg); 1.102 + 1.103 + thread->thread_ = pthread_self(); 1.104 + SetThreadName(thread->name()); 1.105 + ASSERT(thread->thread_ != kNoThread); 1.106 + thread->Run(); 1.107 + return NULL; 1.108 +} 1.109 + 1.110 + 1.111 +void Thread::set_name(const char* name) { 1.112 + strncpy(name_, name, sizeof(name_)); 1.113 + name_[sizeof(name_) - 1] = '\0'; 1.114 +} 1.115 + 1.116 + 1.117 +void Thread::Start() { 1.118 + pthread_attr_t* attr_ptr = NULL; 1.119 + pthread_attr_t attr; 1.120 + if (stack_size_ > 0) { 1.121 + pthread_attr_init(&attr); 1.122 + pthread_attr_setstacksize(&attr, static_cast<size_t>(stack_size_)); 1.123 + attr_ptr = &attr; 1.124 + } 1.125 + pthread_create(&thread_, attr_ptr, ThreadEntry, this); 1.126 + ASSERT(thread_ != kNoThread); 1.127 +} 1.128 + 1.129 +void Thread::Join() { 1.130 + pthread_join(thread_, NULL); 1.131 +} 1.132 + 1.133 +class PlatformData : public Malloced { 1.134 + public: 1.135 + PlatformData() : profiled_thread_(mach_thread_self()) 1.136 + { 1.137 + profiled_pthread_ = pthread_from_mach_thread_np(profiled_thread_); 1.138 + } 1.139 + 1.140 + ~PlatformData() { 1.141 + // Deallocate Mach port for thread. 1.142 + mach_port_deallocate(mach_task_self(), profiled_thread_); 1.143 + } 1.144 + 1.145 + thread_act_t profiled_thread() { return profiled_thread_; } 1.146 + pthread_t profiled_pthread() { return profiled_pthread_; } 1.147 + 1.148 + private: 1.149 + // Note: for profiled_thread_ Mach primitives are used instead of PThread's 1.150 + // because the latter doesn't provide thread manipulation primitives required. 1.151 + // For details, consult "Mac OS X Internals" book, Section 7.3. 1.152 + thread_act_t profiled_thread_; 1.153 + // we also store the pthread because Mach threads have no concept of stack 1.154 + // and we want to be able to get the stack size when we need to unwind the 1.155 + // stack using frame pointers. 1.156 + pthread_t profiled_pthread_; 1.157 +}; 1.158 + 1.159 +/* static */ PlatformData* 1.160 +Sampler::AllocPlatformData(int aThreadId) 1.161 +{ 1.162 + return new PlatformData; 1.163 +} 1.164 + 1.165 +/* static */ void 1.166 +Sampler::FreePlatformData(PlatformData* aData) 1.167 +{ 1.168 + delete aData; 1.169 +} 1.170 + 1.171 +class SamplerThread : public Thread { 1.172 + public: 1.173 + explicit SamplerThread(double interval) 1.174 + : Thread("SamplerThread") 1.175 + , intervalMicro_(floor(interval * 1000 + 0.5)) 1.176 + { 1.177 + if (intervalMicro_ <= 0) { 1.178 + intervalMicro_ = 1; 1.179 + } 1.180 + } 1.181 + 1.182 + static void AddActiveSampler(Sampler* sampler) { 1.183 + mozilla::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex); 1.184 + SamplerRegistry::AddActiveSampler(sampler); 1.185 + if (instance_ == NULL) { 1.186 + instance_ = new SamplerThread(sampler->interval()); 1.187 + instance_->Start(); 1.188 + } 1.189 + } 1.190 + 1.191 + static void RemoveActiveSampler(Sampler* sampler) { 1.192 + mozilla::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex); 1.193 + instance_->Join(); 1.194 + //XXX: unlike v8 we need to remove the active sampler after doing the Join 1.195 + // because we drop the sampler immediately 1.196 + SamplerRegistry::RemoveActiveSampler(sampler); 1.197 + delete instance_; 1.198 + instance_ = NULL; 1.199 + } 1.200 + 1.201 + // Implement Thread::Run(). 1.202 + virtual void Run() { 1.203 + while (SamplerRegistry::sampler->IsActive()) { 1.204 + if (!SamplerRegistry::sampler->IsPaused()) { 1.205 + mozilla::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex); 1.206 + std::vector<ThreadInfo*> threads = 1.207 + SamplerRegistry::sampler->GetRegisteredThreads(); 1.208 + for (uint32_t i = 0; i < threads.size(); i++) { 1.209 + ThreadInfo* info = threads[i]; 1.210 + 1.211 + // This will be null if we're not interested in profiling this thread. 1.212 + if (!info->Profile()) 1.213 + continue; 1.214 + 1.215 + PseudoStack::SleepState sleeping = info->Stack()->observeSleeping(); 1.216 + if (sleeping == PseudoStack::SLEEPING_AGAIN) { 1.217 + info->Profile()->DuplicateLastSample(); 1.218 + //XXX: This causes flushes regardless of jank-only mode 1.219 + info->Profile()->flush(); 1.220 + continue; 1.221 + } 1.222 + 1.223 + ThreadProfile* thread_profile = info->Profile(); 1.224 + 1.225 + SampleContext(SamplerRegistry::sampler, thread_profile); 1.226 + } 1.227 + } 1.228 + OS::SleepMicro(intervalMicro_); 1.229 + } 1.230 + } 1.231 + 1.232 + void SampleContext(Sampler* sampler, ThreadProfile* thread_profile) { 1.233 + thread_act_t profiled_thread = 1.234 + thread_profile->GetPlatformData()->profiled_thread(); 1.235 + 1.236 + TickSample sample_obj; 1.237 + TickSample* sample = &sample_obj; 1.238 + 1.239 + if (KERN_SUCCESS != thread_suspend(profiled_thread)) return; 1.240 + 1.241 +#if V8_HOST_ARCH_X64 1.242 + thread_state_flavor_t flavor = x86_THREAD_STATE64; 1.243 + x86_thread_state64_t state; 1.244 + mach_msg_type_number_t count = x86_THREAD_STATE64_COUNT; 1.245 +#if __DARWIN_UNIX03 1.246 +#define REGISTER_FIELD(name) __r ## name 1.247 +#else 1.248 +#define REGISTER_FIELD(name) r ## name 1.249 +#endif // __DARWIN_UNIX03 1.250 +#elif V8_HOST_ARCH_IA32 1.251 + thread_state_flavor_t flavor = i386_THREAD_STATE; 1.252 + i386_thread_state_t state; 1.253 + mach_msg_type_number_t count = i386_THREAD_STATE_COUNT; 1.254 +#if __DARWIN_UNIX03 1.255 +#define REGISTER_FIELD(name) __e ## name 1.256 +#else 1.257 +#define REGISTER_FIELD(name) e ## name 1.258 +#endif // __DARWIN_UNIX03 1.259 +#else 1.260 +#error Unsupported Mac OS X host architecture. 1.261 +#endif // V8_HOST_ARCH 1.262 + 1.263 + if (thread_get_state(profiled_thread, 1.264 + flavor, 1.265 + reinterpret_cast<natural_t*>(&state), 1.266 + &count) == KERN_SUCCESS) { 1.267 + sample->pc = reinterpret_cast<Address>(state.REGISTER_FIELD(ip)); 1.268 + sample->sp = reinterpret_cast<Address>(state.REGISTER_FIELD(sp)); 1.269 + sample->fp = reinterpret_cast<Address>(state.REGISTER_FIELD(bp)); 1.270 + sample->timestamp = mozilla::TimeStamp::Now(); 1.271 + sample->threadProfile = thread_profile; 1.272 + sampler->Tick(sample); 1.273 + } 1.274 + thread_resume(profiled_thread); 1.275 + } 1.276 + 1.277 + int intervalMicro_; 1.278 + //RuntimeProfilerRateLimiter rate_limiter_; 1.279 + 1.280 + static SamplerThread* instance_; 1.281 + 1.282 + DISALLOW_COPY_AND_ASSIGN(SamplerThread); 1.283 +}; 1.284 + 1.285 +#undef REGISTER_FIELD 1.286 + 1.287 +SamplerThread* SamplerThread::instance_ = NULL; 1.288 + 1.289 +Sampler::Sampler(double interval, bool profiling, int entrySize) 1.290 + : // isolate_(isolate), 1.291 + interval_(interval), 1.292 + profiling_(profiling), 1.293 + paused_(false), 1.294 + active_(false), 1.295 + entrySize_(entrySize) /*, 1.296 + samples_taken_(0)*/ { 1.297 +} 1.298 + 1.299 + 1.300 +Sampler::~Sampler() { 1.301 + ASSERT(!IsActive()); 1.302 +} 1.303 + 1.304 + 1.305 +void Sampler::Start() { 1.306 + ASSERT(!IsActive()); 1.307 + SetActive(true); 1.308 + SamplerThread::AddActiveSampler(this); 1.309 +} 1.310 + 1.311 + 1.312 +void Sampler::Stop() { 1.313 + ASSERT(IsActive()); 1.314 + SetActive(false); 1.315 + SamplerThread::RemoveActiveSampler(this); 1.316 +} 1.317 + 1.318 +pthread_t 1.319 +Sampler::GetProfiledThread(PlatformData* aData) 1.320 +{ 1.321 + return aData->profiled_pthread(); 1.322 +} 1.323 + 1.324 +#include <sys/syscall.h> 1.325 +pid_t gettid() 1.326 +{ 1.327 + return (pid_t) syscall(SYS_thread_selfid); 1.328 +} 1.329 + 1.330 +/* static */ Thread::tid_t 1.331 +Thread::GetCurrentId() 1.332 +{ 1.333 + return gettid(); 1.334 +} 1.335 + 1.336 +bool Sampler::RegisterCurrentThread(const char* aName, 1.337 + PseudoStack* aPseudoStack, 1.338 + bool aIsMainThread, void* stackTop) 1.339 +{ 1.340 + if (!Sampler::sRegisteredThreadsMutex) 1.341 + return false; 1.342 + 1.343 + 1.344 + mozilla::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex); 1.345 + 1.346 + int id = gettid(); 1.347 + for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) { 1.348 + ThreadInfo* info = sRegisteredThreads->at(i); 1.349 + if (info->ThreadId() == id) { 1.350 + // Thread already registered. This means the first unregister will be 1.351 + // too early. 1.352 + ASSERT(false); 1.353 + return false; 1.354 + } 1.355 + } 1.356 + 1.357 + set_tls_stack_top(stackTop); 1.358 + 1.359 + ThreadInfo* info = new ThreadInfo(aName, id, 1.360 + aIsMainThread, aPseudoStack, stackTop); 1.361 + 1.362 + if (sActiveSampler) { 1.363 + sActiveSampler->RegisterThread(info); 1.364 + } 1.365 + 1.366 + sRegisteredThreads->push_back(info); 1.367 + 1.368 + uwt__register_thread_for_profiling(stackTop); 1.369 + return true; 1.370 +} 1.371 + 1.372 +void Sampler::UnregisterCurrentThread() 1.373 +{ 1.374 + if (!Sampler::sRegisteredThreadsMutex) 1.375 + return; 1.376 + 1.377 + tlsStackTop.set(nullptr); 1.378 + 1.379 + mozilla::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex); 1.380 + 1.381 + int id = gettid(); 1.382 + 1.383 + for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) { 1.384 + ThreadInfo* info = sRegisteredThreads->at(i); 1.385 + if (info->ThreadId() == id) { 1.386 + delete info; 1.387 + sRegisteredThreads->erase(sRegisteredThreads->begin() + i); 1.388 + break; 1.389 + } 1.390 + } 1.391 +} 1.392 + 1.393 +void TickSample::PopulateContext(void* aContext) 1.394 +{ 1.395 + // Note that this asm changes if PopulateContext's parameter list is altered 1.396 +#if defined(SPS_PLAT_amd64_darwin) 1.397 + asm ( 1.398 + // Compute caller's %rsp by adding to %rbp: 1.399 + // 8 bytes for previous %rbp, 8 bytes for return address 1.400 + "leaq 0x10(%%rbp), %0\n\t" 1.401 + // Dereference %rbp to get previous %rbp 1.402 + "movq (%%rbp), %1\n\t" 1.403 + : 1.404 + "=r"(sp), 1.405 + "=r"(fp) 1.406 + ); 1.407 +#elif defined(SPS_PLAT_x86_darwin) 1.408 + asm ( 1.409 + // Compute caller's %esp by adding to %ebp: 1.410 + // 4 bytes for aContext + 4 bytes for return address + 1.411 + // 4 bytes for previous %ebp 1.412 + "leal 0xc(%%ebp), %0\n\t" 1.413 + // Dereference %ebp to get previous %ebp 1.414 + "movl (%%ebp), %1\n\t" 1.415 + : 1.416 + "=r"(sp), 1.417 + "=r"(fp) 1.418 + ); 1.419 +#else 1.420 +# error "Unsupported architecture" 1.421 +#endif 1.422 + pc = reinterpret_cast<Address>(__builtin_extract_return_addr( 1.423 + __builtin_return_address(0))); 1.424 +} 1.425 +