1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/js/src/perf/pm_linux.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,310 @@ 1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +/* This variant of nsIPerfMeasurement uses the perf_event interface 1.10 + * added in Linux 2.6.31. We key compilation of this file off the 1.11 + * existence of <linux/perf_event.h>. 1.12 + */ 1.13 + 1.14 +#include <errno.h> 1.15 +#include <linux/perf_event.h> 1.16 +#include <string.h> 1.17 +#include <sys/ioctl.h> 1.18 +#include <sys/syscall.h> 1.19 +#include <unistd.h> 1.20 + 1.21 +#include "perf/jsperf.h" 1.22 + 1.23 +using namespace js; 1.24 + 1.25 +// As of July 2010, this system call has not been added to the 1.26 +// C library, so we have to provide our own wrapper function. 1.27 +// If this code runs on a kernel that does not implement the 1.28 +// system call (2.6.30 or older) nothing unpredictable will 1.29 +// happen - it will just always fail and return -1. 1.30 +static int 1.31 +sys_perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, 1.32 + int group_fd, unsigned long flags) 1.33 +{ 1.34 + return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); 1.35 +} 1.36 + 1.37 +namespace { 1.38 + 1.39 +using JS::PerfMeasurement; 1.40 +typedef PerfMeasurement::EventMask EventMask; 1.41 + 1.42 +// Additional state required by this implementation. 1.43 +struct Impl 1.44 +{ 1.45 + // Each active counter corresponds to an open file descriptor. 1.46 + int f_cpu_cycles; 1.47 + int f_instructions; 1.48 + int f_cache_references; 1.49 + int f_cache_misses; 1.50 + int f_branch_instructions; 1.51 + int f_branch_misses; 1.52 + int f_bus_cycles; 1.53 + int f_page_faults; 1.54 + int f_major_page_faults; 1.55 + int f_context_switches; 1.56 + int f_cpu_migrations; 1.57 + 1.58 + // Counter group leader, for Start and Stop. 1.59 + int group_leader; 1.60 + 1.61 + // Whether counters are running. 1.62 + bool running; 1.63 + 1.64 + Impl(); 1.65 + ~Impl(); 1.66 + 1.67 + EventMask init(EventMask toMeasure); 1.68 + void start(); 1.69 + void stop(PerfMeasurement* counters); 1.70 +}; 1.71 + 1.72 +// Mapping from our event bitmask to codes passed into the kernel, and 1.73 +// to fields in the PerfMeasurement and PerfMeasurement::impl structures. 1.74 +static const struct 1.75 +{ 1.76 + EventMask bit; 1.77 + uint32_t type; 1.78 + uint32_t config; 1.79 + uint64_t PerfMeasurement::* counter; 1.80 + int Impl::* fd; 1.81 +} kSlots[PerfMeasurement::NUM_MEASURABLE_EVENTS] = { 1.82 +#define HW(mask, constant, fieldname) \ 1.83 + { PerfMeasurement::mask, PERF_TYPE_HARDWARE, PERF_COUNT_HW_##constant, \ 1.84 + &PerfMeasurement::fieldname, &Impl::f_##fieldname } 1.85 +#define SW(mask, constant, fieldname) \ 1.86 + { PerfMeasurement::mask, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_##constant, \ 1.87 + &PerfMeasurement::fieldname, &Impl::f_##fieldname } 1.88 + 1.89 + HW(CPU_CYCLES, CPU_CYCLES, cpu_cycles), 1.90 + HW(INSTRUCTIONS, INSTRUCTIONS, instructions), 1.91 + HW(CACHE_REFERENCES, CACHE_REFERENCES, cache_references), 1.92 + HW(CACHE_MISSES, CACHE_MISSES, cache_misses), 1.93 + HW(BRANCH_INSTRUCTIONS, BRANCH_INSTRUCTIONS, branch_instructions), 1.94 + HW(BRANCH_MISSES, BRANCH_MISSES, branch_misses), 1.95 + HW(BUS_CYCLES, BUS_CYCLES, bus_cycles), 1.96 + SW(PAGE_FAULTS, PAGE_FAULTS, page_faults), 1.97 + SW(MAJOR_PAGE_FAULTS, PAGE_FAULTS_MAJ, major_page_faults), 1.98 + SW(CONTEXT_SWITCHES, CONTEXT_SWITCHES, context_switches), 1.99 + SW(CPU_MIGRATIONS, CPU_MIGRATIONS, cpu_migrations), 1.100 + 1.101 +#undef HW 1.102 +#undef SW 1.103 +}; 1.104 + 1.105 +Impl::Impl() 1.106 + : f_cpu_cycles(-1), 1.107 + f_instructions(-1), 1.108 + f_cache_references(-1), 1.109 + f_cache_misses(-1), 1.110 + f_branch_instructions(-1), 1.111 + f_branch_misses(-1), 1.112 + f_bus_cycles(-1), 1.113 + f_page_faults(-1), 1.114 + f_major_page_faults(-1), 1.115 + f_context_switches(-1), 1.116 + f_cpu_migrations(-1), 1.117 + group_leader(-1), 1.118 + running(false) 1.119 +{ 1.120 +} 1.121 + 1.122 +Impl::~Impl() 1.123 +{ 1.124 + // Close all active counter descriptors. Take care to do the group 1.125 + // leader last (this may not be necessary, but it's unclear what 1.126 + // happens if you close the group leader out from under a group). 1.127 + for (int i = 0; i < PerfMeasurement::NUM_MEASURABLE_EVENTS; i++) { 1.128 + int fd = this->*(kSlots[i].fd); 1.129 + if (fd != -1 && fd != group_leader) 1.130 + close(fd); 1.131 + } 1.132 + 1.133 + if (group_leader != -1) 1.134 + close(group_leader); 1.135 +} 1.136 + 1.137 +EventMask 1.138 +Impl::init(EventMask toMeasure) 1.139 +{ 1.140 + JS_ASSERT(group_leader == -1); 1.141 + if (!toMeasure) 1.142 + return EventMask(0); 1.143 + 1.144 + EventMask measured = EventMask(0); 1.145 + struct perf_event_attr attr; 1.146 + for (int i = 0; i < PerfMeasurement::NUM_MEASURABLE_EVENTS; i++) { 1.147 + if (!(toMeasure & kSlots[i].bit)) 1.148 + continue; 1.149 + 1.150 + memset(&attr, 0, sizeof(attr)); 1.151 + attr.size = sizeof(attr); 1.152 + 1.153 + // Set the type and config fields to indicate the counter we 1.154 + // want to enable. We want read format 0, and we're not using 1.155 + // sampling, so leave those fields unset. 1.156 + attr.type = kSlots[i].type; 1.157 + attr.config = kSlots[i].config; 1.158 + 1.159 + // If this will be the group leader it should start off 1.160 + // disabled. Otherwise it should start off enabled (but blocked 1.161 + // on the group leader). 1.162 + if (group_leader == -1) 1.163 + attr.disabled = 1; 1.164 + 1.165 + // The rest of the bit fields are really poorly documented. 1.166 + // For instance, I have *no idea* whether we should be setting 1.167 + // the inherit, inherit_stat, or task flags. I'm pretty sure 1.168 + // we do want to set mmap and comm, and not any of the ones I 1.169 + // haven't mentioned. 1.170 + attr.mmap = 1; 1.171 + attr.comm = 1; 1.172 + 1.173 + int fd = sys_perf_event_open(&attr, 1.174 + 0 /* trace self */, 1.175 + -1 /* on any cpu */, 1.176 + group_leader, 1.177 + 0 /* no flags presently defined */); 1.178 + if (fd == -1) 1.179 + continue; 1.180 + 1.181 + measured = EventMask(measured | kSlots[i].bit); 1.182 + this->*(kSlots[i].fd) = fd; 1.183 + if (group_leader == -1) 1.184 + group_leader = fd; 1.185 + } 1.186 + return measured; 1.187 +} 1.188 + 1.189 +void 1.190 +Impl::start() 1.191 +{ 1.192 + if (running || group_leader == -1) 1.193 + return; 1.194 + 1.195 + running = true; 1.196 + ioctl(group_leader, PERF_EVENT_IOC_ENABLE, 0); 1.197 +} 1.198 + 1.199 +void 1.200 +Impl::stop(PerfMeasurement* counters) 1.201 +{ 1.202 + // This scratch buffer is to ensure that we have read all the 1.203 + // available data, even if that's more than we expect. 1.204 + unsigned char buf[1024]; 1.205 + 1.206 + if (!running || group_leader == -1) 1.207 + return; 1.208 + 1.209 + ioctl(group_leader, PERF_EVENT_IOC_DISABLE, 0); 1.210 + running = false; 1.211 + 1.212 + // read out and reset all the counter values 1.213 + for (int i = 0; i < PerfMeasurement::NUM_MEASURABLE_EVENTS; i++) { 1.214 + int fd = this->*(kSlots[i].fd); 1.215 + if (fd == -1) 1.216 + continue; 1.217 + 1.218 + if (read(fd, buf, sizeof(buf)) == sizeof(uint64_t)) { 1.219 + uint64_t cur; 1.220 + memcpy(&cur, buf, sizeof(uint64_t)); 1.221 + counters->*(kSlots[i].counter) += cur; 1.222 + } 1.223 + 1.224 + // Reset the counter regardless of whether the read did what 1.225 + // we expected. 1.226 + ioctl(fd, PERF_EVENT_IOC_RESET, 0); 1.227 + } 1.228 +} 1.229 + 1.230 +} // anonymous namespace 1.231 + 1.232 + 1.233 +namespace JS { 1.234 + 1.235 +#define initCtr(flag) ((eventsMeasured & flag) ? 0 : -1) 1.236 + 1.237 +PerfMeasurement::PerfMeasurement(PerfMeasurement::EventMask toMeasure) 1.238 + : impl(js_new<Impl>()), 1.239 + eventsMeasured(impl ? static_cast<Impl*>(impl)->init(toMeasure) 1.240 + : EventMask(0)), 1.241 + cpu_cycles(initCtr(CPU_CYCLES)), 1.242 + instructions(initCtr(INSTRUCTIONS)), 1.243 + cache_references(initCtr(CACHE_REFERENCES)), 1.244 + cache_misses(initCtr(CACHE_MISSES)), 1.245 + branch_instructions(initCtr(BRANCH_INSTRUCTIONS)), 1.246 + branch_misses(initCtr(BRANCH_MISSES)), 1.247 + bus_cycles(initCtr(BUS_CYCLES)), 1.248 + page_faults(initCtr(PAGE_FAULTS)), 1.249 + major_page_faults(initCtr(MAJOR_PAGE_FAULTS)), 1.250 + context_switches(initCtr(CONTEXT_SWITCHES)), 1.251 + cpu_migrations(initCtr(CPU_MIGRATIONS)) 1.252 +{ 1.253 +} 1.254 + 1.255 +#undef initCtr 1.256 + 1.257 +PerfMeasurement::~PerfMeasurement() 1.258 +{ 1.259 + js_delete(static_cast<Impl*>(impl)); 1.260 +} 1.261 + 1.262 +void 1.263 +PerfMeasurement::start() 1.264 +{ 1.265 + if (impl) 1.266 + static_cast<Impl*>(impl)->start(); 1.267 +} 1.268 + 1.269 +void 1.270 +PerfMeasurement::stop() 1.271 +{ 1.272 + if (impl) 1.273 + static_cast<Impl*>(impl)->stop(this); 1.274 +} 1.275 + 1.276 +void 1.277 +PerfMeasurement::reset() 1.278 +{ 1.279 + for (int i = 0; i < NUM_MEASURABLE_EVENTS; i++) { 1.280 + if (eventsMeasured & kSlots[i].bit) 1.281 + this->*(kSlots[i].counter) = 0; 1.282 + else 1.283 + this->*(kSlots[i].counter) = -1; 1.284 + } 1.285 +} 1.286 + 1.287 +bool 1.288 +PerfMeasurement::canMeasureSomething() 1.289 +{ 1.290 + // Find out if the kernel implements the performance measurement 1.291 + // API. If it doesn't, syscall(__NR_perf_event_open, ...) is 1.292 + // guaranteed to return -1 and set errno to ENOSYS. 1.293 + // 1.294 + // We set up input parameters that should provoke an EINVAL error 1.295 + // from a kernel that does implement perf_event_open, but we can't 1.296 + // be sure it will (newer kernels might add more event types), so 1.297 + // we have to take care to close any valid fd it might return. 1.298 + 1.299 + struct perf_event_attr attr; 1.300 + memset(&attr, 0, sizeof(attr)); 1.301 + attr.size = sizeof(attr); 1.302 + attr.type = PERF_TYPE_MAX; 1.303 + 1.304 + int fd = sys_perf_event_open(&attr, 0, -1, -1, 0); 1.305 + if (fd >= 0) { 1.306 + close(fd); 1.307 + return true; 1.308 + } else { 1.309 + return errno != ENOSYS; 1.310 + } 1.311 +} 1.312 + 1.313 +} // namespace JS