michael@0: /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: /* This variant of nsIPerfMeasurement uses the perf_event interface michael@0: * added in Linux 2.6.31. We key compilation of this file off the michael@0: * existence of . michael@0: */ michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: #include "perf/jsperf.h" michael@0: michael@0: using namespace js; michael@0: michael@0: // As of July 2010, this system call has not been added to the michael@0: // C library, so we have to provide our own wrapper function. michael@0: // If this code runs on a kernel that does not implement the michael@0: // system call (2.6.30 or older) nothing unpredictable will michael@0: // happen - it will just always fail and return -1. michael@0: static int michael@0: sys_perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, michael@0: int group_fd, unsigned long flags) michael@0: { michael@0: return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); michael@0: } michael@0: michael@0: namespace { michael@0: michael@0: using JS::PerfMeasurement; michael@0: typedef PerfMeasurement::EventMask EventMask; michael@0: michael@0: // Additional state required by this implementation. michael@0: struct Impl michael@0: { michael@0: // Each active counter corresponds to an open file descriptor. michael@0: int f_cpu_cycles; michael@0: int f_instructions; michael@0: int f_cache_references; michael@0: int f_cache_misses; michael@0: int f_branch_instructions; michael@0: int f_branch_misses; michael@0: int f_bus_cycles; michael@0: int f_page_faults; michael@0: int f_major_page_faults; michael@0: int f_context_switches; michael@0: int f_cpu_migrations; michael@0: michael@0: // Counter group leader, for Start and Stop. michael@0: int group_leader; michael@0: michael@0: // Whether counters are running. michael@0: bool running; michael@0: michael@0: Impl(); michael@0: ~Impl(); michael@0: michael@0: EventMask init(EventMask toMeasure); michael@0: void start(); michael@0: void stop(PerfMeasurement* counters); michael@0: }; michael@0: michael@0: // Mapping from our event bitmask to codes passed into the kernel, and michael@0: // to fields in the PerfMeasurement and PerfMeasurement::impl structures. michael@0: static const struct michael@0: { michael@0: EventMask bit; michael@0: uint32_t type; michael@0: uint32_t config; michael@0: uint64_t PerfMeasurement::* counter; michael@0: int Impl::* fd; michael@0: } kSlots[PerfMeasurement::NUM_MEASURABLE_EVENTS] = { michael@0: #define HW(mask, constant, fieldname) \ michael@0: { PerfMeasurement::mask, PERF_TYPE_HARDWARE, PERF_COUNT_HW_##constant, \ michael@0: &PerfMeasurement::fieldname, &Impl::f_##fieldname } michael@0: #define SW(mask, constant, fieldname) \ michael@0: { PerfMeasurement::mask, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_##constant, \ michael@0: &PerfMeasurement::fieldname, &Impl::f_##fieldname } michael@0: michael@0: HW(CPU_CYCLES, CPU_CYCLES, cpu_cycles), michael@0: HW(INSTRUCTIONS, INSTRUCTIONS, instructions), michael@0: HW(CACHE_REFERENCES, CACHE_REFERENCES, cache_references), michael@0: HW(CACHE_MISSES, CACHE_MISSES, cache_misses), michael@0: HW(BRANCH_INSTRUCTIONS, BRANCH_INSTRUCTIONS, branch_instructions), michael@0: HW(BRANCH_MISSES, BRANCH_MISSES, branch_misses), michael@0: HW(BUS_CYCLES, BUS_CYCLES, bus_cycles), michael@0: SW(PAGE_FAULTS, PAGE_FAULTS, page_faults), michael@0: SW(MAJOR_PAGE_FAULTS, PAGE_FAULTS_MAJ, major_page_faults), michael@0: SW(CONTEXT_SWITCHES, CONTEXT_SWITCHES, context_switches), michael@0: SW(CPU_MIGRATIONS, CPU_MIGRATIONS, cpu_migrations), michael@0: michael@0: #undef HW michael@0: #undef SW michael@0: }; michael@0: michael@0: Impl::Impl() michael@0: : f_cpu_cycles(-1), michael@0: f_instructions(-1), michael@0: f_cache_references(-1), michael@0: f_cache_misses(-1), michael@0: f_branch_instructions(-1), michael@0: f_branch_misses(-1), michael@0: f_bus_cycles(-1), michael@0: f_page_faults(-1), michael@0: f_major_page_faults(-1), michael@0: f_context_switches(-1), michael@0: f_cpu_migrations(-1), michael@0: group_leader(-1), michael@0: running(false) michael@0: { michael@0: } michael@0: michael@0: Impl::~Impl() michael@0: { michael@0: // Close all active counter descriptors. Take care to do the group michael@0: // leader last (this may not be necessary, but it's unclear what michael@0: // happens if you close the group leader out from under a group). michael@0: for (int i = 0; i < PerfMeasurement::NUM_MEASURABLE_EVENTS; i++) { michael@0: int fd = this->*(kSlots[i].fd); michael@0: if (fd != -1 && fd != group_leader) michael@0: close(fd); michael@0: } michael@0: michael@0: if (group_leader != -1) michael@0: close(group_leader); michael@0: } michael@0: michael@0: EventMask michael@0: Impl::init(EventMask toMeasure) michael@0: { michael@0: JS_ASSERT(group_leader == -1); michael@0: if (!toMeasure) michael@0: return EventMask(0); michael@0: michael@0: EventMask measured = EventMask(0); michael@0: struct perf_event_attr attr; michael@0: for (int i = 0; i < PerfMeasurement::NUM_MEASURABLE_EVENTS; i++) { michael@0: if (!(toMeasure & kSlots[i].bit)) michael@0: continue; michael@0: michael@0: memset(&attr, 0, sizeof(attr)); michael@0: attr.size = sizeof(attr); michael@0: michael@0: // Set the type and config fields to indicate the counter we michael@0: // want to enable. We want read format 0, and we're not using michael@0: // sampling, so leave those fields unset. michael@0: attr.type = kSlots[i].type; michael@0: attr.config = kSlots[i].config; michael@0: michael@0: // If this will be the group leader it should start off michael@0: // disabled. Otherwise it should start off enabled (but blocked michael@0: // on the group leader). michael@0: if (group_leader == -1) michael@0: attr.disabled = 1; michael@0: michael@0: // The rest of the bit fields are really poorly documented. michael@0: // For instance, I have *no idea* whether we should be setting michael@0: // the inherit, inherit_stat, or task flags. I'm pretty sure michael@0: // we do want to set mmap and comm, and not any of the ones I michael@0: // haven't mentioned. michael@0: attr.mmap = 1; michael@0: attr.comm = 1; michael@0: michael@0: int fd = sys_perf_event_open(&attr, michael@0: 0 /* trace self */, michael@0: -1 /* on any cpu */, michael@0: group_leader, michael@0: 0 /* no flags presently defined */); michael@0: if (fd == -1) michael@0: continue; michael@0: michael@0: measured = EventMask(measured | kSlots[i].bit); michael@0: this->*(kSlots[i].fd) = fd; michael@0: if (group_leader == -1) michael@0: group_leader = fd; michael@0: } michael@0: return measured; michael@0: } michael@0: michael@0: void michael@0: Impl::start() michael@0: { michael@0: if (running || group_leader == -1) michael@0: return; michael@0: michael@0: running = true; michael@0: ioctl(group_leader, PERF_EVENT_IOC_ENABLE, 0); michael@0: } michael@0: michael@0: void michael@0: Impl::stop(PerfMeasurement* counters) michael@0: { michael@0: // This scratch buffer is to ensure that we have read all the michael@0: // available data, even if that's more than we expect. michael@0: unsigned char buf[1024]; michael@0: michael@0: if (!running || group_leader == -1) michael@0: return; michael@0: michael@0: ioctl(group_leader, PERF_EVENT_IOC_DISABLE, 0); michael@0: running = false; michael@0: michael@0: // read out and reset all the counter values michael@0: for (int i = 0; i < PerfMeasurement::NUM_MEASURABLE_EVENTS; i++) { michael@0: int fd = this->*(kSlots[i].fd); michael@0: if (fd == -1) michael@0: continue; michael@0: michael@0: if (read(fd, buf, sizeof(buf)) == sizeof(uint64_t)) { michael@0: uint64_t cur; michael@0: memcpy(&cur, buf, sizeof(uint64_t)); michael@0: counters->*(kSlots[i].counter) += cur; michael@0: } michael@0: michael@0: // Reset the counter regardless of whether the read did what michael@0: // we expected. michael@0: ioctl(fd, PERF_EVENT_IOC_RESET, 0); michael@0: } michael@0: } michael@0: michael@0: } // anonymous namespace michael@0: michael@0: michael@0: namespace JS { michael@0: michael@0: #define initCtr(flag) ((eventsMeasured & flag) ? 0 : -1) michael@0: michael@0: PerfMeasurement::PerfMeasurement(PerfMeasurement::EventMask toMeasure) michael@0: : impl(js_new()), michael@0: eventsMeasured(impl ? static_cast(impl)->init(toMeasure) michael@0: : EventMask(0)), michael@0: cpu_cycles(initCtr(CPU_CYCLES)), michael@0: instructions(initCtr(INSTRUCTIONS)), michael@0: cache_references(initCtr(CACHE_REFERENCES)), michael@0: cache_misses(initCtr(CACHE_MISSES)), michael@0: branch_instructions(initCtr(BRANCH_INSTRUCTIONS)), michael@0: branch_misses(initCtr(BRANCH_MISSES)), michael@0: bus_cycles(initCtr(BUS_CYCLES)), michael@0: page_faults(initCtr(PAGE_FAULTS)), michael@0: major_page_faults(initCtr(MAJOR_PAGE_FAULTS)), michael@0: context_switches(initCtr(CONTEXT_SWITCHES)), michael@0: cpu_migrations(initCtr(CPU_MIGRATIONS)) michael@0: { michael@0: } michael@0: michael@0: #undef initCtr michael@0: michael@0: PerfMeasurement::~PerfMeasurement() michael@0: { michael@0: js_delete(static_cast(impl)); michael@0: } michael@0: michael@0: void michael@0: PerfMeasurement::start() michael@0: { michael@0: if (impl) michael@0: static_cast(impl)->start(); michael@0: } michael@0: michael@0: void michael@0: PerfMeasurement::stop() michael@0: { michael@0: if (impl) michael@0: static_cast(impl)->stop(this); michael@0: } michael@0: michael@0: void michael@0: PerfMeasurement::reset() michael@0: { michael@0: for (int i = 0; i < NUM_MEASURABLE_EVENTS; i++) { michael@0: if (eventsMeasured & kSlots[i].bit) michael@0: this->*(kSlots[i].counter) = 0; michael@0: else michael@0: this->*(kSlots[i].counter) = -1; michael@0: } michael@0: } michael@0: michael@0: bool michael@0: PerfMeasurement::canMeasureSomething() michael@0: { michael@0: // Find out if the kernel implements the performance measurement michael@0: // API. If it doesn't, syscall(__NR_perf_event_open, ...) is michael@0: // guaranteed to return -1 and set errno to ENOSYS. michael@0: // michael@0: // We set up input parameters that should provoke an EINVAL error michael@0: // from a kernel that does implement perf_event_open, but we can't michael@0: // be sure it will (newer kernels might add more event types), so michael@0: // we have to take care to close any valid fd it might return. michael@0: michael@0: struct perf_event_attr attr; michael@0: memset(&attr, 0, sizeof(attr)); michael@0: attr.size = sizeof(attr); michael@0: attr.type = PERF_TYPE_MAX; michael@0: michael@0: int fd = sys_perf_event_open(&attr, 0, -1, -1, 0); michael@0: if (fd >= 0) { michael@0: close(fd); michael@0: return true; michael@0: } else { michael@0: return errno != ENOSYS; michael@0: } michael@0: } michael@0: michael@0: } // namespace JS