js/src/perf/pm_linux.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/js/src/perf/pm_linux.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,310 @@
     1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +/* This variant of nsIPerfMeasurement uses the perf_event interface
    1.10 + * added in Linux 2.6.31.  We key compilation of this file off the
    1.11 + * existence of <linux/perf_event.h>.
    1.12 + */
    1.13 +
    1.14 +#include <errno.h>
    1.15 +#include <linux/perf_event.h>
    1.16 +#include <string.h>
    1.17 +#include <sys/ioctl.h>
    1.18 +#include <sys/syscall.h>
    1.19 +#include <unistd.h>
    1.20 +
    1.21 +#include "perf/jsperf.h"
    1.22 +
    1.23 +using namespace js;
    1.24 +
    1.25 +// As of July 2010, this system call has not been added to the
    1.26 +// C library, so we have to provide our own wrapper function.
    1.27 +// If this code runs on a kernel that does not implement the
    1.28 +// system call (2.6.30 or older) nothing unpredictable will
    1.29 +// happen - it will just always fail and return -1.
    1.30 +static int
    1.31 +sys_perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu,
    1.32 +                    int group_fd, unsigned long flags)
    1.33 +{
    1.34 +    return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
    1.35 +}
    1.36 +
    1.37 +namespace {
    1.38 +
    1.39 +using JS::PerfMeasurement;
    1.40 +typedef PerfMeasurement::EventMask EventMask;
    1.41 +
    1.42 +// Additional state required by this implementation.
    1.43 +struct Impl
    1.44 +{
    1.45 +    // Each active counter corresponds to an open file descriptor.
    1.46 +    int f_cpu_cycles;
    1.47 +    int f_instructions;
    1.48 +    int f_cache_references;
    1.49 +    int f_cache_misses;
    1.50 +    int f_branch_instructions;
    1.51 +    int f_branch_misses;
    1.52 +    int f_bus_cycles;
    1.53 +    int f_page_faults;
    1.54 +    int f_major_page_faults;
    1.55 +    int f_context_switches;
    1.56 +    int f_cpu_migrations;
    1.57 +
    1.58 +    // Counter group leader, for Start and Stop.
    1.59 +    int group_leader;
    1.60 +
    1.61 +    // Whether counters are running.
    1.62 +    bool running;
    1.63 +
    1.64 +    Impl();
    1.65 +    ~Impl();
    1.66 +
    1.67 +    EventMask init(EventMask toMeasure);
    1.68 +    void start();
    1.69 +    void stop(PerfMeasurement* counters);
    1.70 +};
    1.71 +
    1.72 +// Mapping from our event bitmask to codes passed into the kernel, and
    1.73 +// to fields in the PerfMeasurement and PerfMeasurement::impl structures.
    1.74 +static const struct
    1.75 +{
    1.76 +    EventMask bit;
    1.77 +    uint32_t type;
    1.78 +    uint32_t config;
    1.79 +    uint64_t PerfMeasurement::* counter;
    1.80 +    int Impl::* fd;
    1.81 +} kSlots[PerfMeasurement::NUM_MEASURABLE_EVENTS] = {
    1.82 +#define HW(mask, constant, fieldname)                                   \
    1.83 +    { PerfMeasurement::mask, PERF_TYPE_HARDWARE, PERF_COUNT_HW_##constant, \
    1.84 +      &PerfMeasurement::fieldname, &Impl::f_##fieldname }
    1.85 +#define SW(mask, constant, fieldname)                                   \
    1.86 +    { PerfMeasurement::mask, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_##constant, \
    1.87 +      &PerfMeasurement::fieldname, &Impl::f_##fieldname }
    1.88 +
    1.89 +    HW(CPU_CYCLES,          CPU_CYCLES,          cpu_cycles),
    1.90 +    HW(INSTRUCTIONS,        INSTRUCTIONS,        instructions),
    1.91 +    HW(CACHE_REFERENCES,    CACHE_REFERENCES,    cache_references),
    1.92 +    HW(CACHE_MISSES,        CACHE_MISSES,        cache_misses),
    1.93 +    HW(BRANCH_INSTRUCTIONS, BRANCH_INSTRUCTIONS, branch_instructions),
    1.94 +    HW(BRANCH_MISSES,       BRANCH_MISSES,       branch_misses),
    1.95 +    HW(BUS_CYCLES,          BUS_CYCLES,          bus_cycles),
    1.96 +    SW(PAGE_FAULTS,         PAGE_FAULTS,         page_faults),
    1.97 +    SW(MAJOR_PAGE_FAULTS,   PAGE_FAULTS_MAJ,     major_page_faults),
    1.98 +    SW(CONTEXT_SWITCHES,    CONTEXT_SWITCHES,    context_switches),
    1.99 +    SW(CPU_MIGRATIONS,      CPU_MIGRATIONS,      cpu_migrations),
   1.100 +
   1.101 +#undef HW
   1.102 +#undef SW
   1.103 +};
   1.104 +
   1.105 +Impl::Impl()
   1.106 +  : f_cpu_cycles(-1),
   1.107 +    f_instructions(-1),
   1.108 +    f_cache_references(-1),
   1.109 +    f_cache_misses(-1),
   1.110 +    f_branch_instructions(-1),
   1.111 +    f_branch_misses(-1),
   1.112 +    f_bus_cycles(-1),
   1.113 +    f_page_faults(-1),
   1.114 +    f_major_page_faults(-1),
   1.115 +    f_context_switches(-1),
   1.116 +    f_cpu_migrations(-1),
   1.117 +    group_leader(-1),
   1.118 +    running(false)
   1.119 +{
   1.120 +}
   1.121 +
   1.122 +Impl::~Impl()
   1.123 +{
   1.124 +    // Close all active counter descriptors.  Take care to do the group
   1.125 +    // leader last (this may not be necessary, but it's unclear what
   1.126 +    // happens if you close the group leader out from under a group).
   1.127 +    for (int i = 0; i < PerfMeasurement::NUM_MEASURABLE_EVENTS; i++) {
   1.128 +        int fd = this->*(kSlots[i].fd);
   1.129 +        if (fd != -1 && fd != group_leader)
   1.130 +            close(fd);
   1.131 +    }
   1.132 +
   1.133 +    if (group_leader != -1)
   1.134 +        close(group_leader);
   1.135 +}
   1.136 +
   1.137 +EventMask
   1.138 +Impl::init(EventMask toMeasure)
   1.139 +{
   1.140 +    JS_ASSERT(group_leader == -1);
   1.141 +    if (!toMeasure)
   1.142 +        return EventMask(0);
   1.143 +
   1.144 +    EventMask measured = EventMask(0);
   1.145 +    struct perf_event_attr attr;
   1.146 +    for (int i = 0; i < PerfMeasurement::NUM_MEASURABLE_EVENTS; i++) {
   1.147 +        if (!(toMeasure & kSlots[i].bit))
   1.148 +            continue;
   1.149 +
   1.150 +        memset(&attr, 0, sizeof(attr));
   1.151 +        attr.size = sizeof(attr);
   1.152 +
   1.153 +        // Set the type and config fields to indicate the counter we
   1.154 +        // want to enable.  We want read format 0, and we're not using
   1.155 +        // sampling, so leave those fields unset.
   1.156 +        attr.type = kSlots[i].type;
   1.157 +        attr.config = kSlots[i].config;
   1.158 +
   1.159 +        // If this will be the group leader it should start off
   1.160 +        // disabled.  Otherwise it should start off enabled (but blocked
   1.161 +        // on the group leader).
   1.162 +        if (group_leader == -1)
   1.163 +            attr.disabled = 1;
   1.164 +
   1.165 +        // The rest of the bit fields are really poorly documented.
   1.166 +        // For instance, I have *no idea* whether we should be setting
   1.167 +        // the inherit, inherit_stat, or task flags.  I'm pretty sure
   1.168 +        // we do want to set mmap and comm, and not any of the ones I
   1.169 +        // haven't mentioned.
   1.170 +        attr.mmap = 1;
   1.171 +        attr.comm = 1;
   1.172 +
   1.173 +        int fd = sys_perf_event_open(&attr,
   1.174 +                                     0 /* trace self */,
   1.175 +                                     -1 /* on any cpu */,
   1.176 +                                     group_leader,
   1.177 +                                     0 /* no flags presently defined */);
   1.178 +        if (fd == -1)
   1.179 +            continue;
   1.180 +
   1.181 +        measured = EventMask(measured | kSlots[i].bit);
   1.182 +        this->*(kSlots[i].fd) = fd;
   1.183 +        if (group_leader == -1)
   1.184 +            group_leader = fd;
   1.185 +    }
   1.186 +    return measured;
   1.187 +}
   1.188 +
   1.189 +void
   1.190 +Impl::start()
   1.191 +{
   1.192 +    if (running || group_leader == -1)
   1.193 +        return;
   1.194 +
   1.195 +    running = true;
   1.196 +    ioctl(group_leader, PERF_EVENT_IOC_ENABLE, 0);
   1.197 +}
   1.198 +
   1.199 +void
   1.200 +Impl::stop(PerfMeasurement* counters)
   1.201 +{
   1.202 +    // This scratch buffer is to ensure that we have read all the
   1.203 +    // available data, even if that's more than we expect.
   1.204 +    unsigned char buf[1024];
   1.205 +
   1.206 +    if (!running || group_leader == -1)
   1.207 +        return;
   1.208 +
   1.209 +    ioctl(group_leader, PERF_EVENT_IOC_DISABLE, 0);
   1.210 +    running = false;
   1.211 +
   1.212 +    // read out and reset all the counter values
   1.213 +    for (int i = 0; i < PerfMeasurement::NUM_MEASURABLE_EVENTS; i++) {
   1.214 +        int fd = this->*(kSlots[i].fd);
   1.215 +        if (fd == -1)
   1.216 +            continue;
   1.217 +
   1.218 +        if (read(fd, buf, sizeof(buf)) == sizeof(uint64_t)) {
   1.219 +            uint64_t cur;
   1.220 +            memcpy(&cur, buf, sizeof(uint64_t));
   1.221 +            counters->*(kSlots[i].counter) += cur;
   1.222 +        }
   1.223 +
   1.224 +        // Reset the counter regardless of whether the read did what
   1.225 +        // we expected.
   1.226 +        ioctl(fd, PERF_EVENT_IOC_RESET, 0);
   1.227 +    }
   1.228 +}
   1.229 +
   1.230 +} // anonymous namespace
   1.231 +
   1.232 +
   1.233 +namespace JS {
   1.234 +
   1.235 +#define initCtr(flag) ((eventsMeasured & flag) ? 0 : -1)
   1.236 +
   1.237 +PerfMeasurement::PerfMeasurement(PerfMeasurement::EventMask toMeasure)
   1.238 +  : impl(js_new<Impl>()),
   1.239 +    eventsMeasured(impl ? static_cast<Impl*>(impl)->init(toMeasure)
   1.240 +                   : EventMask(0)),
   1.241 +    cpu_cycles(initCtr(CPU_CYCLES)),
   1.242 +    instructions(initCtr(INSTRUCTIONS)),
   1.243 +    cache_references(initCtr(CACHE_REFERENCES)),
   1.244 +    cache_misses(initCtr(CACHE_MISSES)),
   1.245 +    branch_instructions(initCtr(BRANCH_INSTRUCTIONS)),
   1.246 +    branch_misses(initCtr(BRANCH_MISSES)),
   1.247 +    bus_cycles(initCtr(BUS_CYCLES)),
   1.248 +    page_faults(initCtr(PAGE_FAULTS)),
   1.249 +    major_page_faults(initCtr(MAJOR_PAGE_FAULTS)),
   1.250 +    context_switches(initCtr(CONTEXT_SWITCHES)),
   1.251 +    cpu_migrations(initCtr(CPU_MIGRATIONS))
   1.252 +{
   1.253 +}
   1.254 +
   1.255 +#undef initCtr
   1.256 +
   1.257 +PerfMeasurement::~PerfMeasurement()
   1.258 +{
   1.259 +    js_delete(static_cast<Impl*>(impl));
   1.260 +}
   1.261 +
   1.262 +void
   1.263 +PerfMeasurement::start()
   1.264 +{
   1.265 +    if (impl)
   1.266 +        static_cast<Impl*>(impl)->start();
   1.267 +}
   1.268 +
   1.269 +void
   1.270 +PerfMeasurement::stop()
   1.271 +{
   1.272 +    if (impl)
   1.273 +        static_cast<Impl*>(impl)->stop(this);
   1.274 +}
   1.275 +
   1.276 +void
   1.277 +PerfMeasurement::reset()
   1.278 +{
   1.279 +    for (int i = 0; i < NUM_MEASURABLE_EVENTS; i++) {
   1.280 +        if (eventsMeasured & kSlots[i].bit)
   1.281 +            this->*(kSlots[i].counter) = 0;
   1.282 +        else
   1.283 +            this->*(kSlots[i].counter) = -1;
   1.284 +    }
   1.285 +}
   1.286 +
   1.287 +bool
   1.288 +PerfMeasurement::canMeasureSomething()
   1.289 +{
   1.290 +    // Find out if the kernel implements the performance measurement
   1.291 +    // API.  If it doesn't, syscall(__NR_perf_event_open, ...) is
   1.292 +    // guaranteed to return -1 and set errno to ENOSYS.
   1.293 +    //
   1.294 +    // We set up input parameters that should provoke an EINVAL error
   1.295 +    // from a kernel that does implement perf_event_open, but we can't
   1.296 +    // be sure it will (newer kernels might add more event types), so
   1.297 +    // we have to take care to close any valid fd it might return.
   1.298 +
   1.299 +    struct perf_event_attr attr;
   1.300 +    memset(&attr, 0, sizeof(attr));
   1.301 +    attr.size = sizeof(attr);
   1.302 +    attr.type = PERF_TYPE_MAX;
   1.303 +
   1.304 +    int fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
   1.305 +    if (fd >= 0) {
   1.306 +        close(fd);
   1.307 +        return true;
   1.308 +    } else {
   1.309 +        return errno != ENOSYS;
   1.310 +    }
   1.311 +}
   1.312 +
   1.313 +} // namespace JS

mercurial