|
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 /* This variant of nsIPerfMeasurement uses the perf_event interface |
|
7 * added in Linux 2.6.31. We key compilation of this file off the |
|
8 * existence of <linux/perf_event.h>. |
|
9 */ |
|
10 |
|
11 #include <errno.h> |
|
12 #include <linux/perf_event.h> |
|
13 #include <string.h> |
|
14 #include <sys/ioctl.h> |
|
15 #include <sys/syscall.h> |
|
16 #include <unistd.h> |
|
17 |
|
18 #include "perf/jsperf.h" |
|
19 |
|
20 using namespace js; |
|
21 |
|
22 // As of July 2010, this system call has not been added to the |
|
23 // C library, so we have to provide our own wrapper function. |
|
24 // If this code runs on a kernel that does not implement the |
|
25 // system call (2.6.30 or older) nothing unpredictable will |
|
26 // happen - it will just always fail and return -1. |
|
27 static int |
|
28 sys_perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, |
|
29 int group_fd, unsigned long flags) |
|
30 { |
|
31 return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); |
|
32 } |
|
33 |
|
34 namespace { |
|
35 |
|
36 using JS::PerfMeasurement; |
|
37 typedef PerfMeasurement::EventMask EventMask; |
|
38 |
|
39 // Additional state required by this implementation. |
|
40 struct Impl |
|
41 { |
|
42 // Each active counter corresponds to an open file descriptor. |
|
43 int f_cpu_cycles; |
|
44 int f_instructions; |
|
45 int f_cache_references; |
|
46 int f_cache_misses; |
|
47 int f_branch_instructions; |
|
48 int f_branch_misses; |
|
49 int f_bus_cycles; |
|
50 int f_page_faults; |
|
51 int f_major_page_faults; |
|
52 int f_context_switches; |
|
53 int f_cpu_migrations; |
|
54 |
|
55 // Counter group leader, for Start and Stop. |
|
56 int group_leader; |
|
57 |
|
58 // Whether counters are running. |
|
59 bool running; |
|
60 |
|
61 Impl(); |
|
62 ~Impl(); |
|
63 |
|
64 EventMask init(EventMask toMeasure); |
|
65 void start(); |
|
66 void stop(PerfMeasurement* counters); |
|
67 }; |
|
68 |
|
69 // Mapping from our event bitmask to codes passed into the kernel, and |
|
70 // to fields in the PerfMeasurement and PerfMeasurement::impl structures. |
|
71 static const struct |
|
72 { |
|
73 EventMask bit; |
|
74 uint32_t type; |
|
75 uint32_t config; |
|
76 uint64_t PerfMeasurement::* counter; |
|
77 int Impl::* fd; |
|
78 } kSlots[PerfMeasurement::NUM_MEASURABLE_EVENTS] = { |
|
79 #define HW(mask, constant, fieldname) \ |
|
80 { PerfMeasurement::mask, PERF_TYPE_HARDWARE, PERF_COUNT_HW_##constant, \ |
|
81 &PerfMeasurement::fieldname, &Impl::f_##fieldname } |
|
82 #define SW(mask, constant, fieldname) \ |
|
83 { PerfMeasurement::mask, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_##constant, \ |
|
84 &PerfMeasurement::fieldname, &Impl::f_##fieldname } |
|
85 |
|
86 HW(CPU_CYCLES, CPU_CYCLES, cpu_cycles), |
|
87 HW(INSTRUCTIONS, INSTRUCTIONS, instructions), |
|
88 HW(CACHE_REFERENCES, CACHE_REFERENCES, cache_references), |
|
89 HW(CACHE_MISSES, CACHE_MISSES, cache_misses), |
|
90 HW(BRANCH_INSTRUCTIONS, BRANCH_INSTRUCTIONS, branch_instructions), |
|
91 HW(BRANCH_MISSES, BRANCH_MISSES, branch_misses), |
|
92 HW(BUS_CYCLES, BUS_CYCLES, bus_cycles), |
|
93 SW(PAGE_FAULTS, PAGE_FAULTS, page_faults), |
|
94 SW(MAJOR_PAGE_FAULTS, PAGE_FAULTS_MAJ, major_page_faults), |
|
95 SW(CONTEXT_SWITCHES, CONTEXT_SWITCHES, context_switches), |
|
96 SW(CPU_MIGRATIONS, CPU_MIGRATIONS, cpu_migrations), |
|
97 |
|
98 #undef HW |
|
99 #undef SW |
|
100 }; |
|
101 |
|
102 Impl::Impl() |
|
103 : f_cpu_cycles(-1), |
|
104 f_instructions(-1), |
|
105 f_cache_references(-1), |
|
106 f_cache_misses(-1), |
|
107 f_branch_instructions(-1), |
|
108 f_branch_misses(-1), |
|
109 f_bus_cycles(-1), |
|
110 f_page_faults(-1), |
|
111 f_major_page_faults(-1), |
|
112 f_context_switches(-1), |
|
113 f_cpu_migrations(-1), |
|
114 group_leader(-1), |
|
115 running(false) |
|
116 { |
|
117 } |
|
118 |
|
119 Impl::~Impl() |
|
120 { |
|
121 // Close all active counter descriptors. Take care to do the group |
|
122 // leader last (this may not be necessary, but it's unclear what |
|
123 // happens if you close the group leader out from under a group). |
|
124 for (int i = 0; i < PerfMeasurement::NUM_MEASURABLE_EVENTS; i++) { |
|
125 int fd = this->*(kSlots[i].fd); |
|
126 if (fd != -1 && fd != group_leader) |
|
127 close(fd); |
|
128 } |
|
129 |
|
130 if (group_leader != -1) |
|
131 close(group_leader); |
|
132 } |
|
133 |
|
134 EventMask |
|
135 Impl::init(EventMask toMeasure) |
|
136 { |
|
137 JS_ASSERT(group_leader == -1); |
|
138 if (!toMeasure) |
|
139 return EventMask(0); |
|
140 |
|
141 EventMask measured = EventMask(0); |
|
142 struct perf_event_attr attr; |
|
143 for (int i = 0; i < PerfMeasurement::NUM_MEASURABLE_EVENTS; i++) { |
|
144 if (!(toMeasure & kSlots[i].bit)) |
|
145 continue; |
|
146 |
|
147 memset(&attr, 0, sizeof(attr)); |
|
148 attr.size = sizeof(attr); |
|
149 |
|
150 // Set the type and config fields to indicate the counter we |
|
151 // want to enable. We want read format 0, and we're not using |
|
152 // sampling, so leave those fields unset. |
|
153 attr.type = kSlots[i].type; |
|
154 attr.config = kSlots[i].config; |
|
155 |
|
156 // If this will be the group leader it should start off |
|
157 // disabled. Otherwise it should start off enabled (but blocked |
|
158 // on the group leader). |
|
159 if (group_leader == -1) |
|
160 attr.disabled = 1; |
|
161 |
|
162 // The rest of the bit fields are really poorly documented. |
|
163 // For instance, I have *no idea* whether we should be setting |
|
164 // the inherit, inherit_stat, or task flags. I'm pretty sure |
|
165 // we do want to set mmap and comm, and not any of the ones I |
|
166 // haven't mentioned. |
|
167 attr.mmap = 1; |
|
168 attr.comm = 1; |
|
169 |
|
170 int fd = sys_perf_event_open(&attr, |
|
171 0 /* trace self */, |
|
172 -1 /* on any cpu */, |
|
173 group_leader, |
|
174 0 /* no flags presently defined */); |
|
175 if (fd == -1) |
|
176 continue; |
|
177 |
|
178 measured = EventMask(measured | kSlots[i].bit); |
|
179 this->*(kSlots[i].fd) = fd; |
|
180 if (group_leader == -1) |
|
181 group_leader = fd; |
|
182 } |
|
183 return measured; |
|
184 } |
|
185 |
|
186 void |
|
187 Impl::start() |
|
188 { |
|
189 if (running || group_leader == -1) |
|
190 return; |
|
191 |
|
192 running = true; |
|
193 ioctl(group_leader, PERF_EVENT_IOC_ENABLE, 0); |
|
194 } |
|
195 |
|
196 void |
|
197 Impl::stop(PerfMeasurement* counters) |
|
198 { |
|
199 // This scratch buffer is to ensure that we have read all the |
|
200 // available data, even if that's more than we expect. |
|
201 unsigned char buf[1024]; |
|
202 |
|
203 if (!running || group_leader == -1) |
|
204 return; |
|
205 |
|
206 ioctl(group_leader, PERF_EVENT_IOC_DISABLE, 0); |
|
207 running = false; |
|
208 |
|
209 // read out and reset all the counter values |
|
210 for (int i = 0; i < PerfMeasurement::NUM_MEASURABLE_EVENTS; i++) { |
|
211 int fd = this->*(kSlots[i].fd); |
|
212 if (fd == -1) |
|
213 continue; |
|
214 |
|
215 if (read(fd, buf, sizeof(buf)) == sizeof(uint64_t)) { |
|
216 uint64_t cur; |
|
217 memcpy(&cur, buf, sizeof(uint64_t)); |
|
218 counters->*(kSlots[i].counter) += cur; |
|
219 } |
|
220 |
|
221 // Reset the counter regardless of whether the read did what |
|
222 // we expected. |
|
223 ioctl(fd, PERF_EVENT_IOC_RESET, 0); |
|
224 } |
|
225 } |
|
226 |
|
227 } // anonymous namespace |
|
228 |
|
229 |
|
230 namespace JS { |
|
231 |
|
232 #define initCtr(flag) ((eventsMeasured & flag) ? 0 : -1) |
|
233 |
|
234 PerfMeasurement::PerfMeasurement(PerfMeasurement::EventMask toMeasure) |
|
235 : impl(js_new<Impl>()), |
|
236 eventsMeasured(impl ? static_cast<Impl*>(impl)->init(toMeasure) |
|
237 : EventMask(0)), |
|
238 cpu_cycles(initCtr(CPU_CYCLES)), |
|
239 instructions(initCtr(INSTRUCTIONS)), |
|
240 cache_references(initCtr(CACHE_REFERENCES)), |
|
241 cache_misses(initCtr(CACHE_MISSES)), |
|
242 branch_instructions(initCtr(BRANCH_INSTRUCTIONS)), |
|
243 branch_misses(initCtr(BRANCH_MISSES)), |
|
244 bus_cycles(initCtr(BUS_CYCLES)), |
|
245 page_faults(initCtr(PAGE_FAULTS)), |
|
246 major_page_faults(initCtr(MAJOR_PAGE_FAULTS)), |
|
247 context_switches(initCtr(CONTEXT_SWITCHES)), |
|
248 cpu_migrations(initCtr(CPU_MIGRATIONS)) |
|
249 { |
|
250 } |
|
251 |
|
252 #undef initCtr |
|
253 |
|
254 PerfMeasurement::~PerfMeasurement() |
|
255 { |
|
256 js_delete(static_cast<Impl*>(impl)); |
|
257 } |
|
258 |
|
259 void |
|
260 PerfMeasurement::start() |
|
261 { |
|
262 if (impl) |
|
263 static_cast<Impl*>(impl)->start(); |
|
264 } |
|
265 |
|
266 void |
|
267 PerfMeasurement::stop() |
|
268 { |
|
269 if (impl) |
|
270 static_cast<Impl*>(impl)->stop(this); |
|
271 } |
|
272 |
|
273 void |
|
274 PerfMeasurement::reset() |
|
275 { |
|
276 for (int i = 0; i < NUM_MEASURABLE_EVENTS; i++) { |
|
277 if (eventsMeasured & kSlots[i].bit) |
|
278 this->*(kSlots[i].counter) = 0; |
|
279 else |
|
280 this->*(kSlots[i].counter) = -1; |
|
281 } |
|
282 } |
|
283 |
|
284 bool |
|
285 PerfMeasurement::canMeasureSomething() |
|
286 { |
|
287 // Find out if the kernel implements the performance measurement |
|
288 // API. If it doesn't, syscall(__NR_perf_event_open, ...) is |
|
289 // guaranteed to return -1 and set errno to ENOSYS. |
|
290 // |
|
291 // We set up input parameters that should provoke an EINVAL error |
|
292 // from a kernel that does implement perf_event_open, but we can't |
|
293 // be sure it will (newer kernels might add more event types), so |
|
294 // we have to take care to close any valid fd it might return. |
|
295 |
|
296 struct perf_event_attr attr; |
|
297 memset(&attr, 0, sizeof(attr)); |
|
298 attr.size = sizeof(attr); |
|
299 attr.type = PERF_TYPE_MAX; |
|
300 |
|
301 int fd = sys_perf_event_open(&attr, 0, -1, -1, 0); |
|
302 if (fd >= 0) { |
|
303 close(fd); |
|
304 return true; |
|
305 } else { |
|
306 return errno != ENOSYS; |
|
307 } |
|
308 } |
|
309 |
|
310 } // namespace JS |