|
1 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
2 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
4 |
|
5 /* |
|
6 |
|
7 A program that computes a function ordering for an executable based |
|
8 on runtime profile information. |
|
9 |
|
10 This program is directly based on work done by Roger Chickering |
|
11 <rogc@netscape.com> in |
|
12 |
|
13 <http://bugzilla.mozilla.org/show_bug.cgi?id=65845> |
|
14 |
|
15 to implement Nat Friedman's <nat@nat.org> `grope', |
|
16 |
|
17 _GNU Rope - A Subroutine Position Optimizer_ |
|
18 <http://www.hungry.com/~shaver/grope/grope.ps> |
|
19 |
|
20 Specifically, it implements the procedure re-ordering algorithm |
|
21 described in: |
|
22 |
|
23 K. Pettis and R. Hansen. ``Profile-Guided Core Position.'' In |
|
24 _Proceedings of the Int. Conference on Programming Language Design |
|
25 and Implementation_, pages 16-27, June 1990. |
|
26 |
|
27 */ |
|
28 |
|
29 #include <assert.h> |
|
30 #include <fstream> |
|
31 #include <hash_set> |
|
32 #include <map> |
|
33 #include <set> |
|
34 #include <list> |
|
35 #include <vector> |
|
36 #include <limits.h> |
|
37 #include <unistd.h> |
|
38 #include <stdio.h> |
|
39 #include <fcntl.h> |
|
40 |
|
41 #include "elf_symbol_table.h" |
|
42 |
|
43 #define _GNU_SOURCE |
|
44 #include <getopt.h> |
|
45 |
|
46 elf_symbol_table symtab; |
|
47 |
|
48 // Straight outta plhash.c! |
|
49 #define GOLDEN_RATIO 0x9E3779B9U |
|
50 |
|
51 struct hash<const Elf32_Sym *> |
|
52 { |
|
53 size_t operator()(const Elf32_Sym *sym) const { |
|
54 return (reinterpret_cast<size_t>(sym) >> 4) * GOLDEN_RATIO; } |
|
55 }; |
|
56 |
|
57 typedef unsigned int call_count_t; |
|
58 |
|
59 struct call_graph_arc |
|
60 { |
|
61 const Elf32_Sym *m_from; |
|
62 const Elf32_Sym *m_to; |
|
63 call_count_t m_count; |
|
64 |
|
65 call_graph_arc(const Elf32_Sym *left, const Elf32_Sym *right, call_count_t count = 0) |
|
66 : m_count(count) |
|
67 { |
|
68 assert(left != right); |
|
69 |
|
70 if (left > right) { |
|
71 m_from = left; |
|
72 m_to = right; |
|
73 } |
|
74 else { |
|
75 m_from = right; |
|
76 m_to = left; |
|
77 } |
|
78 } |
|
79 |
|
80 friend bool |
|
81 operator==(const call_graph_arc &lhs, const call_graph_arc &rhs) |
|
82 { |
|
83 return (lhs.m_from == rhs.m_from) && (lhs.m_to == rhs.m_to); |
|
84 } |
|
85 |
|
86 friend ostream & |
|
87 operator<<(ostream &out, const call_graph_arc &arc) |
|
88 { |
|
89 out << &arc << ": "; |
|
90 out.form("<(%s %s) %d>", |
|
91 symtab.get_symbol_name(arc.m_from), |
|
92 symtab.get_symbol_name(arc.m_to), |
|
93 arc.m_count); |
|
94 |
|
95 return out; |
|
96 } |
|
97 }; |
|
98 |
|
99 struct hash<call_graph_arc *> |
|
100 { |
|
101 size_t |
|
102 operator()(const call_graph_arc* arc) const |
|
103 { |
|
104 size_t result; |
|
105 result = reinterpret_cast<unsigned long>(arc->m_from); |
|
106 result ^= reinterpret_cast<unsigned long>(arc->m_to) >> 16; |
|
107 result ^= reinterpret_cast<unsigned long>(arc->m_to) << 16; |
|
108 result *= GOLDEN_RATIO; |
|
109 return result; |
|
110 } |
|
111 }; |
|
112 |
|
113 struct equal_to<call_graph_arc *> |
|
114 { |
|
115 bool |
|
116 operator()(const call_graph_arc* lhs, const call_graph_arc *rhs) const |
|
117 { |
|
118 return *lhs == *rhs; |
|
119 } |
|
120 }; |
|
121 |
|
122 typedef hash_set<call_graph_arc *> arc_container_t; |
|
123 arc_container_t arcs; |
|
124 |
|
125 typedef multimap<const Elf32_Sym *, call_graph_arc *> arc_map_t; |
|
126 arc_map_t from_map; |
|
127 arc_map_t to_map; |
|
128 |
|
129 struct less_call_graph_arc_count |
|
130 { |
|
131 bool |
|
132 operator()(const call_graph_arc *lhs, const call_graph_arc *rhs) const |
|
133 { |
|
134 if (lhs->m_count == rhs->m_count) { |
|
135 if (lhs->m_from == rhs->m_from) |
|
136 return lhs->m_to > rhs->m_to; |
|
137 |
|
138 return lhs->m_from > rhs->m_from; |
|
139 } |
|
140 return lhs->m_count > rhs->m_count; |
|
141 } |
|
142 }; |
|
143 |
|
144 typedef set<call_graph_arc *, less_call_graph_arc_count> arc_count_index_t; |
|
145 |
|
146 bool opt_debug = false; |
|
147 const char *opt_out = "order.out"; |
|
148 int opt_tick = 0; |
|
149 bool opt_verbose = false; |
|
150 int opt_window = 16; |
|
151 |
|
152 static struct option long_options[] = { |
|
153 { "debug", no_argument, 0, 'd' }, |
|
154 { "exe", required_argument, 0, 'e' }, |
|
155 { "help", no_argument, 0, '?' }, |
|
156 { "out", required_argument, 0, 'o' }, |
|
157 { "tick", optional_argument, 0, 't' }, |
|
158 { "verbose", no_argument, 0, 'v' }, |
|
159 { "window", required_argument, 0, 'w' }, |
|
160 { 0, 0, 0, 0 } |
|
161 }; |
|
162 |
|
163 //---------------------------------------------------------------------- |
|
164 |
|
165 static void |
|
166 usage(const char *name) |
|
167 { |
|
168 cerr << "usage: " << name << " [options] [<file> ...]" << endl; |
|
169 cerr << " Options:" << endl; |
|
170 cerr << " --debug, -d" << endl; |
|
171 cerr << " Print lots of verbose debugging cruft." << endl; |
|
172 cerr << " --exe=<image>, -e <image> (required)" << endl; |
|
173 cerr << " Specify the executable image from which to read symbol information." << endl; |
|
174 cerr << " --help, -?" << endl; |
|
175 cerr << " Print this message and exit." << endl; |
|
176 cerr << " --out=<file>, -o <file>" << endl; |
|
177 cerr << " Specify the output file to which to dump the symbol ordering of the" << endl; |
|
178 cerr << " best individual (default is `order.out')." << endl; |
|
179 cerr << " --tick[=<num>], -t [<num>]" << endl; |
|
180 cerr << " When reading address data, print a dot to stderr every <num>th" << endl; |
|
181 cerr << " address processed from the call trace. If specified with no argument," << endl; |
|
182 cerr << " a dot will be printed for every million addresses processed." << endl; |
|
183 cerr << " --verbose, -v" << endl; |
|
184 cerr << " Issue progress messages to stderr." << endl; |
|
185 cerr << " --window=<num>, -w <num>" << endl; |
|
186 cerr << " Use a sliding window instead of pagination to score orderings." << endl; |
|
187 } |
|
188 |
|
189 /** |
|
190 * Using the symbol table, map a stream of address references into a |
|
191 * callgraph. |
|
192 */ |
|
193 static void |
|
194 map_addrs(int fd) |
|
195 { |
|
196 long long total_calls = 0; |
|
197 typedef list<const Elf32_Sym *> window_t; |
|
198 |
|
199 window_t window; |
|
200 int window_size = 0; |
|
201 unsigned int buf[128]; |
|
202 ssize_t cb; |
|
203 |
|
204 while ((cb = read(fd, buf, sizeof buf)) > 0) { |
|
205 if (cb % sizeof buf[0]) |
|
206 fprintf(stderr, "unaligned read\n"); |
|
207 |
|
208 unsigned int *addr = buf; |
|
209 unsigned int *limit = buf + (cb / 4); |
|
210 |
|
211 for (; addr < limit; ++addr) { |
|
212 const Elf32_Sym *sym = symtab.lookup(*addr); |
|
213 if (! sym) |
|
214 continue; |
|
215 |
|
216 ++total_calls; |
|
217 |
|
218 window.push_front(sym); |
|
219 |
|
220 if (window_size >= opt_window) |
|
221 window.pop_back(); |
|
222 else |
|
223 ++window_size; |
|
224 |
|
225 window_t::const_iterator i = window.begin(); |
|
226 window_t::const_iterator end = window.end(); |
|
227 for (; i != end; ++i) { |
|
228 if (sym != *i) { |
|
229 call_graph_arc *arc; |
|
230 call_graph_arc key(sym, *i); |
|
231 arc_container_t::iterator iter = arcs.find(&key); |
|
232 if (iter == arcs.end()) { |
|
233 arc = new call_graph_arc(sym, *i); |
|
234 arcs.insert(arc); |
|
235 from_map.insert(arc_map_t::value_type(arc->m_from, arc)); |
|
236 to_map.insert(arc_map_t::value_type(arc->m_to, arc)); |
|
237 } |
|
238 else |
|
239 arc = const_cast<call_graph_arc *>(*iter); |
|
240 |
|
241 ++(arc->m_count); |
|
242 } |
|
243 } |
|
244 |
|
245 if (opt_verbose && opt_tick && (total_calls % opt_tick == 0)) { |
|
246 cerr << "."; |
|
247 flush(cerr); |
|
248 } |
|
249 } |
|
250 } |
|
251 |
|
252 if (opt_verbose) { |
|
253 if (opt_tick) |
|
254 cerr << endl; |
|
255 |
|
256 cerr << "Total calls: " << total_calls << endl; |
|
257 } |
|
258 } |
|
259 |
|
260 static void |
|
261 remove_from(arc_map_t& map, const Elf32_Sym *sym, const call_graph_arc *arc) |
|
262 { |
|
263 pair<arc_map_t::iterator, arc_map_t::iterator> p |
|
264 = map.equal_range(sym); |
|
265 |
|
266 for (arc_map_t::iterator i = p.first; i != p.second; ++i) { |
|
267 if (i->second == arc) { |
|
268 map.erase(i); |
|
269 break; |
|
270 } |
|
271 } |
|
272 } |
|
273 |
|
274 /** |
|
275 * The main program |
|
276 */ |
|
277 int |
|
278 main(int argc, char *argv[]) |
|
279 { |
|
280 const char *opt_exe = 0; |
|
281 |
|
282 int c; |
|
283 while (1) { |
|
284 int option_index = 0; |
|
285 c = getopt_long(argc, argv, "?de:o:t:vw:", long_options, &option_index); |
|
286 |
|
287 if (c < 0) |
|
288 break; |
|
289 |
|
290 switch (c) { |
|
291 case '?': |
|
292 usage(argv[0]); |
|
293 return 0; |
|
294 |
|
295 case 'd': |
|
296 opt_debug = true; |
|
297 break; |
|
298 |
|
299 case 'e': |
|
300 opt_exe = optarg; |
|
301 break; |
|
302 |
|
303 case 'o': |
|
304 opt_out = optarg; |
|
305 break; |
|
306 |
|
307 case 't': |
|
308 opt_tick = optarg ? atoi(optarg) : 1000000; |
|
309 break; |
|
310 |
|
311 case 'v': |
|
312 opt_verbose = true; |
|
313 break; |
|
314 |
|
315 case 'w': |
|
316 opt_window = atoi(optarg); |
|
317 if (opt_window <= 0) { |
|
318 cerr << "invalid window size: " << opt_window << endl; |
|
319 return 1; |
|
320 } |
|
321 break; |
|
322 |
|
323 default: |
|
324 usage(argv[0]); |
|
325 return 1; |
|
326 } |
|
327 } |
|
328 |
|
329 // Make sure an image was specified |
|
330 if (! opt_exe) { |
|
331 usage(argv[0]); |
|
332 return 1; |
|
333 } |
|
334 |
|
335 // Read the sym table. |
|
336 symtab.init(opt_exe); |
|
337 |
|
338 // Process addresses to construct the weighted call graph. |
|
339 if (optind >= argc) { |
|
340 map_addrs(STDIN_FILENO); |
|
341 } |
|
342 else { |
|
343 do { |
|
344 int fd = open(argv[optind], O_RDONLY); |
|
345 if (fd < 0) { |
|
346 perror(argv[optind]); |
|
347 return 1; |
|
348 } |
|
349 |
|
350 map_addrs(fd); |
|
351 close(fd); |
|
352 } while (++optind < argc); |
|
353 } |
|
354 |
|
355 // Emit the ordering. |
|
356 ofstream out(opt_out); |
|
357 |
|
358 // Collect all of the arcs into a sorted container, with arcs |
|
359 // having the largest weight at the front. |
|
360 arc_count_index_t sorted_arcs(arcs.begin(), arcs.end()); |
|
361 |
|
362 while (sorted_arcs.size()) { |
|
363 if (opt_debug) { |
|
364 cerr << "==========================================" << endl << endl; |
|
365 |
|
366 cerr << "Sorted Arcs:" << endl; |
|
367 for (arc_count_index_t::const_iterator iter = sorted_arcs.begin(); |
|
368 iter != sorted_arcs.end(); |
|
369 ++iter) { |
|
370 cerr << **iter << endl; |
|
371 } |
|
372 |
|
373 cerr << endl << "Arc Container:" << endl; |
|
374 for (arc_container_t::const_iterator iter = arcs.begin(); |
|
375 iter != arcs.end(); |
|
376 ++iter) { |
|
377 cerr << **iter << endl; |
|
378 } |
|
379 |
|
380 cerr << endl << "From Map:" << endl; |
|
381 for (arc_map_t::const_iterator iter = from_map.begin(); |
|
382 iter != from_map.end(); |
|
383 ++iter) { |
|
384 cerr << symtab.get_symbol_name(iter->first) << ": " << *(iter->second) << endl; |
|
385 } |
|
386 |
|
387 cerr << endl << "To Map:" << endl; |
|
388 for (arc_map_t::const_iterator iter = to_map.begin(); |
|
389 iter != to_map.end(); |
|
390 ++iter) { |
|
391 cerr << symtab.get_symbol_name(iter->first) << ": " << *(iter->second) << endl; |
|
392 } |
|
393 |
|
394 cerr << endl; |
|
395 } |
|
396 |
|
397 // The first arc in the sorted set will have the largest |
|
398 // weight. Pull it out, and emit its sink. |
|
399 arc_count_index_t::iterator max = sorted_arcs.begin(); |
|
400 call_graph_arc *arc = const_cast<call_graph_arc *>(*max); |
|
401 |
|
402 sorted_arcs.erase(max); |
|
403 |
|
404 if (opt_debug) |
|
405 cerr << "pulling " << *arc << endl; |
|
406 |
|
407 arcs.erase(arc); |
|
408 remove_from(from_map, arc->m_from, arc); |
|
409 remove_from(to_map, arc->m_to, arc); |
|
410 |
|
411 out << symtab.get_symbol_name(arc->m_to) << endl; |
|
412 |
|
413 // Merge arc->m_to into arc->m_from. First, modify anything |
|
414 // that used to point to arc->m_to to point to arc->m_from. |
|
415 typedef list<call_graph_arc *> arc_list_t; |
|
416 arc_list_t map_add_queue; |
|
417 |
|
418 pair<arc_map_t::iterator, arc_map_t::iterator> p; |
|
419 |
|
420 // Find all the arcs that point to arc->m_to. |
|
421 p = to_map.equal_range(arc->m_to); |
|
422 for (arc_map_t::iterator i = p.first; i != p.second; ++i) { |
|
423 // Remove the arc that points to arc->m_to (`doomed') from |
|
424 // all of our indicies. |
|
425 call_graph_arc *doomed = i->second; |
|
426 const Elf32_Sym *source = doomed->m_from; |
|
427 |
|
428 sorted_arcs.erase(doomed); |
|
429 arcs.erase(doomed); |
|
430 remove_from(from_map, source, doomed); |
|
431 // N.B. that `doomed' will be removed from the `to_map' |
|
432 // after the loop completes. |
|
433 |
|
434 // Create a new (or locate an existing) arc whose source |
|
435 // was the doomed arc's source, and whose sink is |
|
436 // arc->m_from (i.e., the node that subsumed arc->m_to). |
|
437 call_graph_arc *merge; |
|
438 call_graph_arc key = call_graph_arc(source, arc->m_from); |
|
439 arc_container_t::iterator iter = arcs.find(&key); |
|
440 |
|
441 if (iter == arcs.end()) { |
|
442 merge = new call_graph_arc(source, arc->m_from); |
|
443 |
|
444 arcs.insert(merge); |
|
445 from_map.insert(arc_map_t::value_type(merge->m_from, merge)); |
|
446 map_add_queue.push_back(merge); |
|
447 } |
|
448 else { |
|
449 // We found an existing arc: temporarily remove it |
|
450 // from the sorted index. |
|
451 merge = const_cast<call_graph_arc *>(*iter); |
|
452 sorted_arcs.erase(merge); |
|
453 } |
|
454 |
|
455 // Include the doomed arc's weight in the merged arc, and |
|
456 // (re-)insert it into the sorted index. |
|
457 merge->m_count += doomed->m_count; |
|
458 sorted_arcs.insert(merge); |
|
459 |
|
460 delete doomed; |
|
461 } |
|
462 |
|
463 to_map.erase(p.first, p.second); |
|
464 |
|
465 for (arc_list_t::iterator merge = map_add_queue.begin(); |
|
466 merge != map_add_queue.end(); |
|
467 map_add_queue.erase(merge++)) { |
|
468 to_map.insert(arc_map_t::value_type((*merge)->m_to, *merge)); |
|
469 } |
|
470 |
|
471 // Now, roll anything that arc->m_to used to point at into |
|
472 // what arc->m_from now points at. |
|
473 |
|
474 // Collect all of the arcs that originate at arc->m_to. |
|
475 p = from_map.equal_range(arc->m_to); |
|
476 for (arc_map_t::iterator i = p.first; i != p.second; ++i) { |
|
477 // Remove the arc originating from arc->m_to (`doomed') |
|
478 // from all of our indicies. |
|
479 call_graph_arc *doomed = i->second; |
|
480 const Elf32_Sym *sink = doomed->m_to; |
|
481 |
|
482 // There shouldn't be any self-referential arcs. |
|
483 assert(sink != arc->m_to); |
|
484 |
|
485 sorted_arcs.erase(doomed); |
|
486 arcs.erase(doomed); |
|
487 remove_from(to_map, sink, doomed); |
|
488 // N.B. that `doomed' will be removed from the `from_map' |
|
489 // once the loop completes. |
|
490 |
|
491 // Create a new (or locate an existing) arc whose source |
|
492 // is arc->m_from and whose sink was the removed arc's |
|
493 // sink. |
|
494 call_graph_arc *merge; |
|
495 call_graph_arc key = call_graph_arc(arc->m_from, sink); |
|
496 arc_container_t::iterator iter = arcs.find(&key); |
|
497 |
|
498 if (iter == arcs.end()) { |
|
499 merge = new call_graph_arc(arc->m_from, sink); |
|
500 |
|
501 arcs.insert(merge); |
|
502 |
|
503 map_add_queue.push_back(merge); |
|
504 to_map.insert(arc_map_t::value_type(merge->m_to, merge)); |
|
505 } |
|
506 else { |
|
507 // We found an existing arc: temporarily remove it |
|
508 // from the sorted index. |
|
509 merge = const_cast<call_graph_arc *>(*iter); |
|
510 sorted_arcs.erase(merge); |
|
511 } |
|
512 |
|
513 // Include the doomed arc's weight in the merged arc, and |
|
514 // (re-)insert it into the sorted index. |
|
515 merge->m_count += doomed->m_count; |
|
516 sorted_arcs.insert(merge); |
|
517 |
|
518 delete doomed; |
|
519 } |
|
520 |
|
521 from_map.erase(p.first, p.second); |
|
522 |
|
523 for (arc_list_t::iterator merge = map_add_queue.begin(); |
|
524 merge != map_add_queue.end(); |
|
525 map_add_queue.erase(merge++)) { |
|
526 from_map.insert(arc_map_t::value_type((*merge)->m_from, *merge)); |
|
527 } |
|
528 } |
|
529 |
|
530 out.close(); |
|
531 return 0; |
|
532 } |