michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: /* michael@0: michael@0: A program that computes a function ordering for an executable based michael@0: on runtime profile information. michael@0: michael@0: This program is directly based on work done by Roger Chickering michael@0: in michael@0: michael@0: michael@0: michael@0: to implement Nat Friedman's `grope', michael@0: michael@0: _GNU Rope - A Subroutine Position Optimizer_ michael@0: michael@0: michael@0: Specifically, it implements the procedure re-ordering algorithm michael@0: described in: michael@0: michael@0: K. Pettis and R. Hansen. ``Profile-Guided Core Position.'' In michael@0: _Proceedings of the Int. Conference on Programming Language Design michael@0: and Implementation_, pages 16-27, June 1990. michael@0: michael@0: */ michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: #include "elf_symbol_table.h" michael@0: michael@0: #define _GNU_SOURCE michael@0: #include michael@0: michael@0: elf_symbol_table symtab; michael@0: michael@0: // Straight outta plhash.c! michael@0: #define GOLDEN_RATIO 0x9E3779B9U michael@0: michael@0: struct hash michael@0: { michael@0: size_t operator()(const Elf32_Sym *sym) const { michael@0: return (reinterpret_cast(sym) >> 4) * GOLDEN_RATIO; } michael@0: }; michael@0: michael@0: typedef unsigned int call_count_t; michael@0: michael@0: struct call_graph_arc michael@0: { michael@0: const Elf32_Sym *m_from; michael@0: const Elf32_Sym *m_to; michael@0: call_count_t m_count; michael@0: michael@0: call_graph_arc(const Elf32_Sym *left, const Elf32_Sym *right, call_count_t count = 0) michael@0: : m_count(count) michael@0: { michael@0: assert(left != right); michael@0: michael@0: if (left > right) { michael@0: m_from = left; michael@0: m_to = right; michael@0: } michael@0: else { michael@0: m_from = right; michael@0: m_to = left; michael@0: } michael@0: } michael@0: michael@0: friend bool michael@0: operator==(const call_graph_arc &lhs, const call_graph_arc &rhs) michael@0: { michael@0: return (lhs.m_from == rhs.m_from) && (lhs.m_to == rhs.m_to); michael@0: } michael@0: michael@0: friend ostream & michael@0: operator<<(ostream &out, const call_graph_arc &arc) michael@0: { michael@0: out << &arc << ": "; michael@0: out.form("<(%s %s) %d>", michael@0: symtab.get_symbol_name(arc.m_from), michael@0: symtab.get_symbol_name(arc.m_to), michael@0: arc.m_count); michael@0: michael@0: return out; michael@0: } michael@0: }; michael@0: michael@0: struct hash michael@0: { michael@0: size_t michael@0: operator()(const call_graph_arc* arc) const michael@0: { michael@0: size_t result; michael@0: result = reinterpret_cast(arc->m_from); michael@0: result ^= reinterpret_cast(arc->m_to) >> 16; michael@0: result ^= reinterpret_cast(arc->m_to) << 16; michael@0: result *= GOLDEN_RATIO; michael@0: return result; michael@0: } michael@0: }; michael@0: michael@0: struct equal_to michael@0: { michael@0: bool michael@0: operator()(const call_graph_arc* lhs, const call_graph_arc *rhs) const michael@0: { michael@0: return *lhs == *rhs; michael@0: } michael@0: }; michael@0: michael@0: typedef hash_set arc_container_t; michael@0: arc_container_t arcs; michael@0: michael@0: typedef multimap arc_map_t; michael@0: arc_map_t from_map; michael@0: arc_map_t to_map; michael@0: michael@0: struct less_call_graph_arc_count michael@0: { michael@0: bool michael@0: operator()(const call_graph_arc *lhs, const call_graph_arc *rhs) const michael@0: { michael@0: if (lhs->m_count == rhs->m_count) { michael@0: if (lhs->m_from == rhs->m_from) michael@0: return lhs->m_to > rhs->m_to; michael@0: michael@0: return lhs->m_from > rhs->m_from; michael@0: } michael@0: return lhs->m_count > rhs->m_count; michael@0: } michael@0: }; michael@0: michael@0: typedef set arc_count_index_t; michael@0: michael@0: bool opt_debug = false; michael@0: const char *opt_out = "order.out"; michael@0: int opt_tick = 0; michael@0: bool opt_verbose = false; michael@0: int opt_window = 16; michael@0: michael@0: static struct option long_options[] = { michael@0: { "debug", no_argument, 0, 'd' }, michael@0: { "exe", required_argument, 0, 'e' }, michael@0: { "help", no_argument, 0, '?' }, michael@0: { "out", required_argument, 0, 'o' }, michael@0: { "tick", optional_argument, 0, 't' }, michael@0: { "verbose", no_argument, 0, 'v' }, michael@0: { "window", required_argument, 0, 'w' }, michael@0: { 0, 0, 0, 0 } michael@0: }; michael@0: michael@0: //---------------------------------------------------------------------- michael@0: michael@0: static void michael@0: usage(const char *name) michael@0: { michael@0: cerr << "usage: " << name << " [options] [ ...]" << endl; michael@0: cerr << " Options:" << endl; michael@0: cerr << " --debug, -d" << endl; michael@0: cerr << " Print lots of verbose debugging cruft." << endl; michael@0: cerr << " --exe=, -e (required)" << endl; michael@0: cerr << " Specify the executable image from which to read symbol information." << endl; michael@0: cerr << " --help, -?" << endl; michael@0: cerr << " Print this message and exit." << endl; michael@0: cerr << " --out=, -o " << endl; michael@0: cerr << " Specify the output file to which to dump the symbol ordering of the" << endl; michael@0: cerr << " best individual (default is `order.out')." << endl; michael@0: cerr << " --tick[=], -t []" << endl; michael@0: cerr << " When reading address data, print a dot to stderr every th" << endl; michael@0: cerr << " address processed from the call trace. If specified with no argument," << endl; michael@0: cerr << " a dot will be printed for every million addresses processed." << endl; michael@0: cerr << " --verbose, -v" << endl; michael@0: cerr << " Issue progress messages to stderr." << endl; michael@0: cerr << " --window=, -w " << endl; michael@0: cerr << " Use a sliding window instead of pagination to score orderings." << endl; michael@0: } michael@0: michael@0: /** michael@0: * Using the symbol table, map a stream of address references into a michael@0: * callgraph. michael@0: */ michael@0: static void michael@0: map_addrs(int fd) michael@0: { michael@0: long long total_calls = 0; michael@0: typedef list window_t; michael@0: michael@0: window_t window; michael@0: int window_size = 0; michael@0: unsigned int buf[128]; michael@0: ssize_t cb; michael@0: michael@0: while ((cb = read(fd, buf, sizeof buf)) > 0) { michael@0: if (cb % sizeof buf[0]) michael@0: fprintf(stderr, "unaligned read\n"); michael@0: michael@0: unsigned int *addr = buf; michael@0: unsigned int *limit = buf + (cb / 4); michael@0: michael@0: for (; addr < limit; ++addr) { michael@0: const Elf32_Sym *sym = symtab.lookup(*addr); michael@0: if (! sym) michael@0: continue; michael@0: michael@0: ++total_calls; michael@0: michael@0: window.push_front(sym); michael@0: michael@0: if (window_size >= opt_window) michael@0: window.pop_back(); michael@0: else michael@0: ++window_size; michael@0: michael@0: window_t::const_iterator i = window.begin(); michael@0: window_t::const_iterator end = window.end(); michael@0: for (; i != end; ++i) { michael@0: if (sym != *i) { michael@0: call_graph_arc *arc; michael@0: call_graph_arc key(sym, *i); michael@0: arc_container_t::iterator iter = arcs.find(&key); michael@0: if (iter == arcs.end()) { michael@0: arc = new call_graph_arc(sym, *i); michael@0: arcs.insert(arc); michael@0: from_map.insert(arc_map_t::value_type(arc->m_from, arc)); michael@0: to_map.insert(arc_map_t::value_type(arc->m_to, arc)); michael@0: } michael@0: else michael@0: arc = const_cast(*iter); michael@0: michael@0: ++(arc->m_count); michael@0: } michael@0: } michael@0: michael@0: if (opt_verbose && opt_tick && (total_calls % opt_tick == 0)) { michael@0: cerr << "."; michael@0: flush(cerr); michael@0: } michael@0: } michael@0: } michael@0: michael@0: if (opt_verbose) { michael@0: if (opt_tick) michael@0: cerr << endl; michael@0: michael@0: cerr << "Total calls: " << total_calls << endl; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: remove_from(arc_map_t& map, const Elf32_Sym *sym, const call_graph_arc *arc) michael@0: { michael@0: pair p michael@0: = map.equal_range(sym); michael@0: michael@0: for (arc_map_t::iterator i = p.first; i != p.second; ++i) { michael@0: if (i->second == arc) { michael@0: map.erase(i); michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * The main program michael@0: */ michael@0: int michael@0: main(int argc, char *argv[]) michael@0: { michael@0: const char *opt_exe = 0; michael@0: michael@0: int c; michael@0: while (1) { michael@0: int option_index = 0; michael@0: c = getopt_long(argc, argv, "?de:o:t:vw:", long_options, &option_index); michael@0: michael@0: if (c < 0) michael@0: break; michael@0: michael@0: switch (c) { michael@0: case '?': michael@0: usage(argv[0]); michael@0: return 0; michael@0: michael@0: case 'd': michael@0: opt_debug = true; michael@0: break; michael@0: michael@0: case 'e': michael@0: opt_exe = optarg; michael@0: break; michael@0: michael@0: case 'o': michael@0: opt_out = optarg; michael@0: break; michael@0: michael@0: case 't': michael@0: opt_tick = optarg ? atoi(optarg) : 1000000; michael@0: break; michael@0: michael@0: case 'v': michael@0: opt_verbose = true; michael@0: break; michael@0: michael@0: case 'w': michael@0: opt_window = atoi(optarg); michael@0: if (opt_window <= 0) { michael@0: cerr << "invalid window size: " << opt_window << endl; michael@0: return 1; michael@0: } michael@0: break; michael@0: michael@0: default: michael@0: usage(argv[0]); michael@0: return 1; michael@0: } michael@0: } michael@0: michael@0: // Make sure an image was specified michael@0: if (! opt_exe) { michael@0: usage(argv[0]); michael@0: return 1; michael@0: } michael@0: michael@0: // Read the sym table. michael@0: symtab.init(opt_exe); michael@0: michael@0: // Process addresses to construct the weighted call graph. michael@0: if (optind >= argc) { michael@0: map_addrs(STDIN_FILENO); michael@0: } michael@0: else { michael@0: do { michael@0: int fd = open(argv[optind], O_RDONLY); michael@0: if (fd < 0) { michael@0: perror(argv[optind]); michael@0: return 1; michael@0: } michael@0: michael@0: map_addrs(fd); michael@0: close(fd); michael@0: } while (++optind < argc); michael@0: } michael@0: michael@0: // Emit the ordering. michael@0: ofstream out(opt_out); michael@0: michael@0: // Collect all of the arcs into a sorted container, with arcs michael@0: // having the largest weight at the front. michael@0: arc_count_index_t sorted_arcs(arcs.begin(), arcs.end()); michael@0: michael@0: while (sorted_arcs.size()) { michael@0: if (opt_debug) { michael@0: cerr << "==========================================" << endl << endl; michael@0: michael@0: cerr << "Sorted Arcs:" << endl; michael@0: for (arc_count_index_t::const_iterator iter = sorted_arcs.begin(); michael@0: iter != sorted_arcs.end(); michael@0: ++iter) { michael@0: cerr << **iter << endl; michael@0: } michael@0: michael@0: cerr << endl << "Arc Container:" << endl; michael@0: for (arc_container_t::const_iterator iter = arcs.begin(); michael@0: iter != arcs.end(); michael@0: ++iter) { michael@0: cerr << **iter << endl; michael@0: } michael@0: michael@0: cerr << endl << "From Map:" << endl; michael@0: for (arc_map_t::const_iterator iter = from_map.begin(); michael@0: iter != from_map.end(); michael@0: ++iter) { michael@0: cerr << symtab.get_symbol_name(iter->first) << ": " << *(iter->second) << endl; michael@0: } michael@0: michael@0: cerr << endl << "To Map:" << endl; michael@0: for (arc_map_t::const_iterator iter = to_map.begin(); michael@0: iter != to_map.end(); michael@0: ++iter) { michael@0: cerr << symtab.get_symbol_name(iter->first) << ": " << *(iter->second) << endl; michael@0: } michael@0: michael@0: cerr << endl; michael@0: } michael@0: michael@0: // The first arc in the sorted set will have the largest michael@0: // weight. Pull it out, and emit its sink. michael@0: arc_count_index_t::iterator max = sorted_arcs.begin(); michael@0: call_graph_arc *arc = const_cast(*max); michael@0: michael@0: sorted_arcs.erase(max); michael@0: michael@0: if (opt_debug) michael@0: cerr << "pulling " << *arc << endl; michael@0: michael@0: arcs.erase(arc); michael@0: remove_from(from_map, arc->m_from, arc); michael@0: remove_from(to_map, arc->m_to, arc); michael@0: michael@0: out << symtab.get_symbol_name(arc->m_to) << endl; michael@0: michael@0: // Merge arc->m_to into arc->m_from. First, modify anything michael@0: // that used to point to arc->m_to to point to arc->m_from. michael@0: typedef list arc_list_t; michael@0: arc_list_t map_add_queue; michael@0: michael@0: pair p; michael@0: michael@0: // Find all the arcs that point to arc->m_to. michael@0: p = to_map.equal_range(arc->m_to); michael@0: for (arc_map_t::iterator i = p.first; i != p.second; ++i) { michael@0: // Remove the arc that points to arc->m_to (`doomed') from michael@0: // all of our indicies. michael@0: call_graph_arc *doomed = i->second; michael@0: const Elf32_Sym *source = doomed->m_from; michael@0: michael@0: sorted_arcs.erase(doomed); michael@0: arcs.erase(doomed); michael@0: remove_from(from_map, source, doomed); michael@0: // N.B. that `doomed' will be removed from the `to_map' michael@0: // after the loop completes. michael@0: michael@0: // Create a new (or locate an existing) arc whose source michael@0: // was the doomed arc's source, and whose sink is michael@0: // arc->m_from (i.e., the node that subsumed arc->m_to). michael@0: call_graph_arc *merge; michael@0: call_graph_arc key = call_graph_arc(source, arc->m_from); michael@0: arc_container_t::iterator iter = arcs.find(&key); michael@0: michael@0: if (iter == arcs.end()) { michael@0: merge = new call_graph_arc(source, arc->m_from); michael@0: michael@0: arcs.insert(merge); michael@0: from_map.insert(arc_map_t::value_type(merge->m_from, merge)); michael@0: map_add_queue.push_back(merge); michael@0: } michael@0: else { michael@0: // We found an existing arc: temporarily remove it michael@0: // from the sorted index. michael@0: merge = const_cast(*iter); michael@0: sorted_arcs.erase(merge); michael@0: } michael@0: michael@0: // Include the doomed arc's weight in the merged arc, and michael@0: // (re-)insert it into the sorted index. michael@0: merge->m_count += doomed->m_count; michael@0: sorted_arcs.insert(merge); michael@0: michael@0: delete doomed; michael@0: } michael@0: michael@0: to_map.erase(p.first, p.second); michael@0: michael@0: for (arc_list_t::iterator merge = map_add_queue.begin(); michael@0: merge != map_add_queue.end(); michael@0: map_add_queue.erase(merge++)) { michael@0: to_map.insert(arc_map_t::value_type((*merge)->m_to, *merge)); michael@0: } michael@0: michael@0: // Now, roll anything that arc->m_to used to point at into michael@0: // what arc->m_from now points at. michael@0: michael@0: // Collect all of the arcs that originate at arc->m_to. michael@0: p = from_map.equal_range(arc->m_to); michael@0: for (arc_map_t::iterator i = p.first; i != p.second; ++i) { michael@0: // Remove the arc originating from arc->m_to (`doomed') michael@0: // from all of our indicies. michael@0: call_graph_arc *doomed = i->second; michael@0: const Elf32_Sym *sink = doomed->m_to; michael@0: michael@0: // There shouldn't be any self-referential arcs. michael@0: assert(sink != arc->m_to); michael@0: michael@0: sorted_arcs.erase(doomed); michael@0: arcs.erase(doomed); michael@0: remove_from(to_map, sink, doomed); michael@0: // N.B. that `doomed' will be removed from the `from_map' michael@0: // once the loop completes. michael@0: michael@0: // Create a new (or locate an existing) arc whose source michael@0: // is arc->m_from and whose sink was the removed arc's michael@0: // sink. michael@0: call_graph_arc *merge; michael@0: call_graph_arc key = call_graph_arc(arc->m_from, sink); michael@0: arc_container_t::iterator iter = arcs.find(&key); michael@0: michael@0: if (iter == arcs.end()) { michael@0: merge = new call_graph_arc(arc->m_from, sink); michael@0: michael@0: arcs.insert(merge); michael@0: michael@0: map_add_queue.push_back(merge); michael@0: to_map.insert(arc_map_t::value_type(merge->m_to, merge)); michael@0: } michael@0: else { michael@0: // We found an existing arc: temporarily remove it michael@0: // from the sorted index. michael@0: merge = const_cast(*iter); michael@0: sorted_arcs.erase(merge); michael@0: } michael@0: michael@0: // Include the doomed arc's weight in the merged arc, and michael@0: // (re-)insert it into the sorted index. michael@0: merge->m_count += doomed->m_count; michael@0: sorted_arcs.insert(merge); michael@0: michael@0: delete doomed; michael@0: } michael@0: michael@0: from_map.erase(p.first, p.second); michael@0: michael@0: for (arc_list_t::iterator merge = map_add_queue.begin(); michael@0: merge != map_add_queue.end(); michael@0: map_add_queue.erase(merge++)) { michael@0: from_map.insert(arc_map_t::value_type((*merge)->m_from, *merge)); michael@0: } michael@0: } michael@0: michael@0: out.close(); michael@0: return 0; michael@0: }