1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/tools/reorder/grope.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,532 @@ 1.4 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.5 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.6 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.7 + 1.8 +/* 1.9 + 1.10 + A program that computes a function ordering for an executable based 1.11 + on runtime profile information. 1.12 + 1.13 + This program is directly based on work done by Roger Chickering 1.14 + <rogc@netscape.com> in 1.15 + 1.16 + <http://bugzilla.mozilla.org/show_bug.cgi?id=65845> 1.17 + 1.18 + to implement Nat Friedman's <nat@nat.org> `grope', 1.19 + 1.20 + _GNU Rope - A Subroutine Position Optimizer_ 1.21 + <http://www.hungry.com/~shaver/grope/grope.ps> 1.22 + 1.23 + Specifically, it implements the procedure re-ordering algorithm 1.24 + described in: 1.25 + 1.26 + K. Pettis and R. Hansen. ``Profile-Guided Core Position.'' In 1.27 + _Proceedings of the Int. Conference on Programming Language Design 1.28 + and Implementation_, pages 16-27, June 1990. 1.29 + 1.30 + */ 1.31 + 1.32 +#include <assert.h> 1.33 +#include <fstream> 1.34 +#include <hash_set> 1.35 +#include <map> 1.36 +#include <set> 1.37 +#include <list> 1.38 +#include <vector> 1.39 +#include <limits.h> 1.40 +#include <unistd.h> 1.41 +#include <stdio.h> 1.42 +#include <fcntl.h> 1.43 + 1.44 +#include "elf_symbol_table.h" 1.45 + 1.46 +#define _GNU_SOURCE 1.47 +#include <getopt.h> 1.48 + 1.49 +elf_symbol_table symtab; 1.50 + 1.51 +// Straight outta plhash.c! 1.52 +#define GOLDEN_RATIO 0x9E3779B9U 1.53 + 1.54 +struct hash<const Elf32_Sym *> 1.55 +{ 1.56 + size_t operator()(const Elf32_Sym *sym) const { 1.57 + return (reinterpret_cast<size_t>(sym) >> 4) * GOLDEN_RATIO; } 1.58 +}; 1.59 + 1.60 +typedef unsigned int call_count_t; 1.61 + 1.62 +struct call_graph_arc 1.63 +{ 1.64 + const Elf32_Sym *m_from; 1.65 + const Elf32_Sym *m_to; 1.66 + call_count_t m_count; 1.67 + 1.68 + call_graph_arc(const Elf32_Sym *left, const Elf32_Sym *right, call_count_t count = 0) 1.69 + : m_count(count) 1.70 + { 1.71 + assert(left != right); 1.72 + 1.73 + if (left > right) { 1.74 + m_from = left; 1.75 + m_to = right; 1.76 + } 1.77 + else { 1.78 + m_from = right; 1.79 + m_to = left; 1.80 + } 1.81 + } 1.82 + 1.83 + friend bool 1.84 + operator==(const call_graph_arc &lhs, const call_graph_arc &rhs) 1.85 + { 1.86 + return (lhs.m_from == rhs.m_from) && (lhs.m_to == rhs.m_to); 1.87 + } 1.88 + 1.89 + friend ostream & 1.90 + operator<<(ostream &out, const call_graph_arc &arc) 1.91 + { 1.92 + out << &arc << ": "; 1.93 + out.form("<(%s %s) %d>", 1.94 + symtab.get_symbol_name(arc.m_from), 1.95 + symtab.get_symbol_name(arc.m_to), 1.96 + arc.m_count); 1.97 + 1.98 + return out; 1.99 + } 1.100 +}; 1.101 + 1.102 +struct hash<call_graph_arc *> 1.103 +{ 1.104 + size_t 1.105 + operator()(const call_graph_arc* arc) const 1.106 + { 1.107 + size_t result; 1.108 + result = reinterpret_cast<unsigned long>(arc->m_from); 1.109 + result ^= reinterpret_cast<unsigned long>(arc->m_to) >> 16; 1.110 + result ^= reinterpret_cast<unsigned long>(arc->m_to) << 16; 1.111 + result *= GOLDEN_RATIO; 1.112 + return result; 1.113 + } 1.114 +}; 1.115 + 1.116 +struct equal_to<call_graph_arc *> 1.117 +{ 1.118 + bool 1.119 + operator()(const call_graph_arc* lhs, const call_graph_arc *rhs) const 1.120 + { 1.121 + return *lhs == *rhs; 1.122 + } 1.123 +}; 1.124 + 1.125 +typedef hash_set<call_graph_arc *> arc_container_t; 1.126 +arc_container_t arcs; 1.127 + 1.128 +typedef multimap<const Elf32_Sym *, call_graph_arc *> arc_map_t; 1.129 +arc_map_t from_map; 1.130 +arc_map_t to_map; 1.131 + 1.132 +struct less_call_graph_arc_count 1.133 +{ 1.134 + bool 1.135 + operator()(const call_graph_arc *lhs, const call_graph_arc *rhs) const 1.136 + { 1.137 + if (lhs->m_count == rhs->m_count) { 1.138 + if (lhs->m_from == rhs->m_from) 1.139 + return lhs->m_to > rhs->m_to; 1.140 + 1.141 + return lhs->m_from > rhs->m_from; 1.142 + } 1.143 + return lhs->m_count > rhs->m_count; 1.144 + } 1.145 +}; 1.146 + 1.147 +typedef set<call_graph_arc *, less_call_graph_arc_count> arc_count_index_t; 1.148 + 1.149 +bool opt_debug = false; 1.150 +const char *opt_out = "order.out"; 1.151 +int opt_tick = 0; 1.152 +bool opt_verbose = false; 1.153 +int opt_window = 16; 1.154 + 1.155 +static struct option long_options[] = { 1.156 + { "debug", no_argument, 0, 'd' }, 1.157 + { "exe", required_argument, 0, 'e' }, 1.158 + { "help", no_argument, 0, '?' }, 1.159 + { "out", required_argument, 0, 'o' }, 1.160 + { "tick", optional_argument, 0, 't' }, 1.161 + { "verbose", no_argument, 0, 'v' }, 1.162 + { "window", required_argument, 0, 'w' }, 1.163 + { 0, 0, 0, 0 } 1.164 +}; 1.165 + 1.166 +//---------------------------------------------------------------------- 1.167 + 1.168 +static void 1.169 +usage(const char *name) 1.170 +{ 1.171 + cerr << "usage: " << name << " [options] [<file> ...]" << endl; 1.172 + cerr << " Options:" << endl; 1.173 + cerr << " --debug, -d" << endl; 1.174 + cerr << " Print lots of verbose debugging cruft." << endl; 1.175 + cerr << " --exe=<image>, -e <image> (required)" << endl; 1.176 + cerr << " Specify the executable image from which to read symbol information." << endl; 1.177 + cerr << " --help, -?" << endl; 1.178 + cerr << " Print this message and exit." << endl; 1.179 + cerr << " --out=<file>, -o <file>" << endl; 1.180 + cerr << " Specify the output file to which to dump the symbol ordering of the" << endl; 1.181 + cerr << " best individual (default is `order.out')." << endl; 1.182 + cerr << " --tick[=<num>], -t [<num>]" << endl; 1.183 + cerr << " When reading address data, print a dot to stderr every <num>th" << endl; 1.184 + cerr << " address processed from the call trace. If specified with no argument," << endl; 1.185 + cerr << " a dot will be printed for every million addresses processed." << endl; 1.186 + cerr << " --verbose, -v" << endl; 1.187 + cerr << " Issue progress messages to stderr." << endl; 1.188 + cerr << " --window=<num>, -w <num>" << endl; 1.189 + cerr << " Use a sliding window instead of pagination to score orderings." << endl; 1.190 +} 1.191 + 1.192 +/** 1.193 + * Using the symbol table, map a stream of address references into a 1.194 + * callgraph. 1.195 + */ 1.196 +static void 1.197 +map_addrs(int fd) 1.198 +{ 1.199 + long long total_calls = 0; 1.200 + typedef list<const Elf32_Sym *> window_t; 1.201 + 1.202 + window_t window; 1.203 + int window_size = 0; 1.204 + unsigned int buf[128]; 1.205 + ssize_t cb; 1.206 + 1.207 + while ((cb = read(fd, buf, sizeof buf)) > 0) { 1.208 + if (cb % sizeof buf[0]) 1.209 + fprintf(stderr, "unaligned read\n"); 1.210 + 1.211 + unsigned int *addr = buf; 1.212 + unsigned int *limit = buf + (cb / 4); 1.213 + 1.214 + for (; addr < limit; ++addr) { 1.215 + const Elf32_Sym *sym = symtab.lookup(*addr); 1.216 + if (! sym) 1.217 + continue; 1.218 + 1.219 + ++total_calls; 1.220 + 1.221 + window.push_front(sym); 1.222 + 1.223 + if (window_size >= opt_window) 1.224 + window.pop_back(); 1.225 + else 1.226 + ++window_size; 1.227 + 1.228 + window_t::const_iterator i = window.begin(); 1.229 + window_t::const_iterator end = window.end(); 1.230 + for (; i != end; ++i) { 1.231 + if (sym != *i) { 1.232 + call_graph_arc *arc; 1.233 + call_graph_arc key(sym, *i); 1.234 + arc_container_t::iterator iter = arcs.find(&key); 1.235 + if (iter == arcs.end()) { 1.236 + arc = new call_graph_arc(sym, *i); 1.237 + arcs.insert(arc); 1.238 + from_map.insert(arc_map_t::value_type(arc->m_from, arc)); 1.239 + to_map.insert(arc_map_t::value_type(arc->m_to, arc)); 1.240 + } 1.241 + else 1.242 + arc = const_cast<call_graph_arc *>(*iter); 1.243 + 1.244 + ++(arc->m_count); 1.245 + } 1.246 + } 1.247 + 1.248 + if (opt_verbose && opt_tick && (total_calls % opt_tick == 0)) { 1.249 + cerr << "."; 1.250 + flush(cerr); 1.251 + } 1.252 + } 1.253 + } 1.254 + 1.255 + if (opt_verbose) { 1.256 + if (opt_tick) 1.257 + cerr << endl; 1.258 + 1.259 + cerr << "Total calls: " << total_calls << endl; 1.260 + } 1.261 +} 1.262 + 1.263 +static void 1.264 +remove_from(arc_map_t& map, const Elf32_Sym *sym, const call_graph_arc *arc) 1.265 +{ 1.266 + pair<arc_map_t::iterator, arc_map_t::iterator> p 1.267 + = map.equal_range(sym); 1.268 + 1.269 + for (arc_map_t::iterator i = p.first; i != p.second; ++i) { 1.270 + if (i->second == arc) { 1.271 + map.erase(i); 1.272 + break; 1.273 + } 1.274 + } 1.275 +} 1.276 + 1.277 +/** 1.278 + * The main program 1.279 + */ 1.280 +int 1.281 +main(int argc, char *argv[]) 1.282 +{ 1.283 + const char *opt_exe = 0; 1.284 + 1.285 + int c; 1.286 + while (1) { 1.287 + int option_index = 0; 1.288 + c = getopt_long(argc, argv, "?de:o:t:vw:", long_options, &option_index); 1.289 + 1.290 + if (c < 0) 1.291 + break; 1.292 + 1.293 + switch (c) { 1.294 + case '?': 1.295 + usage(argv[0]); 1.296 + return 0; 1.297 + 1.298 + case 'd': 1.299 + opt_debug = true; 1.300 + break; 1.301 + 1.302 + case 'e': 1.303 + opt_exe = optarg; 1.304 + break; 1.305 + 1.306 + case 'o': 1.307 + opt_out = optarg; 1.308 + break; 1.309 + 1.310 + case 't': 1.311 + opt_tick = optarg ? atoi(optarg) : 1000000; 1.312 + break; 1.313 + 1.314 + case 'v': 1.315 + opt_verbose = true; 1.316 + break; 1.317 + 1.318 + case 'w': 1.319 + opt_window = atoi(optarg); 1.320 + if (opt_window <= 0) { 1.321 + cerr << "invalid window size: " << opt_window << endl; 1.322 + return 1; 1.323 + } 1.324 + break; 1.325 + 1.326 + default: 1.327 + usage(argv[0]); 1.328 + return 1; 1.329 + } 1.330 + } 1.331 + 1.332 + // Make sure an image was specified 1.333 + if (! opt_exe) { 1.334 + usage(argv[0]); 1.335 + return 1; 1.336 + } 1.337 + 1.338 + // Read the sym table. 1.339 + symtab.init(opt_exe); 1.340 + 1.341 + // Process addresses to construct the weighted call graph. 1.342 + if (optind >= argc) { 1.343 + map_addrs(STDIN_FILENO); 1.344 + } 1.345 + else { 1.346 + do { 1.347 + int fd = open(argv[optind], O_RDONLY); 1.348 + if (fd < 0) { 1.349 + perror(argv[optind]); 1.350 + return 1; 1.351 + } 1.352 + 1.353 + map_addrs(fd); 1.354 + close(fd); 1.355 + } while (++optind < argc); 1.356 + } 1.357 + 1.358 + // Emit the ordering. 1.359 + ofstream out(opt_out); 1.360 + 1.361 + // Collect all of the arcs into a sorted container, with arcs 1.362 + // having the largest weight at the front. 1.363 + arc_count_index_t sorted_arcs(arcs.begin(), arcs.end()); 1.364 + 1.365 + while (sorted_arcs.size()) { 1.366 + if (opt_debug) { 1.367 + cerr << "==========================================" << endl << endl; 1.368 + 1.369 + cerr << "Sorted Arcs:" << endl; 1.370 + for (arc_count_index_t::const_iterator iter = sorted_arcs.begin(); 1.371 + iter != sorted_arcs.end(); 1.372 + ++iter) { 1.373 + cerr << **iter << endl; 1.374 + } 1.375 + 1.376 + cerr << endl << "Arc Container:" << endl; 1.377 + for (arc_container_t::const_iterator iter = arcs.begin(); 1.378 + iter != arcs.end(); 1.379 + ++iter) { 1.380 + cerr << **iter << endl; 1.381 + } 1.382 + 1.383 + cerr << endl << "From Map:" << endl; 1.384 + for (arc_map_t::const_iterator iter = from_map.begin(); 1.385 + iter != from_map.end(); 1.386 + ++iter) { 1.387 + cerr << symtab.get_symbol_name(iter->first) << ": " << *(iter->second) << endl; 1.388 + } 1.389 + 1.390 + cerr << endl << "To Map:" << endl; 1.391 + for (arc_map_t::const_iterator iter = to_map.begin(); 1.392 + iter != to_map.end(); 1.393 + ++iter) { 1.394 + cerr << symtab.get_symbol_name(iter->first) << ": " << *(iter->second) << endl; 1.395 + } 1.396 + 1.397 + cerr << endl; 1.398 + } 1.399 + 1.400 + // The first arc in the sorted set will have the largest 1.401 + // weight. Pull it out, and emit its sink. 1.402 + arc_count_index_t::iterator max = sorted_arcs.begin(); 1.403 + call_graph_arc *arc = const_cast<call_graph_arc *>(*max); 1.404 + 1.405 + sorted_arcs.erase(max); 1.406 + 1.407 + if (opt_debug) 1.408 + cerr << "pulling " << *arc << endl; 1.409 + 1.410 + arcs.erase(arc); 1.411 + remove_from(from_map, arc->m_from, arc); 1.412 + remove_from(to_map, arc->m_to, arc); 1.413 + 1.414 + out << symtab.get_symbol_name(arc->m_to) << endl; 1.415 + 1.416 + // Merge arc->m_to into arc->m_from. First, modify anything 1.417 + // that used to point to arc->m_to to point to arc->m_from. 1.418 + typedef list<call_graph_arc *> arc_list_t; 1.419 + arc_list_t map_add_queue; 1.420 + 1.421 + pair<arc_map_t::iterator, arc_map_t::iterator> p; 1.422 + 1.423 + // Find all the arcs that point to arc->m_to. 1.424 + p = to_map.equal_range(arc->m_to); 1.425 + for (arc_map_t::iterator i = p.first; i != p.second; ++i) { 1.426 + // Remove the arc that points to arc->m_to (`doomed') from 1.427 + // all of our indicies. 1.428 + call_graph_arc *doomed = i->second; 1.429 + const Elf32_Sym *source = doomed->m_from; 1.430 + 1.431 + sorted_arcs.erase(doomed); 1.432 + arcs.erase(doomed); 1.433 + remove_from(from_map, source, doomed); 1.434 + // N.B. that `doomed' will be removed from the `to_map' 1.435 + // after the loop completes. 1.436 + 1.437 + // Create a new (or locate an existing) arc whose source 1.438 + // was the doomed arc's source, and whose sink is 1.439 + // arc->m_from (i.e., the node that subsumed arc->m_to). 1.440 + call_graph_arc *merge; 1.441 + call_graph_arc key = call_graph_arc(source, arc->m_from); 1.442 + arc_container_t::iterator iter = arcs.find(&key); 1.443 + 1.444 + if (iter == arcs.end()) { 1.445 + merge = new call_graph_arc(source, arc->m_from); 1.446 + 1.447 + arcs.insert(merge); 1.448 + from_map.insert(arc_map_t::value_type(merge->m_from, merge)); 1.449 + map_add_queue.push_back(merge); 1.450 + } 1.451 + else { 1.452 + // We found an existing arc: temporarily remove it 1.453 + // from the sorted index. 1.454 + merge = const_cast<call_graph_arc *>(*iter); 1.455 + sorted_arcs.erase(merge); 1.456 + } 1.457 + 1.458 + // Include the doomed arc's weight in the merged arc, and 1.459 + // (re-)insert it into the sorted index. 1.460 + merge->m_count += doomed->m_count; 1.461 + sorted_arcs.insert(merge); 1.462 + 1.463 + delete doomed; 1.464 + } 1.465 + 1.466 + to_map.erase(p.first, p.second); 1.467 + 1.468 + for (arc_list_t::iterator merge = map_add_queue.begin(); 1.469 + merge != map_add_queue.end(); 1.470 + map_add_queue.erase(merge++)) { 1.471 + to_map.insert(arc_map_t::value_type((*merge)->m_to, *merge)); 1.472 + } 1.473 + 1.474 + // Now, roll anything that arc->m_to used to point at into 1.475 + // what arc->m_from now points at. 1.476 + 1.477 + // Collect all of the arcs that originate at arc->m_to. 1.478 + p = from_map.equal_range(arc->m_to); 1.479 + for (arc_map_t::iterator i = p.first; i != p.second; ++i) { 1.480 + // Remove the arc originating from arc->m_to (`doomed') 1.481 + // from all of our indicies. 1.482 + call_graph_arc *doomed = i->second; 1.483 + const Elf32_Sym *sink = doomed->m_to; 1.484 + 1.485 + // There shouldn't be any self-referential arcs. 1.486 + assert(sink != arc->m_to); 1.487 + 1.488 + sorted_arcs.erase(doomed); 1.489 + arcs.erase(doomed); 1.490 + remove_from(to_map, sink, doomed); 1.491 + // N.B. that `doomed' will be removed from the `from_map' 1.492 + // once the loop completes. 1.493 + 1.494 + // Create a new (or locate an existing) arc whose source 1.495 + // is arc->m_from and whose sink was the removed arc's 1.496 + // sink. 1.497 + call_graph_arc *merge; 1.498 + call_graph_arc key = call_graph_arc(arc->m_from, sink); 1.499 + arc_container_t::iterator iter = arcs.find(&key); 1.500 + 1.501 + if (iter == arcs.end()) { 1.502 + merge = new call_graph_arc(arc->m_from, sink); 1.503 + 1.504 + arcs.insert(merge); 1.505 + 1.506 + map_add_queue.push_back(merge); 1.507 + to_map.insert(arc_map_t::value_type(merge->m_to, merge)); 1.508 + } 1.509 + else { 1.510 + // We found an existing arc: temporarily remove it 1.511 + // from the sorted index. 1.512 + merge = const_cast<call_graph_arc *>(*iter); 1.513 + sorted_arcs.erase(merge); 1.514 + } 1.515 + 1.516 + // Include the doomed arc's weight in the merged arc, and 1.517 + // (re-)insert it into the sorted index. 1.518 + merge->m_count += doomed->m_count; 1.519 + sorted_arcs.insert(merge); 1.520 + 1.521 + delete doomed; 1.522 + } 1.523 + 1.524 + from_map.erase(p.first, p.second); 1.525 + 1.526 + for (arc_list_t::iterator merge = map_add_queue.begin(); 1.527 + merge != map_add_queue.end(); 1.528 + map_add_queue.erase(merge++)) { 1.529 + from_map.insert(arc_map_t::value_type((*merge)->m_from, *merge)); 1.530 + } 1.531 + } 1.532 + 1.533 + out.close(); 1.534 + return 0; 1.535 +}