tools/reorder/grope.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/tools/reorder/grope.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,532 @@
     1.4 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.5 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.6 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.7 +
     1.8 +/*
     1.9 +
    1.10 +  A program that computes a function ordering for an executable based
    1.11 +  on runtime profile information.
    1.12 +
    1.13 +  This program is directly based on work done by Roger Chickering
    1.14 +  <rogc@netscape.com> in
    1.15 +
    1.16 +    <http://bugzilla.mozilla.org/show_bug.cgi?id=65845>
    1.17 +
    1.18 +  to implement Nat Friedman's <nat@nat.org> `grope',
    1.19 +
    1.20 +    _GNU Rope - A Subroutine Position Optimizer_
    1.21 +    <http://www.hungry.com/~shaver/grope/grope.ps>
    1.22 +
    1.23 +  Specifically, it implements the procedure re-ordering algorithm
    1.24 +  described in:
    1.25 +
    1.26 +    K. Pettis and R. Hansen. ``Profile-Guided Core Position.'' In
    1.27 +    _Proceedings of the Int. Conference on Programming Language Design
    1.28 +    and Implementation_, pages 16-27, June 1990.
    1.29 +
    1.30 + */
    1.31 +
    1.32 +#include <assert.h>
    1.33 +#include <fstream>
    1.34 +#include <hash_set>
    1.35 +#include <map>
    1.36 +#include <set>
    1.37 +#include <list>
    1.38 +#include <vector>
    1.39 +#include <limits.h>
    1.40 +#include <unistd.h>
    1.41 +#include <stdio.h>
    1.42 +#include <fcntl.h>
    1.43 +
    1.44 +#include "elf_symbol_table.h"
    1.45 +
    1.46 +#define _GNU_SOURCE
    1.47 +#include <getopt.h>
    1.48 +
    1.49 +elf_symbol_table symtab;
    1.50 +
    1.51 +// Straight outta plhash.c!
    1.52 +#define GOLDEN_RATIO 0x9E3779B9U
    1.53 +
    1.54 +struct hash<const Elf32_Sym *>
    1.55 +{
    1.56 +    size_t operator()(const Elf32_Sym *sym) const {
    1.57 +        return (reinterpret_cast<size_t>(sym) >> 4) * GOLDEN_RATIO; }
    1.58 +};
    1.59 +
    1.60 +typedef unsigned int call_count_t;
    1.61 +
    1.62 +struct call_graph_arc
    1.63 +{
    1.64 +    const Elf32_Sym *m_from;
    1.65 +    const Elf32_Sym *m_to;
    1.66 +    call_count_t     m_count;
    1.67 +
    1.68 +    call_graph_arc(const Elf32_Sym *left, const Elf32_Sym *right, call_count_t count = 0)
    1.69 +        : m_count(count)
    1.70 +    {
    1.71 +        assert(left != right);
    1.72 +
    1.73 +        if (left > right) {
    1.74 +            m_from = left;
    1.75 +            m_to   = right;
    1.76 +        }
    1.77 +        else {
    1.78 +            m_from = right;
    1.79 +            m_to   = left;
    1.80 +        }
    1.81 +    }
    1.82 +
    1.83 +    friend bool
    1.84 +    operator==(const call_graph_arc &lhs, const call_graph_arc &rhs)
    1.85 +    {
    1.86 +        return (lhs.m_from == rhs.m_from) && (lhs.m_to == rhs.m_to);
    1.87 +    }
    1.88 +
    1.89 +    friend ostream &
    1.90 +    operator<<(ostream &out, const call_graph_arc &arc)
    1.91 +    {
    1.92 +        out << &arc << ": ";
    1.93 +        out.form("<(%s %s) %d>",
    1.94 +                 symtab.get_symbol_name(arc.m_from),
    1.95 +                 symtab.get_symbol_name(arc.m_to),
    1.96 +                 arc.m_count);
    1.97 +
    1.98 +        return out;
    1.99 +    }
   1.100 +};
   1.101 +
   1.102 +struct hash<call_graph_arc *>
   1.103 +{
   1.104 +    size_t
   1.105 +    operator()(const call_graph_arc* arc) const
   1.106 +    {
   1.107 +        size_t result;
   1.108 +        result = reinterpret_cast<unsigned long>(arc->m_from);
   1.109 +        result ^= reinterpret_cast<unsigned long>(arc->m_to) >> 16;
   1.110 +        result ^= reinterpret_cast<unsigned long>(arc->m_to) << 16;
   1.111 +        result *= GOLDEN_RATIO;
   1.112 +        return result;
   1.113 +    }
   1.114 +};
   1.115 +
   1.116 +struct equal_to<call_graph_arc *>
   1.117 +{
   1.118 +    bool
   1.119 +    operator()(const call_graph_arc* lhs, const call_graph_arc *rhs) const
   1.120 +    {
   1.121 +        return *lhs == *rhs;
   1.122 +    }
   1.123 +};
   1.124 +
   1.125 +typedef hash_set<call_graph_arc *> arc_container_t;
   1.126 +arc_container_t arcs;
   1.127 +
   1.128 +typedef multimap<const Elf32_Sym *, call_graph_arc *> arc_map_t;
   1.129 +arc_map_t from_map;
   1.130 +arc_map_t to_map;
   1.131 +
   1.132 +struct less_call_graph_arc_count
   1.133 +{
   1.134 +    bool
   1.135 +    operator()(const call_graph_arc *lhs, const call_graph_arc *rhs) const
   1.136 +    {
   1.137 +        if (lhs->m_count == rhs->m_count) {
   1.138 +            if (lhs->m_from == rhs->m_from)
   1.139 +                return lhs->m_to > rhs->m_to;
   1.140 +
   1.141 +            return lhs->m_from > rhs->m_from;
   1.142 +        }
   1.143 +        return lhs->m_count > rhs->m_count;
   1.144 +    }
   1.145 +};
   1.146 +
   1.147 +typedef set<call_graph_arc *, less_call_graph_arc_count> arc_count_index_t;
   1.148 +
   1.149 +bool opt_debug = false;
   1.150 +const char *opt_out = "order.out";
   1.151 +int opt_tick = 0;
   1.152 +bool opt_verbose = false;
   1.153 +int opt_window = 16;
   1.154 +
   1.155 +static struct option long_options[] = {
   1.156 +    { "debug",       no_argument,       0, 'd' },
   1.157 +    { "exe",         required_argument, 0, 'e' },
   1.158 +    { "help",        no_argument,       0, '?' },
   1.159 +    { "out",         required_argument, 0, 'o' },
   1.160 +    { "tick",        optional_argument, 0, 't' },
   1.161 +    { "verbose",     no_argument,       0, 'v' },
   1.162 +    { "window",      required_argument, 0, 'w' },
   1.163 +    { 0,             0,                 0, 0   }
   1.164 +};
   1.165 +
   1.166 +//----------------------------------------------------------------------
   1.167 +
   1.168 +static void
   1.169 +usage(const char *name)
   1.170 +{
   1.171 +    cerr << "usage: " << name << " [options] [<file> ...]" << endl;
   1.172 +    cerr << "  Options:" << endl;
   1.173 +    cerr << "  --debug, -d" << endl;
   1.174 +    cerr << "      Print lots of verbose debugging cruft." << endl;
   1.175 +    cerr << "  --exe=<image>, -e <image> (required)" << endl;
   1.176 +    cerr << "      Specify the executable image from which to read symbol information." << endl;
   1.177 +    cerr << "  --help, -?" << endl;
   1.178 +    cerr << "      Print this message and exit." << endl;
   1.179 +    cerr << "  --out=<file>, -o <file>" << endl;
   1.180 +    cerr << "      Specify the output file to which to dump the symbol ordering of the" << endl;
   1.181 +    cerr << "      best individual (default is `order.out')." << endl;
   1.182 +    cerr << "  --tick[=<num>], -t [<num>]" << endl;
   1.183 +    cerr << "      When reading address data, print a dot to stderr every <num>th" << endl;
   1.184 +    cerr << "      address processed from the call trace. If specified with no argument," << endl;
   1.185 +    cerr << "      a dot will be printed for every million addresses processed." << endl;
   1.186 +    cerr << "  --verbose, -v" << endl;
   1.187 +    cerr << "      Issue progress messages to stderr." << endl;
   1.188 +    cerr << "  --window=<num>, -w <num>" << endl;
   1.189 +    cerr << "      Use a sliding window instead of pagination to score orderings." << endl;
   1.190 +}
   1.191 +
   1.192 +/**
   1.193 + * Using the symbol table, map a stream of address references into a
   1.194 + * callgraph.
   1.195 + */
   1.196 +static void
   1.197 +map_addrs(int fd)
   1.198 +{
   1.199 +    long long total_calls = 0;
   1.200 +    typedef list<const Elf32_Sym *> window_t;
   1.201 +
   1.202 +    window_t window;
   1.203 +    int window_size = 0;
   1.204 +    unsigned int buf[128];
   1.205 +    ssize_t cb;
   1.206 +
   1.207 +    while ((cb = read(fd, buf, sizeof buf)) > 0) {
   1.208 +        if (cb % sizeof buf[0])
   1.209 +            fprintf(stderr, "unaligned read\n");
   1.210 +
   1.211 +        unsigned int *addr = buf;
   1.212 +        unsigned int *limit = buf + (cb / 4);
   1.213 +
   1.214 +        for (; addr < limit; ++addr) {
   1.215 +            const Elf32_Sym *sym = symtab.lookup(*addr);
   1.216 +            if (! sym)
   1.217 +                continue;
   1.218 +
   1.219 +            ++total_calls;
   1.220 +
   1.221 +            window.push_front(sym);
   1.222 +
   1.223 +            if (window_size >= opt_window)
   1.224 +                window.pop_back();
   1.225 +            else
   1.226 +                ++window_size;
   1.227 +
   1.228 +            window_t::const_iterator i = window.begin();
   1.229 +            window_t::const_iterator end = window.end();
   1.230 +            for (; i != end; ++i) {
   1.231 +                if (sym != *i) {
   1.232 +                    call_graph_arc *arc;
   1.233 +                    call_graph_arc key(sym, *i);
   1.234 +                    arc_container_t::iterator iter = arcs.find(&key);
   1.235 +                    if (iter == arcs.end()) {
   1.236 +                        arc = new call_graph_arc(sym, *i);
   1.237 +                        arcs.insert(arc);
   1.238 +                        from_map.insert(arc_map_t::value_type(arc->m_from, arc));
   1.239 +                        to_map.insert(arc_map_t::value_type(arc->m_to, arc));
   1.240 +                    }
   1.241 +                    else
   1.242 +                        arc = const_cast<call_graph_arc *>(*iter);
   1.243 +
   1.244 +                    ++(arc->m_count);
   1.245 +                }
   1.246 +            }
   1.247 +
   1.248 +            if (opt_verbose && opt_tick && (total_calls % opt_tick == 0)) {
   1.249 +                cerr << ".";
   1.250 +                flush(cerr);
   1.251 +            }
   1.252 +        }
   1.253 +    }
   1.254 +
   1.255 +    if (opt_verbose) {
   1.256 +        if (opt_tick)
   1.257 +            cerr << endl;
   1.258 +
   1.259 +        cerr << "Total calls: " << total_calls << endl;
   1.260 +    }
   1.261 +}
   1.262 +
   1.263 +static void
   1.264 +remove_from(arc_map_t& map, const Elf32_Sym *sym, const call_graph_arc *arc)
   1.265 +{
   1.266 +    pair<arc_map_t::iterator, arc_map_t::iterator> p
   1.267 +        = map.equal_range(sym);
   1.268 +
   1.269 +    for (arc_map_t::iterator i = p.first; i != p.second; ++i) {
   1.270 +        if (i->second == arc) {
   1.271 +            map.erase(i);
   1.272 +            break;
   1.273 +        }
   1.274 +    }
   1.275 +}
   1.276 +
   1.277 +/**
   1.278 + * The main program
   1.279 + */
   1.280 +int
   1.281 +main(int argc, char *argv[])
   1.282 +{
   1.283 +    const char *opt_exe = 0;
   1.284 +
   1.285 +    int c;
   1.286 +    while (1) {
   1.287 +        int option_index = 0;
   1.288 +        c = getopt_long(argc, argv, "?de:o:t:vw:", long_options, &option_index);
   1.289 +
   1.290 +        if (c < 0)
   1.291 +            break;
   1.292 +
   1.293 +        switch (c) {
   1.294 +        case '?':
   1.295 +            usage(argv[0]);
   1.296 +            return 0;
   1.297 +
   1.298 +        case 'd':
   1.299 +            opt_debug = true;
   1.300 +            break;
   1.301 +
   1.302 +        case 'e':
   1.303 +            opt_exe = optarg;
   1.304 +            break;
   1.305 +
   1.306 +        case 'o':
   1.307 +            opt_out = optarg;
   1.308 +            break;
   1.309 +
   1.310 +        case 't':
   1.311 +            opt_tick = optarg ? atoi(optarg) : 1000000;
   1.312 +            break;
   1.313 +
   1.314 +        case 'v':
   1.315 +            opt_verbose = true;
   1.316 +            break;
   1.317 +
   1.318 +        case 'w':
   1.319 +            opt_window = atoi(optarg);
   1.320 +            if (opt_window <= 0) {
   1.321 +                cerr << "invalid window size: " << opt_window << endl;
   1.322 +                return 1;
   1.323 +            }
   1.324 +            break;
   1.325 +
   1.326 +        default:
   1.327 +            usage(argv[0]);
   1.328 +            return 1;
   1.329 +        }
   1.330 +    }
   1.331 +
   1.332 +    // Make sure an image was specified
   1.333 +    if (! opt_exe) {
   1.334 +        usage(argv[0]);
   1.335 +        return 1;
   1.336 +    }
   1.337 +
   1.338 +    // Read the sym table.
   1.339 +    symtab.init(opt_exe);
   1.340 +
   1.341 +    // Process addresses to construct the weighted call graph.
   1.342 +    if (optind >= argc) {
   1.343 +        map_addrs(STDIN_FILENO);
   1.344 +    }
   1.345 +    else {
   1.346 +        do {
   1.347 +            int fd = open(argv[optind], O_RDONLY);
   1.348 +            if (fd < 0) {
   1.349 +                perror(argv[optind]);
   1.350 +                return 1;
   1.351 +            }
   1.352 +
   1.353 +            map_addrs(fd);
   1.354 +            close(fd);
   1.355 +        } while (++optind < argc);
   1.356 +    }
   1.357 +
   1.358 +    // Emit the ordering.
   1.359 +    ofstream out(opt_out);
   1.360 +
   1.361 +    // Collect all of the arcs into a sorted container, with arcs
   1.362 +    // having the largest weight at the front.
   1.363 +    arc_count_index_t sorted_arcs(arcs.begin(), arcs.end());
   1.364 +
   1.365 +    while (sorted_arcs.size()) {
   1.366 +        if (opt_debug) {
   1.367 +            cerr << "==========================================" << endl << endl;
   1.368 +
   1.369 +            cerr << "Sorted Arcs:" << endl;
   1.370 +            for (arc_count_index_t::const_iterator iter = sorted_arcs.begin();
   1.371 +                 iter != sorted_arcs.end();
   1.372 +                 ++iter) {
   1.373 +                cerr << **iter << endl;
   1.374 +            }
   1.375 +
   1.376 +            cerr << endl << "Arc Container:" << endl;
   1.377 +            for (arc_container_t::const_iterator iter = arcs.begin();
   1.378 +                 iter != arcs.end();
   1.379 +                 ++iter) {
   1.380 +                cerr << **iter << endl;
   1.381 +            }
   1.382 +
   1.383 +            cerr << endl << "From Map:" << endl;
   1.384 +            for (arc_map_t::const_iterator iter = from_map.begin();
   1.385 +                 iter != from_map.end();
   1.386 +                 ++iter) {
   1.387 +                cerr << symtab.get_symbol_name(iter->first) << ": " << *(iter->second) << endl;
   1.388 +            }
   1.389 +
   1.390 +            cerr << endl << "To Map:" << endl;
   1.391 +            for (arc_map_t::const_iterator iter = to_map.begin();
   1.392 +                 iter != to_map.end();
   1.393 +                 ++iter) {
   1.394 +                cerr << symtab.get_symbol_name(iter->first) << ": " << *(iter->second) << endl;
   1.395 +            }
   1.396 +
   1.397 +            cerr << endl;
   1.398 +        }
   1.399 +
   1.400 +        // The first arc in the sorted set will have the largest
   1.401 +        // weight. Pull it out, and emit its sink.
   1.402 +        arc_count_index_t::iterator max = sorted_arcs.begin();
   1.403 +        call_graph_arc *arc = const_cast<call_graph_arc *>(*max);
   1.404 +
   1.405 +        sorted_arcs.erase(max);
   1.406 +
   1.407 +        if (opt_debug)
   1.408 +            cerr << "pulling " << *arc << endl;
   1.409 +
   1.410 +        arcs.erase(arc);
   1.411 +        remove_from(from_map, arc->m_from, arc);
   1.412 +        remove_from(to_map, arc->m_to, arc);
   1.413 +
   1.414 +        out << symtab.get_symbol_name(arc->m_to) << endl;
   1.415 +
   1.416 +        // Merge arc->m_to into arc->m_from. First, modify anything
   1.417 +        // that used to point to arc->m_to to point to arc->m_from.
   1.418 +        typedef list<call_graph_arc *> arc_list_t;
   1.419 +        arc_list_t map_add_queue;
   1.420 +
   1.421 +        pair<arc_map_t::iterator, arc_map_t::iterator> p;
   1.422 +
   1.423 +        // Find all the arcs that point to arc->m_to.
   1.424 +        p = to_map.equal_range(arc->m_to);
   1.425 +        for (arc_map_t::iterator i = p.first; i != p.second; ++i) {
   1.426 +            // Remove the arc that points to arc->m_to (`doomed') from
   1.427 +            // all of our indicies.
   1.428 +            call_graph_arc *doomed = i->second;
   1.429 +            const Elf32_Sym *source = doomed->m_from;
   1.430 +
   1.431 +            sorted_arcs.erase(doomed);
   1.432 +            arcs.erase(doomed);
   1.433 +            remove_from(from_map, source, doomed);
   1.434 +            // N.B. that `doomed' will be removed from the `to_map'
   1.435 +            // after the loop completes.
   1.436 +
   1.437 +            // Create a new (or locate an existing) arc whose source
   1.438 +            // was the doomed arc's source, and whose sink is
   1.439 +            // arc->m_from (i.e., the node that subsumed arc->m_to).
   1.440 +            call_graph_arc *merge;
   1.441 +            call_graph_arc key = call_graph_arc(source, arc->m_from);
   1.442 +            arc_container_t::iterator iter = arcs.find(&key);
   1.443 +
   1.444 +            if (iter == arcs.end()) {
   1.445 +                merge = new call_graph_arc(source, arc->m_from);
   1.446 +
   1.447 +                arcs.insert(merge);
   1.448 +                from_map.insert(arc_map_t::value_type(merge->m_from, merge));
   1.449 +                map_add_queue.push_back(merge);
   1.450 +            }
   1.451 +            else {
   1.452 +                // We found an existing arc: temporarily remove it
   1.453 +                // from the sorted index.
   1.454 +                merge = const_cast<call_graph_arc *>(*iter);
   1.455 +                sorted_arcs.erase(merge);
   1.456 +            }
   1.457 +
   1.458 +            // Include the doomed arc's weight in the merged arc, and
   1.459 +            // (re-)insert it into the sorted index.
   1.460 +            merge->m_count += doomed->m_count;
   1.461 +            sorted_arcs.insert(merge);
   1.462 +
   1.463 +            delete doomed;
   1.464 +        }
   1.465 +
   1.466 +        to_map.erase(p.first, p.second);
   1.467 +
   1.468 +        for (arc_list_t::iterator merge = map_add_queue.begin();
   1.469 +             merge != map_add_queue.end();
   1.470 +             map_add_queue.erase(merge++)) {
   1.471 +            to_map.insert(arc_map_t::value_type((*merge)->m_to, *merge));
   1.472 +        }
   1.473 +
   1.474 +        // Now, roll anything that arc->m_to used to point at into
   1.475 +        // what arc->m_from now points at.
   1.476 +
   1.477 +        // Collect all of the arcs that originate at arc->m_to.
   1.478 +        p = from_map.equal_range(arc->m_to);
   1.479 +        for (arc_map_t::iterator i = p.first; i != p.second; ++i) {
   1.480 +            // Remove the arc originating from arc->m_to (`doomed')
   1.481 +            // from all of our indicies.
   1.482 +            call_graph_arc *doomed = i->second;
   1.483 +            const Elf32_Sym *sink = doomed->m_to;
   1.484 +
   1.485 +            // There shouldn't be any self-referential arcs.
   1.486 +            assert(sink != arc->m_to);
   1.487 +
   1.488 +            sorted_arcs.erase(doomed);
   1.489 +            arcs.erase(doomed);
   1.490 +            remove_from(to_map, sink, doomed);
   1.491 +            // N.B. that `doomed' will be removed from the `from_map'
   1.492 +            // once the loop completes.
   1.493 +
   1.494 +            // Create a new (or locate an existing) arc whose source
   1.495 +            // is arc->m_from and whose sink was the removed arc's
   1.496 +            // sink.
   1.497 +            call_graph_arc *merge;
   1.498 +            call_graph_arc key = call_graph_arc(arc->m_from, sink);
   1.499 +            arc_container_t::iterator iter = arcs.find(&key);
   1.500 +
   1.501 +            if (iter == arcs.end()) {
   1.502 +                merge = new call_graph_arc(arc->m_from, sink);
   1.503 +
   1.504 +                arcs.insert(merge);
   1.505 +
   1.506 +                map_add_queue.push_back(merge);
   1.507 +                to_map.insert(arc_map_t::value_type(merge->m_to, merge));
   1.508 +            }
   1.509 +            else {
   1.510 +                // We found an existing arc: temporarily remove it
   1.511 +                // from the sorted index.
   1.512 +                merge = const_cast<call_graph_arc *>(*iter);
   1.513 +                sorted_arcs.erase(merge);
   1.514 +            }
   1.515 +
   1.516 +            // Include the doomed arc's weight in the merged arc, and
   1.517 +            // (re-)insert it into the sorted index.
   1.518 +            merge->m_count += doomed->m_count;
   1.519 +            sorted_arcs.insert(merge);
   1.520 +
   1.521 +            delete doomed;
   1.522 +        }
   1.523 +
   1.524 +        from_map.erase(p.first, p.second);
   1.525 +
   1.526 +        for (arc_list_t::iterator merge = map_add_queue.begin();
   1.527 +             merge != map_add_queue.end();
   1.528 +             map_add_queue.erase(merge++)) {
   1.529 +            from_map.insert(arc_map_t::value_type((*merge)->m_from, *merge));
   1.530 +        }
   1.531 +    }
   1.532 +
   1.533 +    out.close();
   1.534 +    return 0;
   1.535 +}

mercurial