tools/reorder/garope.cpp

branch
TOR_BUG_9701
changeset 8
97036ab72558
equal deleted inserted replaced
-1:000000000000 0:aaedf3749c6c
1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5 /*
6
7 A program that attempts to find an optimal function ordering for an
8 executable using a genetic algorithm whose fitness function is
9 computed using runtime profile information.
10
11 The fitness function was inspired by Nat Friedman's <nat@nat.org>
12 work on `grope':
13
14 _GNU Rope - A Subroutine Position Optimizer_
15 <http://www.hungry.com/~shaver/grope/grope.ps>
16
17 Brendan Eich <brendan@mozilla.org> told me tales about Scott Furman
18 doing something like this, which sort of made me want to try it.
19
20 As far as I can tell, it would take a lot of computers a lot of time
21 to actually find something useful on a non-trivial program using
22 this.
23
24 */
25
26 #include <assert.h>
27 #include <fstream>
28 #include <hash_map>
29 #include <vector>
30 #include <limits.h>
31 #include <unistd.h>
32 #include <stdio.h>
33 #include <fcntl.h>
34
35 #include "elf_symbol_table.h"
36
37 #define _GNU_SOURCE
38 #include <getopt.h>
39
40 #define PAGE_SIZE 4096
41 #define SYMBOL_ALIGN 4
42
43 //----------------------------------------------------------------------
44
45 class call_pair
46 {
47 public:
48 const Elf32_Sym *m_lo;
49 const Elf32_Sym *m_hi;
50
51 call_pair(const Elf32_Sym *site1, const Elf32_Sym *site2)
52 {
53 if (site1 < site2) {
54 m_lo = site1;
55 m_hi = site2;
56 }
57 else {
58 m_hi = site1;
59 m_lo = site2;
60 }
61 }
62
63 friend bool
64 operator==(const call_pair &lhs, const call_pair &rhs)
65 {
66 return (lhs.m_lo == rhs.m_lo) && (lhs.m_hi == rhs.m_hi);
67 }
68 };
69
70 // Straight outta plhash.c!
71 #define GOLDEN_RATIO 0x9E3779B9U
72
73 template<>
74 struct hash<call_pair>
75 {
76 size_t operator()(const call_pair &pair) const
77 {
78 size_t h = (reinterpret_cast<size_t>(pair.m_hi) >> 4);
79 h += (reinterpret_cast<size_t>(pair.m_lo) >> 4);
80 h *= GOLDEN_RATIO;
81 return h;
82 }
83 };
84
85 //----------------------------------------------------------------------
86
87 struct hash<const Elf32_Sym *>
88 {
89 size_t operator()(const Elf32_Sym *sym) const
90 {
91 return (reinterpret_cast<size_t>(sym) >> 4) * GOLDEN_RATIO;
92 }
93 };
94
95 //----------------------------------------------------------------------
96
97 typedef hash_map<call_pair, unsigned int> call_graph_t;
98 call_graph_t call_graph;
99
100 typedef hash_map<const Elf32_Sym *, unsigned int> histogram_t;
101 histogram_t histogram;
102 long long total_calls = 0;
103
104 elf_symbol_table symtab;
105
106 bool opt_debug = false;
107 int opt_generations = 10;
108 int opt_mutate = 0;
109 const char *opt_out = "order.out";
110 int opt_population_size = 100;
111 int opt_tick = 0;
112 bool opt_verbose = false;
113 int opt_window = 0;
114
115 static struct option long_options[] = {
116 { "debug", no_argument, 0, 'd' },
117 { "exe", required_argument, 0, 'e' },
118 { "generations", required_argument, 0, 'g' },
119 { "help", no_argument, 0, '?' },
120 { "mutate", required_argument, 0, 'm' },
121 { "out", required_argument, 0, 'o' },
122 { "population", required_argument, 0, 'p' },
123 { "seed", required_argument, 0, 's' },
124 { "tick", optional_argument, 0, 't' },
125 { "verbose", no_argument, 0, 'v' },
126 { "window", required_argument, 0, 'w' },
127 { 0, 0, 0, 0 }
128 };
129
130 //----------------------------------------------------------------------
131
132 static long long
133 llrand()
134 {
135 long long result;
136 result = (long long) rand();
137 result *= (long long) (unsigned int) (RAND_MAX + 1);
138 result += (long long) rand();
139 return result;
140 }
141
142 //----------------------------------------------------------------------
143
144 class symbol_order {
145 public:
146 typedef vector<const Elf32_Sym *> vector_t;
147 typedef long long score_t;
148
149 static const score_t max_score;
150
151 /**
152 * A vector of symbols that is this ordering.
153 */
154 vector_t m_ordering;
155
156 /**
157 * The symbol ordering's score.
158 */
159 score_t m_score;
160
161 symbol_order() : m_score(0) {}
162
163 /**
164 * ``Shuffle'' a symbol ordering, randomizing it.
165 */
166 void shuffle();
167
168 /**
169 * Initialize this symbol ordering by performing a crossover from
170 * two ``parent'' symbol orderings.
171 */
172 void crossover_from(const symbol_order *father, const symbol_order *mother);
173
174 /**
175 * Randomly mutate this symbol ordering.
176 */
177 void mutate();
178
179 /**
180 * Score a symbol ordering based on paginated locality.
181 */
182 score_t compute_score_page();
183
184 /**
185 * Score a symbol ordering based on a sliding window.
186 */
187 score_t compute_score_window(int window_size);
188
189 static score_t compute_score(symbol_order &order);
190
191 /**
192 * Use the symbol table to dump the ordered symbolic constants.
193 */
194 void dump_symbols() const;
195
196 friend ostream &
197 operator<<(ostream &out, const symbol_order &order);
198 };
199
200 const symbol_order::score_t
201 symbol_order::max_score = ~((symbol_order::score_t)1 << ((sizeof(symbol_order::score_t) * 8) - 1));
202
203 symbol_order::score_t
204 symbol_order::compute_score_page()
205 {
206 m_score = 0;
207
208 unsigned int off = 0; // XXX in reality, probably not page-aligned to start
209
210 vector_t::const_iterator end = m_ordering.end(),
211 last = end,
212 sym = m_ordering.begin();
213
214 while (sym != end) {
215 vector_t page;
216
217 // If we had a symbol that spilled over from the last page,
218 // then include it here.
219 if (last != end)
220 page.push_back(*last);
221
222 // Pack symbols into the page
223 do {
224 page.push_back(*sym);
225
226 int size = (*sym)->st_size;
227 size += SYMBOL_ALIGN - 1;
228 size &= ~(SYMBOL_ALIGN - 1);
229
230 off += size;
231 } while (++sym != end && off < PAGE_SIZE);
232
233 // Remember if there was spill-over.
234 off %= PAGE_SIZE;
235 last = (off != 0) ? sym : end;
236
237 // Now score the page as the count of all calls to symbols on
238 // the page, less calls between the symbols on the page.
239 vector_t::const_iterator page_end = page.end();
240 for (vector_t::const_iterator i = page.begin(); i != page_end; ++i) {
241 histogram_t::const_iterator func = histogram.find(*i);
242 if (func == histogram.end())
243 continue;
244
245 m_score += func->second;
246
247 vector_t::const_iterator j = i;
248 for (++j; j != page_end; ++j) {
249 call_graph_t::const_iterator call =
250 call_graph.find(call_pair(*i, *j));
251
252 if (call != call_graph.end())
253 m_score -= call->second;
254 }
255 }
256 }
257
258 assert(m_score >= 0);
259
260 // Integer reciprocal so we minimize instead of maximize.
261 if (m_score == 0)
262 m_score = 1;
263
264 m_score = (total_calls / m_score) + 1;
265
266 return m_score;
267 }
268
269 symbol_order::score_t
270 symbol_order::compute_score_window(int window_size)
271 {
272 m_score = 0;
273
274 vector_t::const_iterator *window = new vector_t::const_iterator[window_size];
275 int window_fill = 0;
276
277 vector_t::const_iterator end = m_ordering.end(),
278 sym = m_ordering.begin();
279
280 for (; sym != end; ++sym) {
281 histogram_t::const_iterator func = histogram.find(*sym);
282 if (func != histogram.end()) {
283 long long scale = ((long long) 1) << window_size;
284
285 m_score += func->second * scale * 2;
286
287 vector_t::const_iterator *limit = window + window_fill;
288 vector_t::const_iterator *iter;
289 for (iter = window ; iter < limit; ++iter) {
290 call_graph_t::const_iterator call =
291 call_graph.find(call_pair(*sym, **iter));
292
293 if (call != call_graph.end())
294 m_score -= (call->second * scale);
295
296 scale >>= 1;
297 }
298 }
299
300 // Slide the window.
301 vector_t::const_iterator *begin = window;
302 vector_t::const_iterator *iter;
303 for (iter = window + (window_size - 1); iter > begin; --iter)
304 *iter = *(iter - 1);
305
306 if (window_fill < window_size)
307 ++window_fill;
308
309 *window = sym;
310 }
311
312 delete[] window;
313
314 assert(m_score >= 0);
315
316 // Integer reciprocal so we minimize instead of maximize.
317 if (m_score == 0)
318 m_score = 1;
319
320 m_score = (total_calls / m_score) + 1;
321
322 return m_score;
323 }
324
325 symbol_order::score_t
326 symbol_order::compute_score(symbol_order &order)
327 {
328 if (opt_window)
329 return order.compute_score_window(opt_window);
330
331 return order.compute_score_page();
332 }
333
334 void
335 symbol_order::shuffle()
336 {
337 vector_t::iterator sym = m_ordering.begin();
338 vector_t::iterator end = m_ordering.end();
339 for (; sym != end; ++sym) {
340 int i = rand() % m_ordering.size();
341 const Elf32_Sym *temp = *sym;
342 *sym = m_ordering[i];
343 m_ordering[i] = temp;
344 }
345 }
346
347 void
348 symbol_order::crossover_from(const symbol_order *father, const symbol_order *mother)
349 {
350 histogram_t used;
351
352 m_ordering = vector_t(father->m_ordering.size(), 0);
353
354 vector_t::const_iterator parent_sym = father->m_ordering.begin();
355 vector_t::iterator sym = m_ordering.begin();
356 vector_t::iterator end = m_ordering.end();
357
358 for (; sym != end; ++sym, ++parent_sym) {
359 if (rand() % 2) {
360 *sym = *parent_sym;
361 used[*parent_sym] = 1;
362 }
363 }
364
365 parent_sym = mother->m_ordering.begin();
366 sym = m_ordering.begin();
367
368 for (; sym != end; ++sym) {
369 if (! *sym) {
370 while (used[*parent_sym])
371 ++parent_sym;
372
373 *sym = *parent_sym++;
374 }
375 }
376 }
377
378 void
379 symbol_order::mutate()
380 {
381 int i, j;
382 i = rand() % m_ordering.size();
383 j = rand() % m_ordering.size();
384
385 const Elf32_Sym *temp = m_ordering[i];
386 m_ordering[i] = m_ordering[j];
387 m_ordering[j] = temp;
388 }
389
390 void
391 symbol_order::dump_symbols() const
392 {
393 ofstream out(opt_out);
394
395 vector_t::const_iterator sym = m_ordering.begin();
396 vector_t::const_iterator end = m_ordering.end();
397 for (; sym != end; ++sym)
398 out << symtab.get_symbol_name(*sym) << endl;
399
400 out.close();
401 }
402
403 ostream &
404 operator<<(ostream &out, const symbol_order &order)
405 {
406 out << "symbol_order(" << order.m_score << ") ";
407
408 symbol_order::vector_t::const_iterator sym = order.m_ordering.begin();
409 symbol_order::vector_t::const_iterator end = order.m_ordering.end();
410 for (; sym != end; ++sym)
411 out.form("%08x ", *sym);
412
413 out << endl;
414
415 return out;
416 }
417
418 //----------------------------------------------------------------------
419
420 static void
421 usage(const char *name)
422 {
423 cerr << "usage: " << name << " [options] [<file> ...]" << endl;
424 cerr << " Options:" << endl;
425 cerr << " --debug, -d" << endl;
426 cerr << " Print lots of verbose debugging cruft." << endl;
427 cerr << " --exe=<image>, -e <image> (required)" << endl;
428 cerr << " Specify the executable image from which to read symbol information." << endl;
429 cerr << " --generations=<num>, -g <num>" << endl;
430 cerr << " Specify the number of generations to run the GA (default is 10)." << endl;
431 cerr << " --help, -?" << endl;
432 cerr << " Print this message and exit." << endl;
433 cerr << " --mutate=<num>, -m <num>" << endl;
434 cerr << " Mutate every <num>th individual, or zero for no mutation (default)." << endl;
435 cerr << " --out=<file>, -o <file>" << endl;
436 cerr << " Specify the output file to which to dump the symbol ordering of the" << endl;
437 cerr << " best individual (default is `order.out')." << endl;
438 cerr << " --population=<num>, -p <num>" << endl;
439 cerr << " Set the population size to <num> individuals (default is 100)." << endl;
440 cerr << " --seed=<num>, -s <num>" << endl;
441 cerr << " Specify a seed to srand()." << endl;
442 cerr << " --tick[=<num>], -t [<num>]" << endl;
443 cerr << " When reading address data, print a dot to stderr every <num>th" << endl;
444 cerr << " address processed from the call trace. If specified with no argument," << endl;
445 cerr << " a dot will be printed for every million addresses processed." << endl;
446 cerr << " --verbose, -v" << endl;
447 cerr << " Issue progress messages to stderr." << endl;
448 cerr << " --window=<num>, -w <num>" << endl;
449 cerr << " Use a sliding window instead of pagination to score orderings." << endl;
450 cerr << endl;
451 cerr << "This program uses a genetic algorithm to produce an `optimal' ordering for" << endl;
452 cerr << "an executable based on call patterns." << endl;
453 cerr << endl;
454 cerr << "Addresses from a call trace are read as binary data from the files" << endl;
455 cerr << "specified, or from stdin if no files are specified. These addresses" << endl;
456 cerr << "are used with the symbolic information from the executable to create" << endl;
457 cerr << "a call graph. This call graph is used to `score' arbitrary symbol" << endl;
458 cerr << "orderings, and provides the fitness function for the GA." << endl;
459 cerr << endl;
460 }
461
462 /**
463 * Using the symbol table, map a stream of address references into a
464 * callgraph and a histogram.
465 */
466 static void
467 map_addrs(int fd)
468 {
469 const Elf32_Sym *last = 0;
470 unsigned int buf[128];
471 ssize_t cb;
472
473 unsigned int count = 0;
474 while ((cb = read(fd, buf, sizeof buf)) > 0) {
475 if (cb % sizeof buf[0])
476 fprintf(stderr, "unaligned read\n");
477
478 unsigned int *addr = buf;
479 unsigned int *limit = buf + (cb / 4);
480
481 for (; addr < limit; ++addr) {
482 const Elf32_Sym *sym = symtab.lookup(*addr);
483
484 if (last && sym && last != sym) {
485 ++total_calls;
486 ++histogram[sym];
487 ++call_graph[call_pair(last, sym)];
488
489 if (opt_tick && (++count % opt_tick == 0)) {
490 cerr << ".";
491 flush(cerr);
492 }
493 }
494
495 last = sym;
496 }
497 }
498
499 if (opt_tick)
500 cerr << endl;
501
502 cerr << "Total calls: " << total_calls << endl;
503 total_calls *= 1024;
504
505 if (opt_window)
506 total_calls <<= (opt_window + 1);
507 }
508
509 static symbol_order *
510 pick_parent(symbol_order *ordering, int max, int index)
511 {
512 while (1) {
513 index -= ordering->m_score;
514 if (index < 0)
515 break;
516
517 ++ordering;
518 }
519
520 return ordering;
521 }
522
523 /**
524 * The main program
525 */
526 int
527 main(int argc, char *argv[])
528 {
529 const char *opt_exe = 0;
530
531 int c;
532 while (1) {
533 int option_index = 0;
534 c = getopt_long(argc, argv, "?de:g:m:o:p:s:t:vw:", long_options, &option_index);
535
536 if (c < 0)
537 break;
538
539 switch (c) {
540 case '?':
541 usage(argv[0]);
542 return 0;
543
544 case 'd':
545 opt_debug = true;
546 break;
547
548 case 'e':
549 opt_exe = optarg;
550 break;
551
552 case 'g':
553 opt_generations = atoi(optarg);
554 break;
555
556 case 'm':
557 opt_mutate = atoi(optarg);
558 break;
559
560 case 'o':
561 opt_out = optarg;
562 break;
563
564 case 'p':
565 opt_population_size = atoi(optarg);
566 break;
567
568 case 's':
569 srand(atoi(optarg));
570 break;
571
572 case 't':
573 opt_tick = optarg ? atoi(optarg) : 1000000;
574 break;
575
576 case 'v':
577 opt_verbose = true;
578 break;
579
580 case 'w':
581 opt_window = atoi(optarg);
582 if (opt_window < 0 || opt_window > 8) {
583 cerr << "invalid window size: " << opt_window << endl;
584 return 1;
585 }
586
587 break;
588
589 default:
590 usage(argv[0]);
591 return 1;
592 }
593 }
594
595 // Make sure an image was specified
596 if (! opt_exe) {
597 usage(argv[0]);
598 return 1;
599 }
600
601 // Read the sym table.
602 symtab.init(opt_exe);
603
604 // Process addresses to construct the call graph.
605 if (optind >= argc) {
606 map_addrs(STDIN_FILENO);
607 }
608 else {
609 do {
610 int fd = open(argv[optind], O_RDONLY);
611 if (fd < 0) {
612 perror(argv[optind]);
613 return 1;
614 }
615
616 map_addrs(fd);
617 close(fd);
618 } while (++optind < argc);
619 }
620
621 if (opt_debug) {
622 cerr << "Call graph:" << endl;
623
624 call_graph_t::const_iterator limit = call_graph.end();
625 call_graph_t::const_iterator i;
626 for (i = call_graph.begin(); i != limit; ++i) {
627 const call_pair& pair = i->first;
628 cerr.form("%08x %08x %10d\n",
629 pair.m_lo->st_value,
630 pair.m_hi->st_value,
631 i->second);
632 }
633 }
634
635 // Collect the symbols into a vector
636 symbol_order::vector_t ordering;
637 elf_symbol_table::const_iterator end = symtab.end();
638 for (elf_symbol_table::const_iterator sym = symtab.begin(); sym != end; ++sym) {
639 if (symtab.is_function(sym))
640 ordering.push_back(sym);
641 }
642
643 if (opt_verbose) {
644 symbol_order initial;
645 initial.m_ordering = ordering;
646 cerr << "created initial ordering, score=" << symbol_order::compute_score(initial) << endl;
647
648 if (opt_debug)
649 cerr << initial;
650 }
651
652 // Create a population.
653 if (opt_verbose)
654 cerr << "creating population" << endl;
655
656 symbol_order *population = new symbol_order[opt_population_size];
657
658 symbol_order::score_t total = 0, min = symbol_order::max_score, max = 0;
659
660 // Score it.
661 symbol_order *order = population;
662 symbol_order *limit = population + opt_population_size;
663 for (; order < limit; ++order) {
664 order->m_ordering = ordering;
665 order->shuffle();
666
667 symbol_order::score_t score = symbol_order::compute_score(*order);
668
669 if (opt_debug)
670 cerr << *order;
671
672 if (min > score)
673 min = score;
674 if (max < score)
675 max = score;
676
677 total += score;
678 }
679
680 if (opt_verbose) {
681 cerr << "Initial population";
682 cerr << ": min=" << min;
683 cerr << ", max=" << max;
684 cerr << " mean=" << (total / opt_population_size);
685 cerr << endl;
686 }
687
688
689 // Run the GA.
690 if (opt_verbose)
691 cerr << "begininng ga" << endl;
692
693 symbol_order::score_t best = 0;
694
695 for (int generation = 1; generation <= opt_generations; ++generation) {
696 // Create a new population.
697 symbol_order *offspring = new symbol_order[opt_population_size];
698
699 symbol_order *kid = offspring;
700 symbol_order *offspring_limit = offspring + opt_population_size;
701 for (; kid < offspring_limit; ++kid) {
702 // Pick parents.
703 symbol_order *father, *mother;
704 father = pick_parent(population, max, llrand() % total);
705 mother = pick_parent(population, max, llrand() % total);
706
707 // Create a kid.
708 kid->crossover_from(father, mother);
709
710 // Mutate, possibly.
711 if (opt_mutate) {
712 if (rand() % opt_mutate == 0)
713 kid->mutate();
714 }
715 }
716
717 delete[] population;
718 population = offspring;
719
720 // Score the new population.
721 total = 0;
722 min = symbol_order::max_score;
723 max = 0;
724
725 symbol_order *fittest = 0;
726
727 limit = offspring_limit;
728 for (order = population; order < limit; ++order) {
729 symbol_order::score_t score = symbol_order::compute_score(*order);
730
731 if (opt_debug)
732 cerr << *order;
733
734 if (min > score)
735 min = score;
736
737 if (max < score)
738 max = score;
739
740 if (best < score) {
741 best = score;
742 fittest = order;
743 }
744
745 total += score;
746 }
747
748 if (opt_verbose) {
749 cerr << "Generation " << generation;
750 cerr << ": min=" << min;
751 cerr << ", max=" << max;
752 if (fittest)
753 cerr << "*";
754 cerr << " mean=" << (total / opt_population_size);
755 cerr << endl;
756 }
757
758 // If we've found a new ``best'' individual, dump it.
759 if (fittest)
760 fittest->dump_symbols();
761 }
762
763 delete[] population;
764 return 0;
765 }

mercurial