tools/footprint/foldelf.cpp

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 /* -*- Mode: C++; indent-tabs-mode: nil; c-basic-offset: 4 -*-
     2  *
     3  * This Source Code Form is subject to the terms of the Mozilla Public
     4  * License, v. 2.0. If a copy of the MPL was not distributed with this
     5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     7 /* This program reads an ELF file and computes information about
     8  * redundancies. 
     9  */
    11 #include <algorithm>
    12 #include <fstream>
    13 #include <string>
    14 #include <vector>
    15 #include <map>
    16 #include <elf.h>
    17 #include <sys/mman.h>
    18 #include <sys/stat.h>
    19 #include <fcntl.h>
    20 #include <unistd.h>
    21 #include <errno.h>
    22 #include <getopt.h>
    24 //----------------------------------------------------------------------
    26 char* opt_type    = "func";
    27 char* opt_section = ".text";
    29 //----------------------------------------------------------------------
    31 static void
    32 hexdump(ostream& out, const char* bytes, size_t count)
    33 {
    34     hex(out);
    36     size_t off = 0;
    37     while (off < count) {
    38         out.form("%08lx: ", off);
    40         const char* p = bytes + off;
    42         int j = 0;
    43         while (j < 16) {
    44             out.form("%02x", p[j++] & 0xff);
    45             if (j + off >= count)
    46                 break;
    48             out.form("%02x ", p[j++] & 0xff);
    49             if (j + off >= count)
    50                 break;
    51         }
    53         // Pad
    54         for (; j < 16; ++j)
    55             out << ((j%2) ? "   " : "  ");
    57         for (j = 0; j < 16; ++j) {
    58             if (j + off < count)
    59                 out.put(isprint(p[j]) ? p[j] : '.');
    60         }
    62         out << endl;
    63         off += 16;
    64     }
    65 }
    67 //----------------------------------------------------------------------
    69 int
    70 verify_elf_header(const Elf32_Ehdr* hdr)
    71 {
    72     if (hdr->e_ident[EI_MAG0] != ELFMAG0
    73         || hdr->e_ident[EI_MAG1] != ELFMAG1
    74         || hdr->e_ident[EI_MAG2] != ELFMAG2
    75         || hdr->e_ident[EI_MAG3] != ELFMAG3) {
    76         cerr << "not an elf file" << endl;
    77         return -1;
    78     }
    80     if (hdr->e_ident[EI_CLASS] != ELFCLASS32) {
    81         cerr << "not a 32-bit elf file" << endl;
    82         return -1;
    83     }
    85     if (hdr->e_ident[EI_DATA] != ELFDATA2LSB) {
    86         cerr << "not a little endian elf file" << endl;
    87         return -1;
    88     }
    90     if (hdr->e_ident[EI_VERSION] != EV_CURRENT) {
    91         cerr << "incompatible version" << endl;
    92         return -1;
    93     }
    95     return 0;
    96 }
    98 //----------------------------------------------------------------------
   100 class elf_symbol : public Elf32_Sym
   101 {
   102 public:
   103     elf_symbol(const Elf32_Sym& sym)
   104     { ::memcpy(static_cast<Elf32_Sym*>(this), &sym, sizeof(Elf32_Sym)); }
   106     friend bool operator==(const elf_symbol& lhs, const elf_symbol& rhs) {
   107         return 0 == ::memcmp(static_cast<const Elf32_Sym*>(&lhs),
   108                              static_cast<const Elf32_Sym*>(&rhs),
   109                              sizeof(Elf32_Sym)); }
   110 };
   112 //----------------------------------------------------------------------
   114 static const char*
   115 st_bind(unsigned char info)
   116 {
   117     switch (ELF32_ST_BIND(info)) {
   118     case STB_LOCAL:      return "local";
   119     case STB_GLOBAL:     return "global";
   120     case STB_WEAK:       return "weak";
   121     default:             return "unknown";
   122     }
   123 }
   125 static const char*
   126 st_type(unsigned char info)
   127 {
   128     switch (ELF32_ST_TYPE(info)) {
   129     case STT_NOTYPE:     return "none";
   130     case STT_OBJECT:     return "object";
   131     case STT_FUNC:       return "func";
   132     case STT_SECTION:    return "section";
   133     case STT_FILE:       return "file";
   134     default:             return "unknown";
   135     }
   136 }
   138 static unsigned char
   139 st_type(const char* type)
   140 {
   141     if (strcmp(type, "none") == 0) {
   142         return STT_NOTYPE;
   143     }
   144     else if (strcmp(type, "object") == 0) {
   145         return STT_OBJECT;
   146     }
   147     else if (strcmp(type, "func") == 0) {
   148         return STT_FUNC;
   149     }
   150     else {
   151         return 0;
   152     }
   153 }
   155 //----------------------------------------------------------------------
   157 typedef vector<elf_symbol> elf_symbol_table;
   158 typedef map< basic_string<char>, elf_symbol_table > elf_text_map;
   160 void
   161 process_mapping(char* mapping, size_t size)
   162 {
   163     const Elf32_Ehdr* ehdr = reinterpret_cast<Elf32_Ehdr*>(mapping);
   164     if (verify_elf_header(ehdr) < 0)
   165         return;
   167     // find the section headers
   168     const Elf32_Shdr* shdrs = reinterpret_cast<Elf32_Shdr*>(mapping + ehdr->e_shoff);
   170     // find the section header string table, .shstrtab
   171     const Elf32_Shdr* shstrtabsh = shdrs + ehdr->e_shstrndx;
   172     const char* shstrtab = mapping + shstrtabsh->sh_offset;
   174     // find the sections we care about
   175     const Elf32_Shdr *symtabsh, *strtabsh, *textsh;
   176     int textndx;
   178     for (int i = 0; i < ehdr->e_shnum; ++i) {
   179         basic_string<char> name(shstrtab + shdrs[i].sh_name);
   180         if (name == opt_section) {
   181             textsh = shdrs + i;
   182             textndx = i;
   183         }
   184         else if (name == ".symtab") {
   185             symtabsh = shdrs + i;
   186         }
   187         else if (name == ".strtab") {
   188             strtabsh = shdrs + i;
   189         }
   190     }
   192     // find the .strtab
   193     char* strtab = mapping + strtabsh->sh_offset;
   195     // find the .text
   196     char* text = mapping + textsh->sh_offset;
   197     int textaddr = textsh->sh_addr;
   199     // find the symbol table
   200     int nentries = symtabsh->sh_size / sizeof(Elf32_Sym);
   201     Elf32_Sym* symtab = reinterpret_cast<Elf32_Sym*>(mapping + symtabsh->sh_offset);
   203     // look for symbols in the .text section
   204     elf_text_map textmap;
   206     for (int i = 0; i < nentries; ++i) {
   207         const Elf32_Sym* sym = symtab + i;
   208         if (sym->st_shndx == textndx &&
   209             ELF32_ST_TYPE(sym->st_info) == st_type(opt_type) &&
   210             sym->st_size) {
   211             basic_string<char> functext(text + sym->st_value - textaddr, sym->st_size);
   213             elf_symbol_table& syms = textmap[functext];
   214             if (syms.end() == find(syms.begin(), syms.end(), elf_symbol(*sym)))
   215                 syms.insert(syms.end(), *sym);
   216         }
   217     }
   219     int uniquebytes = 0, totalbytes = 0;
   220     int uniquecount = 0, totalcount = 0;
   222     for (elf_text_map::const_iterator entry = textmap.begin();
   223          entry != textmap.end();
   224          ++entry) {
   225         const elf_symbol_table& syms = entry->second;
   227         if (syms.size() <= 1)
   228             continue;
   230         int sz = syms.begin()->st_size;
   231         uniquebytes += sz;
   232         totalbytes += sz * syms.size();
   233         uniquecount += 1;
   234         totalcount += syms.size();
   236         for (elf_symbol_table::const_iterator sym = syms.begin(); sym != syms.end(); ++sym)
   237             cout << strtab + sym->st_name << endl;
   239         dec(cout);
   240         cout << syms.size() << " copies of " << sz << " bytes";
   241         cout << " (" << ((syms.size() - 1) * sz) << " redundant bytes)" << endl;
   243         hexdump(cout, entry->first.data(), entry->first.size());
   244         cout << endl;
   245     }
   247     dec(cout);
   248     cout << "bytes unique=" << uniquebytes << ", total=" << totalbytes << endl;
   249     cout << "entries unique=" << uniquecount << ", total=" << totalcount << endl;
   250 }
   252 void
   253 process_file(const char* name)
   254 {
   255     int fd = open(name, O_RDWR);
   256     if (fd >= 0) {
   257         struct stat statbuf;
   258         if (fstat(fd, &statbuf) >= 0) {
   259             size_t size = statbuf.st_size;
   261             void* mapping = mmap(0, size, PROT_READ, MAP_SHARED, fd, 0);
   262             if (mapping != MAP_FAILED) {
   263                 process_mapping(static_cast<char*>(mapping), size);
   264                 munmap(mapping, size);
   265             }
   266         }
   267         close(fd);
   268     }
   269 }
   271 static void
   272 usage()
   273 {
   274     cerr << "foldelf [--section=<section>] [--type=<type>] [file ...]\n\
   275    --section, -s  the section of the ELF file to scan; defaults\n\
   276                   to ``.text''. Valid values include any section\n\
   277                   of the ELF file.\n\
   278    --type, -t     the type of object to examine in the section;\n\
   279                   defaults to ``func''. Valid values include\n\
   280                   ``none'', ``func'', or ``object''.\n";
   282 }
   284 static struct option opts[] = {
   285     { "type",    required_argument, 0, 't' },
   286     { "section", required_argument, 0, 's' },
   287     { "help",    no_argument,       0, '?' },
   288     { 0,         0, 0, 0 }
   289 };
   291 int
   292 main(int argc, char* argv[])
   293 {
   294     while (1) {
   295         int option_index = 0;
   296         int c = getopt_long(argc, argv, "t:s:", opts, &option_index);
   298         if (c < 0) break;
   300         switch (c) {
   301         case 't':
   302             opt_type = optarg;
   303             break;
   305         case 's':
   306             opt_section = optarg;
   307             break;
   309         case '?':
   310             usage();
   311             break;
   312         }
   313     }
   315     for (int i = optind; i < argc; ++i)
   316         process_file(argv[i]);
   318     return 0;
   319 }

mercurial