tools/footprint/foldelf.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/tools/footprint/foldelf.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,319 @@
     1.4 +/* -*- Mode: C++; indent-tabs-mode: nil; c-basic-offset: 4 -*-
     1.5 + *
     1.6 + * This Source Code Form is subject to the terms of the Mozilla Public
     1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.9 +
    1.10 +/* This program reads an ELF file and computes information about
    1.11 + * redundancies. 
    1.12 + */
    1.13 +
    1.14 +#include <algorithm>
    1.15 +#include <fstream>
    1.16 +#include <string>
    1.17 +#include <vector>
    1.18 +#include <map>
    1.19 +#include <elf.h>
    1.20 +#include <sys/mman.h>
    1.21 +#include <sys/stat.h>
    1.22 +#include <fcntl.h>
    1.23 +#include <unistd.h>
    1.24 +#include <errno.h>
    1.25 +#include <getopt.h>
    1.26 +
    1.27 +//----------------------------------------------------------------------
    1.28 +
    1.29 +char* opt_type    = "func";
    1.30 +char* opt_section = ".text";
    1.31 +
    1.32 +//----------------------------------------------------------------------
    1.33 +
    1.34 +static void
    1.35 +hexdump(ostream& out, const char* bytes, size_t count)
    1.36 +{
    1.37 +    hex(out);
    1.38 +
    1.39 +    size_t off = 0;
    1.40 +    while (off < count) {
    1.41 +        out.form("%08lx: ", off);
    1.42 +
    1.43 +        const char* p = bytes + off;
    1.44 +
    1.45 +        int j = 0;
    1.46 +        while (j < 16) {
    1.47 +            out.form("%02x", p[j++] & 0xff);
    1.48 +            if (j + off >= count)
    1.49 +                break;
    1.50 +
    1.51 +            out.form("%02x ", p[j++] & 0xff);
    1.52 +            if (j + off >= count)
    1.53 +                break;
    1.54 +        }
    1.55 +
    1.56 +        // Pad
    1.57 +        for (; j < 16; ++j)
    1.58 +            out << ((j%2) ? "   " : "  ");
    1.59 +
    1.60 +        for (j = 0; j < 16; ++j) {
    1.61 +            if (j + off < count)
    1.62 +                out.put(isprint(p[j]) ? p[j] : '.');
    1.63 +        }
    1.64 +
    1.65 +        out << endl;
    1.66 +        off += 16;
    1.67 +    }
    1.68 +}
    1.69 +
    1.70 +//----------------------------------------------------------------------
    1.71 +
    1.72 +int
    1.73 +verify_elf_header(const Elf32_Ehdr* hdr)
    1.74 +{
    1.75 +    if (hdr->e_ident[EI_MAG0] != ELFMAG0
    1.76 +        || hdr->e_ident[EI_MAG1] != ELFMAG1
    1.77 +        || hdr->e_ident[EI_MAG2] != ELFMAG2
    1.78 +        || hdr->e_ident[EI_MAG3] != ELFMAG3) {
    1.79 +        cerr << "not an elf file" << endl;
    1.80 +        return -1;
    1.81 +    }
    1.82 +
    1.83 +    if (hdr->e_ident[EI_CLASS] != ELFCLASS32) {
    1.84 +        cerr << "not a 32-bit elf file" << endl;
    1.85 +        return -1;
    1.86 +    }
    1.87 +
    1.88 +    if (hdr->e_ident[EI_DATA] != ELFDATA2LSB) {
    1.89 +        cerr << "not a little endian elf file" << endl;
    1.90 +        return -1;
    1.91 +    }
    1.92 +
    1.93 +    if (hdr->e_ident[EI_VERSION] != EV_CURRENT) {
    1.94 +        cerr << "incompatible version" << endl;
    1.95 +        return -1;
    1.96 +    }
    1.97 +
    1.98 +    return 0;
    1.99 +}
   1.100 +
   1.101 +//----------------------------------------------------------------------
   1.102 +
   1.103 +class elf_symbol : public Elf32_Sym
   1.104 +{
   1.105 +public:
   1.106 +    elf_symbol(const Elf32_Sym& sym)
   1.107 +    { ::memcpy(static_cast<Elf32_Sym*>(this), &sym, sizeof(Elf32_Sym)); }
   1.108 +
   1.109 +    friend bool operator==(const elf_symbol& lhs, const elf_symbol& rhs) {
   1.110 +        return 0 == ::memcmp(static_cast<const Elf32_Sym*>(&lhs),
   1.111 +                             static_cast<const Elf32_Sym*>(&rhs),
   1.112 +                             sizeof(Elf32_Sym)); }
   1.113 +};
   1.114 +
   1.115 +//----------------------------------------------------------------------
   1.116 +
   1.117 +static const char*
   1.118 +st_bind(unsigned char info)
   1.119 +{
   1.120 +    switch (ELF32_ST_BIND(info)) {
   1.121 +    case STB_LOCAL:      return "local";
   1.122 +    case STB_GLOBAL:     return "global";
   1.123 +    case STB_WEAK:       return "weak";
   1.124 +    default:             return "unknown";
   1.125 +    }
   1.126 +}
   1.127 +
   1.128 +static const char*
   1.129 +st_type(unsigned char info)
   1.130 +{
   1.131 +    switch (ELF32_ST_TYPE(info)) {
   1.132 +    case STT_NOTYPE:     return "none";
   1.133 +    case STT_OBJECT:     return "object";
   1.134 +    case STT_FUNC:       return "func";
   1.135 +    case STT_SECTION:    return "section";
   1.136 +    case STT_FILE:       return "file";
   1.137 +    default:             return "unknown";
   1.138 +    }
   1.139 +}
   1.140 +
   1.141 +static unsigned char
   1.142 +st_type(const char* type)
   1.143 +{
   1.144 +    if (strcmp(type, "none") == 0) {
   1.145 +        return STT_NOTYPE;
   1.146 +    }
   1.147 +    else if (strcmp(type, "object") == 0) {
   1.148 +        return STT_OBJECT;
   1.149 +    }
   1.150 +    else if (strcmp(type, "func") == 0) {
   1.151 +        return STT_FUNC;
   1.152 +    }
   1.153 +    else {
   1.154 +        return 0;
   1.155 +    }
   1.156 +}
   1.157 +
   1.158 +//----------------------------------------------------------------------
   1.159 +
   1.160 +typedef vector<elf_symbol> elf_symbol_table;
   1.161 +typedef map< basic_string<char>, elf_symbol_table > elf_text_map;
   1.162 +
   1.163 +void
   1.164 +process_mapping(char* mapping, size_t size)
   1.165 +{
   1.166 +    const Elf32_Ehdr* ehdr = reinterpret_cast<Elf32_Ehdr*>(mapping);
   1.167 +    if (verify_elf_header(ehdr) < 0)
   1.168 +        return;
   1.169 +
   1.170 +    // find the section headers
   1.171 +    const Elf32_Shdr* shdrs = reinterpret_cast<Elf32_Shdr*>(mapping + ehdr->e_shoff);
   1.172 +
   1.173 +    // find the section header string table, .shstrtab
   1.174 +    const Elf32_Shdr* shstrtabsh = shdrs + ehdr->e_shstrndx;
   1.175 +    const char* shstrtab = mapping + shstrtabsh->sh_offset;
   1.176 +
   1.177 +    // find the sections we care about
   1.178 +    const Elf32_Shdr *symtabsh, *strtabsh, *textsh;
   1.179 +    int textndx;
   1.180 +
   1.181 +    for (int i = 0; i < ehdr->e_shnum; ++i) {
   1.182 +        basic_string<char> name(shstrtab + shdrs[i].sh_name);
   1.183 +        if (name == opt_section) {
   1.184 +            textsh = shdrs + i;
   1.185 +            textndx = i;
   1.186 +        }
   1.187 +        else if (name == ".symtab") {
   1.188 +            symtabsh = shdrs + i;
   1.189 +        }
   1.190 +        else if (name == ".strtab") {
   1.191 +            strtabsh = shdrs + i;
   1.192 +        }
   1.193 +    }
   1.194 +
   1.195 +    // find the .strtab
   1.196 +    char* strtab = mapping + strtabsh->sh_offset;
   1.197 +
   1.198 +    // find the .text
   1.199 +    char* text = mapping + textsh->sh_offset;
   1.200 +    int textaddr = textsh->sh_addr;
   1.201 +
   1.202 +    // find the symbol table
   1.203 +    int nentries = symtabsh->sh_size / sizeof(Elf32_Sym);
   1.204 +    Elf32_Sym* symtab = reinterpret_cast<Elf32_Sym*>(mapping + symtabsh->sh_offset);
   1.205 +
   1.206 +    // look for symbols in the .text section
   1.207 +    elf_text_map textmap;
   1.208 +
   1.209 +    for (int i = 0; i < nentries; ++i) {
   1.210 +        const Elf32_Sym* sym = symtab + i;
   1.211 +        if (sym->st_shndx == textndx &&
   1.212 +            ELF32_ST_TYPE(sym->st_info) == st_type(opt_type) &&
   1.213 +            sym->st_size) {
   1.214 +            basic_string<char> functext(text + sym->st_value - textaddr, sym->st_size);
   1.215 +
   1.216 +            elf_symbol_table& syms = textmap[functext];
   1.217 +            if (syms.end() == find(syms.begin(), syms.end(), elf_symbol(*sym)))
   1.218 +                syms.insert(syms.end(), *sym);
   1.219 +        }
   1.220 +    }
   1.221 +
   1.222 +    int uniquebytes = 0, totalbytes = 0;
   1.223 +    int uniquecount = 0, totalcount = 0;
   1.224 +
   1.225 +    for (elf_text_map::const_iterator entry = textmap.begin();
   1.226 +         entry != textmap.end();
   1.227 +         ++entry) {
   1.228 +        const elf_symbol_table& syms = entry->second;
   1.229 +
   1.230 +        if (syms.size() <= 1)
   1.231 +            continue;
   1.232 +
   1.233 +        int sz = syms.begin()->st_size;
   1.234 +        uniquebytes += sz;
   1.235 +        totalbytes += sz * syms.size();
   1.236 +        uniquecount += 1;
   1.237 +        totalcount += syms.size();
   1.238 +
   1.239 +        for (elf_symbol_table::const_iterator sym = syms.begin(); sym != syms.end(); ++sym)
   1.240 +            cout << strtab + sym->st_name << endl;
   1.241 +
   1.242 +        dec(cout);
   1.243 +        cout << syms.size() << " copies of " << sz << " bytes";
   1.244 +        cout << " (" << ((syms.size() - 1) * sz) << " redundant bytes)" << endl;
   1.245 +
   1.246 +        hexdump(cout, entry->first.data(), entry->first.size());
   1.247 +        cout << endl;
   1.248 +    }
   1.249 +
   1.250 +    dec(cout);
   1.251 +    cout << "bytes unique=" << uniquebytes << ", total=" << totalbytes << endl;
   1.252 +    cout << "entries unique=" << uniquecount << ", total=" << totalcount << endl;
   1.253 +}
   1.254 +
   1.255 +void
   1.256 +process_file(const char* name)
   1.257 +{
   1.258 +    int fd = open(name, O_RDWR);
   1.259 +    if (fd >= 0) {
   1.260 +        struct stat statbuf;
   1.261 +        if (fstat(fd, &statbuf) >= 0) {
   1.262 +            size_t size = statbuf.st_size;
   1.263 +
   1.264 +            void* mapping = mmap(0, size, PROT_READ, MAP_SHARED, fd, 0);
   1.265 +            if (mapping != MAP_FAILED) {
   1.266 +                process_mapping(static_cast<char*>(mapping), size);
   1.267 +                munmap(mapping, size);
   1.268 +            }
   1.269 +        }
   1.270 +        close(fd);
   1.271 +    }
   1.272 +}
   1.273 +
   1.274 +static void
   1.275 +usage()
   1.276 +{
   1.277 +    cerr << "foldelf [--section=<section>] [--type=<type>] [file ...]\n\
   1.278 +   --section, -s  the section of the ELF file to scan; defaults\n\
   1.279 +                  to ``.text''. Valid values include any section\n\
   1.280 +                  of the ELF file.\n\
   1.281 +   --type, -t     the type of object to examine in the section;\n\
   1.282 +                  defaults to ``func''. Valid values include\n\
   1.283 +                  ``none'', ``func'', or ``object''.\n";
   1.284 +
   1.285 +}
   1.286 +
   1.287 +static struct option opts[] = {
   1.288 +    { "type",    required_argument, 0, 't' },
   1.289 +    { "section", required_argument, 0, 's' },
   1.290 +    { "help",    no_argument,       0, '?' },
   1.291 +    { 0,         0, 0, 0 }
   1.292 +};
   1.293 +    
   1.294 +int
   1.295 +main(int argc, char* argv[])
   1.296 +{
   1.297 +    while (1) {
   1.298 +        int option_index = 0;
   1.299 +        int c = getopt_long(argc, argv, "t:s:", opts, &option_index);
   1.300 +
   1.301 +        if (c < 0) break;
   1.302 +
   1.303 +        switch (c) {
   1.304 +        case 't':
   1.305 +            opt_type = optarg;
   1.306 +            break;
   1.307 +
   1.308 +        case 's':
   1.309 +            opt_section = optarg;
   1.310 +            break;
   1.311 +
   1.312 +        case '?':
   1.313 +            usage();
   1.314 +            break;
   1.315 +        }
   1.316 +    }
   1.317 +
   1.318 +    for (int i = optind; i < argc; ++i)
   1.319 +        process_file(argv[i]);
   1.320 +
   1.321 +    return 0;
   1.322 +}

mercurial