michael@0: /* -*- Mode: C++; indent-tabs-mode: nil; c-basic-offset: 4 -*- michael@0: * michael@0: * This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: /* This program reads an ELF file and computes information about michael@0: * redundancies. michael@0: */ michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: //---------------------------------------------------------------------- michael@0: michael@0: char* opt_type = "func"; michael@0: char* opt_section = ".text"; michael@0: michael@0: //---------------------------------------------------------------------- michael@0: michael@0: static void michael@0: hexdump(ostream& out, const char* bytes, size_t count) michael@0: { michael@0: hex(out); michael@0: michael@0: size_t off = 0; michael@0: while (off < count) { michael@0: out.form("%08lx: ", off); michael@0: michael@0: const char* p = bytes + off; michael@0: michael@0: int j = 0; michael@0: while (j < 16) { michael@0: out.form("%02x", p[j++] & 0xff); michael@0: if (j + off >= count) michael@0: break; michael@0: michael@0: out.form("%02x ", p[j++] & 0xff); michael@0: if (j + off >= count) michael@0: break; michael@0: } michael@0: michael@0: // Pad michael@0: for (; j < 16; ++j) michael@0: out << ((j%2) ? " " : " "); michael@0: michael@0: for (j = 0; j < 16; ++j) { michael@0: if (j + off < count) michael@0: out.put(isprint(p[j]) ? p[j] : '.'); michael@0: } michael@0: michael@0: out << endl; michael@0: off += 16; michael@0: } michael@0: } michael@0: michael@0: //---------------------------------------------------------------------- michael@0: michael@0: int michael@0: verify_elf_header(const Elf32_Ehdr* hdr) michael@0: { michael@0: if (hdr->e_ident[EI_MAG0] != ELFMAG0 michael@0: || hdr->e_ident[EI_MAG1] != ELFMAG1 michael@0: || hdr->e_ident[EI_MAG2] != ELFMAG2 michael@0: || hdr->e_ident[EI_MAG3] != ELFMAG3) { michael@0: cerr << "not an elf file" << endl; michael@0: return -1; michael@0: } michael@0: michael@0: if (hdr->e_ident[EI_CLASS] != ELFCLASS32) { michael@0: cerr << "not a 32-bit elf file" << endl; michael@0: return -1; michael@0: } michael@0: michael@0: if (hdr->e_ident[EI_DATA] != ELFDATA2LSB) { michael@0: cerr << "not a little endian elf file" << endl; michael@0: return -1; michael@0: } michael@0: michael@0: if (hdr->e_ident[EI_VERSION] != EV_CURRENT) { michael@0: cerr << "incompatible version" << endl; michael@0: return -1; michael@0: } michael@0: michael@0: return 0; michael@0: } michael@0: michael@0: //---------------------------------------------------------------------- michael@0: michael@0: class elf_symbol : public Elf32_Sym michael@0: { michael@0: public: michael@0: elf_symbol(const Elf32_Sym& sym) michael@0: { ::memcpy(static_cast(this), &sym, sizeof(Elf32_Sym)); } michael@0: michael@0: friend bool operator==(const elf_symbol& lhs, const elf_symbol& rhs) { michael@0: return 0 == ::memcmp(static_cast(&lhs), michael@0: static_cast(&rhs), michael@0: sizeof(Elf32_Sym)); } michael@0: }; michael@0: michael@0: //---------------------------------------------------------------------- michael@0: michael@0: static const char* michael@0: st_bind(unsigned char info) michael@0: { michael@0: switch (ELF32_ST_BIND(info)) { michael@0: case STB_LOCAL: return "local"; michael@0: case STB_GLOBAL: return "global"; michael@0: case STB_WEAK: return "weak"; michael@0: default: return "unknown"; michael@0: } michael@0: } michael@0: michael@0: static const char* michael@0: st_type(unsigned char info) michael@0: { michael@0: switch (ELF32_ST_TYPE(info)) { michael@0: case STT_NOTYPE: return "none"; michael@0: case STT_OBJECT: return "object"; michael@0: case STT_FUNC: return "func"; michael@0: case STT_SECTION: return "section"; michael@0: case STT_FILE: return "file"; michael@0: default: return "unknown"; michael@0: } michael@0: } michael@0: michael@0: static unsigned char michael@0: st_type(const char* type) michael@0: { michael@0: if (strcmp(type, "none") == 0) { michael@0: return STT_NOTYPE; michael@0: } michael@0: else if (strcmp(type, "object") == 0) { michael@0: return STT_OBJECT; michael@0: } michael@0: else if (strcmp(type, "func") == 0) { michael@0: return STT_FUNC; michael@0: } michael@0: else { michael@0: return 0; michael@0: } michael@0: } michael@0: michael@0: //---------------------------------------------------------------------- michael@0: michael@0: typedef vector elf_symbol_table; michael@0: typedef map< basic_string, elf_symbol_table > elf_text_map; michael@0: michael@0: void michael@0: process_mapping(char* mapping, size_t size) michael@0: { michael@0: const Elf32_Ehdr* ehdr = reinterpret_cast(mapping); michael@0: if (verify_elf_header(ehdr) < 0) michael@0: return; michael@0: michael@0: // find the section headers michael@0: const Elf32_Shdr* shdrs = reinterpret_cast(mapping + ehdr->e_shoff); michael@0: michael@0: // find the section header string table, .shstrtab michael@0: const Elf32_Shdr* shstrtabsh = shdrs + ehdr->e_shstrndx; michael@0: const char* shstrtab = mapping + shstrtabsh->sh_offset; michael@0: michael@0: // find the sections we care about michael@0: const Elf32_Shdr *symtabsh, *strtabsh, *textsh; michael@0: int textndx; michael@0: michael@0: for (int i = 0; i < ehdr->e_shnum; ++i) { michael@0: basic_string name(shstrtab + shdrs[i].sh_name); michael@0: if (name == opt_section) { michael@0: textsh = shdrs + i; michael@0: textndx = i; michael@0: } michael@0: else if (name == ".symtab") { michael@0: symtabsh = shdrs + i; michael@0: } michael@0: else if (name == ".strtab") { michael@0: strtabsh = shdrs + i; michael@0: } michael@0: } michael@0: michael@0: // find the .strtab michael@0: char* strtab = mapping + strtabsh->sh_offset; michael@0: michael@0: // find the .text michael@0: char* text = mapping + textsh->sh_offset; michael@0: int textaddr = textsh->sh_addr; michael@0: michael@0: // find the symbol table michael@0: int nentries = symtabsh->sh_size / sizeof(Elf32_Sym); michael@0: Elf32_Sym* symtab = reinterpret_cast(mapping + symtabsh->sh_offset); michael@0: michael@0: // look for symbols in the .text section michael@0: elf_text_map textmap; michael@0: michael@0: for (int i = 0; i < nentries; ++i) { michael@0: const Elf32_Sym* sym = symtab + i; michael@0: if (sym->st_shndx == textndx && michael@0: ELF32_ST_TYPE(sym->st_info) == st_type(opt_type) && michael@0: sym->st_size) { michael@0: basic_string functext(text + sym->st_value - textaddr, sym->st_size); michael@0: michael@0: elf_symbol_table& syms = textmap[functext]; michael@0: if (syms.end() == find(syms.begin(), syms.end(), elf_symbol(*sym))) michael@0: syms.insert(syms.end(), *sym); michael@0: } michael@0: } michael@0: michael@0: int uniquebytes = 0, totalbytes = 0; michael@0: int uniquecount = 0, totalcount = 0; michael@0: michael@0: for (elf_text_map::const_iterator entry = textmap.begin(); michael@0: entry != textmap.end(); michael@0: ++entry) { michael@0: const elf_symbol_table& syms = entry->second; michael@0: michael@0: if (syms.size() <= 1) michael@0: continue; michael@0: michael@0: int sz = syms.begin()->st_size; michael@0: uniquebytes += sz; michael@0: totalbytes += sz * syms.size(); michael@0: uniquecount += 1; michael@0: totalcount += syms.size(); michael@0: michael@0: for (elf_symbol_table::const_iterator sym = syms.begin(); sym != syms.end(); ++sym) michael@0: cout << strtab + sym->st_name << endl; michael@0: michael@0: dec(cout); michael@0: cout << syms.size() << " copies of " << sz << " bytes"; michael@0: cout << " (" << ((syms.size() - 1) * sz) << " redundant bytes)" << endl; michael@0: michael@0: hexdump(cout, entry->first.data(), entry->first.size()); michael@0: cout << endl; michael@0: } michael@0: michael@0: dec(cout); michael@0: cout << "bytes unique=" << uniquebytes << ", total=" << totalbytes << endl; michael@0: cout << "entries unique=" << uniquecount << ", total=" << totalcount << endl; michael@0: } michael@0: michael@0: void michael@0: process_file(const char* name) michael@0: { michael@0: int fd = open(name, O_RDWR); michael@0: if (fd >= 0) { michael@0: struct stat statbuf; michael@0: if (fstat(fd, &statbuf) >= 0) { michael@0: size_t size = statbuf.st_size; michael@0: michael@0: void* mapping = mmap(0, size, PROT_READ, MAP_SHARED, fd, 0); michael@0: if (mapping != MAP_FAILED) { michael@0: process_mapping(static_cast(mapping), size); michael@0: munmap(mapping, size); michael@0: } michael@0: } michael@0: close(fd); michael@0: } michael@0: } michael@0: michael@0: static void michael@0: usage() michael@0: { michael@0: cerr << "foldelf [--section=
] [--type=] [file ...]\n\ michael@0: --section, -s the section of the ELF file to scan; defaults\n\ michael@0: to ``.text''. Valid values include any section\n\ michael@0: of the ELF file.\n\ michael@0: --type, -t the type of object to examine in the section;\n\ michael@0: defaults to ``func''. Valid values include\n\ michael@0: ``none'', ``func'', or ``object''.\n"; michael@0: michael@0: } michael@0: michael@0: static struct option opts[] = { michael@0: { "type", required_argument, 0, 't' }, michael@0: { "section", required_argument, 0, 's' }, michael@0: { "help", no_argument, 0, '?' }, michael@0: { 0, 0, 0, 0 } michael@0: }; michael@0: michael@0: int michael@0: main(int argc, char* argv[]) michael@0: { michael@0: while (1) { michael@0: int option_index = 0; michael@0: int c = getopt_long(argc, argv, "t:s:", opts, &option_index); michael@0: michael@0: if (c < 0) break; michael@0: michael@0: switch (c) { michael@0: case 't': michael@0: opt_type = optarg; michael@0: break; michael@0: michael@0: case 's': michael@0: opt_section = optarg; michael@0: break; michael@0: michael@0: case '?': michael@0: usage(); michael@0: break; michael@0: } michael@0: } michael@0: michael@0: for (int i = optind; i < argc; ++i) michael@0: process_file(argv[i]); michael@0: michael@0: return 0; michael@0: }