tools/footprint/foldelf.cpp

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 /* -*- Mode: C++; indent-tabs-mode: nil; c-basic-offset: 4 -*-
michael@0 2 *
michael@0 3 * This Source Code Form is subject to the terms of the Mozilla Public
michael@0 4 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 6
michael@0 7 /* This program reads an ELF file and computes information about
michael@0 8 * redundancies.
michael@0 9 */
michael@0 10
michael@0 11 #include <algorithm>
michael@0 12 #include <fstream>
michael@0 13 #include <string>
michael@0 14 #include <vector>
michael@0 15 #include <map>
michael@0 16 #include <elf.h>
michael@0 17 #include <sys/mman.h>
michael@0 18 #include <sys/stat.h>
michael@0 19 #include <fcntl.h>
michael@0 20 #include <unistd.h>
michael@0 21 #include <errno.h>
michael@0 22 #include <getopt.h>
michael@0 23
michael@0 24 //----------------------------------------------------------------------
michael@0 25
michael@0 26 char* opt_type = "func";
michael@0 27 char* opt_section = ".text";
michael@0 28
michael@0 29 //----------------------------------------------------------------------
michael@0 30
michael@0 31 static void
michael@0 32 hexdump(ostream& out, const char* bytes, size_t count)
michael@0 33 {
michael@0 34 hex(out);
michael@0 35
michael@0 36 size_t off = 0;
michael@0 37 while (off < count) {
michael@0 38 out.form("%08lx: ", off);
michael@0 39
michael@0 40 const char* p = bytes + off;
michael@0 41
michael@0 42 int j = 0;
michael@0 43 while (j < 16) {
michael@0 44 out.form("%02x", p[j++] & 0xff);
michael@0 45 if (j + off >= count)
michael@0 46 break;
michael@0 47
michael@0 48 out.form("%02x ", p[j++] & 0xff);
michael@0 49 if (j + off >= count)
michael@0 50 break;
michael@0 51 }
michael@0 52
michael@0 53 // Pad
michael@0 54 for (; j < 16; ++j)
michael@0 55 out << ((j%2) ? " " : " ");
michael@0 56
michael@0 57 for (j = 0; j < 16; ++j) {
michael@0 58 if (j + off < count)
michael@0 59 out.put(isprint(p[j]) ? p[j] : '.');
michael@0 60 }
michael@0 61
michael@0 62 out << endl;
michael@0 63 off += 16;
michael@0 64 }
michael@0 65 }
michael@0 66
michael@0 67 //----------------------------------------------------------------------
michael@0 68
michael@0 69 int
michael@0 70 verify_elf_header(const Elf32_Ehdr* hdr)
michael@0 71 {
michael@0 72 if (hdr->e_ident[EI_MAG0] != ELFMAG0
michael@0 73 || hdr->e_ident[EI_MAG1] != ELFMAG1
michael@0 74 || hdr->e_ident[EI_MAG2] != ELFMAG2
michael@0 75 || hdr->e_ident[EI_MAG3] != ELFMAG3) {
michael@0 76 cerr << "not an elf file" << endl;
michael@0 77 return -1;
michael@0 78 }
michael@0 79
michael@0 80 if (hdr->e_ident[EI_CLASS] != ELFCLASS32) {
michael@0 81 cerr << "not a 32-bit elf file" << endl;
michael@0 82 return -1;
michael@0 83 }
michael@0 84
michael@0 85 if (hdr->e_ident[EI_DATA] != ELFDATA2LSB) {
michael@0 86 cerr << "not a little endian elf file" << endl;
michael@0 87 return -1;
michael@0 88 }
michael@0 89
michael@0 90 if (hdr->e_ident[EI_VERSION] != EV_CURRENT) {
michael@0 91 cerr << "incompatible version" << endl;
michael@0 92 return -1;
michael@0 93 }
michael@0 94
michael@0 95 return 0;
michael@0 96 }
michael@0 97
michael@0 98 //----------------------------------------------------------------------
michael@0 99
michael@0 100 class elf_symbol : public Elf32_Sym
michael@0 101 {
michael@0 102 public:
michael@0 103 elf_symbol(const Elf32_Sym& sym)
michael@0 104 { ::memcpy(static_cast<Elf32_Sym*>(this), &sym, sizeof(Elf32_Sym)); }
michael@0 105
michael@0 106 friend bool operator==(const elf_symbol& lhs, const elf_symbol& rhs) {
michael@0 107 return 0 == ::memcmp(static_cast<const Elf32_Sym*>(&lhs),
michael@0 108 static_cast<const Elf32_Sym*>(&rhs),
michael@0 109 sizeof(Elf32_Sym)); }
michael@0 110 };
michael@0 111
michael@0 112 //----------------------------------------------------------------------
michael@0 113
michael@0 114 static const char*
michael@0 115 st_bind(unsigned char info)
michael@0 116 {
michael@0 117 switch (ELF32_ST_BIND(info)) {
michael@0 118 case STB_LOCAL: return "local";
michael@0 119 case STB_GLOBAL: return "global";
michael@0 120 case STB_WEAK: return "weak";
michael@0 121 default: return "unknown";
michael@0 122 }
michael@0 123 }
michael@0 124
michael@0 125 static const char*
michael@0 126 st_type(unsigned char info)
michael@0 127 {
michael@0 128 switch (ELF32_ST_TYPE(info)) {
michael@0 129 case STT_NOTYPE: return "none";
michael@0 130 case STT_OBJECT: return "object";
michael@0 131 case STT_FUNC: return "func";
michael@0 132 case STT_SECTION: return "section";
michael@0 133 case STT_FILE: return "file";
michael@0 134 default: return "unknown";
michael@0 135 }
michael@0 136 }
michael@0 137
michael@0 138 static unsigned char
michael@0 139 st_type(const char* type)
michael@0 140 {
michael@0 141 if (strcmp(type, "none") == 0) {
michael@0 142 return STT_NOTYPE;
michael@0 143 }
michael@0 144 else if (strcmp(type, "object") == 0) {
michael@0 145 return STT_OBJECT;
michael@0 146 }
michael@0 147 else if (strcmp(type, "func") == 0) {
michael@0 148 return STT_FUNC;
michael@0 149 }
michael@0 150 else {
michael@0 151 return 0;
michael@0 152 }
michael@0 153 }
michael@0 154
michael@0 155 //----------------------------------------------------------------------
michael@0 156
michael@0 157 typedef vector<elf_symbol> elf_symbol_table;
michael@0 158 typedef map< basic_string<char>, elf_symbol_table > elf_text_map;
michael@0 159
michael@0 160 void
michael@0 161 process_mapping(char* mapping, size_t size)
michael@0 162 {
michael@0 163 const Elf32_Ehdr* ehdr = reinterpret_cast<Elf32_Ehdr*>(mapping);
michael@0 164 if (verify_elf_header(ehdr) < 0)
michael@0 165 return;
michael@0 166
michael@0 167 // find the section headers
michael@0 168 const Elf32_Shdr* shdrs = reinterpret_cast<Elf32_Shdr*>(mapping + ehdr->e_shoff);
michael@0 169
michael@0 170 // find the section header string table, .shstrtab
michael@0 171 const Elf32_Shdr* shstrtabsh = shdrs + ehdr->e_shstrndx;
michael@0 172 const char* shstrtab = mapping + shstrtabsh->sh_offset;
michael@0 173
michael@0 174 // find the sections we care about
michael@0 175 const Elf32_Shdr *symtabsh, *strtabsh, *textsh;
michael@0 176 int textndx;
michael@0 177
michael@0 178 for (int i = 0; i < ehdr->e_shnum; ++i) {
michael@0 179 basic_string<char> name(shstrtab + shdrs[i].sh_name);
michael@0 180 if (name == opt_section) {
michael@0 181 textsh = shdrs + i;
michael@0 182 textndx = i;
michael@0 183 }
michael@0 184 else if (name == ".symtab") {
michael@0 185 symtabsh = shdrs + i;
michael@0 186 }
michael@0 187 else if (name == ".strtab") {
michael@0 188 strtabsh = shdrs + i;
michael@0 189 }
michael@0 190 }
michael@0 191
michael@0 192 // find the .strtab
michael@0 193 char* strtab = mapping + strtabsh->sh_offset;
michael@0 194
michael@0 195 // find the .text
michael@0 196 char* text = mapping + textsh->sh_offset;
michael@0 197 int textaddr = textsh->sh_addr;
michael@0 198
michael@0 199 // find the symbol table
michael@0 200 int nentries = symtabsh->sh_size / sizeof(Elf32_Sym);
michael@0 201 Elf32_Sym* symtab = reinterpret_cast<Elf32_Sym*>(mapping + symtabsh->sh_offset);
michael@0 202
michael@0 203 // look for symbols in the .text section
michael@0 204 elf_text_map textmap;
michael@0 205
michael@0 206 for (int i = 0; i < nentries; ++i) {
michael@0 207 const Elf32_Sym* sym = symtab + i;
michael@0 208 if (sym->st_shndx == textndx &&
michael@0 209 ELF32_ST_TYPE(sym->st_info) == st_type(opt_type) &&
michael@0 210 sym->st_size) {
michael@0 211 basic_string<char> functext(text + sym->st_value - textaddr, sym->st_size);
michael@0 212
michael@0 213 elf_symbol_table& syms = textmap[functext];
michael@0 214 if (syms.end() == find(syms.begin(), syms.end(), elf_symbol(*sym)))
michael@0 215 syms.insert(syms.end(), *sym);
michael@0 216 }
michael@0 217 }
michael@0 218
michael@0 219 int uniquebytes = 0, totalbytes = 0;
michael@0 220 int uniquecount = 0, totalcount = 0;
michael@0 221
michael@0 222 for (elf_text_map::const_iterator entry = textmap.begin();
michael@0 223 entry != textmap.end();
michael@0 224 ++entry) {
michael@0 225 const elf_symbol_table& syms = entry->second;
michael@0 226
michael@0 227 if (syms.size() <= 1)
michael@0 228 continue;
michael@0 229
michael@0 230 int sz = syms.begin()->st_size;
michael@0 231 uniquebytes += sz;
michael@0 232 totalbytes += sz * syms.size();
michael@0 233 uniquecount += 1;
michael@0 234 totalcount += syms.size();
michael@0 235
michael@0 236 for (elf_symbol_table::const_iterator sym = syms.begin(); sym != syms.end(); ++sym)
michael@0 237 cout << strtab + sym->st_name << endl;
michael@0 238
michael@0 239 dec(cout);
michael@0 240 cout << syms.size() << " copies of " << sz << " bytes";
michael@0 241 cout << " (" << ((syms.size() - 1) * sz) << " redundant bytes)" << endl;
michael@0 242
michael@0 243 hexdump(cout, entry->first.data(), entry->first.size());
michael@0 244 cout << endl;
michael@0 245 }
michael@0 246
michael@0 247 dec(cout);
michael@0 248 cout << "bytes unique=" << uniquebytes << ", total=" << totalbytes << endl;
michael@0 249 cout << "entries unique=" << uniquecount << ", total=" << totalcount << endl;
michael@0 250 }
michael@0 251
michael@0 252 void
michael@0 253 process_file(const char* name)
michael@0 254 {
michael@0 255 int fd = open(name, O_RDWR);
michael@0 256 if (fd >= 0) {
michael@0 257 struct stat statbuf;
michael@0 258 if (fstat(fd, &statbuf) >= 0) {
michael@0 259 size_t size = statbuf.st_size;
michael@0 260
michael@0 261 void* mapping = mmap(0, size, PROT_READ, MAP_SHARED, fd, 0);
michael@0 262 if (mapping != MAP_FAILED) {
michael@0 263 process_mapping(static_cast<char*>(mapping), size);
michael@0 264 munmap(mapping, size);
michael@0 265 }
michael@0 266 }
michael@0 267 close(fd);
michael@0 268 }
michael@0 269 }
michael@0 270
michael@0 271 static void
michael@0 272 usage()
michael@0 273 {
michael@0 274 cerr << "foldelf [--section=<section>] [--type=<type>] [file ...]\n\
michael@0 275 --section, -s the section of the ELF file to scan; defaults\n\
michael@0 276 to ``.text''. Valid values include any section\n\
michael@0 277 of the ELF file.\n\
michael@0 278 --type, -t the type of object to examine in the section;\n\
michael@0 279 defaults to ``func''. Valid values include\n\
michael@0 280 ``none'', ``func'', or ``object''.\n";
michael@0 281
michael@0 282 }
michael@0 283
michael@0 284 static struct option opts[] = {
michael@0 285 { "type", required_argument, 0, 't' },
michael@0 286 { "section", required_argument, 0, 's' },
michael@0 287 { "help", no_argument, 0, '?' },
michael@0 288 { 0, 0, 0, 0 }
michael@0 289 };
michael@0 290
michael@0 291 int
michael@0 292 main(int argc, char* argv[])
michael@0 293 {
michael@0 294 while (1) {
michael@0 295 int option_index = 0;
michael@0 296 int c = getopt_long(argc, argv, "t:s:", opts, &option_index);
michael@0 297
michael@0 298 if (c < 0) break;
michael@0 299
michael@0 300 switch (c) {
michael@0 301 case 't':
michael@0 302 opt_type = optarg;
michael@0 303 break;
michael@0 304
michael@0 305 case 's':
michael@0 306 opt_section = optarg;
michael@0 307 break;
michael@0 308
michael@0 309 case '?':
michael@0 310 usage();
michael@0 311 break;
michael@0 312 }
michael@0 313 }
michael@0 314
michael@0 315 for (int i = optind; i < argc; ++i)
michael@0 316 process_file(argv[i]);
michael@0 317
michael@0 318 return 0;
michael@0 319 }

mercurial