Tue, 06 Jan 2015 21:39:09 +0100
Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.
michael@0 | 1 | /* -*- Mode: C++; indent-tabs-mode: nil; c-basic-offset: 4 -*- |
michael@0 | 2 | * |
michael@0 | 3 | * This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 5 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 6 | |
michael@0 | 7 | /* This program reads an ELF file and computes information about |
michael@0 | 8 | * redundancies. |
michael@0 | 9 | */ |
michael@0 | 10 | |
michael@0 | 11 | #include <algorithm> |
michael@0 | 12 | #include <fstream> |
michael@0 | 13 | #include <string> |
michael@0 | 14 | #include <vector> |
michael@0 | 15 | #include <map> |
michael@0 | 16 | #include <elf.h> |
michael@0 | 17 | #include <sys/mman.h> |
michael@0 | 18 | #include <sys/stat.h> |
michael@0 | 19 | #include <fcntl.h> |
michael@0 | 20 | #include <unistd.h> |
michael@0 | 21 | #include <errno.h> |
michael@0 | 22 | #include <getopt.h> |
michael@0 | 23 | |
michael@0 | 24 | //---------------------------------------------------------------------- |
michael@0 | 25 | |
michael@0 | 26 | char* opt_type = "func"; |
michael@0 | 27 | char* opt_section = ".text"; |
michael@0 | 28 | |
michael@0 | 29 | //---------------------------------------------------------------------- |
michael@0 | 30 | |
michael@0 | 31 | static void |
michael@0 | 32 | hexdump(ostream& out, const char* bytes, size_t count) |
michael@0 | 33 | { |
michael@0 | 34 | hex(out); |
michael@0 | 35 | |
michael@0 | 36 | size_t off = 0; |
michael@0 | 37 | while (off < count) { |
michael@0 | 38 | out.form("%08lx: ", off); |
michael@0 | 39 | |
michael@0 | 40 | const char* p = bytes + off; |
michael@0 | 41 | |
michael@0 | 42 | int j = 0; |
michael@0 | 43 | while (j < 16) { |
michael@0 | 44 | out.form("%02x", p[j++] & 0xff); |
michael@0 | 45 | if (j + off >= count) |
michael@0 | 46 | break; |
michael@0 | 47 | |
michael@0 | 48 | out.form("%02x ", p[j++] & 0xff); |
michael@0 | 49 | if (j + off >= count) |
michael@0 | 50 | break; |
michael@0 | 51 | } |
michael@0 | 52 | |
michael@0 | 53 | // Pad |
michael@0 | 54 | for (; j < 16; ++j) |
michael@0 | 55 | out << ((j%2) ? " " : " "); |
michael@0 | 56 | |
michael@0 | 57 | for (j = 0; j < 16; ++j) { |
michael@0 | 58 | if (j + off < count) |
michael@0 | 59 | out.put(isprint(p[j]) ? p[j] : '.'); |
michael@0 | 60 | } |
michael@0 | 61 | |
michael@0 | 62 | out << endl; |
michael@0 | 63 | off += 16; |
michael@0 | 64 | } |
michael@0 | 65 | } |
michael@0 | 66 | |
michael@0 | 67 | //---------------------------------------------------------------------- |
michael@0 | 68 | |
michael@0 | 69 | int |
michael@0 | 70 | verify_elf_header(const Elf32_Ehdr* hdr) |
michael@0 | 71 | { |
michael@0 | 72 | if (hdr->e_ident[EI_MAG0] != ELFMAG0 |
michael@0 | 73 | || hdr->e_ident[EI_MAG1] != ELFMAG1 |
michael@0 | 74 | || hdr->e_ident[EI_MAG2] != ELFMAG2 |
michael@0 | 75 | || hdr->e_ident[EI_MAG3] != ELFMAG3) { |
michael@0 | 76 | cerr << "not an elf file" << endl; |
michael@0 | 77 | return -1; |
michael@0 | 78 | } |
michael@0 | 79 | |
michael@0 | 80 | if (hdr->e_ident[EI_CLASS] != ELFCLASS32) { |
michael@0 | 81 | cerr << "not a 32-bit elf file" << endl; |
michael@0 | 82 | return -1; |
michael@0 | 83 | } |
michael@0 | 84 | |
michael@0 | 85 | if (hdr->e_ident[EI_DATA] != ELFDATA2LSB) { |
michael@0 | 86 | cerr << "not a little endian elf file" << endl; |
michael@0 | 87 | return -1; |
michael@0 | 88 | } |
michael@0 | 89 | |
michael@0 | 90 | if (hdr->e_ident[EI_VERSION] != EV_CURRENT) { |
michael@0 | 91 | cerr << "incompatible version" << endl; |
michael@0 | 92 | return -1; |
michael@0 | 93 | } |
michael@0 | 94 | |
michael@0 | 95 | return 0; |
michael@0 | 96 | } |
michael@0 | 97 | |
michael@0 | 98 | //---------------------------------------------------------------------- |
michael@0 | 99 | |
michael@0 | 100 | class elf_symbol : public Elf32_Sym |
michael@0 | 101 | { |
michael@0 | 102 | public: |
michael@0 | 103 | elf_symbol(const Elf32_Sym& sym) |
michael@0 | 104 | { ::memcpy(static_cast<Elf32_Sym*>(this), &sym, sizeof(Elf32_Sym)); } |
michael@0 | 105 | |
michael@0 | 106 | friend bool operator==(const elf_symbol& lhs, const elf_symbol& rhs) { |
michael@0 | 107 | return 0 == ::memcmp(static_cast<const Elf32_Sym*>(&lhs), |
michael@0 | 108 | static_cast<const Elf32_Sym*>(&rhs), |
michael@0 | 109 | sizeof(Elf32_Sym)); } |
michael@0 | 110 | }; |
michael@0 | 111 | |
michael@0 | 112 | //---------------------------------------------------------------------- |
michael@0 | 113 | |
michael@0 | 114 | static const char* |
michael@0 | 115 | st_bind(unsigned char info) |
michael@0 | 116 | { |
michael@0 | 117 | switch (ELF32_ST_BIND(info)) { |
michael@0 | 118 | case STB_LOCAL: return "local"; |
michael@0 | 119 | case STB_GLOBAL: return "global"; |
michael@0 | 120 | case STB_WEAK: return "weak"; |
michael@0 | 121 | default: return "unknown"; |
michael@0 | 122 | } |
michael@0 | 123 | } |
michael@0 | 124 | |
michael@0 | 125 | static const char* |
michael@0 | 126 | st_type(unsigned char info) |
michael@0 | 127 | { |
michael@0 | 128 | switch (ELF32_ST_TYPE(info)) { |
michael@0 | 129 | case STT_NOTYPE: return "none"; |
michael@0 | 130 | case STT_OBJECT: return "object"; |
michael@0 | 131 | case STT_FUNC: return "func"; |
michael@0 | 132 | case STT_SECTION: return "section"; |
michael@0 | 133 | case STT_FILE: return "file"; |
michael@0 | 134 | default: return "unknown"; |
michael@0 | 135 | } |
michael@0 | 136 | } |
michael@0 | 137 | |
michael@0 | 138 | static unsigned char |
michael@0 | 139 | st_type(const char* type) |
michael@0 | 140 | { |
michael@0 | 141 | if (strcmp(type, "none") == 0) { |
michael@0 | 142 | return STT_NOTYPE; |
michael@0 | 143 | } |
michael@0 | 144 | else if (strcmp(type, "object") == 0) { |
michael@0 | 145 | return STT_OBJECT; |
michael@0 | 146 | } |
michael@0 | 147 | else if (strcmp(type, "func") == 0) { |
michael@0 | 148 | return STT_FUNC; |
michael@0 | 149 | } |
michael@0 | 150 | else { |
michael@0 | 151 | return 0; |
michael@0 | 152 | } |
michael@0 | 153 | } |
michael@0 | 154 | |
michael@0 | 155 | //---------------------------------------------------------------------- |
michael@0 | 156 | |
michael@0 | 157 | typedef vector<elf_symbol> elf_symbol_table; |
michael@0 | 158 | typedef map< basic_string<char>, elf_symbol_table > elf_text_map; |
michael@0 | 159 | |
michael@0 | 160 | void |
michael@0 | 161 | process_mapping(char* mapping, size_t size) |
michael@0 | 162 | { |
michael@0 | 163 | const Elf32_Ehdr* ehdr = reinterpret_cast<Elf32_Ehdr*>(mapping); |
michael@0 | 164 | if (verify_elf_header(ehdr) < 0) |
michael@0 | 165 | return; |
michael@0 | 166 | |
michael@0 | 167 | // find the section headers |
michael@0 | 168 | const Elf32_Shdr* shdrs = reinterpret_cast<Elf32_Shdr*>(mapping + ehdr->e_shoff); |
michael@0 | 169 | |
michael@0 | 170 | // find the section header string table, .shstrtab |
michael@0 | 171 | const Elf32_Shdr* shstrtabsh = shdrs + ehdr->e_shstrndx; |
michael@0 | 172 | const char* shstrtab = mapping + shstrtabsh->sh_offset; |
michael@0 | 173 | |
michael@0 | 174 | // find the sections we care about |
michael@0 | 175 | const Elf32_Shdr *symtabsh, *strtabsh, *textsh; |
michael@0 | 176 | int textndx; |
michael@0 | 177 | |
michael@0 | 178 | for (int i = 0; i < ehdr->e_shnum; ++i) { |
michael@0 | 179 | basic_string<char> name(shstrtab + shdrs[i].sh_name); |
michael@0 | 180 | if (name == opt_section) { |
michael@0 | 181 | textsh = shdrs + i; |
michael@0 | 182 | textndx = i; |
michael@0 | 183 | } |
michael@0 | 184 | else if (name == ".symtab") { |
michael@0 | 185 | symtabsh = shdrs + i; |
michael@0 | 186 | } |
michael@0 | 187 | else if (name == ".strtab") { |
michael@0 | 188 | strtabsh = shdrs + i; |
michael@0 | 189 | } |
michael@0 | 190 | } |
michael@0 | 191 | |
michael@0 | 192 | // find the .strtab |
michael@0 | 193 | char* strtab = mapping + strtabsh->sh_offset; |
michael@0 | 194 | |
michael@0 | 195 | // find the .text |
michael@0 | 196 | char* text = mapping + textsh->sh_offset; |
michael@0 | 197 | int textaddr = textsh->sh_addr; |
michael@0 | 198 | |
michael@0 | 199 | // find the symbol table |
michael@0 | 200 | int nentries = symtabsh->sh_size / sizeof(Elf32_Sym); |
michael@0 | 201 | Elf32_Sym* symtab = reinterpret_cast<Elf32_Sym*>(mapping + symtabsh->sh_offset); |
michael@0 | 202 | |
michael@0 | 203 | // look for symbols in the .text section |
michael@0 | 204 | elf_text_map textmap; |
michael@0 | 205 | |
michael@0 | 206 | for (int i = 0; i < nentries; ++i) { |
michael@0 | 207 | const Elf32_Sym* sym = symtab + i; |
michael@0 | 208 | if (sym->st_shndx == textndx && |
michael@0 | 209 | ELF32_ST_TYPE(sym->st_info) == st_type(opt_type) && |
michael@0 | 210 | sym->st_size) { |
michael@0 | 211 | basic_string<char> functext(text + sym->st_value - textaddr, sym->st_size); |
michael@0 | 212 | |
michael@0 | 213 | elf_symbol_table& syms = textmap[functext]; |
michael@0 | 214 | if (syms.end() == find(syms.begin(), syms.end(), elf_symbol(*sym))) |
michael@0 | 215 | syms.insert(syms.end(), *sym); |
michael@0 | 216 | } |
michael@0 | 217 | } |
michael@0 | 218 | |
michael@0 | 219 | int uniquebytes = 0, totalbytes = 0; |
michael@0 | 220 | int uniquecount = 0, totalcount = 0; |
michael@0 | 221 | |
michael@0 | 222 | for (elf_text_map::const_iterator entry = textmap.begin(); |
michael@0 | 223 | entry != textmap.end(); |
michael@0 | 224 | ++entry) { |
michael@0 | 225 | const elf_symbol_table& syms = entry->second; |
michael@0 | 226 | |
michael@0 | 227 | if (syms.size() <= 1) |
michael@0 | 228 | continue; |
michael@0 | 229 | |
michael@0 | 230 | int sz = syms.begin()->st_size; |
michael@0 | 231 | uniquebytes += sz; |
michael@0 | 232 | totalbytes += sz * syms.size(); |
michael@0 | 233 | uniquecount += 1; |
michael@0 | 234 | totalcount += syms.size(); |
michael@0 | 235 | |
michael@0 | 236 | for (elf_symbol_table::const_iterator sym = syms.begin(); sym != syms.end(); ++sym) |
michael@0 | 237 | cout << strtab + sym->st_name << endl; |
michael@0 | 238 | |
michael@0 | 239 | dec(cout); |
michael@0 | 240 | cout << syms.size() << " copies of " << sz << " bytes"; |
michael@0 | 241 | cout << " (" << ((syms.size() - 1) * sz) << " redundant bytes)" << endl; |
michael@0 | 242 | |
michael@0 | 243 | hexdump(cout, entry->first.data(), entry->first.size()); |
michael@0 | 244 | cout << endl; |
michael@0 | 245 | } |
michael@0 | 246 | |
michael@0 | 247 | dec(cout); |
michael@0 | 248 | cout << "bytes unique=" << uniquebytes << ", total=" << totalbytes << endl; |
michael@0 | 249 | cout << "entries unique=" << uniquecount << ", total=" << totalcount << endl; |
michael@0 | 250 | } |
michael@0 | 251 | |
michael@0 | 252 | void |
michael@0 | 253 | process_file(const char* name) |
michael@0 | 254 | { |
michael@0 | 255 | int fd = open(name, O_RDWR); |
michael@0 | 256 | if (fd >= 0) { |
michael@0 | 257 | struct stat statbuf; |
michael@0 | 258 | if (fstat(fd, &statbuf) >= 0) { |
michael@0 | 259 | size_t size = statbuf.st_size; |
michael@0 | 260 | |
michael@0 | 261 | void* mapping = mmap(0, size, PROT_READ, MAP_SHARED, fd, 0); |
michael@0 | 262 | if (mapping != MAP_FAILED) { |
michael@0 | 263 | process_mapping(static_cast<char*>(mapping), size); |
michael@0 | 264 | munmap(mapping, size); |
michael@0 | 265 | } |
michael@0 | 266 | } |
michael@0 | 267 | close(fd); |
michael@0 | 268 | } |
michael@0 | 269 | } |
michael@0 | 270 | |
michael@0 | 271 | static void |
michael@0 | 272 | usage() |
michael@0 | 273 | { |
michael@0 | 274 | cerr << "foldelf [--section=<section>] [--type=<type>] [file ...]\n\ |
michael@0 | 275 | --section, -s the section of the ELF file to scan; defaults\n\ |
michael@0 | 276 | to ``.text''. Valid values include any section\n\ |
michael@0 | 277 | of the ELF file.\n\ |
michael@0 | 278 | --type, -t the type of object to examine in the section;\n\ |
michael@0 | 279 | defaults to ``func''. Valid values include\n\ |
michael@0 | 280 | ``none'', ``func'', or ``object''.\n"; |
michael@0 | 281 | |
michael@0 | 282 | } |
michael@0 | 283 | |
michael@0 | 284 | static struct option opts[] = { |
michael@0 | 285 | { "type", required_argument, 0, 't' }, |
michael@0 | 286 | { "section", required_argument, 0, 's' }, |
michael@0 | 287 | { "help", no_argument, 0, '?' }, |
michael@0 | 288 | { 0, 0, 0, 0 } |
michael@0 | 289 | }; |
michael@0 | 290 | |
michael@0 | 291 | int |
michael@0 | 292 | main(int argc, char* argv[]) |
michael@0 | 293 | { |
michael@0 | 294 | while (1) { |
michael@0 | 295 | int option_index = 0; |
michael@0 | 296 | int c = getopt_long(argc, argv, "t:s:", opts, &option_index); |
michael@0 | 297 | |
michael@0 | 298 | if (c < 0) break; |
michael@0 | 299 | |
michael@0 | 300 | switch (c) { |
michael@0 | 301 | case 't': |
michael@0 | 302 | opt_type = optarg; |
michael@0 | 303 | break; |
michael@0 | 304 | |
michael@0 | 305 | case 's': |
michael@0 | 306 | opt_section = optarg; |
michael@0 | 307 | break; |
michael@0 | 308 | |
michael@0 | 309 | case '?': |
michael@0 | 310 | usage(); |
michael@0 | 311 | break; |
michael@0 | 312 | } |
michael@0 | 313 | } |
michael@0 | 314 | |
michael@0 | 315 | for (int i = optind; i < argc; ++i) |
michael@0 | 316 | process_file(argv[i]); |
michael@0 | 317 | |
michael@0 | 318 | return 0; |
michael@0 | 319 | } |