|
1 /* -*- Mode: C++; indent-tabs-mode: nil; c-basic-offset: 4 -*- |
|
2 * |
|
3 * This Source Code Form is subject to the terms of the Mozilla Public |
|
4 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
6 |
|
7 /* This program reads an ELF file and computes information about |
|
8 * redundancies. |
|
9 */ |
|
10 |
|
11 #include <algorithm> |
|
12 #include <fstream> |
|
13 #include <string> |
|
14 #include <vector> |
|
15 #include <map> |
|
16 #include <elf.h> |
|
17 #include <sys/mman.h> |
|
18 #include <sys/stat.h> |
|
19 #include <fcntl.h> |
|
20 #include <unistd.h> |
|
21 #include <errno.h> |
|
22 #include <getopt.h> |
|
23 |
|
24 //---------------------------------------------------------------------- |
|
25 |
|
26 char* opt_type = "func"; |
|
27 char* opt_section = ".text"; |
|
28 |
|
29 //---------------------------------------------------------------------- |
|
30 |
|
31 static void |
|
32 hexdump(ostream& out, const char* bytes, size_t count) |
|
33 { |
|
34 hex(out); |
|
35 |
|
36 size_t off = 0; |
|
37 while (off < count) { |
|
38 out.form("%08lx: ", off); |
|
39 |
|
40 const char* p = bytes + off; |
|
41 |
|
42 int j = 0; |
|
43 while (j < 16) { |
|
44 out.form("%02x", p[j++] & 0xff); |
|
45 if (j + off >= count) |
|
46 break; |
|
47 |
|
48 out.form("%02x ", p[j++] & 0xff); |
|
49 if (j + off >= count) |
|
50 break; |
|
51 } |
|
52 |
|
53 // Pad |
|
54 for (; j < 16; ++j) |
|
55 out << ((j%2) ? " " : " "); |
|
56 |
|
57 for (j = 0; j < 16; ++j) { |
|
58 if (j + off < count) |
|
59 out.put(isprint(p[j]) ? p[j] : '.'); |
|
60 } |
|
61 |
|
62 out << endl; |
|
63 off += 16; |
|
64 } |
|
65 } |
|
66 |
|
67 //---------------------------------------------------------------------- |
|
68 |
|
69 int |
|
70 verify_elf_header(const Elf32_Ehdr* hdr) |
|
71 { |
|
72 if (hdr->e_ident[EI_MAG0] != ELFMAG0 |
|
73 || hdr->e_ident[EI_MAG1] != ELFMAG1 |
|
74 || hdr->e_ident[EI_MAG2] != ELFMAG2 |
|
75 || hdr->e_ident[EI_MAG3] != ELFMAG3) { |
|
76 cerr << "not an elf file" << endl; |
|
77 return -1; |
|
78 } |
|
79 |
|
80 if (hdr->e_ident[EI_CLASS] != ELFCLASS32) { |
|
81 cerr << "not a 32-bit elf file" << endl; |
|
82 return -1; |
|
83 } |
|
84 |
|
85 if (hdr->e_ident[EI_DATA] != ELFDATA2LSB) { |
|
86 cerr << "not a little endian elf file" << endl; |
|
87 return -1; |
|
88 } |
|
89 |
|
90 if (hdr->e_ident[EI_VERSION] != EV_CURRENT) { |
|
91 cerr << "incompatible version" << endl; |
|
92 return -1; |
|
93 } |
|
94 |
|
95 return 0; |
|
96 } |
|
97 |
|
98 //---------------------------------------------------------------------- |
|
99 |
|
100 class elf_symbol : public Elf32_Sym |
|
101 { |
|
102 public: |
|
103 elf_symbol(const Elf32_Sym& sym) |
|
104 { ::memcpy(static_cast<Elf32_Sym*>(this), &sym, sizeof(Elf32_Sym)); } |
|
105 |
|
106 friend bool operator==(const elf_symbol& lhs, const elf_symbol& rhs) { |
|
107 return 0 == ::memcmp(static_cast<const Elf32_Sym*>(&lhs), |
|
108 static_cast<const Elf32_Sym*>(&rhs), |
|
109 sizeof(Elf32_Sym)); } |
|
110 }; |
|
111 |
|
112 //---------------------------------------------------------------------- |
|
113 |
|
114 static const char* |
|
115 st_bind(unsigned char info) |
|
116 { |
|
117 switch (ELF32_ST_BIND(info)) { |
|
118 case STB_LOCAL: return "local"; |
|
119 case STB_GLOBAL: return "global"; |
|
120 case STB_WEAK: return "weak"; |
|
121 default: return "unknown"; |
|
122 } |
|
123 } |
|
124 |
|
125 static const char* |
|
126 st_type(unsigned char info) |
|
127 { |
|
128 switch (ELF32_ST_TYPE(info)) { |
|
129 case STT_NOTYPE: return "none"; |
|
130 case STT_OBJECT: return "object"; |
|
131 case STT_FUNC: return "func"; |
|
132 case STT_SECTION: return "section"; |
|
133 case STT_FILE: return "file"; |
|
134 default: return "unknown"; |
|
135 } |
|
136 } |
|
137 |
|
138 static unsigned char |
|
139 st_type(const char* type) |
|
140 { |
|
141 if (strcmp(type, "none") == 0) { |
|
142 return STT_NOTYPE; |
|
143 } |
|
144 else if (strcmp(type, "object") == 0) { |
|
145 return STT_OBJECT; |
|
146 } |
|
147 else if (strcmp(type, "func") == 0) { |
|
148 return STT_FUNC; |
|
149 } |
|
150 else { |
|
151 return 0; |
|
152 } |
|
153 } |
|
154 |
|
155 //---------------------------------------------------------------------- |
|
156 |
|
157 typedef vector<elf_symbol> elf_symbol_table; |
|
158 typedef map< basic_string<char>, elf_symbol_table > elf_text_map; |
|
159 |
|
160 void |
|
161 process_mapping(char* mapping, size_t size) |
|
162 { |
|
163 const Elf32_Ehdr* ehdr = reinterpret_cast<Elf32_Ehdr*>(mapping); |
|
164 if (verify_elf_header(ehdr) < 0) |
|
165 return; |
|
166 |
|
167 // find the section headers |
|
168 const Elf32_Shdr* shdrs = reinterpret_cast<Elf32_Shdr*>(mapping + ehdr->e_shoff); |
|
169 |
|
170 // find the section header string table, .shstrtab |
|
171 const Elf32_Shdr* shstrtabsh = shdrs + ehdr->e_shstrndx; |
|
172 const char* shstrtab = mapping + shstrtabsh->sh_offset; |
|
173 |
|
174 // find the sections we care about |
|
175 const Elf32_Shdr *symtabsh, *strtabsh, *textsh; |
|
176 int textndx; |
|
177 |
|
178 for (int i = 0; i < ehdr->e_shnum; ++i) { |
|
179 basic_string<char> name(shstrtab + shdrs[i].sh_name); |
|
180 if (name == opt_section) { |
|
181 textsh = shdrs + i; |
|
182 textndx = i; |
|
183 } |
|
184 else if (name == ".symtab") { |
|
185 symtabsh = shdrs + i; |
|
186 } |
|
187 else if (name == ".strtab") { |
|
188 strtabsh = shdrs + i; |
|
189 } |
|
190 } |
|
191 |
|
192 // find the .strtab |
|
193 char* strtab = mapping + strtabsh->sh_offset; |
|
194 |
|
195 // find the .text |
|
196 char* text = mapping + textsh->sh_offset; |
|
197 int textaddr = textsh->sh_addr; |
|
198 |
|
199 // find the symbol table |
|
200 int nentries = symtabsh->sh_size / sizeof(Elf32_Sym); |
|
201 Elf32_Sym* symtab = reinterpret_cast<Elf32_Sym*>(mapping + symtabsh->sh_offset); |
|
202 |
|
203 // look for symbols in the .text section |
|
204 elf_text_map textmap; |
|
205 |
|
206 for (int i = 0; i < nentries; ++i) { |
|
207 const Elf32_Sym* sym = symtab + i; |
|
208 if (sym->st_shndx == textndx && |
|
209 ELF32_ST_TYPE(sym->st_info) == st_type(opt_type) && |
|
210 sym->st_size) { |
|
211 basic_string<char> functext(text + sym->st_value - textaddr, sym->st_size); |
|
212 |
|
213 elf_symbol_table& syms = textmap[functext]; |
|
214 if (syms.end() == find(syms.begin(), syms.end(), elf_symbol(*sym))) |
|
215 syms.insert(syms.end(), *sym); |
|
216 } |
|
217 } |
|
218 |
|
219 int uniquebytes = 0, totalbytes = 0; |
|
220 int uniquecount = 0, totalcount = 0; |
|
221 |
|
222 for (elf_text_map::const_iterator entry = textmap.begin(); |
|
223 entry != textmap.end(); |
|
224 ++entry) { |
|
225 const elf_symbol_table& syms = entry->second; |
|
226 |
|
227 if (syms.size() <= 1) |
|
228 continue; |
|
229 |
|
230 int sz = syms.begin()->st_size; |
|
231 uniquebytes += sz; |
|
232 totalbytes += sz * syms.size(); |
|
233 uniquecount += 1; |
|
234 totalcount += syms.size(); |
|
235 |
|
236 for (elf_symbol_table::const_iterator sym = syms.begin(); sym != syms.end(); ++sym) |
|
237 cout << strtab + sym->st_name << endl; |
|
238 |
|
239 dec(cout); |
|
240 cout << syms.size() << " copies of " << sz << " bytes"; |
|
241 cout << " (" << ((syms.size() - 1) * sz) << " redundant bytes)" << endl; |
|
242 |
|
243 hexdump(cout, entry->first.data(), entry->first.size()); |
|
244 cout << endl; |
|
245 } |
|
246 |
|
247 dec(cout); |
|
248 cout << "bytes unique=" << uniquebytes << ", total=" << totalbytes << endl; |
|
249 cout << "entries unique=" << uniquecount << ", total=" << totalcount << endl; |
|
250 } |
|
251 |
|
252 void |
|
253 process_file(const char* name) |
|
254 { |
|
255 int fd = open(name, O_RDWR); |
|
256 if (fd >= 0) { |
|
257 struct stat statbuf; |
|
258 if (fstat(fd, &statbuf) >= 0) { |
|
259 size_t size = statbuf.st_size; |
|
260 |
|
261 void* mapping = mmap(0, size, PROT_READ, MAP_SHARED, fd, 0); |
|
262 if (mapping != MAP_FAILED) { |
|
263 process_mapping(static_cast<char*>(mapping), size); |
|
264 munmap(mapping, size); |
|
265 } |
|
266 } |
|
267 close(fd); |
|
268 } |
|
269 } |
|
270 |
|
271 static void |
|
272 usage() |
|
273 { |
|
274 cerr << "foldelf [--section=<section>] [--type=<type>] [file ...]\n\ |
|
275 --section, -s the section of the ELF file to scan; defaults\n\ |
|
276 to ``.text''. Valid values include any section\n\ |
|
277 of the ELF file.\n\ |
|
278 --type, -t the type of object to examine in the section;\n\ |
|
279 defaults to ``func''. Valid values include\n\ |
|
280 ``none'', ``func'', or ``object''.\n"; |
|
281 |
|
282 } |
|
283 |
|
284 static struct option opts[] = { |
|
285 { "type", required_argument, 0, 't' }, |
|
286 { "section", required_argument, 0, 's' }, |
|
287 { "help", no_argument, 0, '?' }, |
|
288 { 0, 0, 0, 0 } |
|
289 }; |
|
290 |
|
291 int |
|
292 main(int argc, char* argv[]) |
|
293 { |
|
294 while (1) { |
|
295 int option_index = 0; |
|
296 int c = getopt_long(argc, argv, "t:s:", opts, &option_index); |
|
297 |
|
298 if (c < 0) break; |
|
299 |
|
300 switch (c) { |
|
301 case 't': |
|
302 opt_type = optarg; |
|
303 break; |
|
304 |
|
305 case 's': |
|
306 opt_section = optarg; |
|
307 break; |
|
308 |
|
309 case '?': |
|
310 usage(); |
|
311 break; |
|
312 } |
|
313 } |
|
314 |
|
315 for (int i = optind; i < argc; ++i) |
|
316 process_file(argv[i]); |
|
317 |
|
318 return 0; |
|
319 } |