build/unix/elfhack/elfhack.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/build/unix/elfhack/elfhack.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,823 @@
     1.4 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.5 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.6 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.7 +
     1.8 +#undef NDEBUG
     1.9 +#include <assert.h>
    1.10 +#include <cstring>
    1.11 +#include <cstdlib>
    1.12 +#include <cstdio>
    1.13 +#include "elfxx.h"
    1.14 +
    1.15 +#define ver "0"
    1.16 +#define elfhack_data ".elfhack.data.v" ver
    1.17 +#define elfhack_text ".elfhack.text.v" ver
    1.18 +
    1.19 +#ifndef R_ARM_V4BX
    1.20 +#define R_ARM_V4BX 0x28
    1.21 +#endif
    1.22 +#ifndef R_ARM_CALL
    1.23 +#define R_ARM_CALL 0x1c
    1.24 +#endif
    1.25 +#ifndef R_ARM_JUMP24
    1.26 +#define R_ARM_JUMP24 0x1d
    1.27 +#endif
    1.28 +#ifndef R_ARM_THM_JUMP24
    1.29 +#define R_ARM_THM_JUMP24 0x1e
    1.30 +#endif
    1.31 +
    1.32 +char *rundir = nullptr;
    1.33 +
    1.34 +template <typename T>
    1.35 +struct wrapped {
    1.36 +    T value;
    1.37 +};
    1.38 +
    1.39 +class Elf_Addr_Traits {
    1.40 +public:
    1.41 +    typedef wrapped<Elf32_Addr> Type32;
    1.42 +    typedef wrapped<Elf64_Addr> Type64;
    1.43 +
    1.44 +    template <class endian, typename R, typename T>
    1.45 +    static inline void swap(T &t, R &r) {
    1.46 +        r.value = endian::swap(t.value);
    1.47 +    }
    1.48 +};
    1.49 +
    1.50 +typedef serializable<Elf_Addr_Traits> Elf_Addr;
    1.51 +
    1.52 +class Elf_RelHack_Traits {
    1.53 +public:
    1.54 +    typedef Elf32_Rel Type32;
    1.55 +    typedef Elf32_Rel Type64;
    1.56 +
    1.57 +    template <class endian, typename R, typename T>
    1.58 +    static inline void swap(T &t, R &r) {
    1.59 +        r.r_offset = endian::swap(t.r_offset);
    1.60 +        r.r_info = endian::swap(t.r_info);
    1.61 +    }
    1.62 +};
    1.63 +
    1.64 +typedef serializable<Elf_RelHack_Traits> Elf_RelHack;
    1.65 +
    1.66 +class ElfRelHack_Section: public ElfSection {
    1.67 +public:
    1.68 +    ElfRelHack_Section(Elf_Shdr &s)
    1.69 +    : ElfSection(s, nullptr, nullptr)
    1.70 +    {
    1.71 +        name = elfhack_data;
    1.72 +    };
    1.73 +
    1.74 +    void serialize(std::ofstream &file, char ei_class, char ei_data)
    1.75 +    {
    1.76 +        for (std::vector<Elf_RelHack>::iterator i = rels.begin();
    1.77 +             i != rels.end(); ++i)
    1.78 +            (*i).serialize(file, ei_class, ei_data);
    1.79 +    }
    1.80 +
    1.81 +    bool isRelocatable() {
    1.82 +        return true;
    1.83 +    }
    1.84 +
    1.85 +    void push_back(Elf_RelHack &r) {
    1.86 +        rels.push_back(r);
    1.87 +        shdr.sh_size = rels.size() * shdr.sh_entsize;
    1.88 +    }
    1.89 +private:
    1.90 +    std::vector<Elf_RelHack> rels;
    1.91 +};
    1.92 +
    1.93 +class ElfRelHackCode_Section: public ElfSection {
    1.94 +public:
    1.95 +    ElfRelHackCode_Section(Elf_Shdr &s, Elf &e, unsigned int init)
    1.96 +    : ElfSection(s, nullptr, nullptr), parent(e), init(init) {
    1.97 +        std::string file(rundir);
    1.98 +        file += "/inject/";
    1.99 +        switch (parent.getMachine()) {
   1.100 +        case EM_386:
   1.101 +            file += "x86";
   1.102 +            break;
   1.103 +        case EM_X86_64:
   1.104 +            file += "x86_64";
   1.105 +            break;
   1.106 +        case EM_ARM:
   1.107 +            file += "arm";
   1.108 +            break;
   1.109 +        default:
   1.110 +            throw std::runtime_error("unsupported architecture");
   1.111 +        }
   1.112 +        file += ".o";
   1.113 +        std::ifstream inject(file.c_str(), std::ios::in|std::ios::binary);
   1.114 +        elf = new Elf(inject);
   1.115 +        if (elf->getType() != ET_REL)
   1.116 +            throw std::runtime_error("object for injected code is not ET_REL");
   1.117 +        if (elf->getMachine() != parent.getMachine())
   1.118 +            throw std::runtime_error("architecture of object for injected code doesn't match");
   1.119 +
   1.120 +        ElfSymtab_Section *symtab = nullptr;
   1.121 +
   1.122 +        // Find the symbol table.
   1.123 +        for (ElfSection *section = elf->getSection(1); section != nullptr;
   1.124 +             section = section->getNext()) {
   1.125 +            if (section->getType() == SHT_SYMTAB)
   1.126 +                symtab = (ElfSymtab_Section *) section;
   1.127 +        }
   1.128 +        if (symtab == nullptr)
   1.129 +            throw std::runtime_error("Couldn't find a symbol table for the injected code");
   1.130 +
   1.131 +        // Find the init symbol
   1.132 +        entry_point = -1;
   1.133 +        Elf_SymValue *sym = symtab->lookup(init ? "init" : "init_noinit");
   1.134 +        if (!sym)
   1.135 +            throw std::runtime_error("Couldn't find an 'init' symbol in the injected code");
   1.136 +
   1.137 +        entry_point = sym->value.getValue();
   1.138 +
   1.139 +        // Get all relevant sections from the injected code object.
   1.140 +        add_code_section(sym->value.getSection());
   1.141 +
   1.142 +        // Adjust code sections offsets according to their size
   1.143 +        std::vector<ElfSection *>::iterator c = code.begin();
   1.144 +        (*c)->getShdr().sh_addr = 0;
   1.145 +        for(ElfSection *last = *(c++); c != code.end(); c++) {
   1.146 +            unsigned int addr = last->getShdr().sh_addr + last->getSize();
   1.147 +            if (addr & ((*c)->getAddrAlign() - 1))
   1.148 +                addr = (addr | ((*c)->getAddrAlign() - 1)) + 1;
   1.149 +            (*c)->getShdr().sh_addr = addr;
   1.150 +            // We need to align this section depending on the greater
   1.151 +            // alignment required by code sections.
   1.152 +            if (shdr.sh_addralign < (*c)->getAddrAlign())
   1.153 +                shdr.sh_addralign = (*c)->getAddrAlign();
   1.154 +        }
   1.155 +        shdr.sh_size = code.back()->getAddr() + code.back()->getSize();
   1.156 +        data = new char[shdr.sh_size];
   1.157 +        char *buf = data;
   1.158 +        for (c = code.begin(); c != code.end(); c++) {
   1.159 +            memcpy(buf, (*c)->getData(), (*c)->getSize());
   1.160 +            buf += (*c)->getSize();
   1.161 +        }
   1.162 +        name = elfhack_text;
   1.163 +    }
   1.164 +
   1.165 +    ~ElfRelHackCode_Section() {
   1.166 +        delete elf;
   1.167 +    }
   1.168 +
   1.169 +    void serialize(std::ofstream &file, char ei_class, char ei_data)
   1.170 +    {
   1.171 +        // Readjust code offsets
   1.172 +        for (std::vector<ElfSection *>::iterator c = code.begin(); c != code.end(); c++)
   1.173 +            (*c)->getShdr().sh_addr += getAddr();
   1.174 +
   1.175 +        // Apply relocations
   1.176 +        for (std::vector<ElfSection *>::iterator c = code.begin(); c != code.end(); c++) {
   1.177 +            for (ElfSection *rel = elf->getSection(1); rel != nullptr; rel = rel->getNext())
   1.178 +                if (((rel->getType() == SHT_REL) ||
   1.179 +                     (rel->getType() == SHT_RELA)) &&
   1.180 +                    (rel->getInfo().section == *c)) {
   1.181 +                    if (rel->getType() == SHT_REL)
   1.182 +                        apply_relocations((ElfRel_Section<Elf_Rel> *)rel, *c);
   1.183 +                    else
   1.184 +                        apply_relocations((ElfRel_Section<Elf_Rela> *)rel, *c);
   1.185 +                }
   1.186 +            }
   1.187 +
   1.188 +        ElfSection::serialize(file, ei_class, ei_data);
   1.189 +    }
   1.190 +
   1.191 +    bool isRelocatable() {
   1.192 +        return true;
   1.193 +    }
   1.194 +
   1.195 +    unsigned int getEntryPoint() {
   1.196 +        return entry_point;
   1.197 +    }
   1.198 +private:
   1.199 +    void add_code_section(ElfSection *section)
   1.200 +    {
   1.201 +        if (section) {
   1.202 +            /* Don't add section if it's already been added in the past */
   1.203 +            for (auto s = code.begin(); s != code.end(); ++s) {
   1.204 +                if (section == *s)
   1.205 +                    return;
   1.206 +            }
   1.207 +            code.push_back(section);
   1.208 +            find_code(section);
   1.209 +        }
   1.210 +    }
   1.211 +
   1.212 +    /* Look at the relocations associated to the given section to find other
   1.213 +     * sections that it requires */
   1.214 +    void find_code(ElfSection *section)
   1.215 +    {
   1.216 +        for (ElfSection *s = elf->getSection(1); s != nullptr;
   1.217 +             s = s->getNext()) {
   1.218 +            if (((s->getType() == SHT_REL) ||
   1.219 +                 (s->getType() == SHT_RELA)) &&
   1.220 +                (s->getInfo().section == section)) {
   1.221 +                if (s->getType() == SHT_REL)
   1.222 +                    scan_relocs_for_code((ElfRel_Section<Elf_Rel> *)s);
   1.223 +                else
   1.224 +                    scan_relocs_for_code((ElfRel_Section<Elf_Rela> *)s);
   1.225 +            }
   1.226 +        }
   1.227 +    }
   1.228 +
   1.229 +    template <typename Rel_Type>
   1.230 +    void scan_relocs_for_code(ElfRel_Section<Rel_Type> *rel)
   1.231 +    {
   1.232 +        ElfSymtab_Section *symtab = (ElfSymtab_Section *)rel->getLink();
   1.233 +        for (auto r = rel->rels.begin(); r != rel->rels.end(); r++) {
   1.234 +            ElfSection *section = symtab->syms[ELF32_R_SYM(r->r_info)].value.getSection();
   1.235 +            add_code_section(section);
   1.236 +        }
   1.237 +    }
   1.238 +
   1.239 +    class pc32_relocation {
   1.240 +    public:
   1.241 +        Elf32_Addr operator()(unsigned int base_addr, Elf32_Off offset,
   1.242 +                              Elf32_Word addend, unsigned int addr)
   1.243 +        {
   1.244 +            return addr + addend - offset - base_addr;
   1.245 +        }
   1.246 +    };
   1.247 +
   1.248 +    class arm_plt32_relocation {
   1.249 +    public:
   1.250 +        Elf32_Addr operator()(unsigned int base_addr, Elf32_Off offset,
   1.251 +                              Elf32_Word addend, unsigned int addr)
   1.252 +        {
   1.253 +            // We don't care about sign_extend because the only case where this is
   1.254 +            // going to be used only jumps forward.
   1.255 +            Elf32_Addr tmp = (Elf32_Addr) (addr - offset - base_addr) >> 2;
   1.256 +            tmp = (addend + tmp) & 0x00ffffff;
   1.257 +            return (addend & 0xff000000) | tmp;
   1.258 +        }
   1.259 +    };
   1.260 +
   1.261 +    class arm_thm_jump24_relocation {
   1.262 +    public:
   1.263 +        Elf32_Addr operator()(unsigned int base_addr, Elf32_Off offset,
   1.264 +                              Elf32_Word addend, unsigned int addr)
   1.265 +        {
   1.266 +            /* Follows description of b.w and bl instructions as per
   1.267 +               ARM Architecture Reference Manual ARMĀ® v7-A and ARMĀ® v7-R edition, A8.6.16
   1.268 +               We limit ourselves to Encoding T4 of b.w and Encoding T1 of bl.
   1.269 +               We don't care about sign_extend because the only case where this is
   1.270 +               going to be used only jumps forward. */
   1.271 +            Elf32_Addr tmp = (Elf32_Addr) (addr - offset - base_addr);
   1.272 +            unsigned int word0 = addend & 0xffff,
   1.273 +                         word1 = addend >> 16;
   1.274 +
   1.275 +            /* Encoding T4 of B.W is 10x1 ; Encoding T1 of BL is 11x1. */
   1.276 +            unsigned int type = (word1 & 0xd000) >> 12;
   1.277 +            if (((word0 & 0xf800) != 0xf000) || ((type & 0x9) != 0x9))
   1.278 +                throw std::runtime_error("R_ARM_THM_JUMP24/R_ARM_THM_CALL relocation only supported for B.W <label> and BL <label>");
   1.279 +
   1.280 +            /* When the target address points to ARM code, switch a BL to a
   1.281 +             * BLX. This however can't be done with a B.W without adding a
   1.282 +             * trampoline, which is not supported as of now. */
   1.283 +            if ((addr & 0x1) == 0) {
   1.284 +                if (type == 0x9)
   1.285 +                    throw std::runtime_error("R_ARM_THM_JUMP24/R_ARM_THM_CALL relocation only supported for BL <label> when label points to ARM code");
   1.286 +                /* The address of the target is always relative to a 4-bytes
   1.287 +                 * aligned address, so if the address of the BL instruction is
   1.288 +                 * not 4-bytes aligned, adjust for it. */
   1.289 +                if ((base_addr + offset) & 0x2)
   1.290 +                    tmp += 2;
   1.291 +                /* Encoding T2 of BLX is 11x0. */
   1.292 +                type = 0xc;
   1.293 +            }
   1.294 +
   1.295 +            unsigned int s = (word0 & (1 << 10)) >> 10;
   1.296 +            unsigned int j1 = (word1 & (1 << 13)) >> 13;
   1.297 +            unsigned int j2 = (word1 & (1 << 11)) >> 11;
   1.298 +            unsigned int i1 = j1 ^ s ? 0 : 1;
   1.299 +            unsigned int i2 = j2 ^ s ? 0 : 1;
   1.300 +
   1.301 +            tmp += ((s << 24) | (i1 << 23) | (i2 << 22) | ((word0 & 0x3ff) << 12) | ((word1 & 0x7ff) << 1));
   1.302 +
   1.303 +            s = (tmp & (1 << 24)) >> 24;
   1.304 +            j1 = ((tmp & (1 << 23)) >> 23) ^ !s;
   1.305 +            j2 = ((tmp & (1 << 22)) >> 22) ^ !s;
   1.306 +
   1.307 +            return 0xf000 | (s << 10) | ((tmp & (0x3ff << 12)) >> 12) |
   1.308 +                   (type << 28) | (j1 << 29) | (j2 << 27) | ((tmp & 0xffe) << 15);
   1.309 +        }
   1.310 +    };
   1.311 +
   1.312 +    class gotoff_relocation {
   1.313 +    public:
   1.314 +        Elf32_Addr operator()(unsigned int base_addr, Elf32_Off offset,
   1.315 +                              Elf32_Word addend, unsigned int addr)
   1.316 +        {
   1.317 +            return addr + addend;
   1.318 +        }
   1.319 +    };
   1.320 +
   1.321 +    template <class relocation_type>
   1.322 +    void apply_relocation(ElfSection *the_code, char *base, Elf_Rel *r, unsigned int addr)
   1.323 +    {
   1.324 +        relocation_type relocation;
   1.325 +        Elf32_Addr value;
   1.326 +        memcpy(&value, base + r->r_offset, 4);
   1.327 +        value = relocation(the_code->getAddr(), r->r_offset, value, addr);
   1.328 +        memcpy(base + r->r_offset, &value, 4);
   1.329 +    }
   1.330 +
   1.331 +    template <class relocation_type>
   1.332 +    void apply_relocation(ElfSection *the_code, char *base, Elf_Rela *r, unsigned int addr)
   1.333 +    {
   1.334 +        relocation_type relocation;
   1.335 +        Elf32_Addr value = relocation(the_code->getAddr(), r->r_offset, r->r_addend, addr);
   1.336 +        memcpy(base + r->r_offset, &value, 4);
   1.337 +    }
   1.338 +
   1.339 +    template <typename Rel_Type>
   1.340 +    void apply_relocations(ElfRel_Section<Rel_Type> *rel, ElfSection *the_code)
   1.341 +    {
   1.342 +        assert(rel->getType() == Rel_Type::sh_type);
   1.343 +        char *buf = data + (the_code->getAddr() - code.front()->getAddr());
   1.344 +        // TODO: various checks on the sections
   1.345 +        ElfSymtab_Section *symtab = (ElfSymtab_Section *)rel->getLink();
   1.346 +        for (typename std::vector<Rel_Type>::iterator r = rel->rels.begin(); r != rel->rels.end(); r++) {
   1.347 +            // TODO: various checks on the symbol
   1.348 +            const char *name = symtab->syms[ELF32_R_SYM(r->r_info)].name;
   1.349 +            unsigned int addr;
   1.350 +            if (symtab->syms[ELF32_R_SYM(r->r_info)].value.getSection() == nullptr) {
   1.351 +                if (strcmp(name, "relhack") == 0) {
   1.352 +                    addr = getNext()->getAddr();
   1.353 +                } else if (strcmp(name, "elf_header") == 0) {
   1.354 +                    // TODO: change this ungly hack to something better
   1.355 +                    ElfSection *ehdr = parent.getSection(1)->getPrevious()->getPrevious();
   1.356 +                    addr = ehdr->getAddr();
   1.357 +                } else if (strcmp(name, "original_init") == 0) {
   1.358 +                    addr = init;
   1.359 +                } else if (strcmp(name, "_GLOBAL_OFFSET_TABLE_") == 0) {
   1.360 +                    // We actually don't need a GOT, but need it as a reference for
   1.361 +                    // GOTOFF relocations. We'll just use the start of the ELF file
   1.362 +                    addr = 0;
   1.363 +                } else if (strcmp(name, "") == 0) {
   1.364 +                    // This is for R_ARM_V4BX, until we find something better
   1.365 +                    addr = -1;
   1.366 +                } else {
   1.367 +                    throw std::runtime_error("Unsupported symbol in relocation");
   1.368 +                }
   1.369 +            } else {
   1.370 +                ElfSection *section = symtab->syms[ELF32_R_SYM(r->r_info)].value.getSection();
   1.371 +                assert((section->getType() == SHT_PROGBITS) && (section->getFlags() & SHF_EXECINSTR));
   1.372 +                addr = symtab->syms[ELF32_R_SYM(r->r_info)].value.getValue();
   1.373 +            }
   1.374 +            // Do the relocation
   1.375 +#define REL(machine, type) (EM_ ## machine | (R_ ## machine ## _ ## type << 8))
   1.376 +            switch (elf->getMachine() | (ELF32_R_TYPE(r->r_info) << 8)) {
   1.377 +            case REL(X86_64, PC32):
   1.378 +            case REL(386, PC32):
   1.379 +            case REL(386, GOTPC):
   1.380 +            case REL(ARM, GOTPC):
   1.381 +            case REL(ARM, REL32):
   1.382 +                apply_relocation<pc32_relocation>(the_code, buf, &*r, addr);
   1.383 +                break;
   1.384 +            case REL(ARM, CALL):
   1.385 +            case REL(ARM, JUMP24):
   1.386 +            case REL(ARM, PLT32):
   1.387 +                apply_relocation<arm_plt32_relocation>(the_code, buf, &*r, addr);
   1.388 +                break;
   1.389 +            case REL(ARM, THM_PC22 /* THM_CALL */):
   1.390 +            case REL(ARM, THM_JUMP24):
   1.391 +                apply_relocation<arm_thm_jump24_relocation>(the_code, buf, &*r, addr);
   1.392 +                break;
   1.393 +            case REL(386, GOTOFF):
   1.394 +            case REL(ARM, GOTOFF):
   1.395 +                apply_relocation<gotoff_relocation>(the_code, buf, &*r, addr);
   1.396 +                break;
   1.397 +            case REL(ARM, V4BX):
   1.398 +                // Ignore R_ARM_V4BX relocations
   1.399 +                break;
   1.400 +            default:
   1.401 +                throw std::runtime_error("Unsupported relocation type");
   1.402 +            }
   1.403 +        }
   1.404 +    }
   1.405 +
   1.406 +    Elf *elf, &parent;
   1.407 +    std::vector<ElfSection *> code;
   1.408 +    unsigned int init;
   1.409 +    int entry_point;
   1.410 +};
   1.411 +
   1.412 +unsigned int get_addend(Elf_Rel *rel, Elf *elf) {
   1.413 +    ElfLocation loc(rel->r_offset, elf);
   1.414 +    Elf_Addr addr(loc.getBuffer(), Elf_Addr::size(elf->getClass()), elf->getClass(), elf->getData());
   1.415 +    return addr.value;
   1.416 +}
   1.417 +
   1.418 +unsigned int get_addend(Elf_Rela *rel, Elf *elf) {
   1.419 +    return rel->r_addend;
   1.420 +}
   1.421 +
   1.422 +void set_relative_reloc(Elf_Rel *rel, Elf *elf, unsigned int value) {
   1.423 +    ElfLocation loc(rel->r_offset, elf);
   1.424 +    Elf_Addr addr;
   1.425 +    addr.value = value;
   1.426 +    addr.serialize(const_cast<char *>(loc.getBuffer()), Elf_Addr::size(elf->getClass()), elf->getClass(), elf->getData());
   1.427 +}
   1.428 +
   1.429 +void set_relative_reloc(Elf_Rela *rel, Elf *elf, unsigned int value) {
   1.430 +    // ld puts the value of relocated relocations both in the addend and
   1.431 +    // at r_offset. For consistency, keep it that way.
   1.432 +    set_relative_reloc((Elf_Rel *)rel, elf, value);
   1.433 +    rel->r_addend = value;
   1.434 +}
   1.435 +
   1.436 +void maybe_split_segment(Elf *elf, ElfSegment *segment, bool fill)
   1.437 +{
   1.438 +    std::list<ElfSection *>::iterator it = segment->begin();
   1.439 +    for (ElfSection *last = *(it++); it != segment->end(); last = *(it++)) {
   1.440 +        // When two consecutive non-SHT_NOBITS sections are apart by more
   1.441 +        // than the alignment of the section, the second can be moved closer
   1.442 +        // to the first, but this requires the segment to be split.
   1.443 +        if (((*it)->getType() != SHT_NOBITS) && (last->getType() != SHT_NOBITS) &&
   1.444 +            ((*it)->getOffset() - last->getOffset() - last->getSize() > segment->getAlign())) {
   1.445 +            // Probably very wrong.
   1.446 +            Elf_Phdr phdr;
   1.447 +            phdr.p_type = PT_LOAD;
   1.448 +            phdr.p_vaddr = 0;
   1.449 +            phdr.p_paddr = phdr.p_vaddr + segment->getVPDiff();
   1.450 +            phdr.p_flags = segment->getFlags();
   1.451 +            phdr.p_align = segment->getAlign();
   1.452 +            phdr.p_filesz = (unsigned int)-1;
   1.453 +            phdr.p_memsz = (unsigned int)-1;
   1.454 +            ElfSegment *newSegment = new ElfSegment(&phdr);
   1.455 +            elf->insertSegmentAfter(segment, newSegment);
   1.456 +            ElfSection *section = *it;
   1.457 +            for (; it != segment->end(); ++it) {
   1.458 +                newSegment->addSection(*it);
   1.459 +            }
   1.460 +            for (it = newSegment->begin(); it != newSegment->end(); it++) {
   1.461 +                segment->removeSection(*it);
   1.462 +            }
   1.463 +            // Fill the virtual address space gap left between the two PT_LOADs
   1.464 +            // with a new PT_LOAD with no permissions. This avoids the linker
   1.465 +            // (especially bionic's) filling the gap with anonymous memory,
   1.466 +            // which breakpad doesn't like.
   1.467 +            // /!\ running strip on a elfhacked binary will break this filler
   1.468 +            // PT_LOAD.
   1.469 +            if (!fill)
   1.470 +                break;
   1.471 +            // Insert dummy segment to normalize the entire Elf with the header
   1.472 +            // sizes adjusted, before inserting a filler segment.
   1.473 +            {
   1.474 +              memset(&phdr, 0, sizeof(phdr));
   1.475 +              ElfSegment dummySegment(&phdr);
   1.476 +              elf->insertSegmentAfter(segment, &dummySegment);
   1.477 +              elf->normalize();
   1.478 +              elf->removeSegment(&dummySegment);
   1.479 +            }
   1.480 +            ElfSection *previous = section->getPrevious();
   1.481 +            phdr.p_type = PT_LOAD;
   1.482 +            phdr.p_vaddr = (previous->getAddr() + previous->getSize() + segment->getAlign() - 1) & ~(segment->getAlign() - 1);
   1.483 +            phdr.p_paddr = phdr.p_vaddr + segment->getVPDiff();
   1.484 +            phdr.p_flags = 0;
   1.485 +            phdr.p_align = 0;
   1.486 +            phdr.p_filesz = (section->getAddr() & ~(newSegment->getAlign() - 1)) - phdr.p_vaddr;
   1.487 +            phdr.p_memsz = phdr.p_filesz;
   1.488 +            if (phdr.p_filesz) {
   1.489 +                newSegment = new ElfSegment(&phdr);
   1.490 +                assert(newSegment->isElfHackFillerSegment());
   1.491 +                elf->insertSegmentAfter(segment, newSegment);
   1.492 +            } else {
   1.493 +                elf->normalize();
   1.494 +            }
   1.495 +            break;
   1.496 +        }
   1.497 +    }
   1.498 +}
   1.499 +
   1.500 +template <typename Rel_Type>
   1.501 +int do_relocation_section(Elf *elf, unsigned int rel_type, unsigned int rel_type2, bool force, bool fill)
   1.502 +{
   1.503 +    ElfDynamic_Section *dyn = elf->getDynSection();
   1.504 +    if (dyn == nullptr) {
   1.505 +        fprintf(stderr, "Couldn't find SHT_DYNAMIC section\n");
   1.506 +        return -1;
   1.507 +    }
   1.508 +
   1.509 +    ElfSegment *relro = elf->getSegmentByType(PT_GNU_RELRO);
   1.510 +
   1.511 +    ElfRel_Section<Rel_Type> *section = (ElfRel_Section<Rel_Type> *)dyn->getSectionForType(Rel_Type::d_tag);
   1.512 +    assert(section->getType() == Rel_Type::sh_type);
   1.513 +
   1.514 +    Elf32_Shdr relhack32_section =
   1.515 +        { 0, SHT_PROGBITS, SHF_ALLOC, 0, (Elf32_Off)-1, 0, SHN_UNDEF, 0,
   1.516 +          Elf_RelHack::size(elf->getClass()), Elf_RelHack::size(elf->getClass()) }; // TODO: sh_addralign should be an alignment, not size
   1.517 +    Elf32_Shdr relhackcode32_section =
   1.518 +        { 0, SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, 0, (Elf32_Off)-1, 0,
   1.519 +          SHN_UNDEF, 0, 1, 0 };
   1.520 +
   1.521 +    unsigned int entry_sz = Elf_Addr::size(elf->getClass());
   1.522 +
   1.523 +    // The injected code needs to be executed before any init code in the
   1.524 +    // binary. There are three possible cases:
   1.525 +    // - The binary has no init code at all. In this case, we will add a
   1.526 +    //   DT_INIT entry pointing to the injected code.
   1.527 +    // - The binary has a DT_INIT entry. In this case, we will interpose:
   1.528 +    //   we change DT_INIT to point to the injected code, and have the
   1.529 +    //   injected code call the original DT_INIT entry point.
   1.530 +    // - The binary has no DT_INIT entry, but has a DT_INIT_ARRAY. In this
   1.531 +    //   case, we interpose as well, by replacing the first entry in the
   1.532 +    //   array to point to the injected code, and have the injected code
   1.533 +    //   call the original first entry.
   1.534 +    // The binary may have .ctors instead of DT_INIT_ARRAY, for its init
   1.535 +    // functions, but this falls into the second case above, since .ctors
   1.536 +    // are actually run by DT_INIT code.
   1.537 +    ElfValue *value = dyn->getValueForType(DT_INIT);
   1.538 +    unsigned int original_init = value ? value->getValue() : 0;
   1.539 +    ElfSection *init_array = nullptr;
   1.540 +    if (!value || !value->getValue()) {
   1.541 +        value = dyn->getValueForType(DT_INIT_ARRAYSZ);
   1.542 +        if (value && value->getValue() >= entry_sz)
   1.543 +            init_array = dyn->getSectionForType(DT_INIT_ARRAY);
   1.544 +    }
   1.545 +
   1.546 +    Elf_Shdr relhack_section(relhack32_section);
   1.547 +    Elf_Shdr relhackcode_section(relhackcode32_section);
   1.548 +    ElfRelHack_Section *relhack = new ElfRelHack_Section(relhack_section);
   1.549 +
   1.550 +    ElfSymtab_Section *symtab = (ElfSymtab_Section *) section->getLink();
   1.551 +    Elf_SymValue *sym = symtab->lookup("__cxa_pure_virtual");
   1.552 +
   1.553 +    std::vector<Rel_Type> new_rels;
   1.554 +    Elf_RelHack relhack_entry;
   1.555 +    relhack_entry.r_offset = relhack_entry.r_info = 0;
   1.556 +    size_t init_array_reloc = 0;
   1.557 +    for (typename std::vector<Rel_Type>::iterator i = section->rels.begin();
   1.558 +         i != section->rels.end(); i++) {
   1.559 +        // We don't need to keep R_*_NONE relocations
   1.560 +        if (!ELF32_R_TYPE(i->r_info))
   1.561 +            continue;
   1.562 +        ElfLocation loc(i->r_offset, elf);
   1.563 +        // __cxa_pure_virtual is a function used in vtables to point at pure
   1.564 +        // virtual methods. The __cxa_pure_virtual function usually abort()s.
   1.565 +        // These functions are however normally never called. In the case
   1.566 +        // where they would, jumping to the null address instead of calling
   1.567 +        // __cxa_pure_virtual is going to work just as well. So we can remove
   1.568 +        // relocations for the __cxa_pure_virtual symbol and null out the
   1.569 +        // content at the offset pointed by the relocation.
   1.570 +        if (sym) {
   1.571 +            if (sym->defined) {
   1.572 +                // If we are statically linked to libstdc++, the
   1.573 +                // __cxa_pure_virtual symbol is defined in our lib, and we
   1.574 +                // have relative relocations (rel_type) for it.
   1.575 +                if (ELF32_R_TYPE(i->r_info) == rel_type) {
   1.576 +                    Elf_Addr addr(loc.getBuffer(), entry_sz, elf->getClass(), elf->getData());
   1.577 +                    if (addr.value == sym->value.getValue()) {
   1.578 +                        memset((char *)loc.getBuffer(), 0, entry_sz);
   1.579 +                        continue;
   1.580 +                    }
   1.581 +                }
   1.582 +            } else {
   1.583 +                // If we are dynamically linked to libstdc++, the
   1.584 +                // __cxa_pure_virtual symbol is undefined in our lib, and we
   1.585 +                // have absolute relocations (rel_type2) for it.
   1.586 +                if ((ELF32_R_TYPE(i->r_info) == rel_type2) &&
   1.587 +                    (sym == &symtab->syms[ELF32_R_SYM(i->r_info)])) {
   1.588 +                    memset((char *)loc.getBuffer(), 0, entry_sz);
   1.589 +                    continue;
   1.590 +                }
   1.591 +            }
   1.592 +        }
   1.593 +        // Keep track of the relocation associated with the first init_array entry.
   1.594 +        if (init_array && i->r_offset == init_array->getAddr()) {
   1.595 +            if (init_array_reloc) {
   1.596 +                fprintf(stderr, "Found multiple relocations for the first init_array entry. Skipping\n");
   1.597 +                return -1;
   1.598 +            }
   1.599 +            new_rels.push_back(*i);
   1.600 +            init_array_reloc = new_rels.size();
   1.601 +        } else if (!(loc.getSection()->getFlags() & SHF_WRITE) || (ELF32_R_TYPE(i->r_info) != rel_type) ||
   1.602 +                   (relro && (i->r_offset >= relro->getAddr()) &&
   1.603 +                   (i->r_offset < relro->getAddr() + relro->getMemSize()))) {
   1.604 +            // Don't pack relocations happening in non writable sections.
   1.605 +            // Our injected code is likely not to be allowed to write there.
   1.606 +            new_rels.push_back(*i);
   1.607 +        } else {
   1.608 +            // TODO: check that i->r_addend == *i->r_offset
   1.609 +            if (i->r_offset == relhack_entry.r_offset + relhack_entry.r_info * entry_sz) {
   1.610 +                relhack_entry.r_info++;
   1.611 +            } else {
   1.612 +                if (relhack_entry.r_offset)
   1.613 +                    relhack->push_back(relhack_entry);
   1.614 +                relhack_entry.r_offset = i->r_offset;
   1.615 +                relhack_entry.r_info = 1;
   1.616 +            }
   1.617 +        }
   1.618 +    }
   1.619 +    if (relhack_entry.r_offset)
   1.620 +        relhack->push_back(relhack_entry);
   1.621 +    // Last entry must be nullptr
   1.622 +    relhack_entry.r_offset = relhack_entry.r_info = 0;
   1.623 +    relhack->push_back(relhack_entry);
   1.624 +
   1.625 +    unsigned int old_end = section->getOffset() + section->getSize();
   1.626 +
   1.627 +    if (init_array) {
   1.628 +        if (! init_array_reloc) {
   1.629 +            fprintf(stderr, "Didn't find relocation for DT_INIT_ARRAY's first entry. Skipping\n");
   1.630 +            return -1;
   1.631 +        }
   1.632 +        Rel_Type *rel = &new_rels[init_array_reloc - 1];
   1.633 +        unsigned int addend = get_addend(rel, elf);
   1.634 +        // Use relocated value of DT_INIT_ARRAY's first entry for the
   1.635 +        // function to be called by the injected code.
   1.636 +        if (ELF32_R_TYPE(rel->r_info) == rel_type) {
   1.637 +            original_init = addend;
   1.638 +        } else if (ELF32_R_TYPE(rel->r_info) == rel_type2) {
   1.639 +            ElfSymtab_Section *symtab = (ElfSymtab_Section *)section->getLink();
   1.640 +            original_init = symtab->syms[ELF32_R_SYM(rel->r_info)].value.getValue() + addend;
   1.641 +        } else {
   1.642 +            fprintf(stderr, "Unsupported relocation type for DT_INIT_ARRAY's first entry. Skipping\n");
   1.643 +            return -1;
   1.644 +        }
   1.645 +    }
   1.646 +
   1.647 +    section->rels.assign(new_rels.begin(), new_rels.end());
   1.648 +    section->shrink(new_rels.size() * section->getEntSize());
   1.649 +
   1.650 +    ElfRelHackCode_Section *relhackcode = new ElfRelHackCode_Section(relhackcode_section, *elf, original_init);
   1.651 +    relhackcode->insertBefore(section);
   1.652 +    relhack->insertAfter(relhackcode);
   1.653 +    if (section->getOffset() + section->getSize() >= old_end) {
   1.654 +        fprintf(stderr, "No gain. Skipping\n");
   1.655 +        return -1;
   1.656 +    }
   1.657 +
   1.658 +    // Adjust PT_LOAD segments
   1.659 +    for (ElfSegment *segment = elf->getSegmentByType(PT_LOAD); segment;
   1.660 +         segment = elf->getSegmentByType(PT_LOAD, segment)) {
   1.661 +        maybe_split_segment(elf, segment, fill);
   1.662 +    }
   1.663 +
   1.664 +    // Ensure Elf sections will be at their final location.
   1.665 +    elf->normalize();
   1.666 +    ElfLocation *init = new ElfLocation(relhackcode, relhackcode->getEntryPoint());
   1.667 +    if (init_array) {
   1.668 +        // Adjust the first DT_INIT_ARRAY entry to point at the injected code
   1.669 +        // by transforming its relocation into a relative one pointing to the
   1.670 +        // address of the injected code.
   1.671 +        Rel_Type *rel = &section->rels[init_array_reloc - 1];
   1.672 +        rel->r_info = ELF32_R_INFO(0, rel_type); // Set as a relative relocation
   1.673 +        set_relative_reloc(&section->rels[init_array_reloc - 1], elf, init->getValue());
   1.674 +    } else if (!dyn->setValueForType(DT_INIT, init)) {
   1.675 +        fprintf(stderr, "Can't grow .dynamic section to set DT_INIT. Skipping\n");
   1.676 +        return -1;
   1.677 +    }
   1.678 +    // TODO: adjust the value according to the remaining number of relative relocations
   1.679 +    if (dyn->getValueForType(Rel_Type::d_tag_count))
   1.680 +        dyn->setValueForType(Rel_Type::d_tag_count, new ElfPlainValue(0));
   1.681 +
   1.682 +    return 0;
   1.683 +}
   1.684 +
   1.685 +static inline int backup_file(const char *name)
   1.686 +{
   1.687 +    std::string fname(name);
   1.688 +    fname += ".bak";
   1.689 +    return rename(name, fname.c_str());
   1.690 +}
   1.691 +
   1.692 +void do_file(const char *name, bool backup = false, bool force = false, bool fill = false)
   1.693 +{
   1.694 +    std::ifstream file(name, std::ios::in|std::ios::binary);
   1.695 +    Elf elf(file);
   1.696 +    unsigned int size = elf.getSize();
   1.697 +    fprintf(stderr, "%s: ", name);
   1.698 +    if (elf.getType() != ET_DYN) {
   1.699 +        fprintf(stderr, "Not a shared object. Skipping\n");
   1.700 +        return;
   1.701 +    }
   1.702 +
   1.703 +    for (ElfSection *section = elf.getSection(1); section != nullptr;
   1.704 +         section = section->getNext()) {
   1.705 +        if (section->getName() &&
   1.706 +            (strncmp(section->getName(), ".elfhack.", 9) == 0)) {
   1.707 +            fprintf(stderr, "Already elfhacked. Skipping\n");
   1.708 +            return;
   1.709 +        }
   1.710 +    }
   1.711 +
   1.712 +    int exit = -1;
   1.713 +    switch (elf.getMachine()) {
   1.714 +    case EM_386:
   1.715 +        exit = do_relocation_section<Elf_Rel>(&elf, R_386_RELATIVE, R_386_32, force, fill);
   1.716 +        break;
   1.717 +    case EM_X86_64:
   1.718 +        exit = do_relocation_section<Elf_Rela>(&elf, R_X86_64_RELATIVE, R_X86_64_64, force, fill);
   1.719 +        break;
   1.720 +    case EM_ARM:
   1.721 +        exit = do_relocation_section<Elf_Rel>(&elf, R_ARM_RELATIVE, R_ARM_ABS32, force, fill);
   1.722 +        break;
   1.723 +    }
   1.724 +    if (exit == 0) {
   1.725 +        if (!force && (elf.getSize() >= size)) {
   1.726 +            fprintf(stderr, "No gain. Skipping\n");
   1.727 +        } else if (backup && backup_file(name) != 0) {
   1.728 +            fprintf(stderr, "Couln't create backup file\n");
   1.729 +        } else {
   1.730 +            std::ofstream ofile(name, std::ios::out|std::ios::binary|std::ios::trunc);
   1.731 +            elf.write(ofile);
   1.732 +            fprintf(stderr, "Reduced by %d bytes\n", size - elf.getSize());
   1.733 +        }
   1.734 +    }
   1.735 +}
   1.736 +
   1.737 +void undo_file(const char *name, bool backup = false)
   1.738 +{
   1.739 +    std::ifstream file(name, std::ios::in|std::ios::binary);
   1.740 +    Elf elf(file);
   1.741 +    unsigned int size = elf.getSize();
   1.742 +    fprintf(stderr, "%s: ", name);
   1.743 +    if (elf.getType() != ET_DYN) {
   1.744 +        fprintf(stderr, "Not a shared object. Skipping\n");
   1.745 +        return;
   1.746 +    }
   1.747 +
   1.748 +    ElfSection *data = nullptr, *text = nullptr;
   1.749 +    for (ElfSection *section = elf.getSection(1); section != nullptr;
   1.750 +         section = section->getNext()) {
   1.751 +        if (section->getName() &&
   1.752 +            (strcmp(section->getName(), elfhack_data) == 0))
   1.753 +            data = section;
   1.754 +        if (section->getName() &&
   1.755 +            (strcmp(section->getName(), elfhack_text) == 0))
   1.756 +            text = section;
   1.757 +    }
   1.758 +
   1.759 +    if (!data || !text) {
   1.760 +        fprintf(stderr, "Not elfhacked. Skipping\n");
   1.761 +        return;
   1.762 +    }
   1.763 +    if (data != text->getNext()) {
   1.764 +        fprintf(stderr, elfhack_data " section not following " elfhack_text ". Skipping\n");
   1.765 +        return;
   1.766 +    }
   1.767 +
   1.768 +    ElfSegment *first = elf.getSegmentByType(PT_LOAD);
   1.769 +    ElfSegment *second = elf.getSegmentByType(PT_LOAD, first);
   1.770 +    ElfSegment *filler = nullptr;
   1.771 +    // If the second PT_LOAD is a filler from elfhack --fill, check the third.
   1.772 +    if (second->isElfHackFillerSegment()) {
   1.773 +        filler = second;
   1.774 +        second = elf.getSegmentByType(PT_LOAD, filler);
   1.775 +    }
   1.776 +    if (second->getFlags() != first->getFlags()) {
   1.777 +        fprintf(stderr, "Couldn't identify elfhacked PT_LOAD segments. Skipping\n");
   1.778 +        return;
   1.779 +    }
   1.780 +    // Move sections from the second PT_LOAD to the first, and remove the
   1.781 +    // second PT_LOAD segment.
   1.782 +    for (std::list<ElfSection *>::iterator section = second->begin();
   1.783 +         section != second->end(); ++section)
   1.784 +        first->addSection(*section);
   1.785 +
   1.786 +    elf.removeSegment(second);
   1.787 +    if (filler)
   1.788 +        elf.removeSegment(filler);
   1.789 +
   1.790 +    if (backup && backup_file(name) != 0) {
   1.791 +        fprintf(stderr, "Couln't create backup file\n");
   1.792 +    } else {
   1.793 +        std::ofstream ofile(name, std::ios::out|std::ios::binary|std::ios::trunc);
   1.794 +        elf.write(ofile);
   1.795 +        fprintf(stderr, "Grown by %d bytes\n", elf.getSize() - size);
   1.796 +    }
   1.797 +}
   1.798 +
   1.799 +int main(int argc, char *argv[])
   1.800 +{
   1.801 +    int arg;
   1.802 +    bool backup = false;
   1.803 +    bool force = false;
   1.804 +    bool revert = false;
   1.805 +    bool fill = false;
   1.806 +    char *lastSlash = rindex(argv[0], '/');
   1.807 +    if (lastSlash != nullptr)
   1.808 +        rundir = strndup(argv[0], lastSlash - argv[0]);
   1.809 +    for (arg = 1; arg < argc; arg++) {
   1.810 +        if (strcmp(argv[arg], "-f") == 0)
   1.811 +            force = true;
   1.812 +        else if (strcmp(argv[arg], "-b") == 0)
   1.813 +            backup = true;
   1.814 +        else if (strcmp(argv[arg], "-r") == 0)
   1.815 +            revert = true;
   1.816 +        else if (strcmp(argv[arg], "--fill") == 0)
   1.817 +            fill = true;
   1.818 +        else if (revert) {
   1.819 +            undo_file(argv[arg], backup);
   1.820 +        } else
   1.821 +            do_file(argv[arg], backup, force, fill);
   1.822 +    }
   1.823 +
   1.824 +    free(rundir);
   1.825 +    return 0;
   1.826 +}

mercurial