michael@0: /* GRAPHITE2 LICENSING michael@0: michael@0: Copyright 2010, SIL International michael@0: All rights reserved. michael@0: michael@0: This library is free software; you can redistribute it and/or modify michael@0: it under the terms of the GNU Lesser General Public License as published michael@0: by the Free Software Foundation; either version 2.1 of License, or michael@0: (at your option) any later version. michael@0: michael@0: This program is distributed in the hope that it will be useful, michael@0: but WITHOUT ANY WARRANTY; without even the implied warranty of michael@0: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU michael@0: Lesser General Public License for more details. michael@0: michael@0: You should also have received a copy of the GNU Lesser General Public michael@0: License along with this library in the file named "LICENSE". michael@0: If not, write to the Free Software Foundation, 51 Franklin Street, michael@0: Suite 500, Boston, MA 02110-1335, USA or visit their web page on the michael@0: internet at http://www.fsf.org/licenses/lgpl.html. michael@0: michael@0: Alternatively, the contents of this file may be used under the terms of the michael@0: Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public michael@0: License, as published by the Free Software Foundation, either version 2 michael@0: of the License or (at your option) any later version. michael@0: */ michael@0: // This class represents loaded graphite stack machine code. It performs michael@0: // basic sanity checks, on the incoming code to prevent more obvious problems michael@0: // from crashing graphite. michael@0: // Author: Tim Eves michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include "graphite2/Segment.h" michael@0: #include "inc/Code.h" michael@0: #include "inc/Face.h" michael@0: #include "inc/GlyphFace.h" michael@0: #include "inc/GlyphCache.h" michael@0: #include "inc/Machine.h" michael@0: #include "inc/Rule.h" michael@0: #include "inc/Silf.h" michael@0: michael@0: #include michael@0: michael@0: #ifdef NDEBUG michael@0: #ifdef __GNUC__ michael@0: #pragma GCC diagnostic ignored "-Wunused-parameter" michael@0: #endif michael@0: #endif michael@0: michael@0: michael@0: using namespace graphite2; michael@0: using namespace vm; michael@0: michael@0: namespace { michael@0: michael@0: inline bool is_return(const instr i) { michael@0: const opcode_t * opmap = Machine::getOpcodeTable(); michael@0: const instr pop_ret = *opmap[POP_RET].impl, michael@0: ret_zero = *opmap[RET_ZERO].impl, michael@0: ret_true = *opmap[RET_TRUE].impl; michael@0: return i == pop_ret || i == ret_zero || i == ret_true; michael@0: } michael@0: michael@0: struct context michael@0: { michael@0: context(uint8 ref=0) : codeRef(ref) {flags.changed=false; flags.referenced=false; flags.inserted=false;} michael@0: struct { michael@0: uint8 changed:1, michael@0: referenced:1, michael@0: inserted:1; michael@0: } flags; michael@0: uint8 codeRef; michael@0: }; michael@0: michael@0: } // end namespace michael@0: michael@0: michael@0: class Machine::Code::decoder michael@0: { michael@0: public: michael@0: struct limits; michael@0: struct analysis michael@0: { michael@0: uint8 slotref; michael@0: context contexts[256]; michael@0: byte max_ref; michael@0: michael@0: analysis() : slotref(0), max_ref(0) {}; michael@0: void set_ref(int index) throw(); michael@0: void set_changed(int index) throw(); michael@0: michael@0: }; michael@0: michael@0: decoder(const limits & lims, Code &code) throw(); michael@0: michael@0: bool load(const byte * bc_begin, const byte * bc_end); michael@0: void apply_analysis(instr * const code, instr * code_end); michael@0: byte max_ref() { return _analysis.max_ref; } michael@0: int pre_context() const { return _pre_context; } michael@0: michael@0: private: michael@0: opcode fetch_opcode(const byte * bc); michael@0: void analyse_opcode(const opcode, const int8 * const dp) throw(); michael@0: bool emit_opcode(opcode opc, const byte * & bc); michael@0: bool validate_opcode(const opcode opc, const byte * const bc); michael@0: bool valid_upto(const uint16 limit, const uint16 x) const throw(); michael@0: void failure(const status_t s) const throw() { _code.failure(s); } michael@0: michael@0: Code & _code; michael@0: int _pre_context; michael@0: uint16 _rule_length; michael@0: instr * _instr; michael@0: byte * _data; michael@0: const limits & _max; michael@0: analysis _analysis; michael@0: }; michael@0: michael@0: michael@0: struct Machine::Code::decoder::limits michael@0: { michael@0: const byte * const bytecode; michael@0: const uint8 pre_context; michael@0: const uint16 rule_length, michael@0: classes, michael@0: glyf_attrs, michael@0: features; michael@0: const byte attrid[gr_slatMax]; michael@0: }; michael@0: michael@0: inline Machine::Code::decoder::decoder(const limits & lims, Code &code) throw() michael@0: : _code(code), michael@0: _pre_context(code._constraint ? 0 : lims.pre_context), michael@0: _rule_length(code._constraint ? 1 : lims.rule_length), michael@0: _instr(code._code), _data(code._data), _max(lims) michael@0: { } michael@0: michael@0: michael@0: michael@0: Machine::Code::Code(bool is_constraint, const byte * bytecode_begin, const byte * const bytecode_end, michael@0: uint8 pre_context, uint16 rule_length, const Silf & silf, const Face & face) michael@0: : _code(0), _data(0), _data_size(0), _instr_count(0), _max_ref(0), _status(loaded), michael@0: _constraint(is_constraint), _modify(false), _delete(false), _own(true) michael@0: { michael@0: #ifdef GRAPHITE2_TELEMETRY michael@0: telemetry::category _code_cat(face.tele.code); michael@0: #endif michael@0: assert(bytecode_begin != 0); michael@0: if (bytecode_begin == bytecode_end) michael@0: { michael@0: ::new (this) Code(); michael@0: return; michael@0: } michael@0: assert(bytecode_end > bytecode_begin); michael@0: const opcode_t * op_to_fn = Machine::getOpcodeTable(); michael@0: michael@0: // Allocate code and dat target buffers, these sizes are a worst case michael@0: // estimate. Once we know their real sizes the we'll shrink them. michael@0: _code = static_cast(malloc((bytecode_end - bytecode_begin) michael@0: * sizeof(instr))); michael@0: _data = static_cast(malloc((bytecode_end - bytecode_begin) michael@0: * sizeof(byte))); michael@0: michael@0: if (!_code || !_data) { michael@0: failure(alloc_failed); michael@0: return; michael@0: } michael@0: michael@0: const decoder::limits lims = { michael@0: bytecode_end, michael@0: pre_context, michael@0: rule_length, michael@0: silf.numClasses(), michael@0: face.glyphs().numAttrs(), michael@0: face.numFeatures(), michael@0: {1,1,1,1,1,1,1,1, michael@0: 1,1,1,1,1,1,1,255, michael@0: 1,1,1,1,1,1,1,1, michael@0: 1,1,1,1,1,1,0,0, michael@0: 0,0,0,0,0,0,0,0, michael@0: 0,0,0,0,0,0,0,0, michael@0: 0,0,0,0,0,0,0, silf.numUser()} michael@0: }; michael@0: michael@0: decoder dec(lims, *this); michael@0: if(!dec.load(bytecode_begin, bytecode_end)) michael@0: return; michael@0: michael@0: // Is this an empty program? michael@0: if (_instr_count == 0) michael@0: { michael@0: release_buffers(); michael@0: ::new (this) Code(); michael@0: return; michael@0: } michael@0: michael@0: // When we reach the end check we've terminated it correctly michael@0: if (!is_return(_code[_instr_count-1])) { michael@0: failure(missing_return); michael@0: return; michael@0: } michael@0: michael@0: assert((_constraint && immutable()) || !_constraint); michael@0: dec.apply_analysis(_code, _code + _instr_count); michael@0: _max_ref = dec.max_ref(); michael@0: michael@0: // Now we know exactly how much code and data the program really needs michael@0: // realloc the buffers to exactly the right size so we don't waste any michael@0: // memory. michael@0: assert((bytecode_end - bytecode_begin) >= std::ptrdiff_t(_instr_count)); michael@0: assert((bytecode_end - bytecode_begin) >= std::ptrdiff_t(_data_size)); michael@0: _code = static_cast(realloc(_code, (_instr_count+1)*sizeof(instr))); michael@0: _data = static_cast(realloc(_data, _data_size*sizeof(byte))); michael@0: michael@0: if (!_code) michael@0: { michael@0: failure(alloc_failed); michael@0: return; michael@0: } michael@0: michael@0: // Make this RET_ZERO, we should never reach this but just in case ... michael@0: _code[_instr_count] = op_to_fn[RET_ZERO].impl[_constraint]; michael@0: michael@0: #ifdef GRAPHITE2_TELEMETRY michael@0: telemetry::count_bytes(_data_size + (_instr_count+1)*sizeof(instr)); michael@0: #endif michael@0: } michael@0: michael@0: Machine::Code::~Code() throw () michael@0: { michael@0: if (_own) michael@0: release_buffers(); michael@0: } michael@0: michael@0: michael@0: bool Machine::Code::decoder::load(const byte * bc, const byte * bc_end) michael@0: { michael@0: while (bc < bc_end) michael@0: { michael@0: const opcode opc = fetch_opcode(bc++); michael@0: if (opc == vm::MAX_OPCODE) michael@0: return false; michael@0: michael@0: analyse_opcode(opc, reinterpret_cast(bc)); michael@0: michael@0: if (!emit_opcode(opc, bc)) michael@0: return false; michael@0: } michael@0: michael@0: return bool(_code); michael@0: } michael@0: michael@0: // Validation check and fixups. michael@0: // michael@0: michael@0: opcode Machine::Code::decoder::fetch_opcode(const byte * bc) michael@0: { michael@0: const opcode opc = opcode(*bc++); michael@0: michael@0: // Do some basic sanity checks based on what we know about the opcode michael@0: if (!validate_opcode(opc, bc)) return MAX_OPCODE; michael@0: michael@0: // And check it's arguments as far as possible michael@0: switch (opc) michael@0: { michael@0: case NOP : michael@0: case PUSH_BYTE : michael@0: case PUSH_BYTEU : michael@0: case PUSH_SHORT : michael@0: case PUSH_SHORTU : michael@0: case PUSH_LONG : michael@0: case ADD : michael@0: case SUB : michael@0: case MUL : michael@0: case DIV : michael@0: case MIN_ : michael@0: case MAX_ : michael@0: case NEG : michael@0: case TRUNC8 : michael@0: case TRUNC16 : michael@0: case COND : michael@0: case AND : michael@0: case OR : michael@0: case NOT : michael@0: case EQUAL : michael@0: case NOT_EQ : michael@0: case LESS : michael@0: case GTR : michael@0: case LESS_EQ : michael@0: case GTR_EQ : michael@0: break; michael@0: case NEXT : michael@0: case NEXT_N : // runtime checked michael@0: case COPY_NEXT : michael@0: ++_pre_context; michael@0: break; michael@0: case PUT_GLYPH_8BIT_OBS : michael@0: valid_upto(_max.classes, bc[0]); michael@0: break; michael@0: case PUT_SUBS_8BIT_OBS : michael@0: valid_upto(_rule_length, _pre_context + int8(bc[0])); michael@0: valid_upto(_max.classes, bc[1]); michael@0: valid_upto(_max.classes, bc[2]); michael@0: break; michael@0: case PUT_COPY : michael@0: valid_upto(_rule_length, _pre_context + int8(bc[0])); michael@0: break; michael@0: case INSERT : michael@0: --_pre_context; michael@0: break; michael@0: case DELETE : michael@0: break; michael@0: case ASSOC : michael@0: for (uint8 num = bc[0]; num; --num) michael@0: valid_upto(_rule_length, _pre_context + int8(bc[num])); michael@0: break; michael@0: case CNTXT_ITEM : michael@0: valid_upto(_max.rule_length, _max.pre_context + int8(bc[0])); michael@0: if (bc + 2 + bc[1] >= _max.bytecode) failure(jump_past_end); michael@0: if (_pre_context != 0) failure(nested_context_item); michael@0: break; michael@0: case ATTR_SET : michael@0: case ATTR_ADD : michael@0: case ATTR_SUB : michael@0: case ATTR_SET_SLOT : michael@0: valid_upto(gr_slatMax, bc[0]); michael@0: break; michael@0: case IATTR_SET_SLOT : michael@0: if (valid_upto(gr_slatMax, bc[0])) michael@0: valid_upto(_max.attrid[bc[0]], bc[1]); michael@0: break; michael@0: case PUSH_SLOT_ATTR : michael@0: valid_upto(gr_slatMax, bc[0]); michael@0: valid_upto(_rule_length, _pre_context + int8(bc[1])); michael@0: break; michael@0: case PUSH_GLYPH_ATTR_OBS : michael@0: valid_upto(_max.glyf_attrs, bc[0]); michael@0: valid_upto(_rule_length, _pre_context + int8(bc[1])); michael@0: break; michael@0: case PUSH_GLYPH_METRIC : michael@0: valid_upto(kgmetDescent, bc[0]); michael@0: valid_upto(_rule_length, _pre_context + int8(bc[1])); michael@0: // level: dp[2] no check necessary michael@0: break; michael@0: case PUSH_FEAT : michael@0: valid_upto(_max.features, bc[0]); michael@0: valid_upto(_rule_length, _pre_context + int8(bc[1])); michael@0: break; michael@0: case PUSH_ATT_TO_GATTR_OBS : michael@0: valid_upto(_max.glyf_attrs, bc[0]); michael@0: valid_upto(_rule_length, _pre_context + int8(bc[1])); michael@0: break; michael@0: case PUSH_ATT_TO_GLYPH_METRIC : michael@0: valid_upto(kgmetDescent, bc[0]); michael@0: valid_upto(_rule_length, _pre_context + int8(bc[1])); michael@0: // level: dp[2] no check necessary michael@0: break; michael@0: case PUSH_ISLOT_ATTR : michael@0: if (valid_upto(gr_slatMax, bc[0])) michael@0: { michael@0: valid_upto(_rule_length, _pre_context + int8(bc[1])); michael@0: valid_upto(_max.attrid[bc[0]], bc[2]); michael@0: } michael@0: break; michael@0: case PUSH_IGLYPH_ATTR :// not implemented michael@0: case POP_RET : michael@0: case RET_ZERO : michael@0: case RET_TRUE : michael@0: break; michael@0: case IATTR_SET : michael@0: case IATTR_ADD : michael@0: case IATTR_SUB : michael@0: if (valid_upto(gr_slatMax, bc[0])) michael@0: valid_upto(_max.attrid[bc[0]], bc[1]); michael@0: break; michael@0: case PUSH_PROC_STATE : // dummy: dp[0] no check necessary michael@0: case PUSH_VERSION : michael@0: break; michael@0: case PUT_SUBS : michael@0: valid_upto(_rule_length, _pre_context + int8(bc[0])); michael@0: valid_upto(_max.classes, uint16(bc[1]<< 8) | bc[2]); michael@0: valid_upto(_max.classes, uint16(bc[3]<< 8) | bc[4]); michael@0: break; michael@0: case PUT_SUBS2 : // not implemented michael@0: case PUT_SUBS3 : // not implemented michael@0: break; michael@0: case PUT_GLYPH : michael@0: valid_upto(_max.classes, uint16(bc[0]<< 8) | bc[1]); michael@0: break; michael@0: case PUSH_GLYPH_ATTR : michael@0: case PUSH_ATT_TO_GLYPH_ATTR : michael@0: valid_upto(_max.glyf_attrs, uint16(bc[0]<< 8) | bc[1]); michael@0: valid_upto(_rule_length, _pre_context + int8(bc[2])); michael@0: break; michael@0: default: michael@0: failure(invalid_opcode); michael@0: break; michael@0: } michael@0: michael@0: return bool(_code) ? opc : MAX_OPCODE; michael@0: } michael@0: michael@0: michael@0: void Machine::Code::decoder::analyse_opcode(const opcode opc, const int8 * arg) throw() michael@0: { michael@0: if (_code._constraint) return; michael@0: michael@0: switch (opc) michael@0: { michael@0: case DELETE : michael@0: _code._delete = true; michael@0: break; michael@0: case PUT_GLYPH_8BIT_OBS : michael@0: case PUT_GLYPH : michael@0: _code._modify = true; michael@0: _analysis.set_changed(_analysis.slotref); michael@0: break; michael@0: case NEXT : michael@0: case COPY_NEXT : michael@0: if (!_analysis.contexts[_analysis.slotref].flags.inserted) michael@0: ++_analysis.slotref; michael@0: _analysis.contexts[_analysis.slotref] = context(_code._instr_count+1); michael@0: if (_analysis.slotref > _analysis.max_ref) _analysis.max_ref = _analysis.slotref; michael@0: break; michael@0: case INSERT : michael@0: _analysis.contexts[_analysis.slotref].flags.inserted = true; michael@0: _code._modify = true; michael@0: break; michael@0: case PUT_SUBS_8BIT_OBS : // slotref on 1st parameter michael@0: case PUT_SUBS : michael@0: _code._modify = true; michael@0: _analysis.set_changed(_analysis.slotref); michael@0: // no break michael@0: case PUT_COPY : michael@0: { michael@0: if (arg[0] != 0) { _analysis.set_changed(_analysis.slotref); _code._modify = true; } michael@0: if (arg[0] <= 0 && -arg[0] <= _analysis.slotref - _analysis.contexts[_analysis.slotref].flags.inserted) michael@0: _analysis.set_ref(_analysis.slotref + arg[0] - _analysis.contexts[_analysis.slotref].flags.inserted); michael@0: else if (_analysis.slotref + arg[0] > _analysis.max_ref) _analysis.max_ref = _analysis.slotref + arg[0]; michael@0: break; michael@0: } michael@0: case PUSH_ATT_TO_GATTR_OBS : // slotref on 2nd parameter michael@0: if (_code._constraint) return; michael@0: // no break michael@0: case PUSH_GLYPH_ATTR_OBS : michael@0: case PUSH_SLOT_ATTR : michael@0: case PUSH_GLYPH_METRIC : michael@0: case PUSH_ATT_TO_GLYPH_METRIC : michael@0: case PUSH_ISLOT_ATTR : michael@0: case PUSH_FEAT : michael@0: if (arg[1] <= 0 && -arg[1] <= _analysis.slotref - _analysis.contexts[_analysis.slotref].flags.inserted) michael@0: _analysis.set_ref(_analysis.slotref + arg[1] - _analysis.contexts[_analysis.slotref].flags.inserted); michael@0: else if (_analysis.slotref + arg[1] > _analysis.max_ref) _analysis.max_ref = _analysis.slotref + arg[1]; michael@0: break; michael@0: case PUSH_ATT_TO_GLYPH_ATTR : michael@0: if (_code._constraint) return; michael@0: // no break michael@0: case PUSH_GLYPH_ATTR : michael@0: if (arg[2] <= 0 && -arg[2] <= _analysis.slotref - _analysis.contexts[_analysis.slotref].flags.inserted) michael@0: _analysis.set_ref(_analysis.slotref + arg[2] - _analysis.contexts[_analysis.slotref].flags.inserted); michael@0: else if (_analysis.slotref + arg[2] > _analysis.max_ref) _analysis.max_ref = _analysis.slotref + arg[2]; michael@0: break; michael@0: case ASSOC : // slotrefs in varargs michael@0: break; michael@0: default: michael@0: break; michael@0: } michael@0: } michael@0: michael@0: michael@0: bool Machine::Code::decoder::emit_opcode(opcode opc, const byte * & bc) michael@0: { michael@0: const opcode_t * op_to_fn = Machine::getOpcodeTable(); michael@0: const opcode_t & op = op_to_fn[opc]; michael@0: if (op.impl[_code._constraint] == 0) michael@0: { michael@0: failure(unimplemented_opcode_used); michael@0: return false; michael@0: } michael@0: michael@0: const size_t param_sz = op.param_sz == VARARGS ? bc[0] + 1 : op.param_sz; michael@0: michael@0: // Add this instruction michael@0: *_instr++ = op.impl[_code._constraint]; michael@0: ++_code._instr_count; michael@0: michael@0: // Grab the parameters michael@0: if (param_sz) { michael@0: memcpy(_data, bc, param_sz * sizeof(byte)); michael@0: bc += param_sz; michael@0: _data += param_sz; michael@0: _code._data_size += param_sz; michael@0: } michael@0: michael@0: // recursively decode a context item so we can split the skip into michael@0: // instruction and data portions. michael@0: if (opc == CNTXT_ITEM) michael@0: { michael@0: assert(_pre_context == 0); michael@0: _pre_context = _max.pre_context + int8(_data[-2]); michael@0: _rule_length = _max.rule_length; michael@0: michael@0: const size_t ctxt_start = _code._instr_count; michael@0: byte & instr_skip = _data[-1]; michael@0: byte & data_skip = *_data++; michael@0: ++_code._data_size; michael@0: michael@0: if (load(bc, bc + instr_skip)) michael@0: { michael@0: bc += instr_skip; michael@0: data_skip = instr_skip - (_code._instr_count - ctxt_start); michael@0: instr_skip = _code._instr_count - ctxt_start; michael@0: michael@0: _rule_length = 1; michael@0: _pre_context = 0; michael@0: } michael@0: } michael@0: michael@0: return bool(_code); michael@0: } michael@0: michael@0: michael@0: void Machine::Code::decoder::apply_analysis(instr * const code, instr * code_end) michael@0: { michael@0: // insert TEMP_COPY commands for slots that need them (that change and are referenced later) michael@0: int tempcount = 0; michael@0: if (_code._constraint) return; michael@0: michael@0: const instr temp_copy = Machine::getOpcodeTable()[TEMP_COPY].impl[0]; michael@0: for (const context * c = _analysis.contexts, * const ce = c + _analysis.slotref; c != ce; ++c) michael@0: { michael@0: if (!c->flags.referenced || !c->flags.changed) continue; michael@0: michael@0: instr * const tip = code + c->codeRef + tempcount; michael@0: memmove(tip+1, tip, (code_end - tip) * sizeof(instr)); michael@0: *tip = temp_copy; michael@0: ++code_end; michael@0: ++tempcount; michael@0: } michael@0: michael@0: _code._instr_count = code_end - code; michael@0: } michael@0: michael@0: michael@0: inline michael@0: bool Machine::Code::decoder::validate_opcode(const opcode opc, const byte * const bc) michael@0: { michael@0: if (opc >= MAX_OPCODE) michael@0: { michael@0: failure(invalid_opcode); michael@0: return false; michael@0: } michael@0: const opcode_t & op = Machine::getOpcodeTable()[opc]; michael@0: const size_t param_sz = op.param_sz == VARARGS ? bc[0] + 1 : op.param_sz; michael@0: if (bc + param_sz > _max.bytecode) michael@0: { michael@0: failure(arguments_exhausted); michael@0: return false; michael@0: } michael@0: return true; michael@0: } michael@0: michael@0: michael@0: bool Machine::Code::decoder::valid_upto(const uint16 limit, const uint16 x) const throw() michael@0: { michael@0: const bool t = x < limit; michael@0: if (!t) failure(out_of_range_data); michael@0: return t; michael@0: } michael@0: michael@0: michael@0: inline michael@0: void Machine::Code::failure(const status_t s) throw() { michael@0: release_buffers(); michael@0: _status = s; michael@0: } michael@0: michael@0: michael@0: inline michael@0: void Machine::Code::decoder::analysis::set_ref(const int index) throw() { michael@0: contexts[index].flags.referenced = true; michael@0: if (index > max_ref) max_ref = index; michael@0: } michael@0: michael@0: michael@0: inline michael@0: void Machine::Code::decoder::analysis::set_changed(const int index) throw() { michael@0: contexts[index].flags.changed = true; michael@0: if (index > max_ref) max_ref = index; michael@0: } michael@0: michael@0: michael@0: void Machine::Code::release_buffers() throw() michael@0: { michael@0: free(_code); michael@0: free(_data); michael@0: _code = 0; michael@0: _data = 0; michael@0: _own = false; michael@0: } michael@0: michael@0: michael@0: int32 Machine::Code::run(Machine & m, slotref * & map) const michael@0: { michael@0: assert(_own); michael@0: assert(*this); // Check we are actually runnable michael@0: michael@0: if (m.slotMap().size() <= size_t(_max_ref + m.slotMap().context())) michael@0: { michael@0: m._status = Machine::slot_offset_out_bounds; michael@0: // return (m.slotMap().end() - map); michael@0: return 1; michael@0: } michael@0: michael@0: return m.run(_code, _data, map); michael@0: } michael@0: