gfx/ots/src/cmap.cc

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
     2 // Use of this source code is governed by a BSD-style license that can be
     3 // found in the LICENSE file.
     5 #include "cmap.h"
     7 #include <algorithm>
     8 #include <set>
     9 #include <utility>
    10 #include <vector>
    12 #include "maxp.h"
    13 #include "os2.h"
    15 // cmap - Character To Glyph Index Mapping Table
    16 // http://www.microsoft.com/typography/otspec/cmap.htm
    18 #define TABLE_NAME "cmap"
    20 namespace {
    22 struct CMAPSubtableHeader {
    23   uint16_t platform;
    24   uint16_t encoding;
    25   uint32_t offset;
    26   uint16_t format;
    27   uint32_t length;
    28   uint32_t language;
    29 };
    31 struct Subtable314Range {
    32   uint16_t start_range;
    33   uint16_t end_range;
    34   int16_t id_delta;
    35   uint16_t id_range_offset;
    36   uint32_t id_range_offset_offset;
    37 };
    39 // The maximum number of groups in format 12, 13 or 14 subtables.
    40 // Note: 0xFFFF is the maximum number of glyphs in a single font file.
    41 const unsigned kMaxCMAPGroups = 0xFFFF;
    43 // Glyph array size for the Mac Roman (format 0) table.
    44 const size_t kFormat0ArraySize = 256;
    46 // The upper limit of the Unicode code point.
    47 const uint32_t kUnicodeUpperLimit = 0x10FFFF;
    49 // The maximum number of UVS records (See below).
    50 const uint32_t kMaxCMAPSelectorRecords = 259;
    51 // The range of UVSes are:
    52 //   0x180B-0x180D (3 code points)
    53 //   0xFE00-0xFE0F (16 code points)
    54 //   0xE0100-0xE01EF (240 code points)
    55 const uint32_t kMongolianVSStart = 0x180B;
    56 const uint32_t kMongolianVSEnd = 0x180D;
    57 const uint32_t kVSStart = 0xFE00;
    58 const uint32_t kVSEnd = 0xFE0F;
    59 const uint32_t kIVSStart = 0xE0100;
    60 const uint32_t kIVSEnd = 0xE01EF;
    61 const uint32_t kUVSUpperLimit = 0xFFFFFF;
    63 // Parses Format 4 tables
    64 bool ParseFormat4(ots::OpenTypeFile *file, int platform, int encoding,
    65               const uint8_t *data, size_t length, uint16_t num_glyphs) {
    66   ots::Buffer subtable(data, length);
    68   // 0.3.4, 3.0.4 or 3.1.4 subtables are complex and, rather than expanding the
    69   // whole thing and recompacting it, we validate it and include it verbatim
    70   // in the output.
    72   if (!file->os2) {
    73     return OTS_FAILURE_MSG("Required OS/2 table missing");
    74   }
    76   if (!subtable.Skip(4)) {
    77     return OTS_FAILURE_MSG("Can't read 4 bytes at start of cmap format 4 subtable");
    78   }
    79   uint16_t language = 0;
    80   if (!subtable.ReadU16(&language)) {
    81     return OTS_FAILURE_MSG("Can't read language");
    82   }
    83   if (language) {
    84     // Platform ID 3 (windows) subtables should have language '0'.
    85     return OTS_FAILURE_MSG("Languages should be 0 (%d)", language);
    86   }
    88   uint16_t segcountx2, search_range, entry_selector, range_shift;
    89   segcountx2 = search_range = entry_selector = range_shift = 0;
    90   if (!subtable.ReadU16(&segcountx2) ||
    91       !subtable.ReadU16(&search_range) ||
    92       !subtable.ReadU16(&entry_selector) ||
    93       !subtable.ReadU16(&range_shift)) {
    94     return OTS_FAILURE_MSG("Failed to read subcmap structure");
    95   }
    97   if (segcountx2 & 1 || search_range & 1) {
    98     return OTS_FAILURE_MSG("Bad subcmap structure");
    99   }
   100   const uint16_t segcount = segcountx2 >> 1;
   101   // There must be at least one segment according the spec.
   102   if (segcount < 1) {
   103     return OTS_FAILURE_MSG("Segcount < 1 (%d)", segcount);
   104   }
   106   // log2segcount is the maximal x s.t. 2^x < segcount
   107   unsigned log2segcount = 0;
   108   while (1u << (log2segcount + 1) <= segcount) {
   109     log2segcount++;
   110   }
   112   const uint16_t expected_search_range = 2 * 1u << log2segcount;
   113   if (expected_search_range != search_range) {
   114     return OTS_FAILURE_MSG("expected search range != search range (%d != %d)", expected_search_range, search_range);
   115   }
   117   if (entry_selector != log2segcount) {
   118     return OTS_FAILURE_MSG("entry selector != log2(segement count) (%d != %d)", entry_selector, log2segcount);
   119   }
   121   const uint16_t expected_range_shift = segcountx2 - search_range;
   122   if (range_shift != expected_range_shift) {
   123     return OTS_FAILURE_MSG("unexpected range shift (%d != %d)", range_shift, expected_range_shift);
   124   }
   126   std::vector<Subtable314Range> ranges(segcount);
   128   for (unsigned i = 0; i < segcount; ++i) {
   129     if (!subtable.ReadU16(&ranges[i].end_range)) {
   130       return OTS_FAILURE_MSG("Failed to read segment %d", i);
   131     }
   132   }
   134   uint16_t padding;
   135   if (!subtable.ReadU16(&padding)) {
   136     return OTS_FAILURE_MSG("Failed to read cmap subtable segment padding");
   137   }
   138   if (padding) {
   139     return OTS_FAILURE_MSG("Non zero cmap subtable segment padding (%d)", padding);
   140   }
   142   for (unsigned i = 0; i < segcount; ++i) {
   143     if (!subtable.ReadU16(&ranges[i].start_range)) {
   144       return OTS_FAILURE_MSG("Failed to read segment start range %d", i);
   145     }
   146   }
   147   for (unsigned i = 0; i < segcount; ++i) {
   148     if (!subtable.ReadS16(&ranges[i].id_delta)) {
   149       return OTS_FAILURE_MSG("Failed to read segment delta %d", i);
   150     }
   151   }
   152   for (unsigned i = 0; i < segcount; ++i) {
   153     ranges[i].id_range_offset_offset = subtable.offset();
   154     if (!subtable.ReadU16(&ranges[i].id_range_offset)) {
   155       return OTS_FAILURE_MSG("Failed to read segment range offset %d", i);
   156     }
   158     if (ranges[i].id_range_offset & 1) {
   159       // Some font generators seem to put 65535 on id_range_offset
   160       // for 0xFFFF-0xFFFF range.
   161       // (e.g., many fonts in http://www.princexml.com/fonts/)
   162       if (i == segcount - 1u) {
   163         OTS_WARNING("bad id_range_offset");
   164         ranges[i].id_range_offset = 0;
   165         // The id_range_offset value in the transcoded font will not change
   166         // since this table is not actually "transcoded" yet.
   167       } else {
   168         return OTS_FAILURE_MSG("Bad segment offset (%d)", ranges[i].id_range_offset);
   169       }
   170     }
   171   }
   173   // ranges must be ascending order, based on the end_code. Ranges may not
   174   // overlap.
   175   for (unsigned i = 1; i < segcount; ++i) {
   176     if ((i == segcount - 1u) &&
   177         (ranges[i - 1].start_range == 0xffff) &&
   178         (ranges[i - 1].end_range == 0xffff) &&
   179         (ranges[i].start_range == 0xffff) &&
   180         (ranges[i].end_range == 0xffff)) {
   181       // Some fonts (e.g., Germania.ttf) have multiple 0xffff terminators.
   182       // We'll accept them as an exception.
   183       OTS_WARNING("multiple 0xffff terminators found");
   184       continue;
   185     }
   187     // Note: some Linux fonts (e.g., LucidaSansOblique.ttf, bsmi00lp.ttf) have
   188     // unsorted table...
   189     if (ranges[i].end_range <= ranges[i - 1].end_range) {
   190       return OTS_FAILURE_MSG("Out of order end range (%d <= %d)", ranges[i].end_range, ranges[i-1].end_range);
   191     }
   192     if (ranges[i].start_range <= ranges[i - 1].end_range) {
   193       return OTS_FAILURE_MSG("out of order start range (%d <= %d)", ranges[i].start_range, ranges[i-1].end_range);
   194     }
   196     // On many fonts, the value of {first, last}_char_index are incorrect.
   197     // Fix them.
   198     if (file->os2->first_char_index != 0xFFFF &&
   199         ranges[i].start_range != 0xFFFF &&
   200         file->os2->first_char_index > ranges[i].start_range) {
   201       file->os2->first_char_index = ranges[i].start_range;
   202     }
   203     if (file->os2->last_char_index != 0xFFFF &&
   204         ranges[i].end_range != 0xFFFF &&
   205         file->os2->last_char_index < ranges[i].end_range) {
   206       file->os2->last_char_index = ranges[i].end_range;
   207     }
   208   }
   210   // The last range must end at 0xffff
   211   if (ranges[segcount - 1].start_range != 0xffff || ranges[segcount - 1].end_range != 0xffff) {
   212     return OTS_FAILURE_MSG("Final segment start and end must be 0xFFFF (0x%04X-0x%04X)",
   213                            ranges[segcount - 1].start_range, ranges[segcount - 1].end_range);
   214   }
   216   // A format 4 CMAP subtable is complex. To be safe we simulate a lookup of
   217   // each code-point defined in the table and make sure that they are all valid
   218   // glyphs and that we don't access anything out-of-bounds.
   219   for (unsigned i = 0; i < segcount; ++i) {
   220     for (unsigned cp = ranges[i].start_range; cp <= ranges[i].end_range; ++cp) {
   221       const uint16_t code_point = cp;
   222       if (ranges[i].id_range_offset == 0) {
   223         // this is explictly allowed to overflow in the spec
   224         const uint16_t glyph = code_point + ranges[i].id_delta;
   225         if (glyph >= num_glyphs) {
   226           return OTS_FAILURE_MSG("Range glyph reference too high (%d > %d)", glyph, num_glyphs - 1);
   227         }
   228       } else {
   229         const uint16_t range_delta = code_point - ranges[i].start_range;
   230         // this might seem odd, but it's true. The offset is relative to the
   231         // location of the offset value itself.
   232         const uint32_t glyph_id_offset = ranges[i].id_range_offset_offset +
   233                                          ranges[i].id_range_offset +
   234                                          range_delta * 2;
   235         // We need to be able to access a 16-bit value from this offset
   236         if (glyph_id_offset + 1 >= length) {
   237           return OTS_FAILURE_MSG("bad glyph id offset (%d > %ld)", glyph_id_offset, length);
   238         }
   239         uint16_t glyph;
   240         std::memcpy(&glyph, data + glyph_id_offset, 2);
   241         glyph = ntohs(glyph);
   242         if (glyph >= num_glyphs) {
   243           return OTS_FAILURE_MSG("Range glyph reference too high (%d > %d)", glyph, num_glyphs - 1);
   244         }
   245       }
   246     }
   247   }
   249   // We accept the table.
   250   // TODO(yusukes): transcode the subtable.
   251   if (platform == 3 && encoding == 0) {
   252     file->cmap->subtable_3_0_4_data = data;
   253     file->cmap->subtable_3_0_4_length = length;
   254   } else if (platform == 3 && encoding == 1) {
   255     file->cmap->subtable_3_1_4_data = data;
   256     file->cmap->subtable_3_1_4_length = length;
   257   } else if (platform == 0 && encoding == 3) {
   258     file->cmap->subtable_0_3_4_data = data;
   259     file->cmap->subtable_0_3_4_length = length;
   260   } else {
   261     return OTS_FAILURE_MSG("Unknown cmap subtable type (platform=%d, encoding=%d)", platform, encoding);
   262   }
   264   return true;
   265 }
   267 bool Parse31012(ots::OpenTypeFile *file,
   268                 const uint8_t *data, size_t length, uint16_t num_glyphs) {
   269   ots::Buffer subtable(data, length);
   271   // Format 12 tables are simple. We parse these and fully serialise them
   272   // later.
   274   if (!subtable.Skip(8)) {
   275     return OTS_FAILURE_MSG("failed to skip the first 8 bytes of format 12 subtable");
   276   }
   277   uint32_t language = 0;
   278   if (!subtable.ReadU32(&language)) {
   279     return OTS_FAILURE_MSG("can't read format 12 subtable language");
   280   }
   281   if (language) {
   282     return OTS_FAILURE_MSG("format 12 subtable language should be zero (%d)", language);
   283   }
   285   uint32_t num_groups = 0;
   286   if (!subtable.ReadU32(&num_groups)) {
   287     return OTS_FAILURE_MSG("can't read number of format 12 subtable groups");
   288   }
   289   if (num_groups == 0 || num_groups > kMaxCMAPGroups) {
   290     return OTS_FAILURE_MSG("bad format 12 subtable group count %d", num_groups);
   291   }
   293   std::vector<ots::OpenTypeCMAPSubtableRange> &groups
   294       = file->cmap->subtable_3_10_12;
   295   groups.resize(num_groups);
   297   for (unsigned i = 0; i < num_groups; ++i) {
   298     if (!subtable.ReadU32(&groups[i].start_range) ||
   299         !subtable.ReadU32(&groups[i].end_range) ||
   300         !subtable.ReadU32(&groups[i].start_glyph_id)) {
   301       return OTS_FAILURE_MSG("can't read format 12 subtable group");
   302     }
   304     if (groups[i].start_range > kUnicodeUpperLimit ||
   305         groups[i].end_range > kUnicodeUpperLimit ||
   306         groups[i].start_glyph_id > 0xFFFF) {
   307       return OTS_FAILURE_MSG("bad format 12 subtable group (startCharCode=0x%4X, endCharCode=0x%4X, startGlyphID=%d)",
   308                              groups[i].start_range, groups[i].end_range, groups[i].start_glyph_id);
   309     }
   311     // [0xD800, 0xDFFF] are surrogate code points.
   312     if (groups[i].start_range >= 0xD800 &&
   313         groups[i].start_range <= 0xDFFF) {
   314       return OTS_FAILURE_MSG("format 12 subtable out of range group startCharCode (0x%4X)", groups[i].start_range);
   315     }
   316     if (groups[i].end_range >= 0xD800 &&
   317         groups[i].end_range <= 0xDFFF) {
   318       return OTS_FAILURE_MSG("format 12 subtable out of range group endCharCode (0x%4X)", groups[i].end_range);
   319     }
   320     if (groups[i].start_range < 0xD800 &&
   321         groups[i].end_range > 0xDFFF) {
   322       return OTS_FAILURE_MSG("bad format 12 subtable group startCharCode (0x%4X) or endCharCode (0x%4X)",
   323                              groups[i].start_range, groups[i].end_range);
   324     }
   326     // We assert that the glyph value is within range. Because of the range
   327     // limits, above, we don't need to worry about overflow.
   328     if (groups[i].end_range < groups[i].start_range) {
   329       return OTS_FAILURE_MSG("format 12 subtable group endCharCode before startCharCode (0x%4X < 0x%4X)",
   330                              groups[i].end_range, groups[i].start_range);
   331     }
   332     if ((groups[i].end_range - groups[i].start_range) +
   333         groups[i].start_glyph_id > num_glyphs) {
   334       return OTS_FAILURE_MSG("bad format 12 subtable group startGlyphID (%d)", groups[i].start_glyph_id);
   335     }
   336   }
   338   // the groups must be sorted by start code and may not overlap
   339   for (unsigned i = 1; i < num_groups; ++i) {
   340     if (groups[i].start_range <= groups[i - 1].start_range) {
   341       return OTS_FAILURE_MSG("out of order format 12 subtable group (startCharCode=0x%4X <= startCharCode=0x%4X of previous group)",
   342                              groups[i].start_range, groups[i-1].start_range);
   343     }
   344     if (groups[i].start_range <= groups[i - 1].end_range) {
   345       return OTS_FAILURE_MSG("overlapping format 12 subtable groups (startCharCode=0x%4X <= endCharCode=0x%4X of previous group)",
   346                              groups[i].start_range, groups[i-1].end_range);
   347     }
   348   }
   350   return true;
   351 }
   353 bool Parse31013(ots::OpenTypeFile *file,
   354                 const uint8_t *data, size_t length, uint16_t num_glyphs) {
   355   ots::Buffer subtable(data, length);
   357   // Format 13 tables are simple. We parse these and fully serialise them
   358   // later.
   360   if (!subtable.Skip(8)) {
   361     return OTS_FAILURE_MSG("Bad cmap subtable length");
   362   }
   363   uint16_t language = 0;
   364   if (!subtable.ReadU16(&language)) {
   365     return OTS_FAILURE_MSG("Can't read cmap subtable language");
   366   }
   367   if (language) {
   368     return OTS_FAILURE_MSG("Cmap subtable language should be zero but is %d", language);
   369   }
   371   uint32_t num_groups = 0;
   372   if (!subtable.ReadU32(&num_groups)) {
   373     return OTS_FAILURE_MSG("Can't read number of groups in a cmap subtable");
   374   }
   376   // We limit the number of groups in the same way as in 3.10.12 tables. See
   377   // the comment there in
   378   if (num_groups == 0 || num_groups > kMaxCMAPGroups) {
   379     return OTS_FAILURE_MSG("Bad number of groups (%d) in a cmap subtable", num_groups);
   380   }
   382   std::vector<ots::OpenTypeCMAPSubtableRange> &groups
   383       = file->cmap->subtable_3_10_13;
   384   groups.resize(num_groups);
   386   for (unsigned i = 0; i < num_groups; ++i) {
   387     if (!subtable.ReadU32(&groups[i].start_range) ||
   388         !subtable.ReadU32(&groups[i].end_range) ||
   389         !subtable.ReadU32(&groups[i].start_glyph_id)) {
   390       return OTS_FAILURE_MSG("Can't read subrange structure in a cmap subtable");
   391     }
   393     // We conservatively limit all of the values to protect some parsers from
   394     // overflows
   395     if (groups[i].start_range > kUnicodeUpperLimit ||
   396         groups[i].end_range > kUnicodeUpperLimit ||
   397         groups[i].start_glyph_id > 0xFFFF) {
   398       return OTS_FAILURE_MSG("Bad subrange with start_range=%d, end_range=%d, start_glyph_id=%d", groups[i].start_range, groups[i].end_range, groups[i].start_glyph_id);
   399     }
   401     if (groups[i].start_glyph_id >= num_glyphs) {
   402       return OTS_FAILURE_MSG("Subrange starting glyph id too high (%d > %d)", groups[i].start_glyph_id, num_glyphs);
   403     }
   404   }
   406   // the groups must be sorted by start code and may not overlap
   407   for (unsigned i = 1; i < num_groups; ++i) {
   408     if (groups[i].start_range <= groups[i - 1].start_range) {
   409       return OTS_FAILURE_MSG("Overlapping subrange starts (%d >= %d)", groups[i]. start_range, groups[i-1].start_range);
   410     }
   411     if (groups[i].start_range <= groups[i - 1].end_range) {
   412       return OTS_FAILURE_MSG("Overlapping subranges (%d <= %d)", groups[i].start_range, groups[i-1].end_range);
   413     }
   414   }
   416   return true;
   417 }
   419 bool Parse0514(ots::OpenTypeFile *file,
   420                const uint8_t *data, size_t length, uint16_t num_glyphs) {
   421   // Unicode Variation Selector table
   422   ots::Buffer subtable(data, length);
   424   // Format 14 tables are simple. We parse these and fully serialise them
   425   // later.
   427   // Skip format (USHORT) and length (ULONG)
   428   if (!subtable.Skip(6)) {
   429     return OTS_FAILURE_MSG("Can't read start of cmap subtable");
   430   }
   432   uint32_t num_records = 0;
   433   if (!subtable.ReadU32(&num_records)) {
   434     return OTS_FAILURE_MSG("Can't read number of records in cmap subtable");
   435   }
   436   if (num_records == 0 || num_records > kMaxCMAPSelectorRecords) {
   437     return OTS_FAILURE_MSG("Bad number of records (%d) in cmap subtable", num_records);
   438   }
   440   std::vector<ots::OpenTypeCMAPSubtableVSRecord>& records
   441       = file->cmap->subtable_0_5_14;
   442   records.resize(num_records);
   444   for (unsigned i = 0; i < num_records; ++i) {
   445     if (!subtable.ReadU24(&records[i].var_selector) ||
   446         !subtable.ReadU32(&records[i].default_offset) ||
   447         !subtable.ReadU32(&records[i].non_default_offset)) {
   448       return OTS_FAILURE_MSG("Can't read record structure of record %d in cmap subtale", i);
   449     }
   450     // Checks the value of variation selector
   451     if (!((records[i].var_selector >= kMongolianVSStart &&
   452            records[i].var_selector <= kMongolianVSEnd) ||
   453           (records[i].var_selector >= kVSStart &&
   454            records[i].var_selector <= kVSEnd) ||
   455           (records[i].var_selector >= kIVSStart &&
   456            records[i].var_selector <= kIVSEnd))) {
   457       return OTS_FAILURE_MSG("Bad record variation selector (%04X) in record %i", records[i].var_selector, i);
   458     }
   459     if (i > 0 &&
   460         records[i-1].var_selector >= records[i].var_selector) {
   461       return OTS_FAILURE_MSG("Out of order variation selector (%04X >= %04X) in record %d", records[i-1].var_selector, records[i].var_selector, i);
   462     }
   464     // Checks offsets
   465     if (!records[i].default_offset && !records[i].non_default_offset) {
   466       return OTS_FAILURE_MSG("No default aoffset in variation selector record %d", i);
   467     }
   468     if (records[i].default_offset &&
   469         records[i].default_offset >= length) {
   470       return OTS_FAILURE_MSG("Default offset too high (%d >= %ld) in record %d", records[i].default_offset, length, i);
   471     }
   472     if (records[i].non_default_offset &&
   473         records[i].non_default_offset >= length) {
   474       return OTS_FAILURE_MSG("Non default offset too high (%d >= %ld) in record %d", records[i].non_default_offset, length, i);
   475     }
   476   }
   478   for (unsigned i = 0; i < num_records; ++i) {
   479     // Checks default UVS table
   480     if (records[i].default_offset) {
   481       subtable.set_offset(records[i].default_offset);
   482       uint32_t num_ranges = 0;
   483       if (!subtable.ReadU32(&num_ranges)) {
   484         return OTS_FAILURE_MSG("Can't read number of ranges in record %d", i);
   485       }
   486       if (!num_ranges || num_ranges > kMaxCMAPGroups) {
   487         return OTS_FAILURE_MSG("number of ranges too high (%d > %d) in record %d", num_ranges, kMaxCMAPGroups, i);
   488       }
   490       uint32_t last_unicode_value = 0;
   491       std::vector<ots::OpenTypeCMAPSubtableVSRange>& ranges
   492           = records[i].ranges;
   493       ranges.resize(num_ranges);
   495       for (unsigned j = 0; j < num_ranges; ++j) {
   496         if (!subtable.ReadU24(&ranges[j].unicode_value) ||
   497             !subtable.ReadU8(&ranges[j].additional_count)) {
   498           return OTS_FAILURE_MSG("Can't read range info in variation selector record %d", i);
   499         }
   500         const uint32_t check_value =
   501             ranges[j].unicode_value + ranges[j].additional_count;
   502         if (ranges[j].unicode_value == 0 ||
   503             ranges[j].unicode_value > kUnicodeUpperLimit ||
   504             check_value > kUVSUpperLimit ||
   505             (last_unicode_value &&
   506              ranges[j].unicode_value <= last_unicode_value)) {
   507           return OTS_FAILURE_MSG("Bad Unicode value *%04X) in variation selector range %d record %d", ranges[j].unicode_value, j, i);
   508         }
   509         last_unicode_value = check_value;
   510       }
   511     }
   513     // Checks non default UVS table
   514     if (records[i].non_default_offset) {
   515       subtable.set_offset(records[i].non_default_offset);
   516       uint32_t num_mappings = 0;
   517       if (!subtable.ReadU32(&num_mappings)) {
   518         return OTS_FAILURE_MSG("Can't read number of mappings in variation selector record %d", i);
   519       }
   520       if (!num_mappings || num_mappings > kMaxCMAPGroups) {
   521         return OTS_FAILURE_MSG("Number of mappings too high (%d) in variation selector record %d", num_mappings, i);
   522       }
   524       uint32_t last_unicode_value = 0;
   525       std::vector<ots::OpenTypeCMAPSubtableVSMapping>& mappings
   526           = records[i].mappings;
   527       mappings.resize(num_mappings);
   529       for (unsigned j = 0; j < num_mappings; ++j) {
   530         if (!subtable.ReadU24(&mappings[j].unicode_value) ||
   531             !subtable.ReadU16(&mappings[j].glyph_id)) {
   532           return OTS_FAILURE_MSG("Can't read mapping %d in variation selector record %d", j, i);
   533         }
   534         if (mappings[j].glyph_id == 0 ||
   535             mappings[j].unicode_value == 0 ||
   536             mappings[j].unicode_value > kUnicodeUpperLimit ||
   537             (last_unicode_value &&
   538              mappings[j].unicode_value <= last_unicode_value)) {
   539           return OTS_FAILURE_MSG("Bad mapping (%04X -> %d) in mapping %d of variation selector %d", mappings[j].unicode_value, mappings[j].glyph_id, j, i);
   540         }
   541         last_unicode_value = mappings[j].unicode_value;
   542       }
   543     }
   544   }
   546   if (subtable.offset() != length) {
   547     return OTS_FAILURE_MSG("Bad subtable offset (%ld != %ld)", subtable.offset(), length);
   548   }
   549   file->cmap->subtable_0_5_14_length = subtable.offset();
   550   return true;
   551 }
   553 bool Parse100(ots::OpenTypeFile *file, const uint8_t *data, size_t length) {
   554   // Mac Roman table
   555   ots::Buffer subtable(data, length);
   557   if (!subtable.Skip(4)) {
   558     return OTS_FAILURE_MSG("Bad cmap subtable");
   559   }
   560   uint16_t language = 0;
   561   if (!subtable.ReadU16(&language)) {
   562     return OTS_FAILURE_MSG("Can't read language in cmap subtable");
   563   }
   564   if (language) {
   565     // simsun.ttf has non-zero language id.
   566     OTS_WARNING("language id should be zero: %u", language);
   567   }
   569   file->cmap->subtable_1_0_0.reserve(kFormat0ArraySize);
   570   for (size_t i = 0; i < kFormat0ArraySize; ++i) {
   571     uint8_t glyph_id = 0;
   572     if (!subtable.ReadU8(&glyph_id)) {
   573       return OTS_FAILURE_MSG("Can't read glyph id at array[%ld] in cmap subtable", i);
   574     }
   575     file->cmap->subtable_1_0_0.push_back(glyph_id);
   576   }
   578   return true;
   579 }
   581 }  // namespace
   583 namespace ots {
   585 bool ots_cmap_parse(OpenTypeFile *file, const uint8_t *data, size_t length) {
   586   Buffer table(data, length);
   587   file->cmap = new OpenTypeCMAP;
   589   uint16_t version = 0;
   590   uint16_t num_tables = 0;
   591   if (!table.ReadU16(&version) ||
   592       !table.ReadU16(&num_tables)) {
   593     return OTS_FAILURE_MSG("Can't read structure of cmap");
   594   }
   596   if (version != 0) {
   597     return OTS_FAILURE_MSG("Non zero cmap version (%d)", version);
   598   }
   599   if (!num_tables) {
   600     return OTS_FAILURE_MSG("No subtables in cmap!");
   601   }
   603   std::vector<CMAPSubtableHeader> subtable_headers;
   605   // read the subtable headers
   606   subtable_headers.reserve(num_tables);
   607   for (unsigned i = 0; i < num_tables; ++i) {
   608     CMAPSubtableHeader subt;
   610     if (!table.ReadU16(&subt.platform) ||
   611         !table.ReadU16(&subt.encoding) ||
   612         !table.ReadU32(&subt.offset)) {
   613       return OTS_FAILURE_MSG("Can't read subtable information cmap subtable %d", i);
   614     }
   616     subtable_headers.push_back(subt);
   617   }
   619   const size_t data_offset = table.offset();
   621   // make sure that all the offsets are valid.
   622   for (unsigned i = 0; i < num_tables; ++i) {
   623     if (subtable_headers[i].offset > 1024 * 1024 * 1024) {
   624       return OTS_FAILURE_MSG("Bad subtable offset in cmap subtable %d", i);
   625     }
   626     if (subtable_headers[i].offset < data_offset ||
   627         subtable_headers[i].offset >= length) {
   628       return OTS_FAILURE_MSG("Bad subtable offset (%d) in cmap subtable %d", subtable_headers[i].offset, i);
   629     }
   630   }
   632   // the format of the table is the first couple of bytes in the table. The
   633   // length of the table is stored in a format-specific way.
   634   for (unsigned i = 0; i < num_tables; ++i) {
   635     table.set_offset(subtable_headers[i].offset);
   636     if (!table.ReadU16(&subtable_headers[i].format)) {
   637       return OTS_FAILURE_MSG("Can't read cmap subtable header format %d", i);
   638     }
   640     uint16_t len = 0;
   641     uint16_t lang = 0;
   642     switch (subtable_headers[i].format) {
   643       case 0:
   644       case 4:
   645         if (!table.ReadU16(&len)) {
   646           return OTS_FAILURE_MSG("Can't read cmap subtable %d length", i);
   647         }
   648         if (!table.ReadU16(&lang)) {
   649           return OTS_FAILURE_MSG("Can't read cmap subtable %d language", i);
   650         }
   651         subtable_headers[i].length = len;
   652         subtable_headers[i].language = lang;
   653         break;
   654       case 12:
   655       case 13:
   656         if (!table.Skip(2)) {
   657           return OTS_FAILURE_MSG("Bad cmap subtable %d structure", i);
   658         }
   659         if (!table.ReadU32(&subtable_headers[i].length)) {
   660           return OTS_FAILURE_MSG("Can read cmap subtable %d length", i);
   661         }
   662         if (!table.ReadU32(&subtable_headers[i].language)) {
   663           return OTS_FAILURE_MSG("Can't read cmap subtable %d language", i);
   664         }
   665         break;
   666       case 14:
   667         if (!table.ReadU32(&subtable_headers[i].length)) {
   668           return OTS_FAILURE_MSG("Can't read cmap subtable %d length", i);
   669         }
   670         subtable_headers[i].language = 0;
   671         break;
   672       default:
   673         subtable_headers[i].length = 0;
   674         subtable_headers[i].language = 0;
   675         break;
   676     }
   677   }
   679   // check if the table is sorted first by platform ID, then by encoding ID.
   680   uint32_t last_id = 0;
   681   for (unsigned i = 0; i < num_tables; ++i) {
   682     uint32_t current_id
   683         = (subtable_headers[i].platform << 24)
   684         + (subtable_headers[i].encoding << 16)
   685         + subtable_headers[i].language;
   686     if ((i != 0) && (last_id >= current_id)) {
   687       return OTS_FAILURE_MSG("subtable %d with platform ID %d, encoding ID %d, language ID %d "
   688                              "following subtable with platform ID %d, encoding ID %d, language ID %d",
   689                              i,
   690                              (uint8_t)(current_id >> 24), (uint8_t)(current_id >> 16), (uint8_t)(current_id),
   691                              (uint8_t)(last_id >> 24), (uint8_t)(last_id >> 16), (uint8_t)(last_id));
   692     }
   693     last_id = current_id;
   694   }
   696   // Now, verify that all the lengths are sane
   697   for (unsigned i = 0; i < num_tables; ++i) {
   698     if (!subtable_headers[i].length) continue;
   699     if (subtable_headers[i].length > 1024 * 1024 * 1024) {
   700       return OTS_FAILURE_MSG("Bad cmap subtable %d length", i);
   701     }
   702     // We know that both the offset and length are < 1GB, so the following
   703     // addition doesn't overflow
   704     const uint32_t end_byte
   705         = subtable_headers[i].offset + subtable_headers[i].length;
   706     if (end_byte > length) {
   707       return OTS_FAILURE_MSG("Over long cmap subtable %d @ %d for %d", i, subtable_headers[i].offset, subtable_headers[i].length);
   708     }
   709   }
   711   // check that the cmap subtables are not overlapping.
   712   std::set<std::pair<uint32_t, uint32_t> > uniq_checker;
   713   std::vector<std::pair<uint32_t, uint8_t> > overlap_checker;
   714   for (unsigned i = 0; i < num_tables; ++i) {
   715     const uint32_t end_byte
   716         = subtable_headers[i].offset + subtable_headers[i].length;
   718     if (!uniq_checker.insert(std::make_pair(subtable_headers[i].offset,
   719                                             end_byte)).second) {
   720       // Sometimes Unicode table and MS table share exactly the same data.
   721       // We'll allow this.
   722       continue;
   723     }
   724     overlap_checker.push_back(
   725         std::make_pair(subtable_headers[i].offset,
   726                        static_cast<uint8_t>(1) /* start */));
   727     overlap_checker.push_back(
   728         std::make_pair(end_byte, static_cast<uint8_t>(0) /* end */));
   729   }
   730   std::sort(overlap_checker.begin(), overlap_checker.end());
   731   int overlap_count = 0;
   732   for (unsigned i = 0; i < overlap_checker.size(); ++i) {
   733     overlap_count += (overlap_checker[i].second ? 1 : -1);
   734     if (overlap_count > 1) {
   735       return OTS_FAILURE_MSG("Excessive overlap count %d", overlap_count);
   736     }
   737   }
   739   // we grab the number of glyphs in the file from the maxp table to make sure
   740   // that the character map isn't referencing anything beyound this range.
   741   if (!file->maxp) {
   742     return OTS_FAILURE_MSG("No maxp table in font! Needed by cmap.");
   743   }
   744   const uint16_t num_glyphs = file->maxp->num_glyphs;
   746   // We only support a subset of the possible character map tables. Microsoft
   747   // 'strongly recommends' that everyone supports the Unicode BMP table with
   748   // the UCS-4 table for non-BMP glyphs. We'll pass the following subtables:
   749   //   Platform ID   Encoding ID  Format
   750   //   0             0            4       (Unicode Default)
   751   //   0             3            4       (Unicode BMP)
   752   //   0             3            12      (Unicode UCS-4)
   753   //   0             5            14      (Unicode Variation Sequences)
   754   //   1             0            0       (Mac Roman)
   755   //   3             0            4       (MS Symbol)
   756   //   3             1            4       (MS Unicode BMP)
   757   //   3             10           12      (MS Unicode UCS-4)
   758   //   3             10           13      (MS UCS-4 Fallback mapping)
   759   //
   760   // Note:
   761   //  * 0-0-4 table is (usually) written as a 3-1-4 table. If 3-1-4 table
   762   //    also exists, the 0-0-4 table is ignored.
   763   //  * Unlike 0-0-4 table, 0-3-4 table is written as a 0-3-4 table.
   764   //    Some fonts which include 0-5-14 table seems to be required 0-3-4
   765   //    table. The 0-3-4 table will be wriiten even if 3-1-4 table also exists.
   766   //  * 0-3-12 table is written as a 3-10-12 table. If 3-10-12 table also
   767   //    exists, the 0-3-12 table is ignored.
   768   //
   770   for (unsigned i = 0; i < num_tables; ++i) {
   771     if (subtable_headers[i].platform == 0) {
   772       // Unicode platform
   774       if ((subtable_headers[i].encoding == 0) &&
   775           (subtable_headers[i].format == 4)) {
   776         // parse and output the 0-0-4 table as 3-1-4 table. Sometimes the 0-0-4
   777         // table actually points to MS symbol data and thus should be parsed as
   778         // 3-0-4 table (e.g., marqueem.ttf and quixotic.ttf). This error will be
   779         // recovered in ots_cmap_serialise().
   780         if (!ParseFormat4(file, 3, 1, data + subtable_headers[i].offset,
   781                       subtable_headers[i].length, num_glyphs)) {
   782           return OTS_FAILURE_MSG("Failed to parse format 4 cmap subtable %d", i);
   783         }
   784       } else if ((subtable_headers[i].encoding == 3) &&
   785                  (subtable_headers[i].format == 4)) {
   786         // parse and output the 0-3-4 table as 0-3-4 table.
   787         if (!ParseFormat4(file, 0, 3, data + subtable_headers[i].offset,
   788                       subtable_headers[i].length, num_glyphs)) {
   789           return OTS_FAILURE_MSG("Failed to parse format 4 cmap subtable %d", i);
   790         }
   791       } else if ((subtable_headers[i].encoding == 3) &&
   792                  (subtable_headers[i].format == 12)) {
   793         // parse and output the 0-3-12 table as 3-10-12 table.
   794         if (!Parse31012(file, data + subtable_headers[i].offset,
   795                         subtable_headers[i].length, num_glyphs)) {
   796           return OTS_FAILURE_MSG("Failed to parse format 12 cmap subtable %d", i);
   797         }
   798       } else if ((subtable_headers[i].encoding == 5) &&
   799                  (subtable_headers[i].format == 14)) {
   800         if (!Parse0514(file, data + subtable_headers[i].offset,
   801                        subtable_headers[i].length, num_glyphs)) {
   802           return OTS_FAILURE_MSG("Failed to parse format 14 cmap subtable %d", i);
   803         }
   804       }
   805     } else if (subtable_headers[i].platform == 1) {
   806       // Mac platform
   808       if ((subtable_headers[i].encoding == 0) &&
   809           (subtable_headers[i].format == 0)) {
   810         // parse and output the 1-0-0 table.
   811         if (!Parse100(file, data + subtable_headers[i].offset,
   812                       subtable_headers[i].length)) {
   813           return OTS_FAILURE();
   814         }
   815       }
   816     } else if (subtable_headers[i].platform == 3) {
   817       // MS platform
   819       switch (subtable_headers[i].encoding) {
   820         case 0:
   821         case 1:
   822           if (subtable_headers[i].format == 4) {
   823             // parse 3-0-4 or 3-1-4 table.
   824             if (!ParseFormat4(file, subtable_headers[i].platform,
   825                           subtable_headers[i].encoding,
   826                           data + subtable_headers[i].offset,
   827                           subtable_headers[i].length, num_glyphs)) {
   828               return OTS_FAILURE();
   829             }
   830           }
   831           break;
   832         case 10:
   833           if (subtable_headers[i].format == 12) {
   834             file->cmap->subtable_3_10_12.clear();
   835             if (!Parse31012(file, data + subtable_headers[i].offset,
   836                             subtable_headers[i].length, num_glyphs)) {
   837               return OTS_FAILURE();
   838             }
   839           } else if (subtable_headers[i].format == 13) {
   840             file->cmap->subtable_3_10_13.clear();
   841             if (!Parse31013(file, data + subtable_headers[i].offset,
   842                             subtable_headers[i].length, num_glyphs)) {
   843               return OTS_FAILURE();
   844             }
   845           }
   846           break;
   847       }
   848     }
   849   }
   851   return true;
   852 }
   854 bool ots_cmap_should_serialise(OpenTypeFile *file) {
   855   return file->cmap != NULL;
   856 }
   858 bool ots_cmap_serialise(OTSStream *out, OpenTypeFile *file) {
   859   const bool have_034 = file->cmap->subtable_0_3_4_data != NULL;
   860   const bool have_0514 = file->cmap->subtable_0_5_14.size() != 0;
   861   const bool have_100 = file->cmap->subtable_1_0_0.size() != 0;
   862   const bool have_304 = file->cmap->subtable_3_0_4_data != NULL;
   863   // MS Symbol and MS Unicode tables should not co-exist.
   864   // See the comment above in 0-0-4 parser.
   865   const bool have_314 = (!have_304) && file->cmap->subtable_3_1_4_data;
   866   const bool have_31012 = file->cmap->subtable_3_10_12.size() != 0;
   867   const bool have_31013 = file->cmap->subtable_3_10_13.size() != 0;
   868   const unsigned num_subtables = static_cast<unsigned>(have_034) +
   869                                  static_cast<unsigned>(have_0514) +
   870                                  static_cast<unsigned>(have_100) +
   871                                  static_cast<unsigned>(have_304) +
   872                                  static_cast<unsigned>(have_314) +
   873                                  static_cast<unsigned>(have_31012) +
   874                                  static_cast<unsigned>(have_31013);
   875   const off_t table_start = out->Tell();
   877   // Some fonts don't have 3-0-4 MS Symbol nor 3-1-4 Unicode BMP tables
   878   // (e.g., old fonts for Mac). We don't support them.
   879   if (!have_304 && !have_314 && !have_034) {
   880     return OTS_FAILURE();
   881   }
   883   if (!out->WriteU16(0) ||
   884       !out->WriteU16(num_subtables)) {
   885     return OTS_FAILURE();
   886   }
   888   const off_t record_offset = out->Tell();
   889   if (!out->Pad(num_subtables * 8)) {
   890     return OTS_FAILURE();
   891   }
   893   const off_t offset_034 = out->Tell();
   894   if (have_034) {
   895     if (!out->Write(file->cmap->subtable_0_3_4_data,
   896                     file->cmap->subtable_0_3_4_length)) {
   897       return OTS_FAILURE();
   898     }
   899   }
   901   const off_t offset_0514 = out->Tell();
   902   if (have_0514) {
   903     const std::vector<ots::OpenTypeCMAPSubtableVSRecord> &records
   904         = file->cmap->subtable_0_5_14;
   905     const unsigned num_records = records.size();
   906     if (!out->WriteU16(14) ||
   907         !out->WriteU32(file->cmap->subtable_0_5_14_length) ||
   908         !out->WriteU32(num_records)) {
   909       return OTS_FAILURE();
   910     }
   911     for (unsigned i = 0; i < num_records; ++i) {
   912       if (!out->WriteU24(records[i].var_selector) ||
   913           !out->WriteU32(records[i].default_offset) ||
   914           !out->WriteU32(records[i].non_default_offset)) {
   915         return OTS_FAILURE();
   916       }
   917     }
   918     for (unsigned i = 0; i < num_records; ++i) {
   919       if (records[i].default_offset) {
   920         const std::vector<ots::OpenTypeCMAPSubtableVSRange> &ranges
   921             = records[i].ranges;
   922         const unsigned num_ranges = ranges.size();
   923         if (!out->Seek(records[i].default_offset + offset_0514) ||
   924             !out->WriteU32(num_ranges)) {
   925           return OTS_FAILURE();
   926         }
   927         for (unsigned j = 0; j < num_ranges; ++j) {
   928           if (!out->WriteU24(ranges[j].unicode_value) ||
   929               !out->WriteU8(ranges[j].additional_count)) {
   930             return OTS_FAILURE();
   931           }
   932         }
   933       }
   934       if (records[i].non_default_offset) {
   935         const std::vector<ots::OpenTypeCMAPSubtableVSMapping> &mappings
   936             = records[i].mappings;
   937         const unsigned num_mappings = mappings.size();
   938         if (!out->Seek(records[i].non_default_offset + offset_0514) ||
   939             !out->WriteU32(num_mappings)) {
   940           return OTS_FAILURE();
   941         }
   942         for (unsigned j = 0; j < num_mappings; ++j) {
   943           if (!out->WriteU24(mappings[j].unicode_value) ||
   944               !out->WriteU16(mappings[j].glyph_id)) {
   945             return OTS_FAILURE();
   946           }
   947         }
   948       }
   949     }
   950   }
   952   const off_t offset_100 = out->Tell();
   953   if (have_100) {
   954     if (!out->WriteU16(0) ||  // format
   955         !out->WriteU16(6 + kFormat0ArraySize) ||  // length
   956         !out->WriteU16(0)) {  // language
   957       return OTS_FAILURE();
   958     }
   959     if (!out->Write(&(file->cmap->subtable_1_0_0[0]), kFormat0ArraySize)) {
   960       return OTS_FAILURE();
   961     }
   962   }
   964   const off_t offset_304 = out->Tell();
   965   if (have_304) {
   966     if (!out->Write(file->cmap->subtable_3_0_4_data,
   967                     file->cmap->subtable_3_0_4_length)) {
   968       return OTS_FAILURE();
   969     }
   970   }
   972   const off_t offset_314 = out->Tell();
   973   if (have_314) {
   974     if (!out->Write(file->cmap->subtable_3_1_4_data,
   975                     file->cmap->subtable_3_1_4_length)) {
   976       return OTS_FAILURE();
   977     }
   978   }
   980   const off_t offset_31012 = out->Tell();
   981   if (have_31012) {
   982     std::vector<OpenTypeCMAPSubtableRange> &groups
   983         = file->cmap->subtable_3_10_12;
   984     const unsigned num_groups = groups.size();
   985     if (!out->WriteU16(12) ||
   986         !out->WriteU16(0) ||
   987         !out->WriteU32(num_groups * 12 + 16) ||
   988         !out->WriteU32(0) ||
   989         !out->WriteU32(num_groups)) {
   990       return OTS_FAILURE();
   991     }
   993     for (unsigned i = 0; i < num_groups; ++i) {
   994       if (!out->WriteU32(groups[i].start_range) ||
   995           !out->WriteU32(groups[i].end_range) ||
   996           !out->WriteU32(groups[i].start_glyph_id)) {
   997         return OTS_FAILURE();
   998       }
   999     }
  1002   const off_t offset_31013 = out->Tell();
  1003   if (have_31013) {
  1004     std::vector<OpenTypeCMAPSubtableRange> &groups
  1005         = file->cmap->subtable_3_10_13;
  1006     const unsigned num_groups = groups.size();
  1007     if (!out->WriteU16(13) ||
  1008         !out->WriteU16(0) ||
  1009         !out->WriteU32(num_groups * 12 + 14) ||
  1010         !out->WriteU32(0) ||
  1011         !out->WriteU32(num_groups)) {
  1012       return OTS_FAILURE();
  1015     for (unsigned i = 0; i < num_groups; ++i) {
  1016       if (!out->WriteU32(groups[i].start_range) ||
  1017           !out->WriteU32(groups[i].end_range) ||
  1018           !out->WriteU32(groups[i].start_glyph_id)) {
  1019         return OTS_FAILURE();
  1024   const off_t table_end = out->Tell();
  1025   // We might have hanging bytes from the above's checksum which the OTSStream
  1026   // then merges into the table of offsets.
  1027   OTSStream::ChecksumState saved_checksum = out->SaveChecksumState();
  1028   out->ResetChecksum();
  1030   // Now seek back and write the table of offsets
  1031   if (!out->Seek(record_offset)) {
  1032     return OTS_FAILURE();
  1035   if (have_034) {
  1036     if (!out->WriteU16(0) ||
  1037         !out->WriteU16(3) ||
  1038         !out->WriteU32(offset_034 - table_start)) {
  1039       return OTS_FAILURE();
  1043   if (have_0514) {
  1044     if (!out->WriteU16(0) ||
  1045         !out->WriteU16(5) ||
  1046         !out->WriteU32(offset_0514 - table_start)) {
  1047       return OTS_FAILURE();
  1051   if (have_100) {
  1052     if (!out->WriteU16(1) ||
  1053         !out->WriteU16(0) ||
  1054         !out->WriteU32(offset_100 - table_start)) {
  1055       return OTS_FAILURE();
  1059   if (have_304) {
  1060     if (!out->WriteU16(3) ||
  1061         !out->WriteU16(0) ||
  1062         !out->WriteU32(offset_304 - table_start)) {
  1063       return OTS_FAILURE();
  1067   if (have_314) {
  1068     if (!out->WriteU16(3) ||
  1069         !out->WriteU16(1) ||
  1070         !out->WriteU32(offset_314 - table_start)) {
  1071       return OTS_FAILURE();
  1075   if (have_31012) {
  1076     if (!out->WriteU16(3) ||
  1077         !out->WriteU16(10) ||
  1078         !out->WriteU32(offset_31012 - table_start)) {
  1079       return OTS_FAILURE();
  1083   if (have_31013) {
  1084     if (!out->WriteU16(3) ||
  1085         !out->WriteU16(10) ||
  1086         !out->WriteU32(offset_31013 - table_start)) {
  1087       return OTS_FAILURE();
  1091   if (!out->Seek(table_end)) {
  1092     return OTS_FAILURE();
  1094   out->RestoreChecksum(saved_checksum);
  1096   return true;
  1099 void ots_cmap_free(OpenTypeFile *file) {
  1100   delete file->cmap;
  1103 }  // namespace ots

mercurial