Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | #!/usr/bin/python |
michael@0 | 2 | |
michael@0 | 3 | import sys |
michael@0 | 4 | |
michael@0 | 5 | if len (sys.argv) != 4: |
michael@0 | 6 | print >>sys.stderr, "usage: ./gen-indic-table.py IndicSyllabicCategory.txt IndicMatraCategory.txt Blocks.txt" |
michael@0 | 7 | sys.exit (1) |
michael@0 | 8 | |
michael@0 | 9 | files = [file (x) for x in sys.argv[1:]] |
michael@0 | 10 | |
michael@0 | 11 | headers = [[f.readline () for i in range (2)] for f in files] |
michael@0 | 12 | |
michael@0 | 13 | blocks = {} |
michael@0 | 14 | data = [{} for f in files] |
michael@0 | 15 | values = [{} for f in files] |
michael@0 | 16 | for i, f in enumerate (files): |
michael@0 | 17 | for line in f: |
michael@0 | 18 | |
michael@0 | 19 | j = line.find ('#') |
michael@0 | 20 | if j >= 0: |
michael@0 | 21 | line = line[:j] |
michael@0 | 22 | |
michael@0 | 23 | fields = [x.strip () for x in line.split (';')] |
michael@0 | 24 | if len (fields) == 1: |
michael@0 | 25 | continue |
michael@0 | 26 | |
michael@0 | 27 | uu = fields[0].split ('..') |
michael@0 | 28 | start = int (uu[0], 16) |
michael@0 | 29 | if len (uu) == 1: |
michael@0 | 30 | end = start |
michael@0 | 31 | else: |
michael@0 | 32 | end = int (uu[1], 16) |
michael@0 | 33 | |
michael@0 | 34 | t = fields[1] |
michael@0 | 35 | |
michael@0 | 36 | for u in range (start, end + 1): |
michael@0 | 37 | data[i][u] = t |
michael@0 | 38 | values[i][t] = values[i].get (t, 0) + 1 |
michael@0 | 39 | |
michael@0 | 40 | if i == 2: |
michael@0 | 41 | blocks[t] = (start, end) |
michael@0 | 42 | |
michael@0 | 43 | # Merge data into one dict: |
michael@0 | 44 | defaults = ('Other', 'Not_Applicable', 'No_Block') |
michael@0 | 45 | for i,v in enumerate (defaults): |
michael@0 | 46 | values[i][v] = values[i].get (v, 0) + 1 |
michael@0 | 47 | combined = {} |
michael@0 | 48 | for i,d in enumerate (data): |
michael@0 | 49 | for u,v in d.items (): |
michael@0 | 50 | if i == 2 and not u in combined: |
michael@0 | 51 | continue |
michael@0 | 52 | if not u in combined: |
michael@0 | 53 | combined[u] = list (defaults) |
michael@0 | 54 | combined[u][i] = v |
michael@0 | 55 | data = combined |
michael@0 | 56 | del combined |
michael@0 | 57 | num = len (data) |
michael@0 | 58 | |
michael@0 | 59 | # Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out |
michael@0 | 60 | singles = {} |
michael@0 | 61 | for u in [0x00A0, 0x25CC]: |
michael@0 | 62 | singles[u] = data[u] |
michael@0 | 63 | del data[u] |
michael@0 | 64 | |
michael@0 | 65 | print "/* == Start of generated table == */" |
michael@0 | 66 | print "/*" |
michael@0 | 67 | print " * The following table is generated by running:" |
michael@0 | 68 | print " *" |
michael@0 | 69 | print " * ./gen-indic-table.py IndicSyllabicCategory.txt IndicMatraCategory.txt Blocks.txt" |
michael@0 | 70 | print " *" |
michael@0 | 71 | print " * on files with these headers:" |
michael@0 | 72 | print " *" |
michael@0 | 73 | for h in headers: |
michael@0 | 74 | for l in h: |
michael@0 | 75 | print " * %s" % (l.strip()) |
michael@0 | 76 | print " */" |
michael@0 | 77 | |
michael@0 | 78 | print '#include "hb-ot-shape-complex-indic-private.hh"' |
michael@0 | 79 | |
michael@0 | 80 | |
michael@0 | 81 | # Shorten values |
michael@0 | 82 | short = [{ |
michael@0 | 83 | "Bindu": 'Bi', |
michael@0 | 84 | "Visarga": 'Vs', |
michael@0 | 85 | "Vowel": 'Vo', |
michael@0 | 86 | "Vowel_Dependent": 'M', |
michael@0 | 87 | "Other": 'x', |
michael@0 | 88 | },{ |
michael@0 | 89 | "Not_Applicable": 'x', |
michael@0 | 90 | }] |
michael@0 | 91 | all_shorts = [[],[]] |
michael@0 | 92 | |
michael@0 | 93 | # Add some of the values, to make them more readable, and to avoid duplicates |
michael@0 | 94 | |
michael@0 | 95 | |
michael@0 | 96 | for i in range (2): |
michael@0 | 97 | for v,s in short[i].items (): |
michael@0 | 98 | all_shorts[i].append (s) |
michael@0 | 99 | |
michael@0 | 100 | what = ["INDIC_SYLLABIC_CATEGORY", "INDIC_MATRA_CATEGORY"] |
michael@0 | 101 | what_short = ["ISC", "IMC"] |
michael@0 | 102 | for i in range (2): |
michael@0 | 103 | |
michael@0 | 104 | vv = values[i].keys () |
michael@0 | 105 | vv.sort () |
michael@0 | 106 | for v in vv: |
michael@0 | 107 | v_no_and = v.replace ('_And_', '_') |
michael@0 | 108 | if v in short[i]: |
michael@0 | 109 | s = short[i][v] |
michael@0 | 110 | else: |
michael@0 | 111 | s = ''.join ([c for c in v_no_and if ord ('A') <= ord (c) <= ord ('Z')]) |
michael@0 | 112 | if s in all_shorts[i]: |
michael@0 | 113 | raise Exception ("Duplicate short value alias", v, s) |
michael@0 | 114 | all_shorts[i].append (s) |
michael@0 | 115 | short[i][v] = s |
michael@0 | 116 | print "#define %s_%s %s_%s %s/* %3d chars; %s */" % \ |
michael@0 | 117 | (what_short[i], s, what[i], v.upper (), \ |
michael@0 | 118 | ' '* ((48-1 - len (what[i]) - 1 - len (v)) / 8), \ |
michael@0 | 119 | values[i][v], v) |
michael@0 | 120 | |
michael@0 | 121 | print "#define _(S,M) INDIC_COMBINE_CATEGORIES (ISC_##S, IMC_##M)" |
michael@0 | 122 | |
michael@0 | 123 | |
michael@0 | 124 | |
michael@0 | 125 | total = 0 |
michael@0 | 126 | used = 0 |
michael@0 | 127 | def print_block (block, start, end, data): |
michael@0 | 128 | |
michael@0 | 129 | |
michael@0 | 130 | print " /* %s (%04X..%04X) */" % (block, start, end) |
michael@0 | 131 | num = 0 |
michael@0 | 132 | for u in range (start, end+1): |
michael@0 | 133 | if u % 8 == 0: |
michael@0 | 134 | |
michael@0 | 135 | print " /* %04X */" % u, |
michael@0 | 136 | if u in data: |
michael@0 | 137 | num += 1 |
michael@0 | 138 | d = data.get (u, defaults) |
michael@0 | 139 | sys.stdout.write ("%9s" % ("_(%s,%s)," % (short[0][d[0]], short[1][d[1]]))) |
michael@0 | 140 | |
michael@0 | 141 | global total, used |
michael@0 | 142 | total += end - start + 1 |
michael@0 | 143 | used += num |
michael@0 | 144 | |
michael@0 | 145 | uu = data.keys () |
michael@0 | 146 | uu.sort () |
michael@0 | 147 | |
michael@0 | 148 | last = -1 |
michael@0 | 149 | num = 0 |
michael@0 | 150 | offset = 0 |
michael@0 | 151 | starts = [] |
michael@0 | 152 | ends = [] |
michael@0 | 153 | print "static const INDIC_TABLE_ELEMENT_TYPE indic_table[] = {" |
michael@0 | 154 | for u in uu: |
michael@0 | 155 | if u <= last: |
michael@0 | 156 | continue |
michael@0 | 157 | block = data[u][2] |
michael@0 | 158 | (start, end) = blocks[block] |
michael@0 | 159 | |
michael@0 | 160 | if start != last + 1: |
michael@0 | 161 | if start - last <= 33: |
michael@0 | 162 | print_block ("FILLER", last+1, start-1, data) |
michael@0 | 163 | last = start-1 |
michael@0 | 164 | else: |
michael@0 | 165 | if last >= 0: |
michael@0 | 166 | ends.append (last + 1) |
michael@0 | 167 | offset += ends[-1] - starts[-1] |
michael@0 | 168 | |
michael@0 | 169 | |
michael@0 | 170 | print "#define indic_offset_0x%04x %d" % (start, offset) |
michael@0 | 171 | starts.append (start) |
michael@0 | 172 | |
michael@0 | 173 | print_block (block, start, end, data) |
michael@0 | 174 | last = end |
michael@0 | 175 | ends.append (last + 1) |
michael@0 | 176 | offset += ends[-1] - starts[-1] |
michael@0 | 177 | |
michael@0 | 178 | |
michael@0 | 179 | print "#define indic_offset_total %d" % offset |
michael@0 | 180 | |
michael@0 | 181 | occupancy = used * 100. / total |
michael@0 | 182 | print "}; /* Table occupancy: %d%% */" % occupancy |
michael@0 | 183 | |
michael@0 | 184 | print "INDIC_TABLE_ELEMENT_TYPE" |
michael@0 | 185 | print "hb_indic_get_categories (hb_codepoint_t u)" |
michael@0 | 186 | print "{" |
michael@0 | 187 | for (start,end) in zip (starts, ends): |
michael@0 | 188 | offset = "indic_offset_0x%04x" % start |
michael@0 | 189 | print " if (0x%04X <= u && u <= 0x%04X) return indic_table[u - 0x%04X + %s];" % (start, end, start, offset) |
michael@0 | 190 | for u,d in singles.items (): |
michael@0 | 191 | print " if (unlikely (u == 0x%04X)) return _(%s,%s);" % (u, short[0][d[0]], short[1][d[1]]) |
michael@0 | 192 | print " return _(x,x);" |
michael@0 | 193 | print "}" |
michael@0 | 194 | |
michael@0 | 195 | print "#undef _" |
michael@0 | 196 | for i in range (2): |
michael@0 | 197 | |
michael@0 | 198 | vv = values[i].keys () |
michael@0 | 199 | vv.sort () |
michael@0 | 200 | for v in vv: |
michael@0 | 201 | print "#undef %s_%s" % \ |
michael@0 | 202 | (what_short[i], short[i][v]) |
michael@0 | 203 | |
michael@0 | 204 | print "/* == End of generated table == */" |
michael@0 | 205 | |
michael@0 | 206 | # Maintain at least 30% occupancy in the table */ |
michael@0 | 207 | if occupancy < 30: |
michael@0 | 208 | raise Exception ("Table too sparse, please investigate: ", occupancy) |