1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/gfx/harfbuzz/src/gen-indic-table.py Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,208 @@ 1.4 +#!/usr/bin/python 1.5 + 1.6 +import sys 1.7 + 1.8 +if len (sys.argv) != 4: 1.9 + print >>sys.stderr, "usage: ./gen-indic-table.py IndicSyllabicCategory.txt IndicMatraCategory.txt Blocks.txt" 1.10 + sys.exit (1) 1.11 + 1.12 +files = [file (x) for x in sys.argv[1:]] 1.13 + 1.14 +headers = [[f.readline () for i in range (2)] for f in files] 1.15 + 1.16 +blocks = {} 1.17 +data = [{} for f in files] 1.18 +values = [{} for f in files] 1.19 +for i, f in enumerate (files): 1.20 + for line in f: 1.21 + 1.22 + j = line.find ('#') 1.23 + if j >= 0: 1.24 + line = line[:j] 1.25 + 1.26 + fields = [x.strip () for x in line.split (';')] 1.27 + if len (fields) == 1: 1.28 + continue 1.29 + 1.30 + uu = fields[0].split ('..') 1.31 + start = int (uu[0], 16) 1.32 + if len (uu) == 1: 1.33 + end = start 1.34 + else: 1.35 + end = int (uu[1], 16) 1.36 + 1.37 + t = fields[1] 1.38 + 1.39 + for u in range (start, end + 1): 1.40 + data[i][u] = t 1.41 + values[i][t] = values[i].get (t, 0) + 1 1.42 + 1.43 + if i == 2: 1.44 + blocks[t] = (start, end) 1.45 + 1.46 +# Merge data into one dict: 1.47 +defaults = ('Other', 'Not_Applicable', 'No_Block') 1.48 +for i,v in enumerate (defaults): 1.49 + values[i][v] = values[i].get (v, 0) + 1 1.50 +combined = {} 1.51 +for i,d in enumerate (data): 1.52 + for u,v in d.items (): 1.53 + if i == 2 and not u in combined: 1.54 + continue 1.55 + if not u in combined: 1.56 + combined[u] = list (defaults) 1.57 + combined[u][i] = v 1.58 +data = combined 1.59 +del combined 1.60 +num = len (data) 1.61 + 1.62 +# Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out 1.63 +singles = {} 1.64 +for u in [0x00A0, 0x25CC]: 1.65 + singles[u] = data[u] 1.66 + del data[u] 1.67 + 1.68 +print "/* == Start of generated table == */" 1.69 +print "/*" 1.70 +print " * The following table is generated by running:" 1.71 +print " *" 1.72 +print " * ./gen-indic-table.py IndicSyllabicCategory.txt IndicMatraCategory.txt Blocks.txt" 1.73 +print " *" 1.74 +print " * on files with these headers:" 1.75 +print " *" 1.76 +for h in headers: 1.77 + for l in h: 1.78 + print " * %s" % (l.strip()) 1.79 +print " */" 1.80 +print 1.81 +print '#include "hb-ot-shape-complex-indic-private.hh"' 1.82 +print 1.83 + 1.84 +# Shorten values 1.85 +short = [{ 1.86 + "Bindu": 'Bi', 1.87 + "Visarga": 'Vs', 1.88 + "Vowel": 'Vo', 1.89 + "Vowel_Dependent": 'M', 1.90 + "Other": 'x', 1.91 +},{ 1.92 + "Not_Applicable": 'x', 1.93 +}] 1.94 +all_shorts = [[],[]] 1.95 + 1.96 +# Add some of the values, to make them more readable, and to avoid duplicates 1.97 + 1.98 + 1.99 +for i in range (2): 1.100 + for v,s in short[i].items (): 1.101 + all_shorts[i].append (s) 1.102 + 1.103 +what = ["INDIC_SYLLABIC_CATEGORY", "INDIC_MATRA_CATEGORY"] 1.104 +what_short = ["ISC", "IMC"] 1.105 +for i in range (2): 1.106 + print 1.107 + vv = values[i].keys () 1.108 + vv.sort () 1.109 + for v in vv: 1.110 + v_no_and = v.replace ('_And_', '_') 1.111 + if v in short[i]: 1.112 + s = short[i][v] 1.113 + else: 1.114 + s = ''.join ([c for c in v_no_and if ord ('A') <= ord (c) <= ord ('Z')]) 1.115 + if s in all_shorts[i]: 1.116 + raise Exception ("Duplicate short value alias", v, s) 1.117 + all_shorts[i].append (s) 1.118 + short[i][v] = s 1.119 + print "#define %s_%s %s_%s %s/* %3d chars; %s */" % \ 1.120 + (what_short[i], s, what[i], v.upper (), \ 1.121 + ' '* ((48-1 - len (what[i]) - 1 - len (v)) / 8), \ 1.122 + values[i][v], v) 1.123 +print 1.124 +print "#define _(S,M) INDIC_COMBINE_CATEGORIES (ISC_##S, IMC_##M)" 1.125 +print 1.126 +print 1.127 + 1.128 +total = 0 1.129 +used = 0 1.130 +def print_block (block, start, end, data): 1.131 + print 1.132 + print 1.133 + print " /* %s (%04X..%04X) */" % (block, start, end) 1.134 + num = 0 1.135 + for u in range (start, end+1): 1.136 + if u % 8 == 0: 1.137 + print 1.138 + print " /* %04X */" % u, 1.139 + if u in data: 1.140 + num += 1 1.141 + d = data.get (u, defaults) 1.142 + sys.stdout.write ("%9s" % ("_(%s,%s)," % (short[0][d[0]], short[1][d[1]]))) 1.143 + 1.144 + global total, used 1.145 + total += end - start + 1 1.146 + used += num 1.147 + 1.148 +uu = data.keys () 1.149 +uu.sort () 1.150 + 1.151 +last = -1 1.152 +num = 0 1.153 +offset = 0 1.154 +starts = [] 1.155 +ends = [] 1.156 +print "static const INDIC_TABLE_ELEMENT_TYPE indic_table[] = {" 1.157 +for u in uu: 1.158 + if u <= last: 1.159 + continue 1.160 + block = data[u][2] 1.161 + (start, end) = blocks[block] 1.162 + 1.163 + if start != last + 1: 1.164 + if start - last <= 33: 1.165 + print_block ("FILLER", last+1, start-1, data) 1.166 + last = start-1 1.167 + else: 1.168 + if last >= 0: 1.169 + ends.append (last + 1) 1.170 + offset += ends[-1] - starts[-1] 1.171 + print 1.172 + print 1.173 + print "#define indic_offset_0x%04x %d" % (start, offset) 1.174 + starts.append (start) 1.175 + 1.176 + print_block (block, start, end, data) 1.177 + last = end 1.178 +ends.append (last + 1) 1.179 +offset += ends[-1] - starts[-1] 1.180 +print 1.181 +print 1.182 +print "#define indic_offset_total %d" % offset 1.183 +print 1.184 +occupancy = used * 100. / total 1.185 +print "}; /* Table occupancy: %d%% */" % occupancy 1.186 +print 1.187 +print "INDIC_TABLE_ELEMENT_TYPE" 1.188 +print "hb_indic_get_categories (hb_codepoint_t u)" 1.189 +print "{" 1.190 +for (start,end) in zip (starts, ends): 1.191 + offset = "indic_offset_0x%04x" % start 1.192 + print " if (0x%04X <= u && u <= 0x%04X) return indic_table[u - 0x%04X + %s];" % (start, end, start, offset) 1.193 +for u,d in singles.items (): 1.194 + print " if (unlikely (u == 0x%04X)) return _(%s,%s);" % (u, short[0][d[0]], short[1][d[1]]) 1.195 +print " return _(x,x);" 1.196 +print "}" 1.197 +print 1.198 +print "#undef _" 1.199 +for i in range (2): 1.200 + print 1.201 + vv = values[i].keys () 1.202 + vv.sort () 1.203 + for v in vv: 1.204 + print "#undef %s_%s" % \ 1.205 + (what_short[i], short[i][v]) 1.206 +print 1.207 +print "/* == End of generated table == */" 1.208 + 1.209 +# Maintain at least 30% occupancy in the table */ 1.210 +if occupancy < 30: 1.211 + raise Exception ("Table too sparse, please investigate: ", occupancy)