gfx/harfbuzz/src/gen-indic-table.py

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/gfx/harfbuzz/src/gen-indic-table.py	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,208 @@
     1.4 +#!/usr/bin/python
     1.5 +
     1.6 +import sys
     1.7 +
     1.8 +if len (sys.argv) != 4:
     1.9 +	print >>sys.stderr, "usage: ./gen-indic-table.py IndicSyllabicCategory.txt IndicMatraCategory.txt Blocks.txt"
    1.10 +	sys.exit (1)
    1.11 +
    1.12 +files = [file (x) for x in sys.argv[1:]]
    1.13 +
    1.14 +headers = [[f.readline () for i in range (2)] for f in files]
    1.15 +
    1.16 +blocks = {}
    1.17 +data = [{} for f in files]
    1.18 +values = [{} for f in files]
    1.19 +for i, f in enumerate (files):
    1.20 +	for line in f:
    1.21 +
    1.22 +		j = line.find ('#')
    1.23 +		if j >= 0:
    1.24 +			line = line[:j]
    1.25 +
    1.26 +		fields = [x.strip () for x in line.split (';')]
    1.27 +		if len (fields) == 1:
    1.28 +			continue
    1.29 +
    1.30 +		uu = fields[0].split ('..')
    1.31 +		start = int (uu[0], 16)
    1.32 +		if len (uu) == 1:
    1.33 +			end = start
    1.34 +		else:
    1.35 +			end = int (uu[1], 16)
    1.36 +
    1.37 +		t = fields[1]
    1.38 +
    1.39 +		for u in range (start, end + 1):
    1.40 +			data[i][u] = t
    1.41 +		values[i][t] = values[i].get (t, 0) + 1
    1.42 +
    1.43 +		if i == 2:
    1.44 +			blocks[t] = (start, end)
    1.45 +
    1.46 +# Merge data into one dict:
    1.47 +defaults = ('Other', 'Not_Applicable', 'No_Block')
    1.48 +for i,v in enumerate (defaults):
    1.49 +	values[i][v] = values[i].get (v, 0) + 1
    1.50 +combined = {}
    1.51 +for i,d in enumerate (data):
    1.52 +	for u,v in d.items ():
    1.53 +		if i == 2 and not u in combined:
    1.54 +			continue
    1.55 +		if not u in combined:
    1.56 +			combined[u] = list (defaults)
    1.57 +		combined[u][i] = v
    1.58 +data = combined
    1.59 +del combined
    1.60 +num = len (data)
    1.61 +
    1.62 +# Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out
    1.63 +singles = {}
    1.64 +for u in [0x00A0, 0x25CC]:
    1.65 +	singles[u] = data[u]
    1.66 +	del data[u]
    1.67 +
    1.68 +print "/* == Start of generated table == */"
    1.69 +print "/*"
    1.70 +print " * The following table is generated by running:"
    1.71 +print " *"
    1.72 +print " *   ./gen-indic-table.py IndicSyllabicCategory.txt IndicMatraCategory.txt Blocks.txt"
    1.73 +print " *"
    1.74 +print " * on files with these headers:"
    1.75 +print " *"
    1.76 +for h in headers:
    1.77 +	for l in h:
    1.78 +		print " * %s" % (l.strip())
    1.79 +print " */"
    1.80 +print
    1.81 +print '#include "hb-ot-shape-complex-indic-private.hh"'
    1.82 +print
    1.83 +
    1.84 +# Shorten values
    1.85 +short = [{
    1.86 +	"Bindu":		'Bi',
    1.87 +	"Visarga":		'Vs',
    1.88 +	"Vowel":		'Vo',
    1.89 +	"Vowel_Dependent":	'M',
    1.90 +	"Other":		'x',
    1.91 +},{
    1.92 +	"Not_Applicable":	'x',
    1.93 +}]
    1.94 +all_shorts = [[],[]]
    1.95 +
    1.96 +# Add some of the values, to make them more readable, and to avoid duplicates
    1.97 +
    1.98 +
    1.99 +for i in range (2):
   1.100 +	for v,s in short[i].items ():
   1.101 +		all_shorts[i].append (s)
   1.102 +
   1.103 +what = ["INDIC_SYLLABIC_CATEGORY", "INDIC_MATRA_CATEGORY"]
   1.104 +what_short = ["ISC", "IMC"]
   1.105 +for i in range (2):
   1.106 +	print
   1.107 +	vv = values[i].keys ()
   1.108 +	vv.sort ()
   1.109 +	for v in vv:
   1.110 +		v_no_and = v.replace ('_And_', '_')
   1.111 +		if v in short[i]:
   1.112 +			s = short[i][v]
   1.113 +		else:
   1.114 +			s = ''.join ([c for c in v_no_and if ord ('A') <= ord (c) <= ord ('Z')])
   1.115 +			if s in all_shorts[i]:
   1.116 +				raise Exception ("Duplicate short value alias", v, s)
   1.117 +			all_shorts[i].append (s)
   1.118 +			short[i][v] = s
   1.119 +		print "#define %s_%s	%s_%s	%s/* %3d chars; %s */" % \
   1.120 +			(what_short[i], s, what[i], v.upper (), \
   1.121 +			'	'* ((48-1 - len (what[i]) - 1 - len (v)) / 8), \
   1.122 +			values[i][v], v)
   1.123 +print
   1.124 +print "#define _(S,M) INDIC_COMBINE_CATEGORIES (ISC_##S, IMC_##M)"
   1.125 +print
   1.126 +print
   1.127 +
   1.128 +total = 0
   1.129 +used = 0
   1.130 +def print_block (block, start, end, data):
   1.131 +	print
   1.132 +	print
   1.133 +	print "  /* %s  (%04X..%04X) */" % (block, start, end)
   1.134 +	num = 0
   1.135 +	for u in range (start, end+1):
   1.136 +		if u % 8 == 0:
   1.137 +			print
   1.138 +			print "  /* %04X */" % u,
   1.139 +		if u in data:
   1.140 +			num += 1
   1.141 +		d = data.get (u, defaults)
   1.142 +		sys.stdout.write ("%9s" % ("_(%s,%s)," % (short[0][d[0]], short[1][d[1]])))
   1.143 +
   1.144 +	global total, used
   1.145 +	total += end - start + 1
   1.146 +	used += num
   1.147 +
   1.148 +uu = data.keys ()
   1.149 +uu.sort ()
   1.150 +
   1.151 +last = -1
   1.152 +num = 0
   1.153 +offset = 0
   1.154 +starts = []
   1.155 +ends = []
   1.156 +print "static const INDIC_TABLE_ELEMENT_TYPE indic_table[] = {"
   1.157 +for u in uu:
   1.158 +	if u <= last:
   1.159 +		continue
   1.160 +	block = data[u][2]
   1.161 +	(start, end) = blocks[block]
   1.162 +
   1.163 +	if start != last + 1:
   1.164 +		if start - last <= 33:
   1.165 +			print_block ("FILLER", last+1, start-1, data)
   1.166 +			last = start-1
   1.167 +		else:
   1.168 +			if last >= 0:
   1.169 +				ends.append (last + 1)
   1.170 +				offset += ends[-1] - starts[-1]
   1.171 +			print
   1.172 +			print
   1.173 +			print "#define indic_offset_0x%04x %d" % (start, offset)
   1.174 +			starts.append (start)
   1.175 +
   1.176 +	print_block (block, start, end, data)
   1.177 +	last = end
   1.178 +ends.append (last + 1)
   1.179 +offset += ends[-1] - starts[-1]
   1.180 +print
   1.181 +print
   1.182 +print "#define indic_offset_total %d" % offset
   1.183 +print
   1.184 +occupancy = used * 100. / total
   1.185 +print "}; /* Table occupancy: %d%% */" % occupancy
   1.186 +print
   1.187 +print "INDIC_TABLE_ELEMENT_TYPE"
   1.188 +print "hb_indic_get_categories (hb_codepoint_t u)"
   1.189 +print "{"
   1.190 +for (start,end) in zip (starts, ends):
   1.191 +	offset = "indic_offset_0x%04x" % start
   1.192 +	print "  if (0x%04X <= u && u <= 0x%04X) return indic_table[u - 0x%04X + %s];" % (start, end, start, offset)
   1.193 +for u,d in singles.items ():
   1.194 +	print "  if (unlikely (u == 0x%04X)) return _(%s,%s);" % (u, short[0][d[0]], short[1][d[1]])
   1.195 +print "  return _(x,x);"
   1.196 +print "}"
   1.197 +print
   1.198 +print "#undef _"
   1.199 +for i in range (2):
   1.200 +	print
   1.201 +	vv = values[i].keys ()
   1.202 +	vv.sort ()
   1.203 +	for v in vv:
   1.204 +		print "#undef %s_%s" % \
   1.205 +			(what_short[i], short[i][v])
   1.206 +print
   1.207 +print "/* == End of generated table == */"
   1.208 +
   1.209 +# Maintain at least 30% occupancy in the table */
   1.210 +if occupancy < 30:
   1.211 +	raise Exception ("Table too sparse, please investigate: ", occupancy)

mercurial