gfx/harfbuzz/src/gen-arabic-table.py

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/gfx/harfbuzz/src/gen-arabic-table.py	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,197 @@
     1.4 +#!/usr/bin/python
     1.5 +
     1.6 +import sys
     1.7 +import os.path
     1.8 +
     1.9 +if len (sys.argv) != 3:
    1.10 +	print >>sys.stderr, "usage: ./gen-arabic-table.py ArabicShaping.txt UnicodeData.txt"
    1.11 +	sys.exit (1)
    1.12 +
    1.13 +files = [file (x) for x in sys.argv[1:]]
    1.14 +
    1.15 +headers = [[files[0].readline (), files[0].readline ()]]
    1.16 +headers.append (["UnicodeData.txt does not have a header."])
    1.17 +while files[0].readline ().find ('##################') < 0:
    1.18 +	pass
    1.19 +
    1.20 +
    1.21 +def print_joining_table(f):
    1.22 +
    1.23 +	print
    1.24 +	print "static const uint8_t joining_table[] ="
    1.25 +	print "{"
    1.26 +
    1.27 +	min_u = 0x110000
    1.28 +	max_u = 0
    1.29 +	num = 0
    1.30 +	last = -1
    1.31 +	block = ''
    1.32 +	for line in f:
    1.33 +
    1.34 +		if line[0] == '#':
    1.35 +			if line.find (" characters"):
    1.36 +				block = line[2:].strip ()
    1.37 +			continue
    1.38 +
    1.39 +		fields = [x.strip () for x in line.split (';')]
    1.40 +		if len (fields) == 1:
    1.41 +			continue
    1.42 +
    1.43 +		u = int (fields[0], 16)
    1.44 +		if u == 0x200C or u == 0x200D:
    1.45 +			continue
    1.46 +		if u < last:
    1.47 +			raise Exception ("Input data character not sorted", u)
    1.48 +		min_u = min (min_u, u)
    1.49 +		max_u = max (max_u, u)
    1.50 +		num += 1
    1.51 +
    1.52 +		if block:
    1.53 +			print "\n  /* %s */\n" % block
    1.54 +			block = ''
    1.55 +
    1.56 +		if last != -1:
    1.57 +			last += 1
    1.58 +			while last < u:
    1.59 +				print "  JOINING_TYPE_X, /* %04X */" % last
    1.60 +				last += 1
    1.61 +		else:
    1.62 +			last = u
    1.63 +
    1.64 +		if fields[3] in ["ALAPH", "DALATH RISH"]:
    1.65 +			value = "JOINING_GROUP_" + fields[3].replace(' ', '_')
    1.66 +		else:
    1.67 +			value = "JOINING_TYPE_" + fields[2]
    1.68 +		print "  %s, /* %s */" % (value, '; '.join(fields))
    1.69 +
    1.70 +	print
    1.71 +	print "};"
    1.72 +	print
    1.73 +	print "#define JOINING_TABLE_FIRST	0x%04X" % min_u
    1.74 +	print "#define JOINING_TABLE_LAST	0x%04X" % max_u
    1.75 +	print
    1.76 +
    1.77 +	occupancy = num * 100 / (max_u - min_u + 1)
    1.78 +	# Maintain at least 40% occupancy in the table */
    1.79 +	if occupancy < 40:
    1.80 +		raise Exception ("Table too sparse, please investigate: ", occupancy)
    1.81 +
    1.82 +def print_shaping_table(f):
    1.83 +
    1.84 +	shapes = {}
    1.85 +	ligatures = {}
    1.86 +	names = {}
    1.87 +	for line in f:
    1.88 +
    1.89 +		fields = [x.strip () for x in line.split (';')]
    1.90 +		if fields[5][0:1] != '<':
    1.91 +			continue
    1.92 +
    1.93 +		items = fields[5].split (' ')
    1.94 +		shape, items = items[0][1:-1], tuple (int (x, 16) for x in items[1:])
    1.95 +
    1.96 +		if not shape in ['initial', 'medial', 'isolated', 'final']:
    1.97 +			continue
    1.98 +
    1.99 +		c = int (fields[0], 16)
   1.100 +		if len (items) != 1:
   1.101 +			# We only care about lam-alef ligatures
   1.102 +			if len (items) != 2 or items[0] != 0x0644 or items[1] not in [0x0622, 0x0623, 0x0625, 0x0627]:
   1.103 +				continue
   1.104 +
   1.105 +			# Save ligature
   1.106 +			names[c] = fields[1]
   1.107 +			if items not in ligatures:
   1.108 +				ligatures[items] = {}
   1.109 +			ligatures[items][shape] = c
   1.110 +			pass
   1.111 +		else:
   1.112 +			# Save shape
   1.113 +			if items[0] not in names:
   1.114 +				names[items[0]] = fields[1]
   1.115 +			else:
   1.116 +				names[items[0]] = os.path.commonprefix ([names[items[0]], fields[1]]).strip ()
   1.117 +			if items[0] not in shapes:
   1.118 +				shapes[items[0]] = {}
   1.119 +			shapes[items[0]][shape] = c
   1.120 +
   1.121 +	print
   1.122 +	print "static const uint16_t shaping_table[][4] ="
   1.123 +	print "{"
   1.124 +
   1.125 +	keys = shapes.keys ()
   1.126 +	min_u, max_u = min (keys), max (keys)
   1.127 +	for u in range (min_u, max_u + 1):
   1.128 +		s = [shapes[u][shape] if u in shapes and shape in shapes[u] else 0
   1.129 +		     for shape in  ['initial', 'medial', 'final', 'isolated']]
   1.130 +		value = ', '.join ("0x%04X" % c for c in s)
   1.131 +		print "  {%s}, /* U+%04X %s */" % (value, u, names[u] if u in names else "")
   1.132 +
   1.133 +	print "};"
   1.134 +	print
   1.135 +	print "#define SHAPING_TABLE_FIRST	0x%04X" % min_u
   1.136 +	print "#define SHAPING_TABLE_LAST	0x%04X" % max_u
   1.137 +	print
   1.138 +
   1.139 +	ligas = {}
   1.140 +	for pair in ligatures.keys ():
   1.141 +		for shape in ligatures[pair]:
   1.142 +			c = ligatures[pair][shape]
   1.143 +			if shape == 'isolated':
   1.144 +				liga = (shapes[pair[0]]['initial'], shapes[pair[1]]['final'])
   1.145 +			elif shape == 'final':
   1.146 +				liga = (shapes[pair[0]]['medial'], shapes[pair[1]]['final'])
   1.147 +			else:
   1.148 +				raise Exception ("Unexpected shape", shape)
   1.149 +			if liga[0] not in ligas:
   1.150 +				ligas[liga[0]] = []
   1.151 +			ligas[liga[0]].append ((liga[1], c))
   1.152 +	max_i = max (len (ligas[l]) for l in ligas)
   1.153 +	print
   1.154 +	print "static const struct ligature_set_t {"
   1.155 +	print " uint16_t first;"
   1.156 +	print " struct ligature_pairs_t {"
   1.157 +	print "   uint16_t second;"
   1.158 +	print "   uint16_t ligature;"
   1.159 +	print " } ligatures[%d];" % max_i
   1.160 +	print "} ligature_table[] ="
   1.161 +	print "{"
   1.162 +	keys = ligas.keys ()
   1.163 +	keys.sort ()
   1.164 +	for first in keys:
   1.165 +
   1.166 +		print "  { 0x%04X, {" % (first)
   1.167 +		for liga in ligas[first]:
   1.168 +			print "    { 0x%04X, 0x%04X }, /* %s */" % (liga[0], liga[1], names[liga[1]])
   1.169 +		print "  }},"
   1.170 +
   1.171 +	print "};"
   1.172 +	print
   1.173 +
   1.174 +
   1.175 +
   1.176 +print "/* == Start of generated table == */"
   1.177 +print "/*"
   1.178 +print " * The following table is generated by running:"
   1.179 +print " *"
   1.180 +print " *   ./gen-arabic-table.py ArabicShaping.txt UnicodeData.txt"
   1.181 +print " *"
   1.182 +print " * on files with these headers:"
   1.183 +print " *"
   1.184 +for h in headers:
   1.185 +	for l in h:
   1.186 +		print " * %s" % (l.strip())
   1.187 +print " */"
   1.188 +print
   1.189 +print "#ifndef HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH"
   1.190 +print "#define HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH"
   1.191 +print
   1.192 +
   1.193 +print_joining_table (files[0])
   1.194 +print_shaping_table (files[1])
   1.195 +
   1.196 +print
   1.197 +print "#endif /* HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH */"
   1.198 +print
   1.199 +print "/* == End of generated table == */"
   1.200 +

mercurial