1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/gfx/harfbuzz/src/gen-arabic-table.py Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,197 @@ 1.4 +#!/usr/bin/python 1.5 + 1.6 +import sys 1.7 +import os.path 1.8 + 1.9 +if len (sys.argv) != 3: 1.10 + print >>sys.stderr, "usage: ./gen-arabic-table.py ArabicShaping.txt UnicodeData.txt" 1.11 + sys.exit (1) 1.12 + 1.13 +files = [file (x) for x in sys.argv[1:]] 1.14 + 1.15 +headers = [[files[0].readline (), files[0].readline ()]] 1.16 +headers.append (["UnicodeData.txt does not have a header."]) 1.17 +while files[0].readline ().find ('##################') < 0: 1.18 + pass 1.19 + 1.20 + 1.21 +def print_joining_table(f): 1.22 + 1.23 + print 1.24 + print "static const uint8_t joining_table[] =" 1.25 + print "{" 1.26 + 1.27 + min_u = 0x110000 1.28 + max_u = 0 1.29 + num = 0 1.30 + last = -1 1.31 + block = '' 1.32 + for line in f: 1.33 + 1.34 + if line[0] == '#': 1.35 + if line.find (" characters"): 1.36 + block = line[2:].strip () 1.37 + continue 1.38 + 1.39 + fields = [x.strip () for x in line.split (';')] 1.40 + if len (fields) == 1: 1.41 + continue 1.42 + 1.43 + u = int (fields[0], 16) 1.44 + if u == 0x200C or u == 0x200D: 1.45 + continue 1.46 + if u < last: 1.47 + raise Exception ("Input data character not sorted", u) 1.48 + min_u = min (min_u, u) 1.49 + max_u = max (max_u, u) 1.50 + num += 1 1.51 + 1.52 + if block: 1.53 + print "\n /* %s */\n" % block 1.54 + block = '' 1.55 + 1.56 + if last != -1: 1.57 + last += 1 1.58 + while last < u: 1.59 + print " JOINING_TYPE_X, /* %04X */" % last 1.60 + last += 1 1.61 + else: 1.62 + last = u 1.63 + 1.64 + if fields[3] in ["ALAPH", "DALATH RISH"]: 1.65 + value = "JOINING_GROUP_" + fields[3].replace(' ', '_') 1.66 + else: 1.67 + value = "JOINING_TYPE_" + fields[2] 1.68 + print " %s, /* %s */" % (value, '; '.join(fields)) 1.69 + 1.70 + print 1.71 + print "};" 1.72 + print 1.73 + print "#define JOINING_TABLE_FIRST 0x%04X" % min_u 1.74 + print "#define JOINING_TABLE_LAST 0x%04X" % max_u 1.75 + print 1.76 + 1.77 + occupancy = num * 100 / (max_u - min_u + 1) 1.78 + # Maintain at least 40% occupancy in the table */ 1.79 + if occupancy < 40: 1.80 + raise Exception ("Table too sparse, please investigate: ", occupancy) 1.81 + 1.82 +def print_shaping_table(f): 1.83 + 1.84 + shapes = {} 1.85 + ligatures = {} 1.86 + names = {} 1.87 + for line in f: 1.88 + 1.89 + fields = [x.strip () for x in line.split (';')] 1.90 + if fields[5][0:1] != '<': 1.91 + continue 1.92 + 1.93 + items = fields[5].split (' ') 1.94 + shape, items = items[0][1:-1], tuple (int (x, 16) for x in items[1:]) 1.95 + 1.96 + if not shape in ['initial', 'medial', 'isolated', 'final']: 1.97 + continue 1.98 + 1.99 + c = int (fields[0], 16) 1.100 + if len (items) != 1: 1.101 + # We only care about lam-alef ligatures 1.102 + if len (items) != 2 or items[0] != 0x0644 or items[1] not in [0x0622, 0x0623, 0x0625, 0x0627]: 1.103 + continue 1.104 + 1.105 + # Save ligature 1.106 + names[c] = fields[1] 1.107 + if items not in ligatures: 1.108 + ligatures[items] = {} 1.109 + ligatures[items][shape] = c 1.110 + pass 1.111 + else: 1.112 + # Save shape 1.113 + if items[0] not in names: 1.114 + names[items[0]] = fields[1] 1.115 + else: 1.116 + names[items[0]] = os.path.commonprefix ([names[items[0]], fields[1]]).strip () 1.117 + if items[0] not in shapes: 1.118 + shapes[items[0]] = {} 1.119 + shapes[items[0]][shape] = c 1.120 + 1.121 + print 1.122 + print "static const uint16_t shaping_table[][4] =" 1.123 + print "{" 1.124 + 1.125 + keys = shapes.keys () 1.126 + min_u, max_u = min (keys), max (keys) 1.127 + for u in range (min_u, max_u + 1): 1.128 + s = [shapes[u][shape] if u in shapes and shape in shapes[u] else 0 1.129 + for shape in ['initial', 'medial', 'final', 'isolated']] 1.130 + value = ', '.join ("0x%04X" % c for c in s) 1.131 + print " {%s}, /* U+%04X %s */" % (value, u, names[u] if u in names else "") 1.132 + 1.133 + print "};" 1.134 + print 1.135 + print "#define SHAPING_TABLE_FIRST 0x%04X" % min_u 1.136 + print "#define SHAPING_TABLE_LAST 0x%04X" % max_u 1.137 + print 1.138 + 1.139 + ligas = {} 1.140 + for pair in ligatures.keys (): 1.141 + for shape in ligatures[pair]: 1.142 + c = ligatures[pair][shape] 1.143 + if shape == 'isolated': 1.144 + liga = (shapes[pair[0]]['initial'], shapes[pair[1]]['final']) 1.145 + elif shape == 'final': 1.146 + liga = (shapes[pair[0]]['medial'], shapes[pair[1]]['final']) 1.147 + else: 1.148 + raise Exception ("Unexpected shape", shape) 1.149 + if liga[0] not in ligas: 1.150 + ligas[liga[0]] = [] 1.151 + ligas[liga[0]].append ((liga[1], c)) 1.152 + max_i = max (len (ligas[l]) for l in ligas) 1.153 + print 1.154 + print "static const struct ligature_set_t {" 1.155 + print " uint16_t first;" 1.156 + print " struct ligature_pairs_t {" 1.157 + print " uint16_t second;" 1.158 + print " uint16_t ligature;" 1.159 + print " } ligatures[%d];" % max_i 1.160 + print "} ligature_table[] =" 1.161 + print "{" 1.162 + keys = ligas.keys () 1.163 + keys.sort () 1.164 + for first in keys: 1.165 + 1.166 + print " { 0x%04X, {" % (first) 1.167 + for liga in ligas[first]: 1.168 + print " { 0x%04X, 0x%04X }, /* %s */" % (liga[0], liga[1], names[liga[1]]) 1.169 + print " }}," 1.170 + 1.171 + print "};" 1.172 + print 1.173 + 1.174 + 1.175 + 1.176 +print "/* == Start of generated table == */" 1.177 +print "/*" 1.178 +print " * The following table is generated by running:" 1.179 +print " *" 1.180 +print " * ./gen-arabic-table.py ArabicShaping.txt UnicodeData.txt" 1.181 +print " *" 1.182 +print " * on files with these headers:" 1.183 +print " *" 1.184 +for h in headers: 1.185 + for l in h: 1.186 + print " * %s" % (l.strip()) 1.187 +print " */" 1.188 +print 1.189 +print "#ifndef HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH" 1.190 +print "#define HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH" 1.191 +print 1.192 + 1.193 +print_joining_table (files[0]) 1.194 +print_shaping_table (files[1]) 1.195 + 1.196 +print 1.197 +print "#endif /* HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH */" 1.198 +print 1.199 +print "/* == End of generated table == */" 1.200 +