Fri, 16 Jan 2015 18:13:44 +0100
Integrate suggestion from review to improve consistency with existing code.
michael@0 | 1 | #!/usr/bin/python |
michael@0 | 2 | |
michael@0 | 3 | import sys |
michael@0 | 4 | import os.path |
michael@0 | 5 | |
michael@0 | 6 | if len (sys.argv) != 3: |
michael@0 | 7 | print >>sys.stderr, "usage: ./gen-arabic-table.py ArabicShaping.txt UnicodeData.txt" |
michael@0 | 8 | sys.exit (1) |
michael@0 | 9 | |
michael@0 | 10 | files = [file (x) for x in sys.argv[1:]] |
michael@0 | 11 | |
michael@0 | 12 | headers = [[files[0].readline (), files[0].readline ()]] |
michael@0 | 13 | headers.append (["UnicodeData.txt does not have a header."]) |
michael@0 | 14 | while files[0].readline ().find ('##################') < 0: |
michael@0 | 15 | pass |
michael@0 | 16 | |
michael@0 | 17 | |
michael@0 | 18 | def print_joining_table(f): |
michael@0 | 19 | |
michael@0 | 20 | |
michael@0 | 21 | print "static const uint8_t joining_table[] =" |
michael@0 | 22 | print "{" |
michael@0 | 23 | |
michael@0 | 24 | min_u = 0x110000 |
michael@0 | 25 | max_u = 0 |
michael@0 | 26 | num = 0 |
michael@0 | 27 | last = -1 |
michael@0 | 28 | block = '' |
michael@0 | 29 | for line in f: |
michael@0 | 30 | |
michael@0 | 31 | if line[0] == '#': |
michael@0 | 32 | if line.find (" characters"): |
michael@0 | 33 | block = line[2:].strip () |
michael@0 | 34 | continue |
michael@0 | 35 | |
michael@0 | 36 | fields = [x.strip () for x in line.split (';')] |
michael@0 | 37 | if len (fields) == 1: |
michael@0 | 38 | continue |
michael@0 | 39 | |
michael@0 | 40 | u = int (fields[0], 16) |
michael@0 | 41 | if u == 0x200C or u == 0x200D: |
michael@0 | 42 | continue |
michael@0 | 43 | if u < last: |
michael@0 | 44 | raise Exception ("Input data character not sorted", u) |
michael@0 | 45 | min_u = min (min_u, u) |
michael@0 | 46 | max_u = max (max_u, u) |
michael@0 | 47 | num += 1 |
michael@0 | 48 | |
michael@0 | 49 | if block: |
michael@0 | 50 | print "\n /* %s */\n" % block |
michael@0 | 51 | block = '' |
michael@0 | 52 | |
michael@0 | 53 | if last != -1: |
michael@0 | 54 | last += 1 |
michael@0 | 55 | while last < u: |
michael@0 | 56 | print " JOINING_TYPE_X, /* %04X */" % last |
michael@0 | 57 | last += 1 |
michael@0 | 58 | else: |
michael@0 | 59 | last = u |
michael@0 | 60 | |
michael@0 | 61 | if fields[3] in ["ALAPH", "DALATH RISH"]: |
michael@0 | 62 | value = "JOINING_GROUP_" + fields[3].replace(' ', '_') |
michael@0 | 63 | else: |
michael@0 | 64 | value = "JOINING_TYPE_" + fields[2] |
michael@0 | 65 | print " %s, /* %s */" % (value, '; '.join(fields)) |
michael@0 | 66 | |
michael@0 | 67 | |
michael@0 | 68 | print "};" |
michael@0 | 69 | |
michael@0 | 70 | print "#define JOINING_TABLE_FIRST 0x%04X" % min_u |
michael@0 | 71 | print "#define JOINING_TABLE_LAST 0x%04X" % max_u |
michael@0 | 72 | |
michael@0 | 73 | |
michael@0 | 74 | occupancy = num * 100 / (max_u - min_u + 1) |
michael@0 | 75 | # Maintain at least 40% occupancy in the table */ |
michael@0 | 76 | if occupancy < 40: |
michael@0 | 77 | raise Exception ("Table too sparse, please investigate: ", occupancy) |
michael@0 | 78 | |
michael@0 | 79 | def print_shaping_table(f): |
michael@0 | 80 | |
michael@0 | 81 | shapes = {} |
michael@0 | 82 | ligatures = {} |
michael@0 | 83 | names = {} |
michael@0 | 84 | for line in f: |
michael@0 | 85 | |
michael@0 | 86 | fields = [x.strip () for x in line.split (';')] |
michael@0 | 87 | if fields[5][0:1] != '<': |
michael@0 | 88 | continue |
michael@0 | 89 | |
michael@0 | 90 | items = fields[5].split (' ') |
michael@0 | 91 | shape, items = items[0][1:-1], tuple (int (x, 16) for x in items[1:]) |
michael@0 | 92 | |
michael@0 | 93 | if not shape in ['initial', 'medial', 'isolated', 'final']: |
michael@0 | 94 | continue |
michael@0 | 95 | |
michael@0 | 96 | c = int (fields[0], 16) |
michael@0 | 97 | if len (items) != 1: |
michael@0 | 98 | # We only care about lam-alef ligatures |
michael@0 | 99 | if len (items) != 2 or items[0] != 0x0644 or items[1] not in [0x0622, 0x0623, 0x0625, 0x0627]: |
michael@0 | 100 | continue |
michael@0 | 101 | |
michael@0 | 102 | # Save ligature |
michael@0 | 103 | names[c] = fields[1] |
michael@0 | 104 | if items not in ligatures: |
michael@0 | 105 | ligatures[items] = {} |
michael@0 | 106 | ligatures[items][shape] = c |
michael@0 | 107 | pass |
michael@0 | 108 | else: |
michael@0 | 109 | # Save shape |
michael@0 | 110 | if items[0] not in names: |
michael@0 | 111 | names[items[0]] = fields[1] |
michael@0 | 112 | else: |
michael@0 | 113 | names[items[0]] = os.path.commonprefix ([names[items[0]], fields[1]]).strip () |
michael@0 | 114 | if items[0] not in shapes: |
michael@0 | 115 | shapes[items[0]] = {} |
michael@0 | 116 | shapes[items[0]][shape] = c |
michael@0 | 117 | |
michael@0 | 118 | |
michael@0 | 119 | print "static const uint16_t shaping_table[][4] =" |
michael@0 | 120 | print "{" |
michael@0 | 121 | |
michael@0 | 122 | keys = shapes.keys () |
michael@0 | 123 | min_u, max_u = min (keys), max (keys) |
michael@0 | 124 | for u in range (min_u, max_u + 1): |
michael@0 | 125 | s = [shapes[u][shape] if u in shapes and shape in shapes[u] else 0 |
michael@0 | 126 | for shape in ['initial', 'medial', 'final', 'isolated']] |
michael@0 | 127 | value = ', '.join ("0x%04X" % c for c in s) |
michael@0 | 128 | print " {%s}, /* U+%04X %s */" % (value, u, names[u] if u in names else "") |
michael@0 | 129 | |
michael@0 | 130 | print "};" |
michael@0 | 131 | |
michael@0 | 132 | print "#define SHAPING_TABLE_FIRST 0x%04X" % min_u |
michael@0 | 133 | print "#define SHAPING_TABLE_LAST 0x%04X" % max_u |
michael@0 | 134 | |
michael@0 | 135 | |
michael@0 | 136 | ligas = {} |
michael@0 | 137 | for pair in ligatures.keys (): |
michael@0 | 138 | for shape in ligatures[pair]: |
michael@0 | 139 | c = ligatures[pair][shape] |
michael@0 | 140 | if shape == 'isolated': |
michael@0 | 141 | liga = (shapes[pair[0]]['initial'], shapes[pair[1]]['final']) |
michael@0 | 142 | elif shape == 'final': |
michael@0 | 143 | liga = (shapes[pair[0]]['medial'], shapes[pair[1]]['final']) |
michael@0 | 144 | else: |
michael@0 | 145 | raise Exception ("Unexpected shape", shape) |
michael@0 | 146 | if liga[0] not in ligas: |
michael@0 | 147 | ligas[liga[0]] = [] |
michael@0 | 148 | ligas[liga[0]].append ((liga[1], c)) |
michael@0 | 149 | max_i = max (len (ligas[l]) for l in ligas) |
michael@0 | 150 | |
michael@0 | 151 | print "static const struct ligature_set_t {" |
michael@0 | 152 | print " uint16_t first;" |
michael@0 | 153 | print " struct ligature_pairs_t {" |
michael@0 | 154 | print " uint16_t second;" |
michael@0 | 155 | print " uint16_t ligature;" |
michael@0 | 156 | print " } ligatures[%d];" % max_i |
michael@0 | 157 | print "} ligature_table[] =" |
michael@0 | 158 | print "{" |
michael@0 | 159 | keys = ligas.keys () |
michael@0 | 160 | keys.sort () |
michael@0 | 161 | for first in keys: |
michael@0 | 162 | |
michael@0 | 163 | print " { 0x%04X, {" % (first) |
michael@0 | 164 | for liga in ligas[first]: |
michael@0 | 165 | print " { 0x%04X, 0x%04X }, /* %s */" % (liga[0], liga[1], names[liga[1]]) |
michael@0 | 166 | print " }}," |
michael@0 | 167 | |
michael@0 | 168 | print "};" |
michael@0 | 169 | |
michael@0 | 170 | |
michael@0 | 171 | |
michael@0 | 172 | |
michael@0 | 173 | print "/* == Start of generated table == */" |
michael@0 | 174 | print "/*" |
michael@0 | 175 | print " * The following table is generated by running:" |
michael@0 | 176 | print " *" |
michael@0 | 177 | print " * ./gen-arabic-table.py ArabicShaping.txt UnicodeData.txt" |
michael@0 | 178 | print " *" |
michael@0 | 179 | print " * on files with these headers:" |
michael@0 | 180 | print " *" |
michael@0 | 181 | for h in headers: |
michael@0 | 182 | for l in h: |
michael@0 | 183 | print " * %s" % (l.strip()) |
michael@0 | 184 | print " */" |
michael@0 | 185 | |
michael@0 | 186 | print "#ifndef HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH" |
michael@0 | 187 | print "#define HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH" |
michael@0 | 188 | |
michael@0 | 189 | |
michael@0 | 190 | print_joining_table (files[0]) |
michael@0 | 191 | print_shaping_table (files[1]) |
michael@0 | 192 | |
michael@0 | 193 | |
michael@0 | 194 | print "#endif /* HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH */" |
michael@0 | 195 | |
michael@0 | 196 | print "/* == End of generated table == */" |
michael@0 | 197 |