1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/js/src/jskwgen.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,425 @@ 1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- 1.5 + * vim: set ts=8 sts=4 et sw=4 tw=99: 1.6 + * This Source Code Form is subject to the terms of the Mozilla Public 1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.9 + 1.10 +#include <assert.h> 1.11 +#include <ctype.h> 1.12 +#include <stdarg.h> 1.13 +#include <stdio.h> 1.14 +#include <stdlib.h> 1.15 +#include <string.h> 1.16 + 1.17 +#include "vm/Keywords.h" 1.18 + 1.19 +static const char * const keyword_list[] = { 1.20 +#define KEYWORD_STRING(keyword, name, type, version) #keyword, 1.21 + FOR_EACH_JAVASCRIPT_KEYWORD(KEYWORD_STRING) 1.22 +#undef KEYWORD_STRING 1.23 +}; 1.24 + 1.25 +struct gen_opt { 1.26 + FILE *output; /* output file for generated source */ 1.27 + unsigned use_if_threshold; /* max number of choices to generate 1.28 + "if" selector instead of "switch" */ 1.29 + unsigned char_tail_test_threshold; /* max number of unprocessed columns 1.30 + to use inlined char compare 1.31 + for remaining chars and not generic 1.32 + string compare code */ 1.33 + unsigned indent_level; /* current source identation level */ 1.34 +}; 1.35 + 1.36 +static unsigned column_to_compare; 1.37 + 1.38 +static int 1.39 +length_comparator(const void *a, const void *b) 1.40 +{ 1.41 + const char *str1 = keyword_list[*(unsigned *)a]; 1.42 + const char *str2 = keyword_list[*(unsigned *)b]; 1.43 + return (int)strlen(str1) - (int)strlen(str2); 1.44 +} 1.45 + 1.46 +static int 1.47 +column_comparator(const void *a, const void *b) 1.48 +{ 1.49 + const char *str1 = keyword_list[*(unsigned *)a]; 1.50 + const char *str2 = keyword_list[*(unsigned *)b]; 1.51 + return (int)str1[column_to_compare] - (int)str2[column_to_compare]; 1.52 +} 1.53 + 1.54 +static unsigned 1.55 +count_different_lengths(unsigned indexes[], unsigned nelem) 1.56 +{ 1.57 + unsigned nlength, current_length, i, l; 1.58 + 1.59 + current_length = 0; 1.60 + nlength = 0; 1.61 + for (i = 0; i != nelem; ++i) { 1.62 + l = (unsigned)strlen(keyword_list[indexes[i]]); 1.63 + assert(l != 0); 1.64 + if (current_length != l) { 1.65 + ++nlength; 1.66 + current_length = l; 1.67 + } 1.68 + } 1.69 + return nlength; 1.70 +} 1.71 + 1.72 +static void 1.73 +find_char_span_and_count(unsigned indexes[], unsigned nelem, unsigned column, 1.74 + unsigned *span_result, unsigned *count_result) 1.75 +{ 1.76 + unsigned i, count; 1.77 + unsigned char c, prev, minc, maxc; 1.78 + 1.79 + assert(nelem != 0); 1.80 + minc = maxc = prev = (unsigned char)keyword_list[indexes[0]][column]; 1.81 + count = 1; 1.82 + for (i = 1; i != nelem; ++i) { 1.83 + c = (unsigned char)keyword_list[indexes[i]][column]; 1.84 + if (prev != c) { 1.85 + prev = c; 1.86 + ++count; 1.87 + if (minc > c) { 1.88 + minc = c; 1.89 + } else if (maxc < c) { 1.90 + maxc = c; 1.91 + } 1.92 + } 1.93 + } 1.94 + 1.95 + *span_result = maxc - minc + 1; 1.96 + *count_result = count; 1.97 +} 1.98 + 1.99 +static unsigned 1.100 +find_optimal_switch_column(struct gen_opt *opt, 1.101 + unsigned indexes[], unsigned nelem, 1.102 + unsigned columns[], unsigned unprocessed_columns, 1.103 + int *use_if_result) 1.104 +{ 1.105 + unsigned i; 1.106 + unsigned span, min_span, min_span_index; 1.107 + unsigned nchar, min_nchar, min_nchar_index; 1.108 + 1.109 + assert(unprocessed_columns != 0); 1.110 + i = 0; 1.111 + min_nchar = min_span = (unsigned)-1; 1.112 + min_nchar_index = min_span_index = 0; 1.113 + do { 1.114 + column_to_compare = columns[i]; 1.115 + qsort(indexes, nelem, sizeof(indexes[0]), column_comparator); 1.116 + find_char_span_and_count(indexes, nelem, column_to_compare, 1.117 + &span, &nchar); 1.118 + assert(span != 0); 1.119 + if (span == 1) { 1.120 + assert(nchar == 1); 1.121 + *use_if_result = 1; 1.122 + return 1; 1.123 + } 1.124 + assert(nchar != 1); 1.125 + if (min_span > span) { 1.126 + min_span = span; 1.127 + min_span_index = i; 1.128 + } 1.129 + if (min_nchar > nchar) { 1.130 + min_nchar = nchar; 1.131 + min_nchar_index = i; 1.132 + } 1.133 + } while (++i != unprocessed_columns); 1.134 + 1.135 + if (min_nchar <= opt->use_if_threshold) { 1.136 + *use_if_result = 1; 1.137 + i = min_nchar_index; 1.138 + } else { 1.139 + *use_if_result = 0; 1.140 + i = min_span_index; 1.141 + } 1.142 + 1.143 + /* 1.144 + * Restore order corresponding to i if it was destroyed by 1.145 + * subsequent sort. 1.146 + */ 1.147 + if (i != unprocessed_columns - 1) { 1.148 + column_to_compare = columns[i]; 1.149 + qsort(indexes, nelem, sizeof(indexes[0]), column_comparator); 1.150 + } 1.151 + 1.152 + return i; 1.153 +} 1.154 + 1.155 + 1.156 +static void 1.157 +p(struct gen_opt *opt, const char *format, ...) 1.158 +{ 1.159 + va_list ap; 1.160 + 1.161 + va_start(ap, format); 1.162 + vfprintf(opt->output, format, ap); 1.163 + va_end(ap); 1.164 +} 1.165 + 1.166 +/* Size for '\xxx' where xxx is octal escape */ 1.167 +#define MIN_QUOTED_CHAR_BUFFER 7 1.168 + 1.169 +static char * 1.170 +qchar(char c, char *quoted_buffer) 1.171 +{ 1.172 + char *s; 1.173 + 1.174 + s = quoted_buffer; 1.175 + *s++ = '\''; 1.176 + switch (c) { 1.177 + case '\n': c = 'n'; goto one_char_escape; 1.178 + case '\r': c = 'r'; goto one_char_escape; 1.179 + case '\t': c = 't'; goto one_char_escape; 1.180 + case '\f': c = 't'; goto one_char_escape; 1.181 + case '\0': c = '0'; goto one_char_escape; 1.182 + case '\'': goto one_char_escape; 1.183 + one_char_escape: 1.184 + *s++ = '\\'; 1.185 + break; 1.186 + default: 1.187 + if (!isprint(c)) { 1.188 + *s++ = '\\'; 1.189 + *s++ = (char)('0' + (0x3 & (((unsigned char)c) >> 6))); 1.190 + *s++ = (char)('0' + (0x7 & (((unsigned char)c) >> 3))); 1.191 + c = (char)('0' + (0x7 & ((unsigned char)c))); 1.192 + } 1.193 + } 1.194 + *s++ = c; 1.195 + *s++ = '\''; 1.196 + *s = '\0'; 1.197 + assert(s + 1 <= quoted_buffer + MIN_QUOTED_CHAR_BUFFER); 1.198 + return quoted_buffer; 1.199 +} 1.200 + 1.201 +static void 1.202 +nl(struct gen_opt *opt) 1.203 +{ 1.204 + putc('\n', opt->output); 1.205 +} 1.206 + 1.207 +static void 1.208 +indent(struct gen_opt *opt) 1.209 +{ 1.210 + unsigned n = opt->indent_level; 1.211 + while (n != 0) { 1.212 + --n; 1.213 + fputs(" ", opt->output); 1.214 + } 1.215 +} 1.216 + 1.217 +static void 1.218 +line(struct gen_opt *opt, const char *format, ...) 1.219 +{ 1.220 + va_list ap; 1.221 + 1.222 + indent(opt); 1.223 + va_start(ap, format); 1.224 + vfprintf(opt->output, format, ap); 1.225 + va_end(ap); 1.226 + nl(opt); 1.227 +} 1.228 + 1.229 +static void 1.230 +generate_letter_switch_r(struct gen_opt *opt, 1.231 + unsigned indexes[], unsigned nelem, 1.232 + unsigned columns[], unsigned unprocessed_columns) 1.233 +{ 1.234 + char qbuf[MIN_QUOTED_CHAR_BUFFER]; 1.235 + 1.236 + assert(nelem != 0); 1.237 + if (nelem == 1) { 1.238 + unsigned kw_index = indexes[0]; 1.239 + const char *keyword = keyword_list[kw_index]; 1.240 + 1.241 + if (unprocessed_columns == 0) { 1.242 + line(opt, "JSKW_GOT_MATCH(%u) /* %s */", kw_index, keyword); 1.243 + } else if (unprocessed_columns > opt->char_tail_test_threshold) { 1.244 + line(opt, "JSKW_TEST_GUESS(%u) /* %s */", kw_index, keyword); 1.245 + } else { 1.246 + unsigned i, column; 1.247 + 1.248 + indent(opt); p(opt, "if ("); 1.249 + for (i = 0; i != unprocessed_columns; ++i) { 1.250 + column = columns[i]; 1.251 + qchar(keyword[column], qbuf); 1.252 + p(opt, "%sJSKW_AT(%u)==%s", (i == 0) ? "" : " && ", 1.253 + column, qbuf); 1.254 + } 1.255 + p(opt, ") {"); nl(opt); 1.256 + ++opt->indent_level; 1.257 + line(opt, "JSKW_GOT_MATCH(%u) /* %s */", kw_index, keyword); 1.258 + --opt->indent_level; 1.259 + line(opt, "}"); 1.260 + line(opt, "JSKW_NO_MATCH()"); 1.261 + } 1.262 + } else { 1.263 + unsigned optimal_column_index, optimal_column; 1.264 + unsigned i; 1.265 + int use_if; 1.266 + char current; 1.267 + 1.268 + assert(unprocessed_columns != 0); 1.269 + optimal_column_index = find_optimal_switch_column(opt, indexes, nelem, 1.270 + columns, 1.271 + unprocessed_columns, 1.272 + &use_if); 1.273 + optimal_column = columns[optimal_column_index]; 1.274 + columns[optimal_column_index] = columns[unprocessed_columns - 1]; 1.275 + 1.276 + if (!use_if) 1.277 + line(opt, "switch (JSKW_AT(%u)) {", optimal_column); 1.278 + 1.279 + current = keyword_list[indexes[0]][optimal_column]; 1.280 + for (i = 0; i != nelem;) { 1.281 + unsigned same_char_begin = i; 1.282 + char next = current; 1.283 + 1.284 + for (++i; i != nelem; ++i) { 1.285 + next = keyword_list[indexes[i]][optimal_column]; 1.286 + if (next != current) 1.287 + break; 1.288 + } 1.289 + qchar(current, qbuf); 1.290 + if (use_if) { 1.291 + line(opt, "if (JSKW_AT(%u) == %s) {", optimal_column, qbuf); 1.292 + } else { 1.293 + line(opt, " case %s:", qbuf); 1.294 + } 1.295 + ++opt->indent_level; 1.296 + generate_letter_switch_r(opt, indexes + same_char_begin, 1.297 + i - same_char_begin, 1.298 + columns, unprocessed_columns - 1); 1.299 + --opt->indent_level; 1.300 + if (use_if) { 1.301 + line(opt, "}"); 1.302 + } 1.303 + current = next; 1.304 + } 1.305 + 1.306 + if (!use_if) { 1.307 + line(opt, "}"); 1.308 + } 1.309 + 1.310 + columns[optimal_column_index] = optimal_column; 1.311 + 1.312 + line(opt, "JSKW_NO_MATCH()"); 1.313 + } 1.314 +} 1.315 + 1.316 +static void 1.317 +generate_letter_switch(struct gen_opt *opt, 1.318 + unsigned indexes[], unsigned nelem, 1.319 + unsigned current_length) 1.320 +{ 1.321 + unsigned *columns; 1.322 + unsigned i; 1.323 + 1.324 + columns = (unsigned *) malloc(sizeof(columns[0]) * current_length); 1.325 + if (!columns) { 1.326 + perror("malloc"); 1.327 + exit(EXIT_FAILURE); 1.328 + } 1.329 + for (i = 0; i != current_length; ++i) { 1.330 + columns[i] = i; 1.331 + } 1.332 + generate_letter_switch_r(opt, indexes, nelem, columns, current_length); 1.333 + free(columns); 1.334 +} 1.335 + 1.336 + 1.337 +static void 1.338 +generate_switch(struct gen_opt *opt) 1.339 +{ 1.340 + unsigned *indexes; 1.341 + unsigned nlength; 1.342 + unsigned i, current; 1.343 + int use_if; 1.344 + unsigned nelem; 1.345 + 1.346 + nelem = sizeof(keyword_list)/sizeof(keyword_list[0]); 1.347 + 1.348 + line(opt, "/*"); 1.349 + line(opt, " * Generating switch for the list of %u entries:", nelem); 1.350 + for (i = 0; i != nelem; ++i) { 1.351 + line(opt, " * %s", keyword_list[i]); 1.352 + } 1.353 + line(opt, " */"); 1.354 + 1.355 + indexes = (unsigned *) malloc(sizeof(indexes[0]) * nelem); 1.356 + if (!indexes) { 1.357 + perror("malloc"); 1.358 + exit(EXIT_FAILURE); 1.359 + } 1.360 + for (i = 0; i != nelem; ++i) 1.361 + indexes[i] = i; 1.362 + qsort(indexes, nelem, sizeof(indexes[i]), length_comparator); 1.363 + nlength = count_different_lengths(indexes, nelem); 1.364 + 1.365 + use_if = (nlength <= opt->use_if_threshold); 1.366 + 1.367 + if (!use_if) 1.368 + line(opt, "switch (JSKW_LENGTH()) {"); 1.369 + 1.370 + current = (unsigned)strlen(keyword_list[indexes[0]]); 1.371 + for (i = 0; i != nelem;) { 1.372 + unsigned same_length_begin = i; 1.373 + unsigned next = current; 1.374 + 1.375 + for (++i; i != nelem; ++i) { 1.376 + next = (unsigned)strlen(keyword_list[indexes[i]]); 1.377 + if (next != current) 1.378 + break; 1.379 + } 1.380 + if (use_if) { 1.381 + line(opt, "if (JSKW_LENGTH() == %u) {", current); 1.382 + } else { 1.383 + line(opt, " case %u:", current); 1.384 + } 1.385 + ++opt->indent_level; 1.386 + generate_letter_switch(opt, indexes + same_length_begin, 1.387 + i - same_length_begin, 1.388 + current); 1.389 + --opt->indent_level; 1.390 + if (use_if) { 1.391 + line(opt, "}"); 1.392 + } 1.393 + current = next; 1.394 + } 1.395 + if (!use_if) 1.396 + line(opt, "}"); 1.397 + line(opt, "JSKW_NO_MATCH()"); 1.398 + free(indexes); 1.399 +} 1.400 + 1.401 +int main(int argc, char **argv) 1.402 +{ 1.403 + struct gen_opt opt; 1.404 + 1.405 + if (argc < 2) { 1.406 + opt.output = stdout; 1.407 + } else { 1.408 + opt.output = fopen(argv[1], "w"); 1.409 + if (!opt.output) { 1.410 + perror("fopen"); 1.411 + exit(EXIT_FAILURE); 1.412 + } 1.413 + } 1.414 + opt.indent_level = 1; 1.415 + opt.use_if_threshold = 3; 1.416 + opt.char_tail_test_threshold = 4; 1.417 + 1.418 + generate_switch(&opt); 1.419 + 1.420 + if (opt.output != stdout) { 1.421 + if (fclose(opt.output)) { 1.422 + perror("fclose"); 1.423 + exit(EXIT_FAILURE); 1.424 + } 1.425 + } 1.426 + 1.427 + return EXIT_SUCCESS; 1.428 +}