js/src/jskwgen.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/js/src/jskwgen.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,425 @@
     1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
     1.5 + * vim: set ts=8 sts=4 et sw=4 tw=99:
     1.6 + * This Source Code Form is subject to the terms of the Mozilla Public
     1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.9 +
    1.10 +#include <assert.h>
    1.11 +#include <ctype.h>
    1.12 +#include <stdarg.h>
    1.13 +#include <stdio.h>
    1.14 +#include <stdlib.h>
    1.15 +#include <string.h>
    1.16 +
    1.17 +#include "vm/Keywords.h"
    1.18 +
    1.19 +static const char * const keyword_list[] = {
    1.20 +#define KEYWORD_STRING(keyword, name, type, version) #keyword,
    1.21 +    FOR_EACH_JAVASCRIPT_KEYWORD(KEYWORD_STRING)
    1.22 +#undef KEYWORD_STRING
    1.23 +};
    1.24 +
    1.25 +struct gen_opt {
    1.26 +    FILE *output;                       /* output file for generated source */
    1.27 +    unsigned use_if_threshold;          /* max number of choices to generate
    1.28 +                                           "if" selector instead of "switch" */
    1.29 +    unsigned char_tail_test_threshold;  /* max number of unprocessed columns
    1.30 +                                           to use inlined char compare
    1.31 +                                           for remaining chars and not generic
    1.32 +                                           string compare code */
    1.33 +    unsigned indent_level;              /* current source identation level */
    1.34 +};
    1.35 +
    1.36 +static unsigned column_to_compare;
    1.37 +
    1.38 +static int
    1.39 +length_comparator(const void *a, const void *b)
    1.40 +{
    1.41 +    const char *str1 = keyword_list[*(unsigned *)a];
    1.42 +    const char *str2 = keyword_list[*(unsigned *)b];
    1.43 +    return (int)strlen(str1) - (int)strlen(str2);
    1.44 +}
    1.45 +
    1.46 +static int
    1.47 +column_comparator(const void *a, const void *b)
    1.48 +{
    1.49 +    const char *str1 = keyword_list[*(unsigned *)a];
    1.50 +    const char *str2 = keyword_list[*(unsigned *)b];
    1.51 +    return (int)str1[column_to_compare] - (int)str2[column_to_compare];
    1.52 +}
    1.53 +
    1.54 +static unsigned
    1.55 +count_different_lengths(unsigned indexes[], unsigned nelem)
    1.56 +{
    1.57 +    unsigned nlength, current_length, i, l;
    1.58 +
    1.59 +    current_length = 0;
    1.60 +    nlength = 0;
    1.61 +    for (i = 0; i != nelem; ++i) {
    1.62 +        l = (unsigned)strlen(keyword_list[indexes[i]]);
    1.63 +        assert(l != 0);
    1.64 +        if (current_length != l) {
    1.65 +            ++nlength;
    1.66 +            current_length = l;
    1.67 +        }
    1.68 +    }
    1.69 +    return nlength;
    1.70 +}
    1.71 +
    1.72 +static void
    1.73 +find_char_span_and_count(unsigned indexes[], unsigned nelem, unsigned column,
    1.74 +                         unsigned *span_result, unsigned *count_result)
    1.75 +{
    1.76 +    unsigned i, count;
    1.77 +    unsigned char c, prev, minc, maxc;
    1.78 +
    1.79 +    assert(nelem != 0);
    1.80 +    minc = maxc = prev = (unsigned char)keyword_list[indexes[0]][column];
    1.81 +    count = 1;
    1.82 +    for (i = 1; i != nelem; ++i) {
    1.83 +        c = (unsigned char)keyword_list[indexes[i]][column];
    1.84 +        if (prev != c) {
    1.85 +            prev = c;
    1.86 +            ++count;
    1.87 +            if (minc > c) {
    1.88 +                minc = c;
    1.89 +            } else if (maxc < c) {
    1.90 +                maxc = c;
    1.91 +            }
    1.92 +        }
    1.93 +    }
    1.94 +
    1.95 +    *span_result = maxc - minc + 1;
    1.96 +    *count_result = count;
    1.97 +}
    1.98 +
    1.99 +static unsigned
   1.100 +find_optimal_switch_column(struct gen_opt *opt,
   1.101 +                           unsigned indexes[], unsigned nelem,
   1.102 +                           unsigned columns[], unsigned unprocessed_columns,
   1.103 +                           int *use_if_result)
   1.104 +{
   1.105 +    unsigned i;
   1.106 +    unsigned span, min_span, min_span_index;
   1.107 +    unsigned nchar, min_nchar, min_nchar_index;
   1.108 +
   1.109 +    assert(unprocessed_columns != 0);
   1.110 +    i = 0;
   1.111 +    min_nchar = min_span = (unsigned)-1;
   1.112 +    min_nchar_index = min_span_index = 0;
   1.113 +    do {
   1.114 +        column_to_compare = columns[i];
   1.115 +        qsort(indexes, nelem, sizeof(indexes[0]), column_comparator);
   1.116 +        find_char_span_and_count(indexes, nelem, column_to_compare,
   1.117 +                                 &span, &nchar);
   1.118 +        assert(span != 0);
   1.119 +        if (span == 1) {
   1.120 +            assert(nchar == 1);
   1.121 +            *use_if_result = 1;
   1.122 +            return 1;
   1.123 +        }
   1.124 +        assert(nchar != 1);
   1.125 +        if (min_span > span) {
   1.126 +            min_span = span;
   1.127 +            min_span_index = i;
   1.128 +        }
   1.129 +        if (min_nchar > nchar) {
   1.130 +            min_nchar = nchar;
   1.131 +            min_nchar_index = i;
   1.132 +        }
   1.133 +    } while (++i != unprocessed_columns);
   1.134 +
   1.135 +    if (min_nchar <= opt->use_if_threshold) {
   1.136 +        *use_if_result = 1;
   1.137 +        i = min_nchar_index;
   1.138 +    } else {
   1.139 +        *use_if_result = 0;
   1.140 +        i = min_span_index;
   1.141 +    }
   1.142 +
   1.143 +    /*
   1.144 +     * Restore order corresponding to i if it was destroyed by
   1.145 +     * subsequent sort.
   1.146 +     */
   1.147 +    if (i != unprocessed_columns - 1) {
   1.148 +        column_to_compare = columns[i];
   1.149 +        qsort(indexes, nelem, sizeof(indexes[0]), column_comparator);
   1.150 +    }
   1.151 +
   1.152 +    return i;
   1.153 +}
   1.154 +
   1.155 +
   1.156 +static void
   1.157 +p(struct gen_opt *opt, const char *format, ...)
   1.158 +{
   1.159 +    va_list ap;
   1.160 +
   1.161 +    va_start(ap, format);
   1.162 +    vfprintf(opt->output, format, ap);
   1.163 +    va_end(ap);
   1.164 +}
   1.165 +
   1.166 +/* Size for '\xxx' where xxx is octal escape */
   1.167 +#define MIN_QUOTED_CHAR_BUFFER 7
   1.168 +
   1.169 +static char *
   1.170 +qchar(char c, char *quoted_buffer)
   1.171 +{
   1.172 +    char *s;
   1.173 +
   1.174 +    s = quoted_buffer;
   1.175 +    *s++ = '\'';
   1.176 +    switch (c) {
   1.177 +      case '\n': c = 'n'; goto one_char_escape;
   1.178 +      case '\r': c = 'r'; goto one_char_escape;
   1.179 +      case '\t': c = 't'; goto one_char_escape;
   1.180 +      case '\f': c = 't'; goto one_char_escape;
   1.181 +      case '\0': c = '0'; goto one_char_escape;
   1.182 +      case '\'': goto one_char_escape;
   1.183 +      one_char_escape:
   1.184 +        *s++ = '\\';
   1.185 +        break;
   1.186 +      default:
   1.187 +        if (!isprint(c)) {
   1.188 +            *s++ = '\\';
   1.189 +            *s++ = (char)('0' + (0x3 & (((unsigned char)c) >> 6)));
   1.190 +            *s++ = (char)('0' + (0x7 & (((unsigned char)c) >> 3)));
   1.191 +            c = (char)('0' + (0x7 & ((unsigned char)c)));
   1.192 +        }
   1.193 +    }
   1.194 +    *s++ = c;
   1.195 +    *s++ = '\'';
   1.196 +    *s = '\0';
   1.197 +    assert(s + 1 <= quoted_buffer + MIN_QUOTED_CHAR_BUFFER);
   1.198 +    return quoted_buffer;
   1.199 +}
   1.200 +
   1.201 +static void
   1.202 +nl(struct gen_opt *opt)
   1.203 +{
   1.204 +    putc('\n', opt->output);
   1.205 +}
   1.206 +
   1.207 +static void
   1.208 +indent(struct gen_opt *opt)
   1.209 +{
   1.210 +    unsigned n = opt->indent_level;
   1.211 +    while (n != 0) {
   1.212 +        --n;
   1.213 +        fputs("    ", opt->output);
   1.214 +    }
   1.215 +}
   1.216 +
   1.217 +static void
   1.218 +line(struct gen_opt *opt, const char *format, ...)
   1.219 +{
   1.220 +    va_list ap;
   1.221 +
   1.222 +    indent(opt);
   1.223 +    va_start(ap, format);
   1.224 +    vfprintf(opt->output, format, ap);
   1.225 +    va_end(ap);
   1.226 +    nl(opt);
   1.227 +}
   1.228 +
   1.229 +static void
   1.230 +generate_letter_switch_r(struct gen_opt *opt,
   1.231 +                         unsigned indexes[], unsigned nelem,
   1.232 +                         unsigned columns[], unsigned unprocessed_columns)
   1.233 +{
   1.234 +    char qbuf[MIN_QUOTED_CHAR_BUFFER];
   1.235 +
   1.236 +    assert(nelem != 0);
   1.237 +    if (nelem == 1) {
   1.238 +        unsigned kw_index = indexes[0];
   1.239 +        const char *keyword = keyword_list[kw_index];
   1.240 +
   1.241 +        if (unprocessed_columns == 0) {
   1.242 +            line(opt, "JSKW_GOT_MATCH(%u) /* %s */", kw_index, keyword);
   1.243 +        } else if (unprocessed_columns > opt->char_tail_test_threshold) {
   1.244 +            line(opt, "JSKW_TEST_GUESS(%u) /* %s */", kw_index, keyword);
   1.245 +        } else {
   1.246 +            unsigned i, column;
   1.247 +
   1.248 +            indent(opt); p(opt, "if (");
   1.249 +            for (i = 0; i != unprocessed_columns; ++i) {
   1.250 +                column = columns[i];
   1.251 +                qchar(keyword[column], qbuf);
   1.252 +                p(opt, "%sJSKW_AT(%u)==%s", (i == 0) ? "" : " && ",
   1.253 +                  column, qbuf);
   1.254 +            }
   1.255 +            p(opt, ") {"); nl(opt);
   1.256 +            ++opt->indent_level;
   1.257 +            line(opt, "JSKW_GOT_MATCH(%u) /* %s */", kw_index, keyword);
   1.258 +            --opt->indent_level;
   1.259 +            line(opt, "}");
   1.260 +            line(opt, "JSKW_NO_MATCH()");
   1.261 +        }
   1.262 +    } else {
   1.263 +        unsigned optimal_column_index, optimal_column;
   1.264 +        unsigned i;
   1.265 +        int use_if;
   1.266 +        char current;
   1.267 +
   1.268 +        assert(unprocessed_columns != 0);
   1.269 +        optimal_column_index = find_optimal_switch_column(opt, indexes, nelem,
   1.270 +                                                          columns,
   1.271 +                                                          unprocessed_columns,
   1.272 +                                                          &use_if);
   1.273 +        optimal_column = columns[optimal_column_index];
   1.274 +        columns[optimal_column_index] = columns[unprocessed_columns - 1];
   1.275 +
   1.276 +        if (!use_if)
   1.277 +            line(opt, "switch (JSKW_AT(%u)) {", optimal_column);
   1.278 +
   1.279 +        current = keyword_list[indexes[0]][optimal_column];
   1.280 +        for (i = 0; i != nelem;) {
   1.281 +            unsigned same_char_begin = i;
   1.282 +            char next = current;
   1.283 +
   1.284 +            for (++i; i != nelem; ++i) {
   1.285 +                next = keyword_list[indexes[i]][optimal_column];
   1.286 +                if (next != current)
   1.287 +                    break;
   1.288 +            }
   1.289 +            qchar(current, qbuf);
   1.290 +            if (use_if) {
   1.291 +                line(opt, "if (JSKW_AT(%u) == %s) {", optimal_column, qbuf);
   1.292 +            } else {
   1.293 +                line(opt, "  case %s:", qbuf);
   1.294 +            }
   1.295 +            ++opt->indent_level;
   1.296 +            generate_letter_switch_r(opt, indexes + same_char_begin,
   1.297 +                                     i - same_char_begin,
   1.298 +                                     columns, unprocessed_columns - 1);
   1.299 +            --opt->indent_level;
   1.300 +            if (use_if) {
   1.301 +                line(opt, "}");
   1.302 +            }
   1.303 +            current = next;
   1.304 +        }
   1.305 +
   1.306 +        if (!use_if) {
   1.307 +            line(opt, "}");
   1.308 +        }
   1.309 +
   1.310 +        columns[optimal_column_index] = optimal_column;
   1.311 +
   1.312 +        line(opt, "JSKW_NO_MATCH()");
   1.313 +    }
   1.314 +}
   1.315 +
   1.316 +static void
   1.317 +generate_letter_switch(struct gen_opt *opt,
   1.318 +                       unsigned indexes[], unsigned nelem,
   1.319 +                       unsigned current_length)
   1.320 +{
   1.321 +    unsigned *columns;
   1.322 +    unsigned i;
   1.323 +
   1.324 +    columns = (unsigned *) malloc(sizeof(columns[0]) * current_length);
   1.325 +    if (!columns) {
   1.326 +        perror("malloc");
   1.327 +        exit(EXIT_FAILURE);
   1.328 +    }
   1.329 +    for (i = 0; i != current_length; ++i) {
   1.330 +        columns[i] = i;
   1.331 +    }
   1.332 +    generate_letter_switch_r(opt, indexes, nelem, columns, current_length);
   1.333 +    free(columns);
   1.334 +}
   1.335 +
   1.336 +
   1.337 +static void
   1.338 +generate_switch(struct gen_opt *opt)
   1.339 +{
   1.340 +    unsigned *indexes;
   1.341 +    unsigned nlength;
   1.342 +    unsigned i, current;
   1.343 +    int use_if;
   1.344 +    unsigned nelem;
   1.345 +
   1.346 +    nelem = sizeof(keyword_list)/sizeof(keyword_list[0]);
   1.347 +
   1.348 +    line(opt, "/*");
   1.349 +    line(opt, " * Generating switch for the list of %u entries:", nelem);
   1.350 +    for (i = 0; i != nelem; ++i) {
   1.351 +        line(opt, " * %s", keyword_list[i]);
   1.352 +    }
   1.353 +    line(opt, " */");
   1.354 +
   1.355 +    indexes = (unsigned *) malloc(sizeof(indexes[0]) * nelem);
   1.356 +    if (!indexes) {
   1.357 +        perror("malloc");
   1.358 +        exit(EXIT_FAILURE);
   1.359 +    }
   1.360 +    for (i = 0; i != nelem; ++i)
   1.361 +        indexes[i] = i;
   1.362 +    qsort(indexes, nelem, sizeof(indexes[i]), length_comparator);
   1.363 +    nlength = count_different_lengths(indexes, nelem);
   1.364 +
   1.365 +    use_if = (nlength <= opt->use_if_threshold);
   1.366 +
   1.367 +    if (!use_if)
   1.368 +        line(opt, "switch (JSKW_LENGTH()) {");
   1.369 +
   1.370 +    current = (unsigned)strlen(keyword_list[indexes[0]]);
   1.371 +    for (i = 0; i != nelem;) {
   1.372 +        unsigned same_length_begin = i;
   1.373 +        unsigned next = current;
   1.374 +
   1.375 +        for (++i; i != nelem; ++i) {
   1.376 +            next = (unsigned)strlen(keyword_list[indexes[i]]);
   1.377 +            if (next != current)
   1.378 +                break;
   1.379 +        }
   1.380 +        if (use_if) {
   1.381 +            line(opt, "if (JSKW_LENGTH() == %u) {", current);
   1.382 +        } else {
   1.383 +            line(opt, "  case %u:", current);
   1.384 +        }
   1.385 +        ++opt->indent_level;
   1.386 +        generate_letter_switch(opt, indexes + same_length_begin,
   1.387 +                               i - same_length_begin,
   1.388 +                               current);
   1.389 +        --opt->indent_level;
   1.390 +        if (use_if) {
   1.391 +            line(opt, "}");
   1.392 +        }
   1.393 +        current = next;
   1.394 +    }
   1.395 +    if (!use_if)
   1.396 +        line(opt, "}");
   1.397 +    line(opt, "JSKW_NO_MATCH()");
   1.398 +    free(indexes);
   1.399 +}
   1.400 +
   1.401 +int main(int argc, char **argv)
   1.402 +{
   1.403 +    struct gen_opt opt;
   1.404 +
   1.405 +    if (argc < 2) {
   1.406 +        opt.output = stdout;
   1.407 +    } else {
   1.408 +        opt.output = fopen(argv[1], "w");
   1.409 +        if (!opt.output) {
   1.410 +            perror("fopen");
   1.411 +            exit(EXIT_FAILURE);
   1.412 +        }
   1.413 +    }
   1.414 +    opt.indent_level = 1;
   1.415 +    opt.use_if_threshold = 3;
   1.416 +    opt.char_tail_test_threshold = 4;
   1.417 +
   1.418 +    generate_switch(&opt);
   1.419 +
   1.420 +    if (opt.output != stdout) {
   1.421 +        if (fclose(opt.output)) {
   1.422 +            perror("fclose");
   1.423 +            exit(EXIT_FAILURE);
   1.424 +        }
   1.425 +    }
   1.426 +
   1.427 +    return EXIT_SUCCESS;
   1.428 +}

mercurial