xpcom/io/nsWildCard.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/xpcom/io/nsWildCard.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,439 @@
     1.4 +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
     1.5 +/* vim:set ts=4 sts=4 sw=4 cin et: */
     1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.9 +
    1.10 +/* *
    1.11 + *
    1.12 + *
    1.13 + * nsWildCard.cpp: shell-like wildcard match routines
    1.14 + *
    1.15 + * See nsIZipReader.findEntries documentation in nsIZipReader.idl for
    1.16 + * a description of the syntax supported by the routines in this file.
    1.17 + *
    1.18 + * Rob McCool
    1.19 + *
    1.20 + */
    1.21 +
    1.22 +#include "nsWildCard.h"
    1.23 +#include "nsXPCOM.h"
    1.24 +#include "nsCRTGlue.h"
    1.25 +#include "nsCharTraits.h"
    1.26 +
    1.27 +/* -------------------- ASCII-specific character methods ------------------- */
    1.28 +
    1.29 +typedef int static_assert_character_code_arrangement['a' > 'A' ? 1 : -1];
    1.30 +
    1.31 +template<class T>
    1.32 +static int
    1.33 +alpha(T c)
    1.34 +{
    1.35 +    return ('a' <= c && c <= 'z') ||
    1.36 +           ('A' <= c && c <= 'Z');
    1.37 +}
    1.38 +
    1.39 +template<class T>
    1.40 +static int
    1.41 +alphanumeric(T c)
    1.42 +{
    1.43 +    return ('0' <= c && c <= '9') || ::alpha(c);
    1.44 +}
    1.45 +
    1.46 +template<class T>
    1.47 +static int
    1.48 +lower(T c)
    1.49 +{
    1.50 +    return ('A' <= c && c <= 'Z') ? c + ('a' - 'A') : c;
    1.51 +}
    1.52 +
    1.53 +template<class T>
    1.54 +static int
    1.55 +upper(T c)
    1.56 +{
    1.57 +    return ('a' <= c && c <= 'z') ? c - ('a' - 'A') : c;
    1.58 +}
    1.59 +
    1.60 +/* ----------------------------- _valid_subexp ---------------------------- */
    1.61 +
    1.62 +template<class T>
    1.63 +static int
    1.64 +_valid_subexp(const T *expr, T stop1, T stop2)
    1.65 +{
    1.66 +    int x;
    1.67 +    int nsc = 0;     /* Number of special characters */
    1.68 +    int np;          /* Number of pipe characters in union */
    1.69 +    int tld = 0;     /* Number of tilde characters */
    1.70 + 
    1.71 +    for (x = 0; expr[x] && (expr[x] != stop1) && (expr[x] != stop2); ++x) {
    1.72 +        switch(expr[x]) {
    1.73 +        case '~':
    1.74 +            if(tld)                 /* at most one exclusion */
    1.75 +                return INVALID_SXP;
    1.76 +            if (stop1)              /* no exclusions within unions */
    1.77 +                return INVALID_SXP;
    1.78 +            if (!expr[x+1])          /* exclusion cannot be last character */
    1.79 +                return INVALID_SXP;
    1.80 +            if (!x)                 /* exclusion cannot be first character */
    1.81 +                return INVALID_SXP;
    1.82 +            ++tld;
    1.83 +            /* fall through */
    1.84 +        case '*':
    1.85 +        case '?':
    1.86 +        case '$':
    1.87 +            ++nsc;
    1.88 +            break;
    1.89 +        case '[':
    1.90 +            ++nsc;
    1.91 +            if((!expr[++x]) || (expr[x] == ']'))
    1.92 +                return INVALID_SXP;
    1.93 +            for(; expr[x] && (expr[x] != ']'); ++x) {
    1.94 +                if(expr[x] == '\\' && !expr[++x])
    1.95 +                    return INVALID_SXP;
    1.96 +            }
    1.97 +            if(!expr[x])
    1.98 +                return INVALID_SXP;
    1.99 +            break;
   1.100 +        case '(':
   1.101 +            ++nsc;
   1.102 +            if (stop1)                  /* no nested unions */
   1.103 +                return INVALID_SXP;
   1.104 +            np = -1;
   1.105 +            do {
   1.106 +                int t = ::_valid_subexp(&expr[++x], T(')'), T('|'));
   1.107 +                if(t == 0 || t == INVALID_SXP)
   1.108 +                    return INVALID_SXP;
   1.109 +                x+=t;
   1.110 +                if(!expr[x])
   1.111 +                    return INVALID_SXP;
   1.112 +                ++np;
   1.113 +            } while (expr[x] == '|' );
   1.114 +            if(np < 1)  /* must be at least one pipe */
   1.115 +                return INVALID_SXP;
   1.116 +            break;
   1.117 +        case ')':
   1.118 +        case ']':
   1.119 +        case '|':
   1.120 +            return INVALID_SXP;
   1.121 +        case '\\':
   1.122 +            ++nsc;
   1.123 +            if(!expr[++x])
   1.124 +                return INVALID_SXP;
   1.125 +            break;
   1.126 +        default:
   1.127 +            break;
   1.128 +        }
   1.129 +    }
   1.130 +    if((!stop1) && (!nsc)) /* must be at least one special character */
   1.131 +         return NON_SXP;
   1.132 +    return ((expr[x] == stop1 || expr[x] == stop2) ? x : INVALID_SXP);
   1.133 +}
   1.134 +
   1.135 +
   1.136 +template<class T>
   1.137 +int
   1.138 +NS_WildCardValid_(const T *expr)
   1.139 +{
   1.140 +    int x = ::_valid_subexp(expr, T('\0'), T('\0'));
   1.141 +    return (x < 0 ? x : VALID_SXP);
   1.142 +}
   1.143 +
   1.144 +int
   1.145 +NS_WildCardValid(const char *expr)
   1.146 +{
   1.147 +    return NS_WildCardValid_(expr);
   1.148 +}
   1.149 +
   1.150 +int
   1.151 +NS_WildCardValid(const char16_t *expr)
   1.152 +{
   1.153 +    return NS_WildCardValid_(expr);
   1.154 +}
   1.155 +
   1.156 +/* ----------------------------- _shexp_match ----------------------------- */
   1.157 +
   1.158 +
   1.159 +#define MATCH 0
   1.160 +#define NOMATCH 1
   1.161 +#define ABORTED -1
   1.162 +
   1.163 +template<class T>
   1.164 +static int
   1.165 +_shexp_match(const T *str, const T *expr, bool case_insensitive, unsigned int level);
   1.166 +
   1.167 +/**
   1.168 + * Count characters until we reach a NUL character or either of the
   1.169 + * two delimiter characters, stop1 or stop2.  If we encounter a bracketed
   1.170 + * expression, look only for NUL or ']' inside it.  Do not look for stop1
   1.171 + * or stop2 inside it. Return ABORTED if bracketed expression is unterminated.
   1.172 + * Handle all escaping.
   1.173 + * Return index in input string of first stop found, or ABORTED if not found.
   1.174 + * If "dest" is non-nullptr, copy counted characters to it and null terminate.
   1.175 + */
   1.176 +template<class T>
   1.177 +static int
   1.178 +_scan_and_copy(const T *expr, T stop1, T stop2, T *dest)
   1.179 +{
   1.180 +    int sx;     /* source index */
   1.181 +    T cc;
   1.182 +
   1.183 +    for (sx = 0; (cc = expr[sx]) && cc != stop1 && cc != stop2; sx++) {
   1.184 +        if (cc == '\\') {
   1.185 +            if (!expr[++sx])
   1.186 +                return ABORTED; /* should be impossible */
   1.187 +        }
   1.188 +        else if (cc == '[') {
   1.189 +            while ((cc = expr[++sx]) && cc != ']') {
   1.190 +                if(cc == '\\' && !expr[++sx])
   1.191 +                    return ABORTED;
   1.192 +            }
   1.193 +            if (!cc)
   1.194 +                return ABORTED; /* should be impossible */
   1.195 +        }
   1.196 +    }
   1.197 +    if (dest && sx) {
   1.198 +        /* Copy all but the closing delimiter. */
   1.199 +        memcpy(dest, expr, sx * sizeof(T));
   1.200 +        dest[sx] = 0;
   1.201 +    }
   1.202 +    return cc ? sx : ABORTED; /* index of closing delimiter */
   1.203 +}
   1.204 +
   1.205 +/* On input, expr[0] is the opening parenthesis of a union.
   1.206 + * See if any of the alternatives in the union matches as a pattern.
   1.207 + * The strategy is to take each of the alternatives, in turn, and append
   1.208 + * the rest of the expression (after the closing ')' that marks the end of
   1.209 + * this union) to that alternative, and then see if the resultant expression
   1.210 + * matches the input string.  Repeat this until some alternative matches,
   1.211 + * or we have an abort.
   1.212 + */
   1.213 +template<class T>
   1.214 +static int
   1.215 +_handle_union(const T *str, const T *expr, bool case_insensitive,
   1.216 +              unsigned int level)
   1.217 +{
   1.218 +    int sx;              /* source index */
   1.219 +    int cp;              /* source index of closing parenthesis */
   1.220 +    int count;
   1.221 +    int ret   = NOMATCH;
   1.222 +    T *e2;
   1.223 +
   1.224 +    /* Find the closing parenthesis that ends this union in the expression */
   1.225 +    cp = ::_scan_and_copy(expr, T(')'), T('\0'), static_cast<T*>(nullptr));
   1.226 +    if (cp == ABORTED || cp < 4) /* must be at least "(a|b" before ')' */
   1.227 +        return ABORTED;
   1.228 +    ++cp;                /* now index of char after closing parenthesis */
   1.229 +    e2 = (T *) NS_Alloc((1 + nsCharTraits<T>::length(expr)) * sizeof(T));
   1.230 +    if (!e2)
   1.231 +        return ABORTED;
   1.232 +    for (sx = 1; ; ++sx) {
   1.233 +        /* Here, expr[sx] is one character past the preceding '(' or '|'. */
   1.234 +        /* Copy everything up to the next delimiter to e2 */
   1.235 +        count = ::_scan_and_copy(expr + sx, T(')'), T('|'), e2);
   1.236 +        if (count == ABORTED || !count) {
   1.237 +            ret = ABORTED;
   1.238 +            break;
   1.239 +        }
   1.240 +        sx += count;
   1.241 +        /* Append everything after closing parenthesis to e2. This is safe. */
   1.242 +        nsCharTraits<T>::copy(e2 + count, expr + cp, nsCharTraits<T>::length(expr + cp) + 1);
   1.243 +        ret = ::_shexp_match(str, e2, case_insensitive, level + 1);
   1.244 +        if (ret != NOMATCH || !expr[sx] || expr[sx] == ')')
   1.245 +            break;
   1.246 +    }
   1.247 +    NS_Free(e2);
   1.248 +    if (sx < 2)
   1.249 +        ret = ABORTED;
   1.250 +    return ret;
   1.251 +}
   1.252 +
   1.253 +/* returns 1 if val is in range from start..end, case insensitive. */
   1.254 +static int
   1.255 +_is_char_in_range(unsigned char start, unsigned char end, unsigned char val)
   1.256 +{
   1.257 +    char map[256];
   1.258 +    memset(map, 0, sizeof map);
   1.259 +    while (start <= end)
   1.260 +        map[lower(start++)] = 1;
   1.261 +    return map[lower(val)];
   1.262 +}
   1.263 +
   1.264 +template<class T>
   1.265 +static int
   1.266 +_shexp_match(const T *str, const T *expr, bool case_insensitive,
   1.267 +             unsigned int level)
   1.268 +{
   1.269 +    int x;   /* input string index */
   1.270 +    int y;   /* expression index */
   1.271 +    int ret,neg;
   1.272 +
   1.273 +    if (level > 20)      /* Don't let the stack get too deep. */
   1.274 +        return ABORTED;
   1.275 +    for(x = 0, y = 0; expr[y]; ++y, ++x) {
   1.276 +        if((!str[x]) && (expr[y] != '$') && (expr[y] != '*')) {
   1.277 +            return NOMATCH;
   1.278 +        }
   1.279 +        switch(expr[y]) {
   1.280 +        case '$':
   1.281 +            if(str[x])
   1.282 +                return NOMATCH;
   1.283 +            --x;                 /* we don't want loop to increment x */
   1.284 +            break;
   1.285 +        case '*':
   1.286 +            while(expr[++y] == '*'){}
   1.287 +            if(!expr[y])
   1.288 +                return MATCH;
   1.289 +            while(str[x]) {
   1.290 +                ret = ::_shexp_match(&str[x++], &expr[y], case_insensitive,
   1.291 +                                     level + 1);
   1.292 +                switch(ret) {
   1.293 +                case NOMATCH:
   1.294 +                    continue;
   1.295 +                case ABORTED:
   1.296 +                    return ABORTED;
   1.297 +                default:
   1.298 +                    return MATCH;
   1.299 +                }
   1.300 +            }
   1.301 +            if((expr[y] == '$') && (expr[y+1] == '\0') && (!str[x]))
   1.302 +                return MATCH;
   1.303 +            else
   1.304 +                return NOMATCH;
   1.305 +        case '[': {
   1.306 +            T start, end = 0;
   1.307 +            int i;
   1.308 +            neg = ((expr[++y] == '^') && (expr[y+1] != ']'));
   1.309 +            if (neg)
   1.310 +                ++y;
   1.311 +            i = y;
   1.312 +            start = expr[i++];
   1.313 +            if (start == '\\')
   1.314 +                start = expr[i++];
   1.315 +            if (::alphanumeric(start) && expr[i++] == '-') {
   1.316 +                end = expr[i++];
   1.317 +                if (end == '\\')
   1.318 +                    end = expr[i++];
   1.319 +            }
   1.320 +            if (::alphanumeric(end) && expr[i] == ']') {
   1.321 +                /* This is a range form: a-b */
   1.322 +                T val = str[x];
   1.323 +                if (end < start) { /* swap them */
   1.324 +                    T tmp = end;
   1.325 +                    end = start;
   1.326 +                    start = tmp;
   1.327 +                }
   1.328 +                if (case_insensitive && ::alpha(val)) {
   1.329 +                    val = ::_is_char_in_range((unsigned char) start,
   1.330 +                                              (unsigned char) end,
   1.331 +                                              (unsigned char) val);
   1.332 +                    if (neg == val)
   1.333 +                        return NOMATCH;
   1.334 +                }
   1.335 +                else if (neg != ((val < start) || (val > end))) {
   1.336 +                    return NOMATCH;
   1.337 +                }
   1.338 +                y = i;
   1.339 +            }
   1.340 +            else {
   1.341 +                /* Not range form */
   1.342 +                int matched = 0;
   1.343 +                for (; expr[y] != ']'; y++) {
   1.344 +                    if (expr[y] == '\\')
   1.345 +                        ++y;
   1.346 +                    if(case_insensitive)
   1.347 +                        matched |= (::upper(str[x]) == ::upper(expr[y]));
   1.348 +                    else
   1.349 +                        matched |= (str[x] == expr[y]);
   1.350 +                }
   1.351 +                if (neg == matched)
   1.352 +                    return NOMATCH;
   1.353 +            }
   1.354 +        }
   1.355 +        break;
   1.356 +        case '(':
   1.357 +            if (!expr[y+1])
   1.358 +                return ABORTED;
   1.359 +            return ::_handle_union(&str[x], &expr[y], case_insensitive, level + 1);
   1.360 +        case '?':
   1.361 +            break;
   1.362 +        case ')':
   1.363 +        case ']':
   1.364 +        case '|':
   1.365 +            return ABORTED;
   1.366 +        case '\\':
   1.367 +            ++y;
   1.368 +            /* fall through */
   1.369 +        default:
   1.370 +            if(case_insensitive) {
   1.371 +                if(::upper(str[x]) != ::upper(expr[y]))
   1.372 +                    return NOMATCH;
   1.373 +            }
   1.374 +            else {
   1.375 +                if(str[x] != expr[y])
   1.376 +                    return NOMATCH;
   1.377 +            }
   1.378 +            break;
   1.379 +        }
   1.380 +    }
   1.381 +    return (str[x] ? NOMATCH : MATCH);
   1.382 +}
   1.383 +
   1.384 +
   1.385 +template<class T>
   1.386 +static int
   1.387 +ns_WildCardMatch(const T *str, const T *xp, bool case_insensitive)
   1.388 +{
   1.389 +    T *expr = nullptr;
   1.390 +    int x, ret = MATCH;
   1.391 +
   1.392 +    if (!nsCharTraits<T>::find(xp, nsCharTraits<T>::length(xp), T('~')))
   1.393 +        return ::_shexp_match(str, xp, case_insensitive, 0);
   1.394 +
   1.395 +    expr = (T *) NS_Alloc((nsCharTraits<T>::length(xp) + 1) * sizeof(T));
   1.396 +    if(!expr)
   1.397 +        return NOMATCH;
   1.398 +    memcpy(expr, xp, (nsCharTraits<T>::length(xp) + 1) * sizeof(T));
   1.399 +
   1.400 +    x = ::_scan_and_copy(expr, T('~'), T('\0'), static_cast<T*>(nullptr));
   1.401 +    if (x != ABORTED && expr[x] == '~') {
   1.402 +        expr[x++] = '\0';
   1.403 +        ret = ::_shexp_match(str, &expr[x], case_insensitive, 0);
   1.404 +        switch (ret) {
   1.405 +        case NOMATCH: ret = MATCH;   break;
   1.406 +        case MATCH:   ret = NOMATCH; break;
   1.407 +        default:                     break;
   1.408 +        }
   1.409 +    }
   1.410 +    if (ret == MATCH)
   1.411 +        ret = ::_shexp_match(str, expr, case_insensitive, 0);
   1.412 +
   1.413 +    NS_Free(expr);
   1.414 +    return ret;
   1.415 +}
   1.416 +
   1.417 +template<class T>
   1.418 +int
   1.419 +NS_WildCardMatch_(const T *str, const T *expr, bool case_insensitive)
   1.420 +{
   1.421 +    int is_valid = NS_WildCardValid(expr);
   1.422 +    switch(is_valid) {
   1.423 +        case INVALID_SXP:
   1.424 +            return -1;
   1.425 +        default:
   1.426 +            return ::ns_WildCardMatch(str, expr, case_insensitive);
   1.427 +    }
   1.428 +}
   1.429 +
   1.430 +int
   1.431 +NS_WildCardMatch(const char *str, const char *xp,
   1.432 +                 bool case_insensitive)
   1.433 +{
   1.434 +    return NS_WildCardMatch_(str, xp, case_insensitive);
   1.435 +}
   1.436 +
   1.437 +int
   1.438 +NS_WildCardMatch(const char16_t *str, const char16_t *xp,
   1.439 +                 bool case_insensitive)
   1.440 +{
   1.441 +    return NS_WildCardMatch_(str, xp, case_insensitive);
   1.442 +}

mercurial