1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/xpcom/io/nsWildCard.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,439 @@ 1.4 +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 1.5 +/* vim:set ts=4 sts=4 sw=4 cin et: */ 1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.9 + 1.10 +/* * 1.11 + * 1.12 + * 1.13 + * nsWildCard.cpp: shell-like wildcard match routines 1.14 + * 1.15 + * See nsIZipReader.findEntries documentation in nsIZipReader.idl for 1.16 + * a description of the syntax supported by the routines in this file. 1.17 + * 1.18 + * Rob McCool 1.19 + * 1.20 + */ 1.21 + 1.22 +#include "nsWildCard.h" 1.23 +#include "nsXPCOM.h" 1.24 +#include "nsCRTGlue.h" 1.25 +#include "nsCharTraits.h" 1.26 + 1.27 +/* -------------------- ASCII-specific character methods ------------------- */ 1.28 + 1.29 +typedef int static_assert_character_code_arrangement['a' > 'A' ? 1 : -1]; 1.30 + 1.31 +template<class T> 1.32 +static int 1.33 +alpha(T c) 1.34 +{ 1.35 + return ('a' <= c && c <= 'z') || 1.36 + ('A' <= c && c <= 'Z'); 1.37 +} 1.38 + 1.39 +template<class T> 1.40 +static int 1.41 +alphanumeric(T c) 1.42 +{ 1.43 + return ('0' <= c && c <= '9') || ::alpha(c); 1.44 +} 1.45 + 1.46 +template<class T> 1.47 +static int 1.48 +lower(T c) 1.49 +{ 1.50 + return ('A' <= c && c <= 'Z') ? c + ('a' - 'A') : c; 1.51 +} 1.52 + 1.53 +template<class T> 1.54 +static int 1.55 +upper(T c) 1.56 +{ 1.57 + return ('a' <= c && c <= 'z') ? c - ('a' - 'A') : c; 1.58 +} 1.59 + 1.60 +/* ----------------------------- _valid_subexp ---------------------------- */ 1.61 + 1.62 +template<class T> 1.63 +static int 1.64 +_valid_subexp(const T *expr, T stop1, T stop2) 1.65 +{ 1.66 + int x; 1.67 + int nsc = 0; /* Number of special characters */ 1.68 + int np; /* Number of pipe characters in union */ 1.69 + int tld = 0; /* Number of tilde characters */ 1.70 + 1.71 + for (x = 0; expr[x] && (expr[x] != stop1) && (expr[x] != stop2); ++x) { 1.72 + switch(expr[x]) { 1.73 + case '~': 1.74 + if(tld) /* at most one exclusion */ 1.75 + return INVALID_SXP; 1.76 + if (stop1) /* no exclusions within unions */ 1.77 + return INVALID_SXP; 1.78 + if (!expr[x+1]) /* exclusion cannot be last character */ 1.79 + return INVALID_SXP; 1.80 + if (!x) /* exclusion cannot be first character */ 1.81 + return INVALID_SXP; 1.82 + ++tld; 1.83 + /* fall through */ 1.84 + case '*': 1.85 + case '?': 1.86 + case '$': 1.87 + ++nsc; 1.88 + break; 1.89 + case '[': 1.90 + ++nsc; 1.91 + if((!expr[++x]) || (expr[x] == ']')) 1.92 + return INVALID_SXP; 1.93 + for(; expr[x] && (expr[x] != ']'); ++x) { 1.94 + if(expr[x] == '\\' && !expr[++x]) 1.95 + return INVALID_SXP; 1.96 + } 1.97 + if(!expr[x]) 1.98 + return INVALID_SXP; 1.99 + break; 1.100 + case '(': 1.101 + ++nsc; 1.102 + if (stop1) /* no nested unions */ 1.103 + return INVALID_SXP; 1.104 + np = -1; 1.105 + do { 1.106 + int t = ::_valid_subexp(&expr[++x], T(')'), T('|')); 1.107 + if(t == 0 || t == INVALID_SXP) 1.108 + return INVALID_SXP; 1.109 + x+=t; 1.110 + if(!expr[x]) 1.111 + return INVALID_SXP; 1.112 + ++np; 1.113 + } while (expr[x] == '|' ); 1.114 + if(np < 1) /* must be at least one pipe */ 1.115 + return INVALID_SXP; 1.116 + break; 1.117 + case ')': 1.118 + case ']': 1.119 + case '|': 1.120 + return INVALID_SXP; 1.121 + case '\\': 1.122 + ++nsc; 1.123 + if(!expr[++x]) 1.124 + return INVALID_SXP; 1.125 + break; 1.126 + default: 1.127 + break; 1.128 + } 1.129 + } 1.130 + if((!stop1) && (!nsc)) /* must be at least one special character */ 1.131 + return NON_SXP; 1.132 + return ((expr[x] == stop1 || expr[x] == stop2) ? x : INVALID_SXP); 1.133 +} 1.134 + 1.135 + 1.136 +template<class T> 1.137 +int 1.138 +NS_WildCardValid_(const T *expr) 1.139 +{ 1.140 + int x = ::_valid_subexp(expr, T('\0'), T('\0')); 1.141 + return (x < 0 ? x : VALID_SXP); 1.142 +} 1.143 + 1.144 +int 1.145 +NS_WildCardValid(const char *expr) 1.146 +{ 1.147 + return NS_WildCardValid_(expr); 1.148 +} 1.149 + 1.150 +int 1.151 +NS_WildCardValid(const char16_t *expr) 1.152 +{ 1.153 + return NS_WildCardValid_(expr); 1.154 +} 1.155 + 1.156 +/* ----------------------------- _shexp_match ----------------------------- */ 1.157 + 1.158 + 1.159 +#define MATCH 0 1.160 +#define NOMATCH 1 1.161 +#define ABORTED -1 1.162 + 1.163 +template<class T> 1.164 +static int 1.165 +_shexp_match(const T *str, const T *expr, bool case_insensitive, unsigned int level); 1.166 + 1.167 +/** 1.168 + * Count characters until we reach a NUL character or either of the 1.169 + * two delimiter characters, stop1 or stop2. If we encounter a bracketed 1.170 + * expression, look only for NUL or ']' inside it. Do not look for stop1 1.171 + * or stop2 inside it. Return ABORTED if bracketed expression is unterminated. 1.172 + * Handle all escaping. 1.173 + * Return index in input string of first stop found, or ABORTED if not found. 1.174 + * If "dest" is non-nullptr, copy counted characters to it and null terminate. 1.175 + */ 1.176 +template<class T> 1.177 +static int 1.178 +_scan_and_copy(const T *expr, T stop1, T stop2, T *dest) 1.179 +{ 1.180 + int sx; /* source index */ 1.181 + T cc; 1.182 + 1.183 + for (sx = 0; (cc = expr[sx]) && cc != stop1 && cc != stop2; sx++) { 1.184 + if (cc == '\\') { 1.185 + if (!expr[++sx]) 1.186 + return ABORTED; /* should be impossible */ 1.187 + } 1.188 + else if (cc == '[') { 1.189 + while ((cc = expr[++sx]) && cc != ']') { 1.190 + if(cc == '\\' && !expr[++sx]) 1.191 + return ABORTED; 1.192 + } 1.193 + if (!cc) 1.194 + return ABORTED; /* should be impossible */ 1.195 + } 1.196 + } 1.197 + if (dest && sx) { 1.198 + /* Copy all but the closing delimiter. */ 1.199 + memcpy(dest, expr, sx * sizeof(T)); 1.200 + dest[sx] = 0; 1.201 + } 1.202 + return cc ? sx : ABORTED; /* index of closing delimiter */ 1.203 +} 1.204 + 1.205 +/* On input, expr[0] is the opening parenthesis of a union. 1.206 + * See if any of the alternatives in the union matches as a pattern. 1.207 + * The strategy is to take each of the alternatives, in turn, and append 1.208 + * the rest of the expression (after the closing ')' that marks the end of 1.209 + * this union) to that alternative, and then see if the resultant expression 1.210 + * matches the input string. Repeat this until some alternative matches, 1.211 + * or we have an abort. 1.212 + */ 1.213 +template<class T> 1.214 +static int 1.215 +_handle_union(const T *str, const T *expr, bool case_insensitive, 1.216 + unsigned int level) 1.217 +{ 1.218 + int sx; /* source index */ 1.219 + int cp; /* source index of closing parenthesis */ 1.220 + int count; 1.221 + int ret = NOMATCH; 1.222 + T *e2; 1.223 + 1.224 + /* Find the closing parenthesis that ends this union in the expression */ 1.225 + cp = ::_scan_and_copy(expr, T(')'), T('\0'), static_cast<T*>(nullptr)); 1.226 + if (cp == ABORTED || cp < 4) /* must be at least "(a|b" before ')' */ 1.227 + return ABORTED; 1.228 + ++cp; /* now index of char after closing parenthesis */ 1.229 + e2 = (T *) NS_Alloc((1 + nsCharTraits<T>::length(expr)) * sizeof(T)); 1.230 + if (!e2) 1.231 + return ABORTED; 1.232 + for (sx = 1; ; ++sx) { 1.233 + /* Here, expr[sx] is one character past the preceding '(' or '|'. */ 1.234 + /* Copy everything up to the next delimiter to e2 */ 1.235 + count = ::_scan_and_copy(expr + sx, T(')'), T('|'), e2); 1.236 + if (count == ABORTED || !count) { 1.237 + ret = ABORTED; 1.238 + break; 1.239 + } 1.240 + sx += count; 1.241 + /* Append everything after closing parenthesis to e2. This is safe. */ 1.242 + nsCharTraits<T>::copy(e2 + count, expr + cp, nsCharTraits<T>::length(expr + cp) + 1); 1.243 + ret = ::_shexp_match(str, e2, case_insensitive, level + 1); 1.244 + if (ret != NOMATCH || !expr[sx] || expr[sx] == ')') 1.245 + break; 1.246 + } 1.247 + NS_Free(e2); 1.248 + if (sx < 2) 1.249 + ret = ABORTED; 1.250 + return ret; 1.251 +} 1.252 + 1.253 +/* returns 1 if val is in range from start..end, case insensitive. */ 1.254 +static int 1.255 +_is_char_in_range(unsigned char start, unsigned char end, unsigned char val) 1.256 +{ 1.257 + char map[256]; 1.258 + memset(map, 0, sizeof map); 1.259 + while (start <= end) 1.260 + map[lower(start++)] = 1; 1.261 + return map[lower(val)]; 1.262 +} 1.263 + 1.264 +template<class T> 1.265 +static int 1.266 +_shexp_match(const T *str, const T *expr, bool case_insensitive, 1.267 + unsigned int level) 1.268 +{ 1.269 + int x; /* input string index */ 1.270 + int y; /* expression index */ 1.271 + int ret,neg; 1.272 + 1.273 + if (level > 20) /* Don't let the stack get too deep. */ 1.274 + return ABORTED; 1.275 + for(x = 0, y = 0; expr[y]; ++y, ++x) { 1.276 + if((!str[x]) && (expr[y] != '$') && (expr[y] != '*')) { 1.277 + return NOMATCH; 1.278 + } 1.279 + switch(expr[y]) { 1.280 + case '$': 1.281 + if(str[x]) 1.282 + return NOMATCH; 1.283 + --x; /* we don't want loop to increment x */ 1.284 + break; 1.285 + case '*': 1.286 + while(expr[++y] == '*'){} 1.287 + if(!expr[y]) 1.288 + return MATCH; 1.289 + while(str[x]) { 1.290 + ret = ::_shexp_match(&str[x++], &expr[y], case_insensitive, 1.291 + level + 1); 1.292 + switch(ret) { 1.293 + case NOMATCH: 1.294 + continue; 1.295 + case ABORTED: 1.296 + return ABORTED; 1.297 + default: 1.298 + return MATCH; 1.299 + } 1.300 + } 1.301 + if((expr[y] == '$') && (expr[y+1] == '\0') && (!str[x])) 1.302 + return MATCH; 1.303 + else 1.304 + return NOMATCH; 1.305 + case '[': { 1.306 + T start, end = 0; 1.307 + int i; 1.308 + neg = ((expr[++y] == '^') && (expr[y+1] != ']')); 1.309 + if (neg) 1.310 + ++y; 1.311 + i = y; 1.312 + start = expr[i++]; 1.313 + if (start == '\\') 1.314 + start = expr[i++]; 1.315 + if (::alphanumeric(start) && expr[i++] == '-') { 1.316 + end = expr[i++]; 1.317 + if (end == '\\') 1.318 + end = expr[i++]; 1.319 + } 1.320 + if (::alphanumeric(end) && expr[i] == ']') { 1.321 + /* This is a range form: a-b */ 1.322 + T val = str[x]; 1.323 + if (end < start) { /* swap them */ 1.324 + T tmp = end; 1.325 + end = start; 1.326 + start = tmp; 1.327 + } 1.328 + if (case_insensitive && ::alpha(val)) { 1.329 + val = ::_is_char_in_range((unsigned char) start, 1.330 + (unsigned char) end, 1.331 + (unsigned char) val); 1.332 + if (neg == val) 1.333 + return NOMATCH; 1.334 + } 1.335 + else if (neg != ((val < start) || (val > end))) { 1.336 + return NOMATCH; 1.337 + } 1.338 + y = i; 1.339 + } 1.340 + else { 1.341 + /* Not range form */ 1.342 + int matched = 0; 1.343 + for (; expr[y] != ']'; y++) { 1.344 + if (expr[y] == '\\') 1.345 + ++y; 1.346 + if(case_insensitive) 1.347 + matched |= (::upper(str[x]) == ::upper(expr[y])); 1.348 + else 1.349 + matched |= (str[x] == expr[y]); 1.350 + } 1.351 + if (neg == matched) 1.352 + return NOMATCH; 1.353 + } 1.354 + } 1.355 + break; 1.356 + case '(': 1.357 + if (!expr[y+1]) 1.358 + return ABORTED; 1.359 + return ::_handle_union(&str[x], &expr[y], case_insensitive, level + 1); 1.360 + case '?': 1.361 + break; 1.362 + case ')': 1.363 + case ']': 1.364 + case '|': 1.365 + return ABORTED; 1.366 + case '\\': 1.367 + ++y; 1.368 + /* fall through */ 1.369 + default: 1.370 + if(case_insensitive) { 1.371 + if(::upper(str[x]) != ::upper(expr[y])) 1.372 + return NOMATCH; 1.373 + } 1.374 + else { 1.375 + if(str[x] != expr[y]) 1.376 + return NOMATCH; 1.377 + } 1.378 + break; 1.379 + } 1.380 + } 1.381 + return (str[x] ? NOMATCH : MATCH); 1.382 +} 1.383 + 1.384 + 1.385 +template<class T> 1.386 +static int 1.387 +ns_WildCardMatch(const T *str, const T *xp, bool case_insensitive) 1.388 +{ 1.389 + T *expr = nullptr; 1.390 + int x, ret = MATCH; 1.391 + 1.392 + if (!nsCharTraits<T>::find(xp, nsCharTraits<T>::length(xp), T('~'))) 1.393 + return ::_shexp_match(str, xp, case_insensitive, 0); 1.394 + 1.395 + expr = (T *) NS_Alloc((nsCharTraits<T>::length(xp) + 1) * sizeof(T)); 1.396 + if(!expr) 1.397 + return NOMATCH; 1.398 + memcpy(expr, xp, (nsCharTraits<T>::length(xp) + 1) * sizeof(T)); 1.399 + 1.400 + x = ::_scan_and_copy(expr, T('~'), T('\0'), static_cast<T*>(nullptr)); 1.401 + if (x != ABORTED && expr[x] == '~') { 1.402 + expr[x++] = '\0'; 1.403 + ret = ::_shexp_match(str, &expr[x], case_insensitive, 0); 1.404 + switch (ret) { 1.405 + case NOMATCH: ret = MATCH; break; 1.406 + case MATCH: ret = NOMATCH; break; 1.407 + default: break; 1.408 + } 1.409 + } 1.410 + if (ret == MATCH) 1.411 + ret = ::_shexp_match(str, expr, case_insensitive, 0); 1.412 + 1.413 + NS_Free(expr); 1.414 + return ret; 1.415 +} 1.416 + 1.417 +template<class T> 1.418 +int 1.419 +NS_WildCardMatch_(const T *str, const T *expr, bool case_insensitive) 1.420 +{ 1.421 + int is_valid = NS_WildCardValid(expr); 1.422 + switch(is_valid) { 1.423 + case INVALID_SXP: 1.424 + return -1; 1.425 + default: 1.426 + return ::ns_WildCardMatch(str, expr, case_insensitive); 1.427 + } 1.428 +} 1.429 + 1.430 +int 1.431 +NS_WildCardMatch(const char *str, const char *xp, 1.432 + bool case_insensitive) 1.433 +{ 1.434 + return NS_WildCardMatch_(str, xp, case_insensitive); 1.435 +} 1.436 + 1.437 +int 1.438 +NS_WildCardMatch(const char16_t *str, const char16_t *xp, 1.439 + bool case_insensitive) 1.440 +{ 1.441 + return NS_WildCardMatch_(str, xp, case_insensitive); 1.442 +}