Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /* vim:set ts=4 sts=4 sw=4 cin et: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 /* *
8 *
9 *
10 * nsWildCard.cpp: shell-like wildcard match routines
11 *
12 * See nsIZipReader.findEntries documentation in nsIZipReader.idl for
13 * a description of the syntax supported by the routines in this file.
14 *
15 * Rob McCool
16 *
17 */
19 #include "nsWildCard.h"
20 #include "nsXPCOM.h"
21 #include "nsCRTGlue.h"
22 #include "nsCharTraits.h"
24 /* -------------------- ASCII-specific character methods ------------------- */
26 typedef int static_assert_character_code_arrangement['a' > 'A' ? 1 : -1];
28 template<class T>
29 static int
30 alpha(T c)
31 {
32 return ('a' <= c && c <= 'z') ||
33 ('A' <= c && c <= 'Z');
34 }
36 template<class T>
37 static int
38 alphanumeric(T c)
39 {
40 return ('0' <= c && c <= '9') || ::alpha(c);
41 }
43 template<class T>
44 static int
45 lower(T c)
46 {
47 return ('A' <= c && c <= 'Z') ? c + ('a' - 'A') : c;
48 }
50 template<class T>
51 static int
52 upper(T c)
53 {
54 return ('a' <= c && c <= 'z') ? c - ('a' - 'A') : c;
55 }
57 /* ----------------------------- _valid_subexp ---------------------------- */
59 template<class T>
60 static int
61 _valid_subexp(const T *expr, T stop1, T stop2)
62 {
63 int x;
64 int nsc = 0; /* Number of special characters */
65 int np; /* Number of pipe characters in union */
66 int tld = 0; /* Number of tilde characters */
68 for (x = 0; expr[x] && (expr[x] != stop1) && (expr[x] != stop2); ++x) {
69 switch(expr[x]) {
70 case '~':
71 if(tld) /* at most one exclusion */
72 return INVALID_SXP;
73 if (stop1) /* no exclusions within unions */
74 return INVALID_SXP;
75 if (!expr[x+1]) /* exclusion cannot be last character */
76 return INVALID_SXP;
77 if (!x) /* exclusion cannot be first character */
78 return INVALID_SXP;
79 ++tld;
80 /* fall through */
81 case '*':
82 case '?':
83 case '$':
84 ++nsc;
85 break;
86 case '[':
87 ++nsc;
88 if((!expr[++x]) || (expr[x] == ']'))
89 return INVALID_SXP;
90 for(; expr[x] && (expr[x] != ']'); ++x) {
91 if(expr[x] == '\\' && !expr[++x])
92 return INVALID_SXP;
93 }
94 if(!expr[x])
95 return INVALID_SXP;
96 break;
97 case '(':
98 ++nsc;
99 if (stop1) /* no nested unions */
100 return INVALID_SXP;
101 np = -1;
102 do {
103 int t = ::_valid_subexp(&expr[++x], T(')'), T('|'));
104 if(t == 0 || t == INVALID_SXP)
105 return INVALID_SXP;
106 x+=t;
107 if(!expr[x])
108 return INVALID_SXP;
109 ++np;
110 } while (expr[x] == '|' );
111 if(np < 1) /* must be at least one pipe */
112 return INVALID_SXP;
113 break;
114 case ')':
115 case ']':
116 case '|':
117 return INVALID_SXP;
118 case '\\':
119 ++nsc;
120 if(!expr[++x])
121 return INVALID_SXP;
122 break;
123 default:
124 break;
125 }
126 }
127 if((!stop1) && (!nsc)) /* must be at least one special character */
128 return NON_SXP;
129 return ((expr[x] == stop1 || expr[x] == stop2) ? x : INVALID_SXP);
130 }
133 template<class T>
134 int
135 NS_WildCardValid_(const T *expr)
136 {
137 int x = ::_valid_subexp(expr, T('\0'), T('\0'));
138 return (x < 0 ? x : VALID_SXP);
139 }
141 int
142 NS_WildCardValid(const char *expr)
143 {
144 return NS_WildCardValid_(expr);
145 }
147 int
148 NS_WildCardValid(const char16_t *expr)
149 {
150 return NS_WildCardValid_(expr);
151 }
153 /* ----------------------------- _shexp_match ----------------------------- */
156 #define MATCH 0
157 #define NOMATCH 1
158 #define ABORTED -1
160 template<class T>
161 static int
162 _shexp_match(const T *str, const T *expr, bool case_insensitive, unsigned int level);
164 /**
165 * Count characters until we reach a NUL character or either of the
166 * two delimiter characters, stop1 or stop2. If we encounter a bracketed
167 * expression, look only for NUL or ']' inside it. Do not look for stop1
168 * or stop2 inside it. Return ABORTED if bracketed expression is unterminated.
169 * Handle all escaping.
170 * Return index in input string of first stop found, or ABORTED if not found.
171 * If "dest" is non-nullptr, copy counted characters to it and null terminate.
172 */
173 template<class T>
174 static int
175 _scan_and_copy(const T *expr, T stop1, T stop2, T *dest)
176 {
177 int sx; /* source index */
178 T cc;
180 for (sx = 0; (cc = expr[sx]) && cc != stop1 && cc != stop2; sx++) {
181 if (cc == '\\') {
182 if (!expr[++sx])
183 return ABORTED; /* should be impossible */
184 }
185 else if (cc == '[') {
186 while ((cc = expr[++sx]) && cc != ']') {
187 if(cc == '\\' && !expr[++sx])
188 return ABORTED;
189 }
190 if (!cc)
191 return ABORTED; /* should be impossible */
192 }
193 }
194 if (dest && sx) {
195 /* Copy all but the closing delimiter. */
196 memcpy(dest, expr, sx * sizeof(T));
197 dest[sx] = 0;
198 }
199 return cc ? sx : ABORTED; /* index of closing delimiter */
200 }
202 /* On input, expr[0] is the opening parenthesis of a union.
203 * See if any of the alternatives in the union matches as a pattern.
204 * The strategy is to take each of the alternatives, in turn, and append
205 * the rest of the expression (after the closing ')' that marks the end of
206 * this union) to that alternative, and then see if the resultant expression
207 * matches the input string. Repeat this until some alternative matches,
208 * or we have an abort.
209 */
210 template<class T>
211 static int
212 _handle_union(const T *str, const T *expr, bool case_insensitive,
213 unsigned int level)
214 {
215 int sx; /* source index */
216 int cp; /* source index of closing parenthesis */
217 int count;
218 int ret = NOMATCH;
219 T *e2;
221 /* Find the closing parenthesis that ends this union in the expression */
222 cp = ::_scan_and_copy(expr, T(')'), T('\0'), static_cast<T*>(nullptr));
223 if (cp == ABORTED || cp < 4) /* must be at least "(a|b" before ')' */
224 return ABORTED;
225 ++cp; /* now index of char after closing parenthesis */
226 e2 = (T *) NS_Alloc((1 + nsCharTraits<T>::length(expr)) * sizeof(T));
227 if (!e2)
228 return ABORTED;
229 for (sx = 1; ; ++sx) {
230 /* Here, expr[sx] is one character past the preceding '(' or '|'. */
231 /* Copy everything up to the next delimiter to e2 */
232 count = ::_scan_and_copy(expr + sx, T(')'), T('|'), e2);
233 if (count == ABORTED || !count) {
234 ret = ABORTED;
235 break;
236 }
237 sx += count;
238 /* Append everything after closing parenthesis to e2. This is safe. */
239 nsCharTraits<T>::copy(e2 + count, expr + cp, nsCharTraits<T>::length(expr + cp) + 1);
240 ret = ::_shexp_match(str, e2, case_insensitive, level + 1);
241 if (ret != NOMATCH || !expr[sx] || expr[sx] == ')')
242 break;
243 }
244 NS_Free(e2);
245 if (sx < 2)
246 ret = ABORTED;
247 return ret;
248 }
250 /* returns 1 if val is in range from start..end, case insensitive. */
251 static int
252 _is_char_in_range(unsigned char start, unsigned char end, unsigned char val)
253 {
254 char map[256];
255 memset(map, 0, sizeof map);
256 while (start <= end)
257 map[lower(start++)] = 1;
258 return map[lower(val)];
259 }
261 template<class T>
262 static int
263 _shexp_match(const T *str, const T *expr, bool case_insensitive,
264 unsigned int level)
265 {
266 int x; /* input string index */
267 int y; /* expression index */
268 int ret,neg;
270 if (level > 20) /* Don't let the stack get too deep. */
271 return ABORTED;
272 for(x = 0, y = 0; expr[y]; ++y, ++x) {
273 if((!str[x]) && (expr[y] != '$') && (expr[y] != '*')) {
274 return NOMATCH;
275 }
276 switch(expr[y]) {
277 case '$':
278 if(str[x])
279 return NOMATCH;
280 --x; /* we don't want loop to increment x */
281 break;
282 case '*':
283 while(expr[++y] == '*'){}
284 if(!expr[y])
285 return MATCH;
286 while(str[x]) {
287 ret = ::_shexp_match(&str[x++], &expr[y], case_insensitive,
288 level + 1);
289 switch(ret) {
290 case NOMATCH:
291 continue;
292 case ABORTED:
293 return ABORTED;
294 default:
295 return MATCH;
296 }
297 }
298 if((expr[y] == '$') && (expr[y+1] == '\0') && (!str[x]))
299 return MATCH;
300 else
301 return NOMATCH;
302 case '[': {
303 T start, end = 0;
304 int i;
305 neg = ((expr[++y] == '^') && (expr[y+1] != ']'));
306 if (neg)
307 ++y;
308 i = y;
309 start = expr[i++];
310 if (start == '\\')
311 start = expr[i++];
312 if (::alphanumeric(start) && expr[i++] == '-') {
313 end = expr[i++];
314 if (end == '\\')
315 end = expr[i++];
316 }
317 if (::alphanumeric(end) && expr[i] == ']') {
318 /* This is a range form: a-b */
319 T val = str[x];
320 if (end < start) { /* swap them */
321 T tmp = end;
322 end = start;
323 start = tmp;
324 }
325 if (case_insensitive && ::alpha(val)) {
326 val = ::_is_char_in_range((unsigned char) start,
327 (unsigned char) end,
328 (unsigned char) val);
329 if (neg == val)
330 return NOMATCH;
331 }
332 else if (neg != ((val < start) || (val > end))) {
333 return NOMATCH;
334 }
335 y = i;
336 }
337 else {
338 /* Not range form */
339 int matched = 0;
340 for (; expr[y] != ']'; y++) {
341 if (expr[y] == '\\')
342 ++y;
343 if(case_insensitive)
344 matched |= (::upper(str[x]) == ::upper(expr[y]));
345 else
346 matched |= (str[x] == expr[y]);
347 }
348 if (neg == matched)
349 return NOMATCH;
350 }
351 }
352 break;
353 case '(':
354 if (!expr[y+1])
355 return ABORTED;
356 return ::_handle_union(&str[x], &expr[y], case_insensitive, level + 1);
357 case '?':
358 break;
359 case ')':
360 case ']':
361 case '|':
362 return ABORTED;
363 case '\\':
364 ++y;
365 /* fall through */
366 default:
367 if(case_insensitive) {
368 if(::upper(str[x]) != ::upper(expr[y]))
369 return NOMATCH;
370 }
371 else {
372 if(str[x] != expr[y])
373 return NOMATCH;
374 }
375 break;
376 }
377 }
378 return (str[x] ? NOMATCH : MATCH);
379 }
382 template<class T>
383 static int
384 ns_WildCardMatch(const T *str, const T *xp, bool case_insensitive)
385 {
386 T *expr = nullptr;
387 int x, ret = MATCH;
389 if (!nsCharTraits<T>::find(xp, nsCharTraits<T>::length(xp), T('~')))
390 return ::_shexp_match(str, xp, case_insensitive, 0);
392 expr = (T *) NS_Alloc((nsCharTraits<T>::length(xp) + 1) * sizeof(T));
393 if(!expr)
394 return NOMATCH;
395 memcpy(expr, xp, (nsCharTraits<T>::length(xp) + 1) * sizeof(T));
397 x = ::_scan_and_copy(expr, T('~'), T('\0'), static_cast<T*>(nullptr));
398 if (x != ABORTED && expr[x] == '~') {
399 expr[x++] = '\0';
400 ret = ::_shexp_match(str, &expr[x], case_insensitive, 0);
401 switch (ret) {
402 case NOMATCH: ret = MATCH; break;
403 case MATCH: ret = NOMATCH; break;
404 default: break;
405 }
406 }
407 if (ret == MATCH)
408 ret = ::_shexp_match(str, expr, case_insensitive, 0);
410 NS_Free(expr);
411 return ret;
412 }
414 template<class T>
415 int
416 NS_WildCardMatch_(const T *str, const T *expr, bool case_insensitive)
417 {
418 int is_valid = NS_WildCardValid(expr);
419 switch(is_valid) {
420 case INVALID_SXP:
421 return -1;
422 default:
423 return ::ns_WildCardMatch(str, expr, case_insensitive);
424 }
425 }
427 int
428 NS_WildCardMatch(const char *str, const char *xp,
429 bool case_insensitive)
430 {
431 return NS_WildCardMatch_(str, xp, case_insensitive);
432 }
434 int
435 NS_WildCardMatch(const char16_t *str, const char16_t *xp,
436 bool case_insensitive)
437 {
438 return NS_WildCardMatch_(str, xp, case_insensitive);
439 }