|
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
|
2 /* vim:set ts=4 sts=4 sw=4 cin et: */ |
|
3 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
4 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
6 |
|
7 /* * |
|
8 * |
|
9 * |
|
10 * nsWildCard.cpp: shell-like wildcard match routines |
|
11 * |
|
12 * See nsIZipReader.findEntries documentation in nsIZipReader.idl for |
|
13 * a description of the syntax supported by the routines in this file. |
|
14 * |
|
15 * Rob McCool |
|
16 * |
|
17 */ |
|
18 |
|
19 #include "nsWildCard.h" |
|
20 #include "nsXPCOM.h" |
|
21 #include "nsCRTGlue.h" |
|
22 #include "nsCharTraits.h" |
|
23 |
|
24 /* -------------------- ASCII-specific character methods ------------------- */ |
|
25 |
|
26 typedef int static_assert_character_code_arrangement['a' > 'A' ? 1 : -1]; |
|
27 |
|
28 template<class T> |
|
29 static int |
|
30 alpha(T c) |
|
31 { |
|
32 return ('a' <= c && c <= 'z') || |
|
33 ('A' <= c && c <= 'Z'); |
|
34 } |
|
35 |
|
36 template<class T> |
|
37 static int |
|
38 alphanumeric(T c) |
|
39 { |
|
40 return ('0' <= c && c <= '9') || ::alpha(c); |
|
41 } |
|
42 |
|
43 template<class T> |
|
44 static int |
|
45 lower(T c) |
|
46 { |
|
47 return ('A' <= c && c <= 'Z') ? c + ('a' - 'A') : c; |
|
48 } |
|
49 |
|
50 template<class T> |
|
51 static int |
|
52 upper(T c) |
|
53 { |
|
54 return ('a' <= c && c <= 'z') ? c - ('a' - 'A') : c; |
|
55 } |
|
56 |
|
57 /* ----------------------------- _valid_subexp ---------------------------- */ |
|
58 |
|
59 template<class T> |
|
60 static int |
|
61 _valid_subexp(const T *expr, T stop1, T stop2) |
|
62 { |
|
63 int x; |
|
64 int nsc = 0; /* Number of special characters */ |
|
65 int np; /* Number of pipe characters in union */ |
|
66 int tld = 0; /* Number of tilde characters */ |
|
67 |
|
68 for (x = 0; expr[x] && (expr[x] != stop1) && (expr[x] != stop2); ++x) { |
|
69 switch(expr[x]) { |
|
70 case '~': |
|
71 if(tld) /* at most one exclusion */ |
|
72 return INVALID_SXP; |
|
73 if (stop1) /* no exclusions within unions */ |
|
74 return INVALID_SXP; |
|
75 if (!expr[x+1]) /* exclusion cannot be last character */ |
|
76 return INVALID_SXP; |
|
77 if (!x) /* exclusion cannot be first character */ |
|
78 return INVALID_SXP; |
|
79 ++tld; |
|
80 /* fall through */ |
|
81 case '*': |
|
82 case '?': |
|
83 case '$': |
|
84 ++nsc; |
|
85 break; |
|
86 case '[': |
|
87 ++nsc; |
|
88 if((!expr[++x]) || (expr[x] == ']')) |
|
89 return INVALID_SXP; |
|
90 for(; expr[x] && (expr[x] != ']'); ++x) { |
|
91 if(expr[x] == '\\' && !expr[++x]) |
|
92 return INVALID_SXP; |
|
93 } |
|
94 if(!expr[x]) |
|
95 return INVALID_SXP; |
|
96 break; |
|
97 case '(': |
|
98 ++nsc; |
|
99 if (stop1) /* no nested unions */ |
|
100 return INVALID_SXP; |
|
101 np = -1; |
|
102 do { |
|
103 int t = ::_valid_subexp(&expr[++x], T(')'), T('|')); |
|
104 if(t == 0 || t == INVALID_SXP) |
|
105 return INVALID_SXP; |
|
106 x+=t; |
|
107 if(!expr[x]) |
|
108 return INVALID_SXP; |
|
109 ++np; |
|
110 } while (expr[x] == '|' ); |
|
111 if(np < 1) /* must be at least one pipe */ |
|
112 return INVALID_SXP; |
|
113 break; |
|
114 case ')': |
|
115 case ']': |
|
116 case '|': |
|
117 return INVALID_SXP; |
|
118 case '\\': |
|
119 ++nsc; |
|
120 if(!expr[++x]) |
|
121 return INVALID_SXP; |
|
122 break; |
|
123 default: |
|
124 break; |
|
125 } |
|
126 } |
|
127 if((!stop1) && (!nsc)) /* must be at least one special character */ |
|
128 return NON_SXP; |
|
129 return ((expr[x] == stop1 || expr[x] == stop2) ? x : INVALID_SXP); |
|
130 } |
|
131 |
|
132 |
|
133 template<class T> |
|
134 int |
|
135 NS_WildCardValid_(const T *expr) |
|
136 { |
|
137 int x = ::_valid_subexp(expr, T('\0'), T('\0')); |
|
138 return (x < 0 ? x : VALID_SXP); |
|
139 } |
|
140 |
|
141 int |
|
142 NS_WildCardValid(const char *expr) |
|
143 { |
|
144 return NS_WildCardValid_(expr); |
|
145 } |
|
146 |
|
147 int |
|
148 NS_WildCardValid(const char16_t *expr) |
|
149 { |
|
150 return NS_WildCardValid_(expr); |
|
151 } |
|
152 |
|
153 /* ----------------------------- _shexp_match ----------------------------- */ |
|
154 |
|
155 |
|
156 #define MATCH 0 |
|
157 #define NOMATCH 1 |
|
158 #define ABORTED -1 |
|
159 |
|
160 template<class T> |
|
161 static int |
|
162 _shexp_match(const T *str, const T *expr, bool case_insensitive, unsigned int level); |
|
163 |
|
164 /** |
|
165 * Count characters until we reach a NUL character or either of the |
|
166 * two delimiter characters, stop1 or stop2. If we encounter a bracketed |
|
167 * expression, look only for NUL or ']' inside it. Do not look for stop1 |
|
168 * or stop2 inside it. Return ABORTED if bracketed expression is unterminated. |
|
169 * Handle all escaping. |
|
170 * Return index in input string of first stop found, or ABORTED if not found. |
|
171 * If "dest" is non-nullptr, copy counted characters to it and null terminate. |
|
172 */ |
|
173 template<class T> |
|
174 static int |
|
175 _scan_and_copy(const T *expr, T stop1, T stop2, T *dest) |
|
176 { |
|
177 int sx; /* source index */ |
|
178 T cc; |
|
179 |
|
180 for (sx = 0; (cc = expr[sx]) && cc != stop1 && cc != stop2; sx++) { |
|
181 if (cc == '\\') { |
|
182 if (!expr[++sx]) |
|
183 return ABORTED; /* should be impossible */ |
|
184 } |
|
185 else if (cc == '[') { |
|
186 while ((cc = expr[++sx]) && cc != ']') { |
|
187 if(cc == '\\' && !expr[++sx]) |
|
188 return ABORTED; |
|
189 } |
|
190 if (!cc) |
|
191 return ABORTED; /* should be impossible */ |
|
192 } |
|
193 } |
|
194 if (dest && sx) { |
|
195 /* Copy all but the closing delimiter. */ |
|
196 memcpy(dest, expr, sx * sizeof(T)); |
|
197 dest[sx] = 0; |
|
198 } |
|
199 return cc ? sx : ABORTED; /* index of closing delimiter */ |
|
200 } |
|
201 |
|
202 /* On input, expr[0] is the opening parenthesis of a union. |
|
203 * See if any of the alternatives in the union matches as a pattern. |
|
204 * The strategy is to take each of the alternatives, in turn, and append |
|
205 * the rest of the expression (after the closing ')' that marks the end of |
|
206 * this union) to that alternative, and then see if the resultant expression |
|
207 * matches the input string. Repeat this until some alternative matches, |
|
208 * or we have an abort. |
|
209 */ |
|
210 template<class T> |
|
211 static int |
|
212 _handle_union(const T *str, const T *expr, bool case_insensitive, |
|
213 unsigned int level) |
|
214 { |
|
215 int sx; /* source index */ |
|
216 int cp; /* source index of closing parenthesis */ |
|
217 int count; |
|
218 int ret = NOMATCH; |
|
219 T *e2; |
|
220 |
|
221 /* Find the closing parenthesis that ends this union in the expression */ |
|
222 cp = ::_scan_and_copy(expr, T(')'), T('\0'), static_cast<T*>(nullptr)); |
|
223 if (cp == ABORTED || cp < 4) /* must be at least "(a|b" before ')' */ |
|
224 return ABORTED; |
|
225 ++cp; /* now index of char after closing parenthesis */ |
|
226 e2 = (T *) NS_Alloc((1 + nsCharTraits<T>::length(expr)) * sizeof(T)); |
|
227 if (!e2) |
|
228 return ABORTED; |
|
229 for (sx = 1; ; ++sx) { |
|
230 /* Here, expr[sx] is one character past the preceding '(' or '|'. */ |
|
231 /* Copy everything up to the next delimiter to e2 */ |
|
232 count = ::_scan_and_copy(expr + sx, T(')'), T('|'), e2); |
|
233 if (count == ABORTED || !count) { |
|
234 ret = ABORTED; |
|
235 break; |
|
236 } |
|
237 sx += count; |
|
238 /* Append everything after closing parenthesis to e2. This is safe. */ |
|
239 nsCharTraits<T>::copy(e2 + count, expr + cp, nsCharTraits<T>::length(expr + cp) + 1); |
|
240 ret = ::_shexp_match(str, e2, case_insensitive, level + 1); |
|
241 if (ret != NOMATCH || !expr[sx] || expr[sx] == ')') |
|
242 break; |
|
243 } |
|
244 NS_Free(e2); |
|
245 if (sx < 2) |
|
246 ret = ABORTED; |
|
247 return ret; |
|
248 } |
|
249 |
|
250 /* returns 1 if val is in range from start..end, case insensitive. */ |
|
251 static int |
|
252 _is_char_in_range(unsigned char start, unsigned char end, unsigned char val) |
|
253 { |
|
254 char map[256]; |
|
255 memset(map, 0, sizeof map); |
|
256 while (start <= end) |
|
257 map[lower(start++)] = 1; |
|
258 return map[lower(val)]; |
|
259 } |
|
260 |
|
261 template<class T> |
|
262 static int |
|
263 _shexp_match(const T *str, const T *expr, bool case_insensitive, |
|
264 unsigned int level) |
|
265 { |
|
266 int x; /* input string index */ |
|
267 int y; /* expression index */ |
|
268 int ret,neg; |
|
269 |
|
270 if (level > 20) /* Don't let the stack get too deep. */ |
|
271 return ABORTED; |
|
272 for(x = 0, y = 0; expr[y]; ++y, ++x) { |
|
273 if((!str[x]) && (expr[y] != '$') && (expr[y] != '*')) { |
|
274 return NOMATCH; |
|
275 } |
|
276 switch(expr[y]) { |
|
277 case '$': |
|
278 if(str[x]) |
|
279 return NOMATCH; |
|
280 --x; /* we don't want loop to increment x */ |
|
281 break; |
|
282 case '*': |
|
283 while(expr[++y] == '*'){} |
|
284 if(!expr[y]) |
|
285 return MATCH; |
|
286 while(str[x]) { |
|
287 ret = ::_shexp_match(&str[x++], &expr[y], case_insensitive, |
|
288 level + 1); |
|
289 switch(ret) { |
|
290 case NOMATCH: |
|
291 continue; |
|
292 case ABORTED: |
|
293 return ABORTED; |
|
294 default: |
|
295 return MATCH; |
|
296 } |
|
297 } |
|
298 if((expr[y] == '$') && (expr[y+1] == '\0') && (!str[x])) |
|
299 return MATCH; |
|
300 else |
|
301 return NOMATCH; |
|
302 case '[': { |
|
303 T start, end = 0; |
|
304 int i; |
|
305 neg = ((expr[++y] == '^') && (expr[y+1] != ']')); |
|
306 if (neg) |
|
307 ++y; |
|
308 i = y; |
|
309 start = expr[i++]; |
|
310 if (start == '\\') |
|
311 start = expr[i++]; |
|
312 if (::alphanumeric(start) && expr[i++] == '-') { |
|
313 end = expr[i++]; |
|
314 if (end == '\\') |
|
315 end = expr[i++]; |
|
316 } |
|
317 if (::alphanumeric(end) && expr[i] == ']') { |
|
318 /* This is a range form: a-b */ |
|
319 T val = str[x]; |
|
320 if (end < start) { /* swap them */ |
|
321 T tmp = end; |
|
322 end = start; |
|
323 start = tmp; |
|
324 } |
|
325 if (case_insensitive && ::alpha(val)) { |
|
326 val = ::_is_char_in_range((unsigned char) start, |
|
327 (unsigned char) end, |
|
328 (unsigned char) val); |
|
329 if (neg == val) |
|
330 return NOMATCH; |
|
331 } |
|
332 else if (neg != ((val < start) || (val > end))) { |
|
333 return NOMATCH; |
|
334 } |
|
335 y = i; |
|
336 } |
|
337 else { |
|
338 /* Not range form */ |
|
339 int matched = 0; |
|
340 for (; expr[y] != ']'; y++) { |
|
341 if (expr[y] == '\\') |
|
342 ++y; |
|
343 if(case_insensitive) |
|
344 matched |= (::upper(str[x]) == ::upper(expr[y])); |
|
345 else |
|
346 matched |= (str[x] == expr[y]); |
|
347 } |
|
348 if (neg == matched) |
|
349 return NOMATCH; |
|
350 } |
|
351 } |
|
352 break; |
|
353 case '(': |
|
354 if (!expr[y+1]) |
|
355 return ABORTED; |
|
356 return ::_handle_union(&str[x], &expr[y], case_insensitive, level + 1); |
|
357 case '?': |
|
358 break; |
|
359 case ')': |
|
360 case ']': |
|
361 case '|': |
|
362 return ABORTED; |
|
363 case '\\': |
|
364 ++y; |
|
365 /* fall through */ |
|
366 default: |
|
367 if(case_insensitive) { |
|
368 if(::upper(str[x]) != ::upper(expr[y])) |
|
369 return NOMATCH; |
|
370 } |
|
371 else { |
|
372 if(str[x] != expr[y]) |
|
373 return NOMATCH; |
|
374 } |
|
375 break; |
|
376 } |
|
377 } |
|
378 return (str[x] ? NOMATCH : MATCH); |
|
379 } |
|
380 |
|
381 |
|
382 template<class T> |
|
383 static int |
|
384 ns_WildCardMatch(const T *str, const T *xp, bool case_insensitive) |
|
385 { |
|
386 T *expr = nullptr; |
|
387 int x, ret = MATCH; |
|
388 |
|
389 if (!nsCharTraits<T>::find(xp, nsCharTraits<T>::length(xp), T('~'))) |
|
390 return ::_shexp_match(str, xp, case_insensitive, 0); |
|
391 |
|
392 expr = (T *) NS_Alloc((nsCharTraits<T>::length(xp) + 1) * sizeof(T)); |
|
393 if(!expr) |
|
394 return NOMATCH; |
|
395 memcpy(expr, xp, (nsCharTraits<T>::length(xp) + 1) * sizeof(T)); |
|
396 |
|
397 x = ::_scan_and_copy(expr, T('~'), T('\0'), static_cast<T*>(nullptr)); |
|
398 if (x != ABORTED && expr[x] == '~') { |
|
399 expr[x++] = '\0'; |
|
400 ret = ::_shexp_match(str, &expr[x], case_insensitive, 0); |
|
401 switch (ret) { |
|
402 case NOMATCH: ret = MATCH; break; |
|
403 case MATCH: ret = NOMATCH; break; |
|
404 default: break; |
|
405 } |
|
406 } |
|
407 if (ret == MATCH) |
|
408 ret = ::_shexp_match(str, expr, case_insensitive, 0); |
|
409 |
|
410 NS_Free(expr); |
|
411 return ret; |
|
412 } |
|
413 |
|
414 template<class T> |
|
415 int |
|
416 NS_WildCardMatch_(const T *str, const T *expr, bool case_insensitive) |
|
417 { |
|
418 int is_valid = NS_WildCardValid(expr); |
|
419 switch(is_valid) { |
|
420 case INVALID_SXP: |
|
421 return -1; |
|
422 default: |
|
423 return ::ns_WildCardMatch(str, expr, case_insensitive); |
|
424 } |
|
425 } |
|
426 |
|
427 int |
|
428 NS_WildCardMatch(const char *str, const char *xp, |
|
429 bool case_insensitive) |
|
430 { |
|
431 return NS_WildCardMatch_(str, xp, case_insensitive); |
|
432 } |
|
433 |
|
434 int |
|
435 NS_WildCardMatch(const char16_t *str, const char16_t *xp, |
|
436 bool case_insensitive) |
|
437 { |
|
438 return NS_WildCardMatch_(str, xp, case_insensitive); |
|
439 } |