|
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
|
2 /* vim:set ts=4 sw=4 sts=4 et cindent: */ |
|
3 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
4 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
6 |
|
7 #include "mozilla/RangedPtr.h" |
|
8 |
|
9 #include "nsURLHelper.h" |
|
10 #include "nsIFile.h" |
|
11 #include "nsIURLParser.h" |
|
12 #include "nsCOMPtr.h" |
|
13 #include "nsCRT.h" |
|
14 #include "nsNetCID.h" |
|
15 #include "prnetdb.h" |
|
16 |
|
17 using namespace mozilla; |
|
18 |
|
19 //---------------------------------------------------------------------------- |
|
20 // Init/Shutdown |
|
21 //---------------------------------------------------------------------------- |
|
22 |
|
23 static bool gInitialized = false; |
|
24 static nsIURLParser *gNoAuthURLParser = nullptr; |
|
25 static nsIURLParser *gAuthURLParser = nullptr; |
|
26 static nsIURLParser *gStdURLParser = nullptr; |
|
27 |
|
28 static void |
|
29 InitGlobals() |
|
30 { |
|
31 nsCOMPtr<nsIURLParser> parser; |
|
32 |
|
33 parser = do_GetService(NS_NOAUTHURLPARSER_CONTRACTID); |
|
34 NS_ASSERTION(parser, "failed getting 'noauth' url parser"); |
|
35 if (parser) { |
|
36 gNoAuthURLParser = parser.get(); |
|
37 NS_ADDREF(gNoAuthURLParser); |
|
38 } |
|
39 |
|
40 parser = do_GetService(NS_AUTHURLPARSER_CONTRACTID); |
|
41 NS_ASSERTION(parser, "failed getting 'auth' url parser"); |
|
42 if (parser) { |
|
43 gAuthURLParser = parser.get(); |
|
44 NS_ADDREF(gAuthURLParser); |
|
45 } |
|
46 |
|
47 parser = do_GetService(NS_STDURLPARSER_CONTRACTID); |
|
48 NS_ASSERTION(parser, "failed getting 'std' url parser"); |
|
49 if (parser) { |
|
50 gStdURLParser = parser.get(); |
|
51 NS_ADDREF(gStdURLParser); |
|
52 } |
|
53 |
|
54 gInitialized = true; |
|
55 } |
|
56 |
|
57 void |
|
58 net_ShutdownURLHelper() |
|
59 { |
|
60 if (gInitialized) { |
|
61 NS_IF_RELEASE(gNoAuthURLParser); |
|
62 NS_IF_RELEASE(gAuthURLParser); |
|
63 NS_IF_RELEASE(gStdURLParser); |
|
64 gInitialized = false; |
|
65 } |
|
66 } |
|
67 |
|
68 //---------------------------------------------------------------------------- |
|
69 // nsIURLParser getters |
|
70 //---------------------------------------------------------------------------- |
|
71 |
|
72 nsIURLParser * |
|
73 net_GetAuthURLParser() |
|
74 { |
|
75 if (!gInitialized) |
|
76 InitGlobals(); |
|
77 return gAuthURLParser; |
|
78 } |
|
79 |
|
80 nsIURLParser * |
|
81 net_GetNoAuthURLParser() |
|
82 { |
|
83 if (!gInitialized) |
|
84 InitGlobals(); |
|
85 return gNoAuthURLParser; |
|
86 } |
|
87 |
|
88 nsIURLParser * |
|
89 net_GetStdURLParser() |
|
90 { |
|
91 if (!gInitialized) |
|
92 InitGlobals(); |
|
93 return gStdURLParser; |
|
94 } |
|
95 |
|
96 //--------------------------------------------------------------------------- |
|
97 // GetFileFromURLSpec implementations |
|
98 //--------------------------------------------------------------------------- |
|
99 nsresult |
|
100 net_GetURLSpecFromDir(nsIFile *aFile, nsACString &result) |
|
101 { |
|
102 nsAutoCString escPath; |
|
103 nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath); |
|
104 if (NS_FAILED(rv)) |
|
105 return rv; |
|
106 |
|
107 if (escPath.Last() != '/') { |
|
108 escPath += '/'; |
|
109 } |
|
110 |
|
111 result = escPath; |
|
112 return NS_OK; |
|
113 } |
|
114 |
|
115 nsresult |
|
116 net_GetURLSpecFromFile(nsIFile *aFile, nsACString &result) |
|
117 { |
|
118 nsAutoCString escPath; |
|
119 nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath); |
|
120 if (NS_FAILED(rv)) |
|
121 return rv; |
|
122 |
|
123 // if this file references a directory, then we need to ensure that the |
|
124 // URL ends with a slash. this is important since it affects the rules |
|
125 // for relative URL resolution when this URL is used as a base URL. |
|
126 // if the file does not exist, then we make no assumption about its type, |
|
127 // and simply leave the URL unmodified. |
|
128 if (escPath.Last() != '/') { |
|
129 bool dir; |
|
130 rv = aFile->IsDirectory(&dir); |
|
131 if (NS_SUCCEEDED(rv) && dir) |
|
132 escPath += '/'; |
|
133 } |
|
134 |
|
135 result = escPath; |
|
136 return NS_OK; |
|
137 } |
|
138 |
|
139 //---------------------------------------------------------------------------- |
|
140 // file:// URL parsing |
|
141 //---------------------------------------------------------------------------- |
|
142 |
|
143 nsresult |
|
144 net_ParseFileURL(const nsACString &inURL, |
|
145 nsACString &outDirectory, |
|
146 nsACString &outFileBaseName, |
|
147 nsACString &outFileExtension) |
|
148 { |
|
149 nsresult rv; |
|
150 |
|
151 outDirectory.Truncate(); |
|
152 outFileBaseName.Truncate(); |
|
153 outFileExtension.Truncate(); |
|
154 |
|
155 const nsPromiseFlatCString &flatURL = PromiseFlatCString(inURL); |
|
156 const char *url = flatURL.get(); |
|
157 |
|
158 uint32_t schemeBeg, schemeEnd; |
|
159 rv = net_ExtractURLScheme(flatURL, &schemeBeg, &schemeEnd, nullptr); |
|
160 if (NS_FAILED(rv)) return rv; |
|
161 |
|
162 if (strncmp(url + schemeBeg, "file", schemeEnd - schemeBeg) != 0) { |
|
163 NS_ERROR("must be a file:// url"); |
|
164 return NS_ERROR_UNEXPECTED; |
|
165 } |
|
166 |
|
167 nsIURLParser *parser = net_GetNoAuthURLParser(); |
|
168 NS_ENSURE_TRUE(parser, NS_ERROR_UNEXPECTED); |
|
169 |
|
170 uint32_t pathPos, filepathPos, directoryPos, basenamePos, extensionPos; |
|
171 int32_t pathLen, filepathLen, directoryLen, basenameLen, extensionLen; |
|
172 |
|
173 // invoke the parser to extract the URL path |
|
174 rv = parser->ParseURL(url, flatURL.Length(), |
|
175 nullptr, nullptr, // don't care about scheme |
|
176 nullptr, nullptr, // don't care about authority |
|
177 &pathPos, &pathLen); |
|
178 if (NS_FAILED(rv)) return rv; |
|
179 |
|
180 // invoke the parser to extract filepath from the path |
|
181 rv = parser->ParsePath(url + pathPos, pathLen, |
|
182 &filepathPos, &filepathLen, |
|
183 nullptr, nullptr, // don't care about query |
|
184 nullptr, nullptr); // don't care about ref |
|
185 if (NS_FAILED(rv)) return rv; |
|
186 |
|
187 filepathPos += pathPos; |
|
188 |
|
189 // invoke the parser to extract the directory and filename from filepath |
|
190 rv = parser->ParseFilePath(url + filepathPos, filepathLen, |
|
191 &directoryPos, &directoryLen, |
|
192 &basenamePos, &basenameLen, |
|
193 &extensionPos, &extensionLen); |
|
194 if (NS_FAILED(rv)) return rv; |
|
195 |
|
196 if (directoryLen > 0) |
|
197 outDirectory = Substring(inURL, filepathPos + directoryPos, directoryLen); |
|
198 if (basenameLen > 0) |
|
199 outFileBaseName = Substring(inURL, filepathPos + basenamePos, basenameLen); |
|
200 if (extensionLen > 0) |
|
201 outFileExtension = Substring(inURL, filepathPos + extensionPos, extensionLen); |
|
202 // since we are using a no-auth url parser, there will never be a host |
|
203 // XXX not strictly true... file://localhost/foo/bar.html is a valid URL |
|
204 |
|
205 return NS_OK; |
|
206 } |
|
207 |
|
208 //---------------------------------------------------------------------------- |
|
209 // path manipulation functions |
|
210 //---------------------------------------------------------------------------- |
|
211 |
|
212 // Replace all /./ with a / while resolving URLs |
|
213 // But only till #? |
|
214 void |
|
215 net_CoalesceDirs(netCoalesceFlags flags, char* path) |
|
216 { |
|
217 /* Stolen from the old netlib's mkparse.c. |
|
218 * |
|
219 * modifies a url of the form /foo/../foo1 -> /foo1 |
|
220 * and /foo/./foo1 -> /foo/foo1 |
|
221 * and /foo/foo1/.. -> /foo/ |
|
222 */ |
|
223 char *fwdPtr = path; |
|
224 char *urlPtr = path; |
|
225 char *lastslash = path; |
|
226 uint32_t traversal = 0; |
|
227 uint32_t special_ftp_len = 0; |
|
228 |
|
229 /* Remember if this url is a special ftp one: */ |
|
230 if (flags & NET_COALESCE_DOUBLE_SLASH_IS_ROOT) |
|
231 { |
|
232 /* some schemes (for example ftp) have the speciality that |
|
233 the path can begin // or /%2F to mark the root of the |
|
234 servers filesystem, a simple / only marks the root relative |
|
235 to the user loging in. We remember the length of the marker */ |
|
236 if (nsCRT::strncasecmp(path,"/%2F",4) == 0) |
|
237 special_ftp_len = 4; |
|
238 else if (nsCRT::strncmp(path,"//",2) == 0 ) |
|
239 special_ftp_len = 2; |
|
240 } |
|
241 |
|
242 /* find the last slash before # or ? */ |
|
243 for(; (*fwdPtr != '\0') && |
|
244 (*fwdPtr != '?') && |
|
245 (*fwdPtr != '#'); ++fwdPtr) |
|
246 { |
|
247 } |
|
248 |
|
249 /* found nothing, but go back one only */ |
|
250 /* if there is something to go back to */ |
|
251 if (fwdPtr != path && *fwdPtr == '\0') |
|
252 { |
|
253 --fwdPtr; |
|
254 } |
|
255 |
|
256 /* search the slash */ |
|
257 for(; (fwdPtr != path) && |
|
258 (*fwdPtr != '/'); --fwdPtr) |
|
259 { |
|
260 } |
|
261 lastslash = fwdPtr; |
|
262 fwdPtr = path; |
|
263 |
|
264 /* replace all %2E or %2e with . in the path */ |
|
265 /* but stop at lastchar if non null */ |
|
266 for(; (*fwdPtr != '\0') && |
|
267 (*fwdPtr != '?') && |
|
268 (*fwdPtr != '#') && |
|
269 (*lastslash == '\0' || fwdPtr != lastslash); ++fwdPtr) |
|
270 { |
|
271 if (*fwdPtr == '%' && *(fwdPtr+1) == '2' && |
|
272 (*(fwdPtr+2) == 'E' || *(fwdPtr+2) == 'e')) |
|
273 { |
|
274 *urlPtr++ = '.'; |
|
275 ++fwdPtr; |
|
276 ++fwdPtr; |
|
277 } |
|
278 else |
|
279 { |
|
280 *urlPtr++ = *fwdPtr; |
|
281 } |
|
282 } |
|
283 // Copy remaining stuff past the #?; |
|
284 for (; *fwdPtr != '\0'; ++fwdPtr) |
|
285 { |
|
286 *urlPtr++ = *fwdPtr; |
|
287 } |
|
288 *urlPtr = '\0'; // terminate the url |
|
289 |
|
290 // start again, this time for real |
|
291 fwdPtr = path; |
|
292 urlPtr = path; |
|
293 |
|
294 for(; (*fwdPtr != '\0') && |
|
295 (*fwdPtr != '?') && |
|
296 (*fwdPtr != '#'); ++fwdPtr) |
|
297 { |
|
298 if (*fwdPtr == '/' && *(fwdPtr+1) == '.' && *(fwdPtr+2) == '/' ) |
|
299 { |
|
300 // remove . followed by slash |
|
301 ++fwdPtr; |
|
302 } |
|
303 else if(*fwdPtr == '/' && *(fwdPtr+1) == '.' && *(fwdPtr+2) == '.' && |
|
304 (*(fwdPtr+3) == '/' || |
|
305 *(fwdPtr+3) == '\0' || // This will take care of |
|
306 *(fwdPtr+3) == '?' || // something like foo/bar/..#sometag |
|
307 *(fwdPtr+3) == '#')) |
|
308 { |
|
309 // remove foo/.. |
|
310 // reverse the urlPtr to the previous slash if possible |
|
311 // if url does not allow relative root then drop .. above root |
|
312 // otherwise retain them in the path |
|
313 if(traversal > 0 || !(flags & |
|
314 NET_COALESCE_ALLOW_RELATIVE_ROOT)) |
|
315 { |
|
316 if (urlPtr != path) |
|
317 urlPtr--; // we must be going back at least by one |
|
318 for(;*urlPtr != '/' && urlPtr != path; urlPtr--) |
|
319 ; // null body |
|
320 --traversal; // count back |
|
321 // forward the fwdPtr past the ../ |
|
322 fwdPtr += 2; |
|
323 // if we have reached the beginning of the path |
|
324 // while searching for the previous / and we remember |
|
325 // that it is an url that begins with /%2F then |
|
326 // advance urlPtr again by 3 chars because /%2F already |
|
327 // marks the root of the path |
|
328 if (urlPtr == path && special_ftp_len > 3) |
|
329 { |
|
330 ++urlPtr; |
|
331 ++urlPtr; |
|
332 ++urlPtr; |
|
333 } |
|
334 // special case if we have reached the end |
|
335 // to preserve the last / |
|
336 if (*fwdPtr == '.' && *(fwdPtr+1) == '\0') |
|
337 ++urlPtr; |
|
338 } |
|
339 else |
|
340 { |
|
341 // there are to much /.. in this path, just copy them instead. |
|
342 // forward the urlPtr past the /.. and copying it |
|
343 |
|
344 // However if we remember it is an url that starts with |
|
345 // /%2F and urlPtr just points at the "F" of "/%2F" then do |
|
346 // not overwrite it with the /, just copy .. and move forward |
|
347 // urlPtr. |
|
348 if (special_ftp_len > 3 && urlPtr == path+special_ftp_len-1) |
|
349 ++urlPtr; |
|
350 else |
|
351 *urlPtr++ = *fwdPtr; |
|
352 ++fwdPtr; |
|
353 *urlPtr++ = *fwdPtr; |
|
354 ++fwdPtr; |
|
355 *urlPtr++ = *fwdPtr; |
|
356 } |
|
357 } |
|
358 else |
|
359 { |
|
360 // count the hierachie, but only if we do not have reached |
|
361 // the root of some special urls with a special root marker |
|
362 if (*fwdPtr == '/' && *(fwdPtr+1) != '.' && |
|
363 (special_ftp_len != 2 || *(fwdPtr+1) != '/')) |
|
364 traversal++; |
|
365 // copy the url incrementaly |
|
366 *urlPtr++ = *fwdPtr; |
|
367 } |
|
368 } |
|
369 |
|
370 /* |
|
371 * Now lets remove trailing . case |
|
372 * /foo/foo1/. -> /foo/foo1/ |
|
373 */ |
|
374 |
|
375 if ((urlPtr > (path+1)) && (*(urlPtr-1) == '.') && (*(urlPtr-2) == '/')) |
|
376 urlPtr--; |
|
377 |
|
378 // Copy remaining stuff past the #?; |
|
379 for (; *fwdPtr != '\0'; ++fwdPtr) |
|
380 { |
|
381 *urlPtr++ = *fwdPtr; |
|
382 } |
|
383 *urlPtr = '\0'; // terminate the url |
|
384 } |
|
385 |
|
386 nsresult |
|
387 net_ResolveRelativePath(const nsACString &relativePath, |
|
388 const nsACString &basePath, |
|
389 nsACString &result) |
|
390 { |
|
391 nsAutoCString name; |
|
392 nsAutoCString path(basePath); |
|
393 bool needsDelim = false; |
|
394 |
|
395 if ( !path.IsEmpty() ) { |
|
396 char16_t last = path.Last(); |
|
397 needsDelim = !(last == '/'); |
|
398 } |
|
399 |
|
400 nsACString::const_iterator beg, end; |
|
401 relativePath.BeginReading(beg); |
|
402 relativePath.EndReading(end); |
|
403 |
|
404 bool stop = false; |
|
405 char c; |
|
406 for (; !stop; ++beg) { |
|
407 c = (beg == end) ? '\0' : *beg; |
|
408 //printf("%c [name=%s] [path=%s]\n", c, name.get(), path.get()); |
|
409 switch (c) { |
|
410 case '\0': |
|
411 case '#': |
|
412 case '?': |
|
413 stop = true; |
|
414 // fall through... |
|
415 case '/': |
|
416 // delimiter found |
|
417 if (name.EqualsLiteral("..")) { |
|
418 // pop path |
|
419 // If we already have the delim at end, then |
|
420 // skip over that when searching for next one to the left |
|
421 int32_t offset = path.Length() - (needsDelim ? 1 : 2); |
|
422 // First check for errors |
|
423 if (offset < 0 ) |
|
424 return NS_ERROR_MALFORMED_URI; |
|
425 int32_t pos = path.RFind("/", false, offset); |
|
426 if (pos >= 0) |
|
427 path.Truncate(pos + 1); |
|
428 else |
|
429 path.Truncate(); |
|
430 } |
|
431 else if (name.IsEmpty() || name.EqualsLiteral(".")) { |
|
432 // do nothing |
|
433 } |
|
434 else { |
|
435 // append name to path |
|
436 if (needsDelim) |
|
437 path += '/'; |
|
438 path += name; |
|
439 needsDelim = true; |
|
440 } |
|
441 name.Truncate(); |
|
442 break; |
|
443 |
|
444 default: |
|
445 // append char to name |
|
446 name += c; |
|
447 } |
|
448 } |
|
449 // append anything left on relativePath (e.g. #..., ;..., ?...) |
|
450 if (c != '\0') |
|
451 path += Substring(--beg, end); |
|
452 |
|
453 result = path; |
|
454 return NS_OK; |
|
455 } |
|
456 |
|
457 //---------------------------------------------------------------------------- |
|
458 // scheme fu |
|
459 //---------------------------------------------------------------------------- |
|
460 |
|
461 /* Extract URI-Scheme if possible */ |
|
462 nsresult |
|
463 net_ExtractURLScheme(const nsACString &inURI, |
|
464 uint32_t *startPos, |
|
465 uint32_t *endPos, |
|
466 nsACString *scheme) |
|
467 { |
|
468 // search for something up to a colon, and call it the scheme |
|
469 const nsPromiseFlatCString &flatURI = PromiseFlatCString(inURI); |
|
470 const char* uri_start = flatURI.get(); |
|
471 const char* uri = uri_start; |
|
472 |
|
473 if (!uri) |
|
474 return NS_ERROR_MALFORMED_URI; |
|
475 |
|
476 // skip leading white space |
|
477 while (nsCRT::IsAsciiSpace(*uri)) |
|
478 uri++; |
|
479 |
|
480 uint32_t start = uri - uri_start; |
|
481 if (startPos) { |
|
482 *startPos = start; |
|
483 } |
|
484 |
|
485 uint32_t length = 0; |
|
486 char c; |
|
487 while ((c = *uri++) != '\0') { |
|
488 // First char must be Alpha |
|
489 if (length == 0 && nsCRT::IsAsciiAlpha(c)) { |
|
490 length++; |
|
491 } |
|
492 // Next chars can be alpha + digit + some special chars |
|
493 else if (length > 0 && (nsCRT::IsAsciiAlpha(c) || |
|
494 nsCRT::IsAsciiDigit(c) || c == '+' || |
|
495 c == '.' || c == '-')) { |
|
496 length++; |
|
497 } |
|
498 // stop if colon reached but not as first char |
|
499 else if (c == ':' && length > 0) { |
|
500 if (endPos) { |
|
501 *endPos = start + length; |
|
502 } |
|
503 |
|
504 if (scheme) |
|
505 scheme->Assign(Substring(inURI, start, length)); |
|
506 return NS_OK; |
|
507 } |
|
508 else |
|
509 break; |
|
510 } |
|
511 return NS_ERROR_MALFORMED_URI; |
|
512 } |
|
513 |
|
514 bool |
|
515 net_IsValidScheme(const char *scheme, uint32_t schemeLen) |
|
516 { |
|
517 // first char must be alpha |
|
518 if (!nsCRT::IsAsciiAlpha(*scheme)) |
|
519 return false; |
|
520 |
|
521 // nsCStrings may have embedded nulls -- reject those too |
|
522 for (; schemeLen; ++scheme, --schemeLen) { |
|
523 if (!(nsCRT::IsAsciiAlpha(*scheme) || |
|
524 nsCRT::IsAsciiDigit(*scheme) || |
|
525 *scheme == '+' || |
|
526 *scheme == '.' || |
|
527 *scheme == '-')) |
|
528 return false; |
|
529 } |
|
530 |
|
531 return true; |
|
532 } |
|
533 |
|
534 bool |
|
535 net_FilterURIString(const char *str, nsACString& result) |
|
536 { |
|
537 NS_PRECONDITION(str, "Must have a non-null string!"); |
|
538 bool writing = false; |
|
539 result.Truncate(); |
|
540 const char *p = str; |
|
541 |
|
542 // Remove leading spaces, tabs, CR, LF if any. |
|
543 while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') { |
|
544 writing = true; |
|
545 str = p + 1; |
|
546 p++; |
|
547 } |
|
548 |
|
549 // Don't strip from the scheme, because other code assumes everything |
|
550 // up to the ':' is the scheme, and it's bad not to have it match. |
|
551 // If there's no ':', strip. |
|
552 bool found_colon = false; |
|
553 const char *first = nullptr; |
|
554 while (*p) { |
|
555 switch (*p) { |
|
556 case '\t': |
|
557 case '\r': |
|
558 case '\n': |
|
559 if (found_colon) { |
|
560 writing = true; |
|
561 // append chars up to but not including *p |
|
562 if (p > str) |
|
563 result.Append(str, p - str); |
|
564 str = p + 1; |
|
565 } else { |
|
566 // remember where the first \t\r\n was in case we find no scheme |
|
567 if (!first) |
|
568 first = p; |
|
569 } |
|
570 break; |
|
571 |
|
572 case ':': |
|
573 found_colon = true; |
|
574 break; |
|
575 |
|
576 case '/': |
|
577 case '@': |
|
578 if (!found_colon) { |
|
579 // colon also has to precede / or @ to be a scheme |
|
580 found_colon = true; // not really, but means ok to strip |
|
581 if (first) { |
|
582 // go back and replace |
|
583 p = first; |
|
584 continue; // process *p again |
|
585 } |
|
586 } |
|
587 break; |
|
588 |
|
589 default: |
|
590 break; |
|
591 } |
|
592 p++; |
|
593 |
|
594 // At end, if there was no scheme, and we hit a control char, fix |
|
595 // it up now. |
|
596 if (!*p && first != nullptr && !found_colon) { |
|
597 // TRICKY - to avoid duplicating code, we reset the loop back |
|
598 // to the point we found something to do |
|
599 p = first; |
|
600 // This also stops us from looping after we finish |
|
601 found_colon = true; // so we'll replace \t\r\n |
|
602 } |
|
603 } |
|
604 |
|
605 // Remove trailing spaces if any |
|
606 while (((p-1) >= str) && (*(p-1) == ' ')) { |
|
607 writing = true; |
|
608 p--; |
|
609 } |
|
610 |
|
611 if (writing && p > str) |
|
612 result.Append(str, p - str); |
|
613 |
|
614 return writing; |
|
615 } |
|
616 |
|
617 #if defined(XP_WIN) |
|
618 bool |
|
619 net_NormalizeFileURL(const nsACString &aURL, nsCString &aResultBuf) |
|
620 { |
|
621 bool writing = false; |
|
622 |
|
623 nsACString::const_iterator beginIter, endIter; |
|
624 aURL.BeginReading(beginIter); |
|
625 aURL.EndReading(endIter); |
|
626 |
|
627 const char *s, *begin = beginIter.get(); |
|
628 |
|
629 for (s = begin; s != endIter.get(); ++s) |
|
630 { |
|
631 if (*s == '\\') |
|
632 { |
|
633 writing = true; |
|
634 if (s > begin) |
|
635 aResultBuf.Append(begin, s - begin); |
|
636 aResultBuf += '/'; |
|
637 begin = s + 1; |
|
638 } |
|
639 } |
|
640 if (writing && s > begin) |
|
641 aResultBuf.Append(begin, s - begin); |
|
642 |
|
643 return writing; |
|
644 } |
|
645 #endif |
|
646 |
|
647 //---------------------------------------------------------------------------- |
|
648 // miscellaneous (i.e., stuff that should really be elsewhere) |
|
649 //---------------------------------------------------------------------------- |
|
650 |
|
651 static inline |
|
652 void ToLower(char &c) |
|
653 { |
|
654 if ((unsigned)(c - 'A') <= (unsigned)('Z' - 'A')) |
|
655 c += 'a' - 'A'; |
|
656 } |
|
657 |
|
658 void |
|
659 net_ToLowerCase(char *str, uint32_t length) |
|
660 { |
|
661 for (char *end = str + length; str < end; ++str) |
|
662 ToLower(*str); |
|
663 } |
|
664 |
|
665 void |
|
666 net_ToLowerCase(char *str) |
|
667 { |
|
668 for (; *str; ++str) |
|
669 ToLower(*str); |
|
670 } |
|
671 |
|
672 char * |
|
673 net_FindCharInSet(const char *iter, const char *stop, const char *set) |
|
674 { |
|
675 for (; iter != stop && *iter; ++iter) { |
|
676 for (const char *s = set; *s; ++s) { |
|
677 if (*iter == *s) |
|
678 return (char *) iter; |
|
679 } |
|
680 } |
|
681 return (char *) iter; |
|
682 } |
|
683 |
|
684 char * |
|
685 net_FindCharNotInSet(const char *iter, const char *stop, const char *set) |
|
686 { |
|
687 repeat: |
|
688 for (const char *s = set; *s; ++s) { |
|
689 if (*iter == *s) { |
|
690 if (++iter == stop) |
|
691 break; |
|
692 goto repeat; |
|
693 } |
|
694 } |
|
695 return (char *) iter; |
|
696 } |
|
697 |
|
698 char * |
|
699 net_RFindCharNotInSet(const char *stop, const char *iter, const char *set) |
|
700 { |
|
701 --iter; |
|
702 --stop; |
|
703 |
|
704 if (iter == stop) |
|
705 return (char *) iter; |
|
706 |
|
707 repeat: |
|
708 for (const char *s = set; *s; ++s) { |
|
709 if (*iter == *s) { |
|
710 if (--iter == stop) |
|
711 break; |
|
712 goto repeat; |
|
713 } |
|
714 } |
|
715 return (char *) iter; |
|
716 } |
|
717 |
|
718 #define HTTP_LWS " \t" |
|
719 |
|
720 // Return the index of the closing quote of the string, if any |
|
721 static uint32_t |
|
722 net_FindStringEnd(const nsCString& flatStr, |
|
723 uint32_t stringStart, |
|
724 char stringDelim) |
|
725 { |
|
726 NS_ASSERTION(stringStart < flatStr.Length() && |
|
727 flatStr.CharAt(stringStart) == stringDelim && |
|
728 (stringDelim == '"' || stringDelim == '\''), |
|
729 "Invalid stringStart"); |
|
730 |
|
731 const char set[] = { stringDelim, '\\', '\0' }; |
|
732 do { |
|
733 // stringStart points to either the start quote or the last |
|
734 // escaped char (the char following a '\\') |
|
735 |
|
736 // Write to searchStart here, so that when we get back to the |
|
737 // top of the loop right outside this one we search from the |
|
738 // right place. |
|
739 uint32_t stringEnd = flatStr.FindCharInSet(set, stringStart + 1); |
|
740 if (stringEnd == uint32_t(kNotFound)) |
|
741 return flatStr.Length(); |
|
742 |
|
743 if (flatStr.CharAt(stringEnd) == '\\') { |
|
744 // Hit a backslash-escaped char. Need to skip over it. |
|
745 stringStart = stringEnd + 1; |
|
746 if (stringStart == flatStr.Length()) |
|
747 return stringStart; |
|
748 |
|
749 // Go back to looking for the next escape or the string end |
|
750 continue; |
|
751 } |
|
752 |
|
753 return stringEnd; |
|
754 |
|
755 } while (true); |
|
756 |
|
757 NS_NOTREACHED("How did we get here?"); |
|
758 return flatStr.Length(); |
|
759 } |
|
760 |
|
761 |
|
762 static uint32_t |
|
763 net_FindMediaDelimiter(const nsCString& flatStr, |
|
764 uint32_t searchStart, |
|
765 char delimiter) |
|
766 { |
|
767 do { |
|
768 // searchStart points to the spot from which we should start looking |
|
769 // for the delimiter. |
|
770 const char delimStr[] = { delimiter, '"', '\0' }; |
|
771 uint32_t curDelimPos = flatStr.FindCharInSet(delimStr, searchStart); |
|
772 if (curDelimPos == uint32_t(kNotFound)) |
|
773 return flatStr.Length(); |
|
774 |
|
775 char ch = flatStr.CharAt(curDelimPos); |
|
776 if (ch == delimiter) { |
|
777 // Found delimiter |
|
778 return curDelimPos; |
|
779 } |
|
780 |
|
781 // We hit the start of a quoted string. Look for its end. |
|
782 searchStart = net_FindStringEnd(flatStr, curDelimPos, ch); |
|
783 if (searchStart == flatStr.Length()) |
|
784 return searchStart; |
|
785 |
|
786 ++searchStart; |
|
787 |
|
788 // searchStart now points to the first char after the end of the |
|
789 // string, so just go back to the top of the loop and look for |
|
790 // |delimiter| again. |
|
791 } while (true); |
|
792 |
|
793 NS_NOTREACHED("How did we get here?"); |
|
794 return flatStr.Length(); |
|
795 } |
|
796 |
|
797 // aOffset should be added to aCharsetStart and aCharsetEnd if this |
|
798 // function sets them. |
|
799 static void |
|
800 net_ParseMediaType(const nsACString &aMediaTypeStr, |
|
801 nsACString &aContentType, |
|
802 nsACString &aContentCharset, |
|
803 int32_t aOffset, |
|
804 bool *aHadCharset, |
|
805 int32_t *aCharsetStart, |
|
806 int32_t *aCharsetEnd) |
|
807 { |
|
808 const nsCString& flatStr = PromiseFlatCString(aMediaTypeStr); |
|
809 const char* start = flatStr.get(); |
|
810 const char* end = start + flatStr.Length(); |
|
811 |
|
812 // Trim LWS leading and trailing whitespace from type. We include '(' in |
|
813 // the trailing trim set to catch media-type comments, which are not at all |
|
814 // standard, but may occur in rare cases. |
|
815 const char* type = net_FindCharNotInSet(start, end, HTTP_LWS); |
|
816 const char* typeEnd = net_FindCharInSet(type, end, HTTP_LWS ";("); |
|
817 |
|
818 const char* charset = ""; |
|
819 const char* charsetEnd = charset; |
|
820 int32_t charsetParamStart = 0; |
|
821 int32_t charsetParamEnd = 0; |
|
822 |
|
823 // Iterate over parameters |
|
824 bool typeHasCharset = false; |
|
825 uint32_t paramStart = flatStr.FindChar(';', typeEnd - start); |
|
826 if (paramStart != uint32_t(kNotFound)) { |
|
827 // We have parameters. Iterate over them. |
|
828 uint32_t curParamStart = paramStart + 1; |
|
829 do { |
|
830 uint32_t curParamEnd = |
|
831 net_FindMediaDelimiter(flatStr, curParamStart, ';'); |
|
832 |
|
833 const char* paramName = net_FindCharNotInSet(start + curParamStart, |
|
834 start + curParamEnd, |
|
835 HTTP_LWS); |
|
836 static const char charsetStr[] = "charset="; |
|
837 if (PL_strncasecmp(paramName, charsetStr, |
|
838 sizeof(charsetStr) - 1) == 0) { |
|
839 charset = paramName + sizeof(charsetStr) - 1; |
|
840 charsetEnd = start + curParamEnd; |
|
841 typeHasCharset = true; |
|
842 charsetParamStart = curParamStart - 1; |
|
843 charsetParamEnd = curParamEnd; |
|
844 } |
|
845 |
|
846 curParamStart = curParamEnd + 1; |
|
847 } while (curParamStart < flatStr.Length()); |
|
848 } |
|
849 |
|
850 bool charsetNeedsQuotedStringUnescaping = false; |
|
851 if (typeHasCharset) { |
|
852 // Trim LWS leading and trailing whitespace from charset. We include |
|
853 // '(' in the trailing trim set to catch media-type comments, which are |
|
854 // not at all standard, but may occur in rare cases. |
|
855 charset = net_FindCharNotInSet(charset, charsetEnd, HTTP_LWS); |
|
856 if (*charset == '"') { |
|
857 charsetNeedsQuotedStringUnescaping = true; |
|
858 charsetEnd = |
|
859 start + net_FindStringEnd(flatStr, charset - start, *charset); |
|
860 charset++; |
|
861 NS_ASSERTION(charsetEnd >= charset, "Bad charset parsing"); |
|
862 } else { |
|
863 charsetEnd = net_FindCharInSet(charset, charsetEnd, HTTP_LWS ";("); |
|
864 } |
|
865 } |
|
866 |
|
867 // if the server sent "*/*", it is meaningless, so do not store it. |
|
868 // also, if type is the same as aContentType, then just update the |
|
869 // charset. however, if charset is empty and aContentType hasn't |
|
870 // changed, then don't wipe-out an existing aContentCharset. We |
|
871 // also want to reject a mime-type if it does not include a slash. |
|
872 // some servers give junk after the charset parameter, which may |
|
873 // include a comma, so this check makes us a bit more tolerant. |
|
874 |
|
875 if (type != typeEnd && strncmp(type, "*/*", typeEnd - type) != 0 && |
|
876 memchr(type, '/', typeEnd - type) != nullptr) { |
|
877 // Common case here is that aContentType is empty |
|
878 bool eq = !aContentType.IsEmpty() && |
|
879 aContentType.Equals(Substring(type, typeEnd), |
|
880 nsCaseInsensitiveCStringComparator()); |
|
881 if (!eq) { |
|
882 aContentType.Assign(type, typeEnd - type); |
|
883 ToLowerCase(aContentType); |
|
884 } |
|
885 |
|
886 if ((!eq && *aHadCharset) || typeHasCharset) { |
|
887 *aHadCharset = true; |
|
888 if (charsetNeedsQuotedStringUnescaping) { |
|
889 // parameters using the "quoted-string" syntax need |
|
890 // backslash-escapes to be unescaped (see RFC 2616 Section 2.2) |
|
891 aContentCharset.Truncate(); |
|
892 for (const char *c = charset; c != charsetEnd; c++) { |
|
893 if (*c == '\\' && c + 1 != charsetEnd) { |
|
894 // eat escape |
|
895 c++; |
|
896 } |
|
897 aContentCharset.Append(*c); |
|
898 } |
|
899 } |
|
900 else { |
|
901 aContentCharset.Assign(charset, charsetEnd - charset); |
|
902 } |
|
903 if (typeHasCharset) { |
|
904 *aCharsetStart = charsetParamStart + aOffset; |
|
905 *aCharsetEnd = charsetParamEnd + aOffset; |
|
906 } |
|
907 } |
|
908 // Only set a new charset position if this is a different type |
|
909 // from the last one we had and it doesn't already have a |
|
910 // charset param. If this is the same type, we probably want |
|
911 // to leave the charset position on its first occurrence. |
|
912 if (!eq && !typeHasCharset) { |
|
913 int32_t charsetStart = int32_t(paramStart); |
|
914 if (charsetStart == kNotFound) |
|
915 charsetStart = flatStr.Length(); |
|
916 |
|
917 *aCharsetEnd = *aCharsetStart = charsetStart + aOffset; |
|
918 } |
|
919 } |
|
920 } |
|
921 |
|
922 #undef HTTP_LWS |
|
923 |
|
924 void |
|
925 net_ParseContentType(const nsACString &aHeaderStr, |
|
926 nsACString &aContentType, |
|
927 nsACString &aContentCharset, |
|
928 bool *aHadCharset) |
|
929 { |
|
930 int32_t dummy1, dummy2; |
|
931 net_ParseContentType(aHeaderStr, aContentType, aContentCharset, |
|
932 aHadCharset, &dummy1, &dummy2); |
|
933 } |
|
934 |
|
935 void |
|
936 net_ParseContentType(const nsACString &aHeaderStr, |
|
937 nsACString &aContentType, |
|
938 nsACString &aContentCharset, |
|
939 bool *aHadCharset, |
|
940 int32_t *aCharsetStart, |
|
941 int32_t *aCharsetEnd) |
|
942 { |
|
943 // |
|
944 // Augmented BNF (from RFC 2616 section 3.7): |
|
945 // |
|
946 // header-value = media-type *( LWS "," LWS media-type ) |
|
947 // media-type = type "/" subtype *( LWS ";" LWS parameter ) |
|
948 // type = token |
|
949 // subtype = token |
|
950 // parameter = attribute "=" value |
|
951 // attribute = token |
|
952 // value = token | quoted-string |
|
953 // |
|
954 // |
|
955 // Examples: |
|
956 // |
|
957 // text/html |
|
958 // text/html, text/html |
|
959 // text/html,text/html; charset=ISO-8859-1 |
|
960 // text/html,text/html; charset="ISO-8859-1" |
|
961 // text/html;charset=ISO-8859-1, text/html |
|
962 // text/html;charset='ISO-8859-1', text/html |
|
963 // application/octet-stream |
|
964 // |
|
965 |
|
966 *aHadCharset = false; |
|
967 const nsCString& flatStr = PromiseFlatCString(aHeaderStr); |
|
968 |
|
969 // iterate over media-types. Note that ',' characters can happen |
|
970 // inside quoted strings, so we need to watch out for that. |
|
971 uint32_t curTypeStart = 0; |
|
972 do { |
|
973 // curTypeStart points to the start of the current media-type. We want |
|
974 // to look for its end. |
|
975 uint32_t curTypeEnd = |
|
976 net_FindMediaDelimiter(flatStr, curTypeStart, ','); |
|
977 |
|
978 // At this point curTypeEnd points to the spot where the media-type |
|
979 // starting at curTypeEnd ends. Time to parse that! |
|
980 net_ParseMediaType(Substring(flatStr, curTypeStart, |
|
981 curTypeEnd - curTypeStart), |
|
982 aContentType, aContentCharset, curTypeStart, |
|
983 aHadCharset, aCharsetStart, aCharsetEnd); |
|
984 |
|
985 // And let's move on to the next media-type |
|
986 curTypeStart = curTypeEnd + 1; |
|
987 } while (curTypeStart < flatStr.Length()); |
|
988 } |
|
989 |
|
990 bool |
|
991 net_IsValidHostName(const nsCSubstring &host) |
|
992 { |
|
993 const char *end = host.EndReading(); |
|
994 // Use explicit whitelists to select which characters we are |
|
995 // willing to send to lower-level DNS logic. This is more |
|
996 // self-documenting, and can also be slightly faster than the |
|
997 // blacklist approach, since DNS names are the common case, and |
|
998 // the commonest characters will tend to be near the start of |
|
999 // the list. |
|
1000 |
|
1001 // Whitelist for DNS names (RFC 1035) with extra characters added |
|
1002 // for pragmatic reasons "$+_" |
|
1003 // see https://bugzilla.mozilla.org/show_bug.cgi?id=355181#c2 |
|
1004 if (net_FindCharNotInSet(host.BeginReading(), end, |
|
1005 "abcdefghijklmnopqrstuvwxyz" |
|
1006 ".-0123456789" |
|
1007 "ABCDEFGHIJKLMNOPQRSTUVWXYZ$+_") == end) |
|
1008 return true; |
|
1009 |
|
1010 // Might be a valid IPv6 link-local address containing a percent sign |
|
1011 nsAutoCString strhost(host); |
|
1012 PRNetAddr addr; |
|
1013 return PR_StringToNetAddr(strhost.get(), &addr) == PR_SUCCESS; |
|
1014 } |
|
1015 |
|
1016 bool |
|
1017 net_IsValidIPv4Addr(const char *addr, int32_t addrLen) |
|
1018 { |
|
1019 RangedPtr<const char> p(addr, addrLen); |
|
1020 |
|
1021 int32_t octet = -1; // means no digit yet |
|
1022 int32_t dotCount = 0; // number of dots in the address |
|
1023 |
|
1024 for (; addrLen; ++p, --addrLen) { |
|
1025 if (*p == '.') { |
|
1026 dotCount++; |
|
1027 if (octet == -1) { |
|
1028 // invalid octet |
|
1029 return false; |
|
1030 } |
|
1031 octet = -1; |
|
1032 } else if (*p >= '0' && *p <='9') { |
|
1033 if (octet == 0) { |
|
1034 // leading 0 is not allowed |
|
1035 return false; |
|
1036 } else if (octet == -1) { |
|
1037 octet = *p - '0'; |
|
1038 } else { |
|
1039 octet *= 10; |
|
1040 octet += *p - '0'; |
|
1041 if (octet > 255) |
|
1042 return false; |
|
1043 } |
|
1044 } else { |
|
1045 // invalid character |
|
1046 return false; |
|
1047 } |
|
1048 } |
|
1049 |
|
1050 return (dotCount == 3 && octet != -1); |
|
1051 } |
|
1052 |
|
1053 bool |
|
1054 net_IsValidIPv6Addr(const char *addr, int32_t addrLen) |
|
1055 { |
|
1056 RangedPtr<const char> p(addr, addrLen); |
|
1057 |
|
1058 int32_t digits = 0; // number of digits in current block |
|
1059 int32_t colons = 0; // number of colons in a row during parsing |
|
1060 int32_t blocks = 0; // number of hexadecimal blocks |
|
1061 bool haveZeros = false; // true if double colon is present in the address |
|
1062 |
|
1063 for (; addrLen; ++p, --addrLen) { |
|
1064 if (*p == ':') { |
|
1065 if (colons == 0) { |
|
1066 if (digits != 0) { |
|
1067 digits = 0; |
|
1068 blocks++; |
|
1069 } |
|
1070 } else if (colons == 1) { |
|
1071 if (haveZeros) |
|
1072 return false; // only one occurrence is allowed |
|
1073 haveZeros = true; |
|
1074 } else { |
|
1075 // too many colons in a row |
|
1076 return false; |
|
1077 } |
|
1078 colons++; |
|
1079 } else if ((*p >= '0' && *p <= '9') || (*p >= 'a' && *p <= 'f') || |
|
1080 (*p >= 'A' && *p <= 'F')) { |
|
1081 if (colons == 1 && blocks == 0) // starts with a single colon |
|
1082 return false; |
|
1083 if (digits == 4) // too many digits |
|
1084 return false; |
|
1085 colons = 0; |
|
1086 digits++; |
|
1087 } else if (*p == '.') { |
|
1088 // check valid IPv4 from the beginning of the last block |
|
1089 if (!net_IsValidIPv4Addr(p.get() - digits, addrLen + digits)) |
|
1090 return false; |
|
1091 return (haveZeros && blocks < 6) || (!haveZeros && blocks == 6); |
|
1092 } else { |
|
1093 // invalid character |
|
1094 return false; |
|
1095 } |
|
1096 } |
|
1097 |
|
1098 if (colons == 1) // ends with a single colon |
|
1099 return false; |
|
1100 |
|
1101 if (digits) // there is a block at the end |
|
1102 blocks++; |
|
1103 |
|
1104 return (haveZeros && blocks < 8) || (!haveZeros && blocks == 8); |
|
1105 } |