Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /* vim:set ts=4 sw=4 sts=4 et cindent: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "mozilla/RangedPtr.h"
9 #include "nsURLHelper.h"
10 #include "nsIFile.h"
11 #include "nsIURLParser.h"
12 #include "nsCOMPtr.h"
13 #include "nsCRT.h"
14 #include "nsNetCID.h"
15 #include "prnetdb.h"
17 using namespace mozilla;
19 //----------------------------------------------------------------------------
20 // Init/Shutdown
21 //----------------------------------------------------------------------------
23 static bool gInitialized = false;
24 static nsIURLParser *gNoAuthURLParser = nullptr;
25 static nsIURLParser *gAuthURLParser = nullptr;
26 static nsIURLParser *gStdURLParser = nullptr;
28 static void
29 InitGlobals()
30 {
31 nsCOMPtr<nsIURLParser> parser;
33 parser = do_GetService(NS_NOAUTHURLPARSER_CONTRACTID);
34 NS_ASSERTION(parser, "failed getting 'noauth' url parser");
35 if (parser) {
36 gNoAuthURLParser = parser.get();
37 NS_ADDREF(gNoAuthURLParser);
38 }
40 parser = do_GetService(NS_AUTHURLPARSER_CONTRACTID);
41 NS_ASSERTION(parser, "failed getting 'auth' url parser");
42 if (parser) {
43 gAuthURLParser = parser.get();
44 NS_ADDREF(gAuthURLParser);
45 }
47 parser = do_GetService(NS_STDURLPARSER_CONTRACTID);
48 NS_ASSERTION(parser, "failed getting 'std' url parser");
49 if (parser) {
50 gStdURLParser = parser.get();
51 NS_ADDREF(gStdURLParser);
52 }
54 gInitialized = true;
55 }
57 void
58 net_ShutdownURLHelper()
59 {
60 if (gInitialized) {
61 NS_IF_RELEASE(gNoAuthURLParser);
62 NS_IF_RELEASE(gAuthURLParser);
63 NS_IF_RELEASE(gStdURLParser);
64 gInitialized = false;
65 }
66 }
68 //----------------------------------------------------------------------------
69 // nsIURLParser getters
70 //----------------------------------------------------------------------------
72 nsIURLParser *
73 net_GetAuthURLParser()
74 {
75 if (!gInitialized)
76 InitGlobals();
77 return gAuthURLParser;
78 }
80 nsIURLParser *
81 net_GetNoAuthURLParser()
82 {
83 if (!gInitialized)
84 InitGlobals();
85 return gNoAuthURLParser;
86 }
88 nsIURLParser *
89 net_GetStdURLParser()
90 {
91 if (!gInitialized)
92 InitGlobals();
93 return gStdURLParser;
94 }
96 //---------------------------------------------------------------------------
97 // GetFileFromURLSpec implementations
98 //---------------------------------------------------------------------------
99 nsresult
100 net_GetURLSpecFromDir(nsIFile *aFile, nsACString &result)
101 {
102 nsAutoCString escPath;
103 nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath);
104 if (NS_FAILED(rv))
105 return rv;
107 if (escPath.Last() != '/') {
108 escPath += '/';
109 }
111 result = escPath;
112 return NS_OK;
113 }
115 nsresult
116 net_GetURLSpecFromFile(nsIFile *aFile, nsACString &result)
117 {
118 nsAutoCString escPath;
119 nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath);
120 if (NS_FAILED(rv))
121 return rv;
123 // if this file references a directory, then we need to ensure that the
124 // URL ends with a slash. this is important since it affects the rules
125 // for relative URL resolution when this URL is used as a base URL.
126 // if the file does not exist, then we make no assumption about its type,
127 // and simply leave the URL unmodified.
128 if (escPath.Last() != '/') {
129 bool dir;
130 rv = aFile->IsDirectory(&dir);
131 if (NS_SUCCEEDED(rv) && dir)
132 escPath += '/';
133 }
135 result = escPath;
136 return NS_OK;
137 }
139 //----------------------------------------------------------------------------
140 // file:// URL parsing
141 //----------------------------------------------------------------------------
143 nsresult
144 net_ParseFileURL(const nsACString &inURL,
145 nsACString &outDirectory,
146 nsACString &outFileBaseName,
147 nsACString &outFileExtension)
148 {
149 nsresult rv;
151 outDirectory.Truncate();
152 outFileBaseName.Truncate();
153 outFileExtension.Truncate();
155 const nsPromiseFlatCString &flatURL = PromiseFlatCString(inURL);
156 const char *url = flatURL.get();
158 uint32_t schemeBeg, schemeEnd;
159 rv = net_ExtractURLScheme(flatURL, &schemeBeg, &schemeEnd, nullptr);
160 if (NS_FAILED(rv)) return rv;
162 if (strncmp(url + schemeBeg, "file", schemeEnd - schemeBeg) != 0) {
163 NS_ERROR("must be a file:// url");
164 return NS_ERROR_UNEXPECTED;
165 }
167 nsIURLParser *parser = net_GetNoAuthURLParser();
168 NS_ENSURE_TRUE(parser, NS_ERROR_UNEXPECTED);
170 uint32_t pathPos, filepathPos, directoryPos, basenamePos, extensionPos;
171 int32_t pathLen, filepathLen, directoryLen, basenameLen, extensionLen;
173 // invoke the parser to extract the URL path
174 rv = parser->ParseURL(url, flatURL.Length(),
175 nullptr, nullptr, // don't care about scheme
176 nullptr, nullptr, // don't care about authority
177 &pathPos, &pathLen);
178 if (NS_FAILED(rv)) return rv;
180 // invoke the parser to extract filepath from the path
181 rv = parser->ParsePath(url + pathPos, pathLen,
182 &filepathPos, &filepathLen,
183 nullptr, nullptr, // don't care about query
184 nullptr, nullptr); // don't care about ref
185 if (NS_FAILED(rv)) return rv;
187 filepathPos += pathPos;
189 // invoke the parser to extract the directory and filename from filepath
190 rv = parser->ParseFilePath(url + filepathPos, filepathLen,
191 &directoryPos, &directoryLen,
192 &basenamePos, &basenameLen,
193 &extensionPos, &extensionLen);
194 if (NS_FAILED(rv)) return rv;
196 if (directoryLen > 0)
197 outDirectory = Substring(inURL, filepathPos + directoryPos, directoryLen);
198 if (basenameLen > 0)
199 outFileBaseName = Substring(inURL, filepathPos + basenamePos, basenameLen);
200 if (extensionLen > 0)
201 outFileExtension = Substring(inURL, filepathPos + extensionPos, extensionLen);
202 // since we are using a no-auth url parser, there will never be a host
203 // XXX not strictly true... file://localhost/foo/bar.html is a valid URL
205 return NS_OK;
206 }
208 //----------------------------------------------------------------------------
209 // path manipulation functions
210 //----------------------------------------------------------------------------
212 // Replace all /./ with a / while resolving URLs
213 // But only till #?
214 void
215 net_CoalesceDirs(netCoalesceFlags flags, char* path)
216 {
217 /* Stolen from the old netlib's mkparse.c.
218 *
219 * modifies a url of the form /foo/../foo1 -> /foo1
220 * and /foo/./foo1 -> /foo/foo1
221 * and /foo/foo1/.. -> /foo/
222 */
223 char *fwdPtr = path;
224 char *urlPtr = path;
225 char *lastslash = path;
226 uint32_t traversal = 0;
227 uint32_t special_ftp_len = 0;
229 /* Remember if this url is a special ftp one: */
230 if (flags & NET_COALESCE_DOUBLE_SLASH_IS_ROOT)
231 {
232 /* some schemes (for example ftp) have the speciality that
233 the path can begin // or /%2F to mark the root of the
234 servers filesystem, a simple / only marks the root relative
235 to the user loging in. We remember the length of the marker */
236 if (nsCRT::strncasecmp(path,"/%2F",4) == 0)
237 special_ftp_len = 4;
238 else if (nsCRT::strncmp(path,"//",2) == 0 )
239 special_ftp_len = 2;
240 }
242 /* find the last slash before # or ? */
243 for(; (*fwdPtr != '\0') &&
244 (*fwdPtr != '?') &&
245 (*fwdPtr != '#'); ++fwdPtr)
246 {
247 }
249 /* found nothing, but go back one only */
250 /* if there is something to go back to */
251 if (fwdPtr != path && *fwdPtr == '\0')
252 {
253 --fwdPtr;
254 }
256 /* search the slash */
257 for(; (fwdPtr != path) &&
258 (*fwdPtr != '/'); --fwdPtr)
259 {
260 }
261 lastslash = fwdPtr;
262 fwdPtr = path;
264 /* replace all %2E or %2e with . in the path */
265 /* but stop at lastchar if non null */
266 for(; (*fwdPtr != '\0') &&
267 (*fwdPtr != '?') &&
268 (*fwdPtr != '#') &&
269 (*lastslash == '\0' || fwdPtr != lastslash); ++fwdPtr)
270 {
271 if (*fwdPtr == '%' && *(fwdPtr+1) == '2' &&
272 (*(fwdPtr+2) == 'E' || *(fwdPtr+2) == 'e'))
273 {
274 *urlPtr++ = '.';
275 ++fwdPtr;
276 ++fwdPtr;
277 }
278 else
279 {
280 *urlPtr++ = *fwdPtr;
281 }
282 }
283 // Copy remaining stuff past the #?;
284 for (; *fwdPtr != '\0'; ++fwdPtr)
285 {
286 *urlPtr++ = *fwdPtr;
287 }
288 *urlPtr = '\0'; // terminate the url
290 // start again, this time for real
291 fwdPtr = path;
292 urlPtr = path;
294 for(; (*fwdPtr != '\0') &&
295 (*fwdPtr != '?') &&
296 (*fwdPtr != '#'); ++fwdPtr)
297 {
298 if (*fwdPtr == '/' && *(fwdPtr+1) == '.' && *(fwdPtr+2) == '/' )
299 {
300 // remove . followed by slash
301 ++fwdPtr;
302 }
303 else if(*fwdPtr == '/' && *(fwdPtr+1) == '.' && *(fwdPtr+2) == '.' &&
304 (*(fwdPtr+3) == '/' ||
305 *(fwdPtr+3) == '\0' || // This will take care of
306 *(fwdPtr+3) == '?' || // something like foo/bar/..#sometag
307 *(fwdPtr+3) == '#'))
308 {
309 // remove foo/..
310 // reverse the urlPtr to the previous slash if possible
311 // if url does not allow relative root then drop .. above root
312 // otherwise retain them in the path
313 if(traversal > 0 || !(flags &
314 NET_COALESCE_ALLOW_RELATIVE_ROOT))
315 {
316 if (urlPtr != path)
317 urlPtr--; // we must be going back at least by one
318 for(;*urlPtr != '/' && urlPtr != path; urlPtr--)
319 ; // null body
320 --traversal; // count back
321 // forward the fwdPtr past the ../
322 fwdPtr += 2;
323 // if we have reached the beginning of the path
324 // while searching for the previous / and we remember
325 // that it is an url that begins with /%2F then
326 // advance urlPtr again by 3 chars because /%2F already
327 // marks the root of the path
328 if (urlPtr == path && special_ftp_len > 3)
329 {
330 ++urlPtr;
331 ++urlPtr;
332 ++urlPtr;
333 }
334 // special case if we have reached the end
335 // to preserve the last /
336 if (*fwdPtr == '.' && *(fwdPtr+1) == '\0')
337 ++urlPtr;
338 }
339 else
340 {
341 // there are to much /.. in this path, just copy them instead.
342 // forward the urlPtr past the /.. and copying it
344 // However if we remember it is an url that starts with
345 // /%2F and urlPtr just points at the "F" of "/%2F" then do
346 // not overwrite it with the /, just copy .. and move forward
347 // urlPtr.
348 if (special_ftp_len > 3 && urlPtr == path+special_ftp_len-1)
349 ++urlPtr;
350 else
351 *urlPtr++ = *fwdPtr;
352 ++fwdPtr;
353 *urlPtr++ = *fwdPtr;
354 ++fwdPtr;
355 *urlPtr++ = *fwdPtr;
356 }
357 }
358 else
359 {
360 // count the hierachie, but only if we do not have reached
361 // the root of some special urls with a special root marker
362 if (*fwdPtr == '/' && *(fwdPtr+1) != '.' &&
363 (special_ftp_len != 2 || *(fwdPtr+1) != '/'))
364 traversal++;
365 // copy the url incrementaly
366 *urlPtr++ = *fwdPtr;
367 }
368 }
370 /*
371 * Now lets remove trailing . case
372 * /foo/foo1/. -> /foo/foo1/
373 */
375 if ((urlPtr > (path+1)) && (*(urlPtr-1) == '.') && (*(urlPtr-2) == '/'))
376 urlPtr--;
378 // Copy remaining stuff past the #?;
379 for (; *fwdPtr != '\0'; ++fwdPtr)
380 {
381 *urlPtr++ = *fwdPtr;
382 }
383 *urlPtr = '\0'; // terminate the url
384 }
386 nsresult
387 net_ResolveRelativePath(const nsACString &relativePath,
388 const nsACString &basePath,
389 nsACString &result)
390 {
391 nsAutoCString name;
392 nsAutoCString path(basePath);
393 bool needsDelim = false;
395 if ( !path.IsEmpty() ) {
396 char16_t last = path.Last();
397 needsDelim = !(last == '/');
398 }
400 nsACString::const_iterator beg, end;
401 relativePath.BeginReading(beg);
402 relativePath.EndReading(end);
404 bool stop = false;
405 char c;
406 for (; !stop; ++beg) {
407 c = (beg == end) ? '\0' : *beg;
408 //printf("%c [name=%s] [path=%s]\n", c, name.get(), path.get());
409 switch (c) {
410 case '\0':
411 case '#':
412 case '?':
413 stop = true;
414 // fall through...
415 case '/':
416 // delimiter found
417 if (name.EqualsLiteral("..")) {
418 // pop path
419 // If we already have the delim at end, then
420 // skip over that when searching for next one to the left
421 int32_t offset = path.Length() - (needsDelim ? 1 : 2);
422 // First check for errors
423 if (offset < 0 )
424 return NS_ERROR_MALFORMED_URI;
425 int32_t pos = path.RFind("/", false, offset);
426 if (pos >= 0)
427 path.Truncate(pos + 1);
428 else
429 path.Truncate();
430 }
431 else if (name.IsEmpty() || name.EqualsLiteral(".")) {
432 // do nothing
433 }
434 else {
435 // append name to path
436 if (needsDelim)
437 path += '/';
438 path += name;
439 needsDelim = true;
440 }
441 name.Truncate();
442 break;
444 default:
445 // append char to name
446 name += c;
447 }
448 }
449 // append anything left on relativePath (e.g. #..., ;..., ?...)
450 if (c != '\0')
451 path += Substring(--beg, end);
453 result = path;
454 return NS_OK;
455 }
457 //----------------------------------------------------------------------------
458 // scheme fu
459 //----------------------------------------------------------------------------
461 /* Extract URI-Scheme if possible */
462 nsresult
463 net_ExtractURLScheme(const nsACString &inURI,
464 uint32_t *startPos,
465 uint32_t *endPos,
466 nsACString *scheme)
467 {
468 // search for something up to a colon, and call it the scheme
469 const nsPromiseFlatCString &flatURI = PromiseFlatCString(inURI);
470 const char* uri_start = flatURI.get();
471 const char* uri = uri_start;
473 if (!uri)
474 return NS_ERROR_MALFORMED_URI;
476 // skip leading white space
477 while (nsCRT::IsAsciiSpace(*uri))
478 uri++;
480 uint32_t start = uri - uri_start;
481 if (startPos) {
482 *startPos = start;
483 }
485 uint32_t length = 0;
486 char c;
487 while ((c = *uri++) != '\0') {
488 // First char must be Alpha
489 if (length == 0 && nsCRT::IsAsciiAlpha(c)) {
490 length++;
491 }
492 // Next chars can be alpha + digit + some special chars
493 else if (length > 0 && (nsCRT::IsAsciiAlpha(c) ||
494 nsCRT::IsAsciiDigit(c) || c == '+' ||
495 c == '.' || c == '-')) {
496 length++;
497 }
498 // stop if colon reached but not as first char
499 else if (c == ':' && length > 0) {
500 if (endPos) {
501 *endPos = start + length;
502 }
504 if (scheme)
505 scheme->Assign(Substring(inURI, start, length));
506 return NS_OK;
507 }
508 else
509 break;
510 }
511 return NS_ERROR_MALFORMED_URI;
512 }
514 bool
515 net_IsValidScheme(const char *scheme, uint32_t schemeLen)
516 {
517 // first char must be alpha
518 if (!nsCRT::IsAsciiAlpha(*scheme))
519 return false;
521 // nsCStrings may have embedded nulls -- reject those too
522 for (; schemeLen; ++scheme, --schemeLen) {
523 if (!(nsCRT::IsAsciiAlpha(*scheme) ||
524 nsCRT::IsAsciiDigit(*scheme) ||
525 *scheme == '+' ||
526 *scheme == '.' ||
527 *scheme == '-'))
528 return false;
529 }
531 return true;
532 }
534 bool
535 net_FilterURIString(const char *str, nsACString& result)
536 {
537 NS_PRECONDITION(str, "Must have a non-null string!");
538 bool writing = false;
539 result.Truncate();
540 const char *p = str;
542 // Remove leading spaces, tabs, CR, LF if any.
543 while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') {
544 writing = true;
545 str = p + 1;
546 p++;
547 }
549 // Don't strip from the scheme, because other code assumes everything
550 // up to the ':' is the scheme, and it's bad not to have it match.
551 // If there's no ':', strip.
552 bool found_colon = false;
553 const char *first = nullptr;
554 while (*p) {
555 switch (*p) {
556 case '\t':
557 case '\r':
558 case '\n':
559 if (found_colon) {
560 writing = true;
561 // append chars up to but not including *p
562 if (p > str)
563 result.Append(str, p - str);
564 str = p + 1;
565 } else {
566 // remember where the first \t\r\n was in case we find no scheme
567 if (!first)
568 first = p;
569 }
570 break;
572 case ':':
573 found_colon = true;
574 break;
576 case '/':
577 case '@':
578 if (!found_colon) {
579 // colon also has to precede / or @ to be a scheme
580 found_colon = true; // not really, but means ok to strip
581 if (first) {
582 // go back and replace
583 p = first;
584 continue; // process *p again
585 }
586 }
587 break;
589 default:
590 break;
591 }
592 p++;
594 // At end, if there was no scheme, and we hit a control char, fix
595 // it up now.
596 if (!*p && first != nullptr && !found_colon) {
597 // TRICKY - to avoid duplicating code, we reset the loop back
598 // to the point we found something to do
599 p = first;
600 // This also stops us from looping after we finish
601 found_colon = true; // so we'll replace \t\r\n
602 }
603 }
605 // Remove trailing spaces if any
606 while (((p-1) >= str) && (*(p-1) == ' ')) {
607 writing = true;
608 p--;
609 }
611 if (writing && p > str)
612 result.Append(str, p - str);
614 return writing;
615 }
617 #if defined(XP_WIN)
618 bool
619 net_NormalizeFileURL(const nsACString &aURL, nsCString &aResultBuf)
620 {
621 bool writing = false;
623 nsACString::const_iterator beginIter, endIter;
624 aURL.BeginReading(beginIter);
625 aURL.EndReading(endIter);
627 const char *s, *begin = beginIter.get();
629 for (s = begin; s != endIter.get(); ++s)
630 {
631 if (*s == '\\')
632 {
633 writing = true;
634 if (s > begin)
635 aResultBuf.Append(begin, s - begin);
636 aResultBuf += '/';
637 begin = s + 1;
638 }
639 }
640 if (writing && s > begin)
641 aResultBuf.Append(begin, s - begin);
643 return writing;
644 }
645 #endif
647 //----------------------------------------------------------------------------
648 // miscellaneous (i.e., stuff that should really be elsewhere)
649 //----------------------------------------------------------------------------
651 static inline
652 void ToLower(char &c)
653 {
654 if ((unsigned)(c - 'A') <= (unsigned)('Z' - 'A'))
655 c += 'a' - 'A';
656 }
658 void
659 net_ToLowerCase(char *str, uint32_t length)
660 {
661 for (char *end = str + length; str < end; ++str)
662 ToLower(*str);
663 }
665 void
666 net_ToLowerCase(char *str)
667 {
668 for (; *str; ++str)
669 ToLower(*str);
670 }
672 char *
673 net_FindCharInSet(const char *iter, const char *stop, const char *set)
674 {
675 for (; iter != stop && *iter; ++iter) {
676 for (const char *s = set; *s; ++s) {
677 if (*iter == *s)
678 return (char *) iter;
679 }
680 }
681 return (char *) iter;
682 }
684 char *
685 net_FindCharNotInSet(const char *iter, const char *stop, const char *set)
686 {
687 repeat:
688 for (const char *s = set; *s; ++s) {
689 if (*iter == *s) {
690 if (++iter == stop)
691 break;
692 goto repeat;
693 }
694 }
695 return (char *) iter;
696 }
698 char *
699 net_RFindCharNotInSet(const char *stop, const char *iter, const char *set)
700 {
701 --iter;
702 --stop;
704 if (iter == stop)
705 return (char *) iter;
707 repeat:
708 for (const char *s = set; *s; ++s) {
709 if (*iter == *s) {
710 if (--iter == stop)
711 break;
712 goto repeat;
713 }
714 }
715 return (char *) iter;
716 }
718 #define HTTP_LWS " \t"
720 // Return the index of the closing quote of the string, if any
721 static uint32_t
722 net_FindStringEnd(const nsCString& flatStr,
723 uint32_t stringStart,
724 char stringDelim)
725 {
726 NS_ASSERTION(stringStart < flatStr.Length() &&
727 flatStr.CharAt(stringStart) == stringDelim &&
728 (stringDelim == '"' || stringDelim == '\''),
729 "Invalid stringStart");
731 const char set[] = { stringDelim, '\\', '\0' };
732 do {
733 // stringStart points to either the start quote or the last
734 // escaped char (the char following a '\\')
736 // Write to searchStart here, so that when we get back to the
737 // top of the loop right outside this one we search from the
738 // right place.
739 uint32_t stringEnd = flatStr.FindCharInSet(set, stringStart + 1);
740 if (stringEnd == uint32_t(kNotFound))
741 return flatStr.Length();
743 if (flatStr.CharAt(stringEnd) == '\\') {
744 // Hit a backslash-escaped char. Need to skip over it.
745 stringStart = stringEnd + 1;
746 if (stringStart == flatStr.Length())
747 return stringStart;
749 // Go back to looking for the next escape or the string end
750 continue;
751 }
753 return stringEnd;
755 } while (true);
757 NS_NOTREACHED("How did we get here?");
758 return flatStr.Length();
759 }
762 static uint32_t
763 net_FindMediaDelimiter(const nsCString& flatStr,
764 uint32_t searchStart,
765 char delimiter)
766 {
767 do {
768 // searchStart points to the spot from which we should start looking
769 // for the delimiter.
770 const char delimStr[] = { delimiter, '"', '\0' };
771 uint32_t curDelimPos = flatStr.FindCharInSet(delimStr, searchStart);
772 if (curDelimPos == uint32_t(kNotFound))
773 return flatStr.Length();
775 char ch = flatStr.CharAt(curDelimPos);
776 if (ch == delimiter) {
777 // Found delimiter
778 return curDelimPos;
779 }
781 // We hit the start of a quoted string. Look for its end.
782 searchStart = net_FindStringEnd(flatStr, curDelimPos, ch);
783 if (searchStart == flatStr.Length())
784 return searchStart;
786 ++searchStart;
788 // searchStart now points to the first char after the end of the
789 // string, so just go back to the top of the loop and look for
790 // |delimiter| again.
791 } while (true);
793 NS_NOTREACHED("How did we get here?");
794 return flatStr.Length();
795 }
797 // aOffset should be added to aCharsetStart and aCharsetEnd if this
798 // function sets them.
799 static void
800 net_ParseMediaType(const nsACString &aMediaTypeStr,
801 nsACString &aContentType,
802 nsACString &aContentCharset,
803 int32_t aOffset,
804 bool *aHadCharset,
805 int32_t *aCharsetStart,
806 int32_t *aCharsetEnd)
807 {
808 const nsCString& flatStr = PromiseFlatCString(aMediaTypeStr);
809 const char* start = flatStr.get();
810 const char* end = start + flatStr.Length();
812 // Trim LWS leading and trailing whitespace from type. We include '(' in
813 // the trailing trim set to catch media-type comments, which are not at all
814 // standard, but may occur in rare cases.
815 const char* type = net_FindCharNotInSet(start, end, HTTP_LWS);
816 const char* typeEnd = net_FindCharInSet(type, end, HTTP_LWS ";(");
818 const char* charset = "";
819 const char* charsetEnd = charset;
820 int32_t charsetParamStart = 0;
821 int32_t charsetParamEnd = 0;
823 // Iterate over parameters
824 bool typeHasCharset = false;
825 uint32_t paramStart = flatStr.FindChar(';', typeEnd - start);
826 if (paramStart != uint32_t(kNotFound)) {
827 // We have parameters. Iterate over them.
828 uint32_t curParamStart = paramStart + 1;
829 do {
830 uint32_t curParamEnd =
831 net_FindMediaDelimiter(flatStr, curParamStart, ';');
833 const char* paramName = net_FindCharNotInSet(start + curParamStart,
834 start + curParamEnd,
835 HTTP_LWS);
836 static const char charsetStr[] = "charset=";
837 if (PL_strncasecmp(paramName, charsetStr,
838 sizeof(charsetStr) - 1) == 0) {
839 charset = paramName + sizeof(charsetStr) - 1;
840 charsetEnd = start + curParamEnd;
841 typeHasCharset = true;
842 charsetParamStart = curParamStart - 1;
843 charsetParamEnd = curParamEnd;
844 }
846 curParamStart = curParamEnd + 1;
847 } while (curParamStart < flatStr.Length());
848 }
850 bool charsetNeedsQuotedStringUnescaping = false;
851 if (typeHasCharset) {
852 // Trim LWS leading and trailing whitespace from charset. We include
853 // '(' in the trailing trim set to catch media-type comments, which are
854 // not at all standard, but may occur in rare cases.
855 charset = net_FindCharNotInSet(charset, charsetEnd, HTTP_LWS);
856 if (*charset == '"') {
857 charsetNeedsQuotedStringUnescaping = true;
858 charsetEnd =
859 start + net_FindStringEnd(flatStr, charset - start, *charset);
860 charset++;
861 NS_ASSERTION(charsetEnd >= charset, "Bad charset parsing");
862 } else {
863 charsetEnd = net_FindCharInSet(charset, charsetEnd, HTTP_LWS ";(");
864 }
865 }
867 // if the server sent "*/*", it is meaningless, so do not store it.
868 // also, if type is the same as aContentType, then just update the
869 // charset. however, if charset is empty and aContentType hasn't
870 // changed, then don't wipe-out an existing aContentCharset. We
871 // also want to reject a mime-type if it does not include a slash.
872 // some servers give junk after the charset parameter, which may
873 // include a comma, so this check makes us a bit more tolerant.
875 if (type != typeEnd && strncmp(type, "*/*", typeEnd - type) != 0 &&
876 memchr(type, '/', typeEnd - type) != nullptr) {
877 // Common case here is that aContentType is empty
878 bool eq = !aContentType.IsEmpty() &&
879 aContentType.Equals(Substring(type, typeEnd),
880 nsCaseInsensitiveCStringComparator());
881 if (!eq) {
882 aContentType.Assign(type, typeEnd - type);
883 ToLowerCase(aContentType);
884 }
886 if ((!eq && *aHadCharset) || typeHasCharset) {
887 *aHadCharset = true;
888 if (charsetNeedsQuotedStringUnescaping) {
889 // parameters using the "quoted-string" syntax need
890 // backslash-escapes to be unescaped (see RFC 2616 Section 2.2)
891 aContentCharset.Truncate();
892 for (const char *c = charset; c != charsetEnd; c++) {
893 if (*c == '\\' && c + 1 != charsetEnd) {
894 // eat escape
895 c++;
896 }
897 aContentCharset.Append(*c);
898 }
899 }
900 else {
901 aContentCharset.Assign(charset, charsetEnd - charset);
902 }
903 if (typeHasCharset) {
904 *aCharsetStart = charsetParamStart + aOffset;
905 *aCharsetEnd = charsetParamEnd + aOffset;
906 }
907 }
908 // Only set a new charset position if this is a different type
909 // from the last one we had and it doesn't already have a
910 // charset param. If this is the same type, we probably want
911 // to leave the charset position on its first occurrence.
912 if (!eq && !typeHasCharset) {
913 int32_t charsetStart = int32_t(paramStart);
914 if (charsetStart == kNotFound)
915 charsetStart = flatStr.Length();
917 *aCharsetEnd = *aCharsetStart = charsetStart + aOffset;
918 }
919 }
920 }
922 #undef HTTP_LWS
924 void
925 net_ParseContentType(const nsACString &aHeaderStr,
926 nsACString &aContentType,
927 nsACString &aContentCharset,
928 bool *aHadCharset)
929 {
930 int32_t dummy1, dummy2;
931 net_ParseContentType(aHeaderStr, aContentType, aContentCharset,
932 aHadCharset, &dummy1, &dummy2);
933 }
935 void
936 net_ParseContentType(const nsACString &aHeaderStr,
937 nsACString &aContentType,
938 nsACString &aContentCharset,
939 bool *aHadCharset,
940 int32_t *aCharsetStart,
941 int32_t *aCharsetEnd)
942 {
943 //
944 // Augmented BNF (from RFC 2616 section 3.7):
945 //
946 // header-value = media-type *( LWS "," LWS media-type )
947 // media-type = type "/" subtype *( LWS ";" LWS parameter )
948 // type = token
949 // subtype = token
950 // parameter = attribute "=" value
951 // attribute = token
952 // value = token | quoted-string
953 //
954 //
955 // Examples:
956 //
957 // text/html
958 // text/html, text/html
959 // text/html,text/html; charset=ISO-8859-1
960 // text/html,text/html; charset="ISO-8859-1"
961 // text/html;charset=ISO-8859-1, text/html
962 // text/html;charset='ISO-8859-1', text/html
963 // application/octet-stream
964 //
966 *aHadCharset = false;
967 const nsCString& flatStr = PromiseFlatCString(aHeaderStr);
969 // iterate over media-types. Note that ',' characters can happen
970 // inside quoted strings, so we need to watch out for that.
971 uint32_t curTypeStart = 0;
972 do {
973 // curTypeStart points to the start of the current media-type. We want
974 // to look for its end.
975 uint32_t curTypeEnd =
976 net_FindMediaDelimiter(flatStr, curTypeStart, ',');
978 // At this point curTypeEnd points to the spot where the media-type
979 // starting at curTypeEnd ends. Time to parse that!
980 net_ParseMediaType(Substring(flatStr, curTypeStart,
981 curTypeEnd - curTypeStart),
982 aContentType, aContentCharset, curTypeStart,
983 aHadCharset, aCharsetStart, aCharsetEnd);
985 // And let's move on to the next media-type
986 curTypeStart = curTypeEnd + 1;
987 } while (curTypeStart < flatStr.Length());
988 }
990 bool
991 net_IsValidHostName(const nsCSubstring &host)
992 {
993 const char *end = host.EndReading();
994 // Use explicit whitelists to select which characters we are
995 // willing to send to lower-level DNS logic. This is more
996 // self-documenting, and can also be slightly faster than the
997 // blacklist approach, since DNS names are the common case, and
998 // the commonest characters will tend to be near the start of
999 // the list.
1001 // Whitelist for DNS names (RFC 1035) with extra characters added
1002 // for pragmatic reasons "$+_"
1003 // see https://bugzilla.mozilla.org/show_bug.cgi?id=355181#c2
1004 if (net_FindCharNotInSet(host.BeginReading(), end,
1005 "abcdefghijklmnopqrstuvwxyz"
1006 ".-0123456789"
1007 "ABCDEFGHIJKLMNOPQRSTUVWXYZ$+_") == end)
1008 return true;
1010 // Might be a valid IPv6 link-local address containing a percent sign
1011 nsAutoCString strhost(host);
1012 PRNetAddr addr;
1013 return PR_StringToNetAddr(strhost.get(), &addr) == PR_SUCCESS;
1014 }
1016 bool
1017 net_IsValidIPv4Addr(const char *addr, int32_t addrLen)
1018 {
1019 RangedPtr<const char> p(addr, addrLen);
1021 int32_t octet = -1; // means no digit yet
1022 int32_t dotCount = 0; // number of dots in the address
1024 for (; addrLen; ++p, --addrLen) {
1025 if (*p == '.') {
1026 dotCount++;
1027 if (octet == -1) {
1028 // invalid octet
1029 return false;
1030 }
1031 octet = -1;
1032 } else if (*p >= '0' && *p <='9') {
1033 if (octet == 0) {
1034 // leading 0 is not allowed
1035 return false;
1036 } else if (octet == -1) {
1037 octet = *p - '0';
1038 } else {
1039 octet *= 10;
1040 octet += *p - '0';
1041 if (octet > 255)
1042 return false;
1043 }
1044 } else {
1045 // invalid character
1046 return false;
1047 }
1048 }
1050 return (dotCount == 3 && octet != -1);
1051 }
1053 bool
1054 net_IsValidIPv6Addr(const char *addr, int32_t addrLen)
1055 {
1056 RangedPtr<const char> p(addr, addrLen);
1058 int32_t digits = 0; // number of digits in current block
1059 int32_t colons = 0; // number of colons in a row during parsing
1060 int32_t blocks = 0; // number of hexadecimal blocks
1061 bool haveZeros = false; // true if double colon is present in the address
1063 for (; addrLen; ++p, --addrLen) {
1064 if (*p == ':') {
1065 if (colons == 0) {
1066 if (digits != 0) {
1067 digits = 0;
1068 blocks++;
1069 }
1070 } else if (colons == 1) {
1071 if (haveZeros)
1072 return false; // only one occurrence is allowed
1073 haveZeros = true;
1074 } else {
1075 // too many colons in a row
1076 return false;
1077 }
1078 colons++;
1079 } else if ((*p >= '0' && *p <= '9') || (*p >= 'a' && *p <= 'f') ||
1080 (*p >= 'A' && *p <= 'F')) {
1081 if (colons == 1 && blocks == 0) // starts with a single colon
1082 return false;
1083 if (digits == 4) // too many digits
1084 return false;
1085 colons = 0;
1086 digits++;
1087 } else if (*p == '.') {
1088 // check valid IPv4 from the beginning of the last block
1089 if (!net_IsValidIPv4Addr(p.get() - digits, addrLen + digits))
1090 return false;
1091 return (haveZeros && blocks < 6) || (!haveZeros && blocks == 6);
1092 } else {
1093 // invalid character
1094 return false;
1095 }
1096 }
1098 if (colons == 1) // ends with a single colon
1099 return false;
1101 if (digits) // there is a block at the end
1102 blocks++;
1104 return (haveZeros && blocks < 8) || (!haveZeros && blocks == 8);
1105 }