netwerk/base/src/nsURLParsers.cpp

branch
TOR_BUG_9701
changeset 15
b8a032363ba2
equal deleted inserted replaced
-1:000000000000 0:91401a94b568
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5
6 #include <string.h>
7
8 #include "mozilla/RangedPtr.h"
9
10 #include "nsURLParsers.h"
11 #include "nsURLHelper.h"
12 #include "nsString.h"
13 #include "nsCRT.h"
14
15 using namespace mozilla;
16
17 //----------------------------------------------------------------------------
18
19 static uint32_t
20 CountConsecutiveSlashes(const char *str, int32_t len)
21 {
22 RangedPtr<const char> p(str, len);
23 uint32_t count = 0;
24 while (len-- && *p++ == '/') ++count;
25 return count;
26 }
27
28 //----------------------------------------------------------------------------
29 // nsBaseURLParser implementation
30 //----------------------------------------------------------------------------
31
32 NS_IMPL_ISUPPORTS(nsAuthURLParser, nsIURLParser)
33 NS_IMPL_ISUPPORTS(nsNoAuthURLParser, nsIURLParser)
34
35 #define SET_RESULT(component, pos, len) \
36 PR_BEGIN_MACRO \
37 if (component ## Pos) \
38 *component ## Pos = uint32_t(pos); \
39 if (component ## Len) \
40 *component ## Len = int32_t(len); \
41 PR_END_MACRO
42
43 #define OFFSET_RESULT(component, offset) \
44 PR_BEGIN_MACRO \
45 if (component ## Pos) \
46 *component ## Pos += offset; \
47 PR_END_MACRO
48
49 NS_IMETHODIMP
50 nsBaseURLParser::ParseURL(const char *spec, int32_t specLen,
51 uint32_t *schemePos, int32_t *schemeLen,
52 uint32_t *authorityPos, int32_t *authorityLen,
53 uint32_t *pathPos, int32_t *pathLen)
54 {
55 NS_PRECONDITION(spec, "null pointer");
56
57 if (specLen < 0)
58 specLen = strlen(spec);
59
60 const char *stop = nullptr;
61 const char *colon = nullptr;
62 const char *slash = nullptr;
63 const char *p;
64 uint32_t offset = 0;
65 int32_t len = specLen;
66 for (p = spec; len && *p && !colon && !slash; ++p, --len) {
67 // skip leading whitespace
68 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') {
69 spec++;
70 specLen--;
71 offset++;
72 continue;
73 }
74 switch (*p) {
75 case ':':
76 if (!colon)
77 colon = p;
78 break;
79 case '/': // start of filepath
80 case '?': // start of query
81 case '#': // start of ref
82 if (!slash)
83 slash = p;
84 break;
85 case '@': // username@hostname
86 case '[': // start of IPv6 address literal
87 if (!stop)
88 stop = p;
89 break;
90 }
91 }
92 // disregard the first colon if it follows an '@' or a '['
93 if (colon && stop && colon > stop)
94 colon = nullptr;
95
96 // if the spec only contained whitespace ...
97 if (specLen == 0) {
98 SET_RESULT(scheme, 0, -1);
99 SET_RESULT(authority, 0, 0);
100 SET_RESULT(path, 0, 0);
101 return NS_OK;
102 }
103
104 // ignore trailing whitespace and control characters
105 for (p = spec + specLen - 1; ((unsigned char) *p <= ' ') && (p != spec); --p)
106 ;
107
108 specLen = p - spec + 1;
109
110 if (colon && (colon < slash || !slash)) {
111 //
112 // spec = <scheme>:/<the-rest>
113 //
114 // or
115 //
116 // spec = <scheme>:<authority>
117 // spec = <scheme>:<path-no-slashes>
118 //
119 if (!net_IsValidScheme(spec, colon - spec) || (*(colon+1) == ':')) {
120 NS_WARNING("malformed uri");
121 return NS_ERROR_MALFORMED_URI;
122 }
123 SET_RESULT(scheme, offset, colon - spec);
124 if (authorityLen || pathLen) {
125 uint32_t schemeLen = colon + 1 - spec;
126 offset += schemeLen;
127 ParseAfterScheme(colon + 1, specLen - schemeLen,
128 authorityPos, authorityLen,
129 pathPos, pathLen);
130 OFFSET_RESULT(authority, offset);
131 OFFSET_RESULT(path, offset);
132 }
133 }
134 else {
135 //
136 // spec = <authority-no-port-or-password>/<path>
137 // spec = <path>
138 //
139 // or
140 //
141 // spec = <authority-no-port-or-password>/<path-with-colon>
142 // spec = <path-with-colon>
143 //
144 // or
145 //
146 // spec = <authority-no-port-or-password>
147 // spec = <path-no-slashes-or-colon>
148 //
149 SET_RESULT(scheme, 0, -1);
150 if (authorityLen || pathLen) {
151 ParseAfterScheme(spec, specLen,
152 authorityPos, authorityLen,
153 pathPos, pathLen);
154 OFFSET_RESULT(authority, offset);
155 OFFSET_RESULT(path, offset);
156 }
157 }
158 return NS_OK;
159 }
160
161 NS_IMETHODIMP
162 nsBaseURLParser::ParseAuthority(const char *auth, int32_t authLen,
163 uint32_t *usernamePos, int32_t *usernameLen,
164 uint32_t *passwordPos, int32_t *passwordLen,
165 uint32_t *hostnamePos, int32_t *hostnameLen,
166 int32_t *port)
167 {
168 NS_PRECONDITION(auth, "null pointer");
169
170 if (authLen < 0)
171 authLen = strlen(auth);
172
173 SET_RESULT(username, 0, -1);
174 SET_RESULT(password, 0, -1);
175 SET_RESULT(hostname, 0, authLen);
176 if (port)
177 *port = -1;
178 return NS_OK;
179 }
180
181 NS_IMETHODIMP
182 nsBaseURLParser::ParseUserInfo(const char *userinfo, int32_t userinfoLen,
183 uint32_t *usernamePos, int32_t *usernameLen,
184 uint32_t *passwordPos, int32_t *passwordLen)
185 {
186 SET_RESULT(username, 0, -1);
187 SET_RESULT(password, 0, -1);
188 return NS_OK;
189 }
190
191 NS_IMETHODIMP
192 nsBaseURLParser::ParseServerInfo(const char *serverinfo, int32_t serverinfoLen,
193 uint32_t *hostnamePos, int32_t *hostnameLen,
194 int32_t *port)
195 {
196 SET_RESULT(hostname, 0, -1);
197 if (port)
198 *port = -1;
199 return NS_OK;
200 }
201
202 NS_IMETHODIMP
203 nsBaseURLParser::ParsePath(const char *path, int32_t pathLen,
204 uint32_t *filepathPos, int32_t *filepathLen,
205 uint32_t *queryPos, int32_t *queryLen,
206 uint32_t *refPos, int32_t *refLen)
207 {
208 NS_PRECONDITION(path, "null pointer");
209
210 if (pathLen < 0)
211 pathLen = strlen(path);
212
213 // path = [/]<segment1>/<segment2>/<...>/<segmentN>?<query>#<ref>
214
215 // XXX PL_strnpbrk would be nice, but it's buggy
216
217 // search for first occurrence of either ? or #
218 const char *query_beg = 0, *query_end = 0;
219 const char *ref_beg = 0;
220 const char *p = 0;
221 for (p = path; p < path + pathLen; ++p) {
222 // only match the query string if it precedes the reference fragment
223 if (!ref_beg && !query_beg && *p == '?')
224 query_beg = p + 1;
225 else if (*p == '#') {
226 ref_beg = p + 1;
227 if (query_beg)
228 query_end = p;
229 break;
230 }
231 }
232
233 if (query_beg) {
234 if (query_end)
235 SET_RESULT(query, query_beg - path, query_end - query_beg);
236 else
237 SET_RESULT(query, query_beg - path, pathLen - (query_beg - path));
238 }
239 else
240 SET_RESULT(query, 0, -1);
241
242 if (ref_beg)
243 SET_RESULT(ref, ref_beg - path, pathLen - (ref_beg - path));
244 else
245 SET_RESULT(ref, 0, -1);
246
247 const char *end;
248 if (query_beg)
249 end = query_beg - 1;
250 else if (ref_beg)
251 end = ref_beg - 1;
252 else
253 end = path + pathLen;
254
255 // an empty file path is no file path
256 if (end != path)
257 SET_RESULT(filepath, 0, end - path);
258 else
259 SET_RESULT(filepath, 0, -1);
260 return NS_OK;
261 }
262
263 NS_IMETHODIMP
264 nsBaseURLParser::ParseFilePath(const char *filepath, int32_t filepathLen,
265 uint32_t *directoryPos, int32_t *directoryLen,
266 uint32_t *basenamePos, int32_t *basenameLen,
267 uint32_t *extensionPos, int32_t *extensionLen)
268 {
269 NS_PRECONDITION(filepath, "null pointer");
270
271 if (filepathLen < 0)
272 filepathLen = strlen(filepath);
273
274 if (filepathLen == 0) {
275 SET_RESULT(directory, 0, -1);
276 SET_RESULT(basename, 0, 0); // assume a zero length file basename
277 SET_RESULT(extension, 0, -1);
278 return NS_OK;
279 }
280
281 const char *p;
282 const char *end = filepath + filepathLen;
283
284 // search backwards for filename
285 for (p = end - 1; *p != '/' && p > filepath; --p)
286 ;
287 if (*p == '/') {
288 // catch /.. and /.
289 if ((p+1 < end && *(p+1) == '.') &&
290 (p+2 == end || (*(p+2) == '.' && p+3 == end)))
291 p = end - 1;
292 // filepath = <directory><filename>.<extension>
293 SET_RESULT(directory, 0, p - filepath + 1);
294 ParseFileName(p + 1, end - (p + 1),
295 basenamePos, basenameLen,
296 extensionPos, extensionLen);
297 OFFSET_RESULT(basename, p + 1 - filepath);
298 OFFSET_RESULT(extension, p + 1 - filepath);
299 }
300 else {
301 // filepath = <filename>.<extension>
302 SET_RESULT(directory, 0, -1);
303 ParseFileName(filepath, filepathLen,
304 basenamePos, basenameLen,
305 extensionPos, extensionLen);
306 }
307 return NS_OK;
308 }
309
310 nsresult
311 nsBaseURLParser::ParseFileName(const char *filename, int32_t filenameLen,
312 uint32_t *basenamePos, int32_t *basenameLen,
313 uint32_t *extensionPos, int32_t *extensionLen)
314 {
315 NS_PRECONDITION(filename, "null pointer");
316
317 if (filenameLen < 0)
318 filenameLen = strlen(filename);
319
320 // no extension if filename ends with a '.'
321 if (filename[filenameLen-1] != '.') {
322 // ignore '.' at the beginning
323 for (const char *p = filename + filenameLen - 1; p > filename; --p) {
324 if (*p == '.') {
325 // filename = <basename.extension>
326 SET_RESULT(basename, 0, p - filename);
327 SET_RESULT(extension, p + 1 - filename, filenameLen - (p - filename + 1));
328 return NS_OK;
329 }
330 }
331 }
332 // filename = <basename>
333 SET_RESULT(basename, 0, filenameLen);
334 SET_RESULT(extension, 0, -1);
335 return NS_OK;
336 }
337
338 //----------------------------------------------------------------------------
339 // nsNoAuthURLParser implementation
340 //----------------------------------------------------------------------------
341
342 NS_IMETHODIMP
343 nsNoAuthURLParser::ParseAuthority(const char *auth, int32_t authLen,
344 uint32_t *usernamePos, int32_t *usernameLen,
345 uint32_t *passwordPos, int32_t *passwordLen,
346 uint32_t *hostnamePos, int32_t *hostnameLen,
347 int32_t *port)
348 {
349 NS_NOTREACHED("Shouldn't parse auth in a NoAuthURL!");
350 return NS_ERROR_UNEXPECTED;
351 }
352
353 void
354 nsNoAuthURLParser::ParseAfterScheme(const char *spec, int32_t specLen,
355 uint32_t *authPos, int32_t *authLen,
356 uint32_t *pathPos, int32_t *pathLen)
357 {
358 NS_PRECONDITION(specLen >= 0, "unexpected");
359
360 // everything is the path
361 uint32_t pos = 0;
362 switch (CountConsecutiveSlashes(spec, specLen)) {
363 case 0:
364 case 1:
365 break;
366 case 2:
367 {
368 const char *p = nullptr;
369 if (specLen > 2) {
370 // looks like there is an authority section
371 #if defined(XP_WIN)
372 // if the authority looks like a drive number then we
373 // really want to treat it as part of the path
374 // [a-zA-Z][:|]{/\}
375 // i.e one of: c: c:\foo c:/foo c| c|\foo c|/foo
376 if ((specLen > 3) && (spec[3] == ':' || spec[3] == '|') &&
377 nsCRT::IsAsciiAlpha(spec[2]) &&
378 ((specLen == 4) || (spec[4] == '/') || (spec[4] == '\\'))) {
379 pos = 1;
380 break;
381 }
382 #endif
383 // Ignore apparent authority; path is everything after it
384 for (p = spec + 2; p < spec + specLen; ++p) {
385 if (*p == '/' || *p == '?' || *p == '#')
386 break;
387 }
388 }
389 SET_RESULT(auth, 0, -1);
390 if (p && p != spec+specLen)
391 SET_RESULT(path, p - spec, specLen - (p - spec));
392 else
393 SET_RESULT(path, 0, -1);
394 return;
395 }
396 default:
397 pos = 2;
398 break;
399 }
400 SET_RESULT(auth, pos, 0);
401 SET_RESULT(path, pos, specLen - pos);
402 }
403
404 #if defined(XP_WIN)
405 NS_IMETHODIMP
406 nsNoAuthURLParser::ParseFilePath(const char *filepath, int32_t filepathLen,
407 uint32_t *directoryPos, int32_t *directoryLen,
408 uint32_t *basenamePos, int32_t *basenameLen,
409 uint32_t *extensionPos, int32_t *extensionLen)
410 {
411 NS_PRECONDITION(filepath, "null pointer");
412
413 if (filepathLen < 0)
414 filepathLen = strlen(filepath);
415
416 // look for a filepath consisting of only a drive number, which may or
417 // may not have a leading slash.
418 if (filepathLen > 1 && filepathLen < 4) {
419 const char *end = filepath + filepathLen;
420 const char *p = filepath;
421 if (*p == '/')
422 p++;
423 if ((end-p == 2) && (p[1]==':' || p[1]=='|') && nsCRT::IsAsciiAlpha(*p)) {
424 // filepath = <drive-number>:
425 SET_RESULT(directory, 0, filepathLen);
426 SET_RESULT(basename, 0, -1);
427 SET_RESULT(extension, 0, -1);
428 return NS_OK;
429 }
430 }
431
432 // otherwise fallback on common implementation
433 return nsBaseURLParser::ParseFilePath(filepath, filepathLen,
434 directoryPos, directoryLen,
435 basenamePos, basenameLen,
436 extensionPos, extensionLen);
437 }
438 #endif
439
440 //----------------------------------------------------------------------------
441 // nsAuthURLParser implementation
442 //----------------------------------------------------------------------------
443
444 NS_IMETHODIMP
445 nsAuthURLParser::ParseAuthority(const char *auth, int32_t authLen,
446 uint32_t *usernamePos, int32_t *usernameLen,
447 uint32_t *passwordPos, int32_t *passwordLen,
448 uint32_t *hostnamePos, int32_t *hostnameLen,
449 int32_t *port)
450 {
451 nsresult rv;
452
453 NS_PRECONDITION(auth, "null pointer");
454
455 if (authLen < 0)
456 authLen = strlen(auth);
457
458 if (authLen == 0) {
459 SET_RESULT(username, 0, -1);
460 SET_RESULT(password, 0, -1);
461 SET_RESULT(hostname, 0, 0);
462 if (port)
463 *port = -1;
464 return NS_OK;
465 }
466
467 // search backwards for @
468 const char *p = auth + authLen - 1;
469 for (; (*p != '@') && (p > auth); --p) {
470 continue;
471 }
472 if ( *p == '@' ) {
473 // auth = <user-info@server-info>
474 rv = ParseUserInfo(auth, p - auth,
475 usernamePos, usernameLen,
476 passwordPos, passwordLen);
477 if (NS_FAILED(rv)) return rv;
478 rv = ParseServerInfo(p + 1, authLen - (p - auth + 1),
479 hostnamePos, hostnameLen,
480 port);
481 if (NS_FAILED(rv)) return rv;
482 OFFSET_RESULT(hostname, p + 1 - auth);
483 }
484 else {
485 // auth = <server-info>
486 SET_RESULT(username, 0, -1);
487 SET_RESULT(password, 0, -1);
488 rv = ParseServerInfo(auth, authLen,
489 hostnamePos, hostnameLen,
490 port);
491 if (NS_FAILED(rv)) return rv;
492 }
493 return NS_OK;
494 }
495
496 NS_IMETHODIMP
497 nsAuthURLParser::ParseUserInfo(const char *userinfo, int32_t userinfoLen,
498 uint32_t *usernamePos, int32_t *usernameLen,
499 uint32_t *passwordPos, int32_t *passwordLen)
500 {
501 NS_PRECONDITION(userinfo, "null pointer");
502
503 if (userinfoLen < 0)
504 userinfoLen = strlen(userinfo);
505
506 if (userinfoLen == 0) {
507 SET_RESULT(username, 0, -1);
508 SET_RESULT(password, 0, -1);
509 return NS_OK;
510 }
511
512 const char *p = (const char *) memchr(userinfo, ':', userinfoLen);
513 if (p) {
514 // userinfo = <username:password>
515 if (p == userinfo) {
516 // must have a username!
517 return NS_ERROR_MALFORMED_URI;
518 }
519 SET_RESULT(username, 0, p - userinfo);
520 SET_RESULT(password, p - userinfo + 1, userinfoLen - (p - userinfo + 1));
521 }
522 else {
523 // userinfo = <username>
524 SET_RESULT(username, 0, userinfoLen);
525 SET_RESULT(password, 0, -1);
526 }
527 return NS_OK;
528 }
529
530 NS_IMETHODIMP
531 nsAuthURLParser::ParseServerInfo(const char *serverinfo, int32_t serverinfoLen,
532 uint32_t *hostnamePos, int32_t *hostnameLen,
533 int32_t *port)
534 {
535 NS_PRECONDITION(serverinfo, "null pointer");
536
537 if (serverinfoLen < 0)
538 serverinfoLen = strlen(serverinfo);
539
540 if (serverinfoLen == 0) {
541 SET_RESULT(hostname, 0, 0);
542 if (port)
543 *port = -1;
544 return NS_OK;
545 }
546
547 // search backwards for a ':' but stop on ']' (IPv6 address literal
548 // delimiter). check for illegal characters in the hostname.
549 const char *p = serverinfo + serverinfoLen - 1;
550 const char *colon = nullptr, *bracket = nullptr;
551 for (; p > serverinfo; --p) {
552 switch (*p) {
553 case ']':
554 bracket = p;
555 break;
556 case ':':
557 if (bracket == nullptr)
558 colon = p;
559 break;
560 case ' ':
561 // hostname must not contain a space
562 NS_WARNING("malformed hostname");
563 return NS_ERROR_MALFORMED_URI;
564 }
565 }
566
567 if (colon) {
568 // serverinfo = <hostname:port>
569 SET_RESULT(hostname, 0, colon - serverinfo);
570 if (port) {
571 // XXX unfortunately ToInteger is not defined for substrings
572 nsAutoCString buf(colon+1, serverinfoLen - (colon + 1 - serverinfo));
573 if (buf.Length() == 0) {
574 *port = -1;
575 }
576 else {
577 const char* nondigit = NS_strspnp("0123456789", buf.get());
578 if (nondigit && *nondigit)
579 return NS_ERROR_MALFORMED_URI;
580
581 nsresult err;
582 *port = buf.ToInteger(&err);
583 if (NS_FAILED(err) || *port < 0)
584 return NS_ERROR_MALFORMED_URI;
585 }
586 }
587 }
588 else {
589 // serverinfo = <hostname>
590 SET_RESULT(hostname, 0, serverinfoLen);
591 if (port)
592 *port = -1;
593 }
594
595 // In case of IPv6 address check its validity
596 if (*hostnameLen > 1 && *(serverinfo + *hostnamePos) == '[' &&
597 *(serverinfo + *hostnamePos + *hostnameLen - 1) == ']' &&
598 !net_IsValidIPv6Addr(serverinfo + *hostnamePos + 1, *hostnameLen - 2))
599 return NS_ERROR_MALFORMED_URI;
600
601 return NS_OK;
602 }
603
604 void
605 nsAuthURLParser::ParseAfterScheme(const char *spec, int32_t specLen,
606 uint32_t *authPos, int32_t *authLen,
607 uint32_t *pathPos, int32_t *pathLen)
608 {
609 NS_PRECONDITION(specLen >= 0, "unexpected");
610
611 uint32_t nslash = CountConsecutiveSlashes(spec, specLen);
612
613 // search for the end of the authority section
614 const char *end = spec + specLen;
615 const char *p;
616 for (p = spec + nslash; p < end; ++p) {
617 if (*p == '/' || *p == '?' || *p == '#')
618 break;
619 }
620 if (p < end) {
621 // spec = [/]<auth><path>
622 SET_RESULT(auth, nslash, p - (spec + nslash));
623 SET_RESULT(path, p - spec, specLen - (p - spec));
624 }
625 else {
626 // spec = [/]<auth>
627 SET_RESULT(auth, nslash, specLen - nslash);
628 SET_RESULT(path, 0, -1);
629 }
630 }
631
632 //----------------------------------------------------------------------------
633 // nsStdURLParser implementation
634 //----------------------------------------------------------------------------
635
636 void
637 nsStdURLParser::ParseAfterScheme(const char *spec, int32_t specLen,
638 uint32_t *authPos, int32_t *authLen,
639 uint32_t *pathPos, int32_t *pathLen)
640 {
641 NS_PRECONDITION(specLen >= 0, "unexpected");
642
643 uint32_t nslash = CountConsecutiveSlashes(spec, specLen);
644
645 // search for the end of the authority section
646 const char *end = spec + specLen;
647 const char *p;
648 for (p = spec + nslash; p < end; ++p) {
649 if (strchr("/?#;", *p))
650 break;
651 }
652 switch (nslash) {
653 case 0:
654 case 2:
655 if (p < end) {
656 // spec = (//)<auth><path>
657 SET_RESULT(auth, nslash, p - (spec + nslash));
658 SET_RESULT(path, p - spec, specLen - (p - spec));
659 }
660 else {
661 // spec = (//)<auth>
662 SET_RESULT(auth, nslash, specLen - nslash);
663 SET_RESULT(path, 0, -1);
664 }
665 break;
666 case 1:
667 // spec = /<path>
668 SET_RESULT(auth, 0, -1);
669 SET_RESULT(path, 0, specLen);
670 break;
671 default:
672 // spec = ///[/]<path>
673 SET_RESULT(auth, 2, 0);
674 SET_RESULT(path, 2, specLen - 2);
675 }
676 }

mercurial