nsprpub/tools/httpget.c

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     7 /*
     8  * Author: Wan-Teh Chang
     9  *
    10  * Given an HTTP URL, httpget uses the GET method to fetch the file.
    11  * The fetched file is written to stdout by default, or can be
    12  * saved in an output file.
    13  *
    14  * This is a single-threaded program.
    15  */
    17 #include "prio.h"
    18 #include "prnetdb.h"
    19 #include "prlog.h"
    20 #include "prerror.h"
    21 #include "prprf.h"
    22 #include "prinit.h"
    24 #include <stdio.h>
    25 #include <string.h>
    26 #include <stdlib.h>  /* for atoi */
    28 #define FCOPY_BUFFER_SIZE (16 * 1024)
    29 #define INPUT_BUFFER_SIZE 1024
    30 #define LINE_SIZE 512
    31 #define HOST_SIZE 256
    32 #define PORT_SIZE 32
    33 #define PATH_SIZE 512
    35 /*
    36  * A buffer for storing the excess input data for ReadLine.
    37  * The data in the buffer starts from (including) the element pointed to
    38  * by inputHead, and ends just before (not including) the element pointed
    39  * to by inputTail.  The buffer is empty if inputHead == inputTail.
    40  */
    42 static char inputBuf[INPUT_BUFFER_SIZE];
    43 /*
    44  * inputBufEnd points just past the end of inputBuf
    45  */
    46 static char *inputBufEnd = inputBuf + sizeof(inputBuf);
    47 static char *inputHead = inputBuf;
    48 static char *inputTail = inputBuf;
    50 static PRBool endOfStream = PR_FALSE;
    52 /*
    53  * ReadLine --
    54  *
    55  * Read in a line of text, terminated by CRLF or LF, from fd into buf.
    56  * The terminating CRLF or LF is included (always as '\n').  The text
    57  * in buf is terminated by a null byte.  The excess bytes are stored in
    58  * inputBuf for use in the next ReadLine call or FetchFile call.
    59  * Returns the number of bytes in buf.  0 means end of stream.  Returns
    60  * -1 if read fails.
    61  */
    63 PRInt32 ReadLine(PRFileDesc *fd, char *buf, PRUint32 bufSize)
    64 {
    65     char *dst = buf;
    66     char *bufEnd = buf + bufSize;  /* just past the end of buf */
    67     PRBool lineFound = PR_FALSE;
    68     char *crPtr = NULL;  /* points to the CR ('\r') character */
    69     PRInt32 nRead;
    71 loop:
    72     PR_ASSERT(inputBuf <= inputHead && inputHead <= inputTail
    73 	    && inputTail <= inputBufEnd);
    74     while (lineFound == PR_FALSE && inputHead != inputTail
    75 	    && dst < bufEnd - 1) {
    76 	if (*inputHead == '\r') {
    77 	    crPtr = dst;
    78 	} else if (*inputHead == '\n') {
    79 	    lineFound = PR_TRUE;
    80 	    if (crPtr == dst - 1) {
    81 		dst--; 
    82 	    }
    83 	}
    84 	*(dst++) = *(inputHead++);
    85     }
    86     if (lineFound == PR_TRUE || dst == bufEnd - 1 || endOfStream == PR_TRUE) {
    87 	*dst = '\0';
    88 	return dst - buf;
    89     }
    91     /*
    92      * The input buffer should be empty now
    93      */
    94     PR_ASSERT(inputHead == inputTail);
    96     nRead = PR_Read(fd, inputBuf, sizeof(inputBuf));
    97     if (nRead == -1) {
    98 	*dst = '\0';
    99 	return -1;
   100     } else if (nRead == 0) {
   101 	endOfStream = PR_TRUE;
   102 	*dst = '\0';
   103 	return dst - buf;
   104     }
   105     inputHead = inputBuf;
   106     inputTail = inputBuf + nRead;
   107     goto loop;
   108 }
   110 PRInt32 DrainInputBuffer(char *buf, PRUint32 bufSize)
   111 {
   112     PRInt32 nBytes = inputTail - inputHead;
   114     if (nBytes == 0) {
   115 	if (endOfStream) {
   116 	    return -1;
   117 	} else {
   118 	    return 0;
   119 	}
   120     }
   121     if ((PRInt32) bufSize < nBytes) {
   122 	nBytes = bufSize;
   123     }
   124     memcpy(buf, inputHead, nBytes);
   125     inputHead += nBytes;
   126     return nBytes;
   127 }
   129 PRStatus FetchFile(PRFileDesc *in, PRFileDesc *out)
   130 {
   131     char buf[FCOPY_BUFFER_SIZE];
   132     PRInt32 nBytes;
   134     while ((nBytes = DrainInputBuffer(buf, sizeof(buf))) > 0) {
   135 	if (PR_Write(out, buf, nBytes) != nBytes) {
   136             fprintf(stderr, "httpget: cannot write to file\n");
   137 	    return PR_FAILURE;
   138 	}
   139     }
   140     if (nBytes < 0) {
   141 	/* Input buffer is empty and end of stream */
   142 	return PR_SUCCESS;
   143     }
   144     while ((nBytes = PR_Read(in, buf, sizeof(buf))) > 0) {
   145 	if (PR_Write(out, buf, nBytes) != nBytes) {
   146 	    fprintf(stderr, "httpget: cannot write to file\n");
   147 	    return PR_FAILURE;
   148         }
   149     }
   150     if (nBytes < 0) {
   151 	fprintf(stderr, "httpget: cannot read from socket\n");
   152 	return PR_FAILURE;
   153     }
   154     return PR_SUCCESS;
   155 }
   157 PRStatus FastFetchFile(PRFileDesc *in, PRFileDesc *out, PRUint32 size)
   158 {
   159     PRInt32 nBytes;
   160     PRFileMap *outfMap;
   161     void *addr;
   162     char *start;
   163     PRUint32 rem;
   164     PRUint32 bytesToRead;
   165     PRStatus rv;
   166     PRInt64 sz64;
   168     LL_UI2L(sz64, size);
   169     outfMap = PR_CreateFileMap(out, sz64, PR_PROT_READWRITE);
   170     PR_ASSERT(outfMap);
   171     addr = PR_MemMap(outfMap, LL_ZERO, size);
   172     if (addr == NULL) {
   173 	fprintf(stderr, "cannot memory-map file: (%d, %d)\n", PR_GetError(),
   174 		PR_GetOSError());
   176 	PR_CloseFileMap(outfMap);
   177 	return PR_FAILURE;
   178     }
   179     start = (char *) addr;
   180     rem = size;
   181     while ((nBytes = DrainInputBuffer(start, rem)) > 0) {
   182 	start += nBytes;
   183 	rem -= nBytes;
   184     }
   185     if (nBytes < 0) {
   186 	/* Input buffer is empty and end of stream */
   187 	return PR_SUCCESS;
   188     }
   189     bytesToRead = (rem < FCOPY_BUFFER_SIZE) ? rem : FCOPY_BUFFER_SIZE;
   190     while (rem > 0 && (nBytes = PR_Read(in, start, bytesToRead)) > 0) {
   191 	start += nBytes;
   192 	rem -= nBytes;
   193         bytesToRead = (rem < FCOPY_BUFFER_SIZE) ? rem : FCOPY_BUFFER_SIZE;
   194     }
   195     if (nBytes < 0) {
   196 	fprintf(stderr, "httpget: cannot read from socket\n");
   197 	return PR_FAILURE;
   198     }
   199     rv = PR_MemUnmap(addr, size);
   200     PR_ASSERT(rv == PR_SUCCESS);
   201     rv = PR_CloseFileMap(outfMap);
   202     PR_ASSERT(rv == PR_SUCCESS);
   203     return PR_SUCCESS;
   204 }
   206 PRStatus ParseURL(char *url, char *host, PRUint32 hostSize,
   207     char *port, PRUint32 portSize, char *path, PRUint32 pathSize)
   208 {
   209     char *start, *end;
   210     char *dst;
   211     char *hostEnd;
   212     char *portEnd;
   213     char *pathEnd;
   215     if (strncmp(url, "http", 4)) {
   216 	fprintf(stderr, "httpget: the protocol must be http\n");
   217 	return PR_FAILURE;
   218     }
   219     if (strncmp(url + 4, "://", 3) || url[7] == '\0') {
   220 	fprintf(stderr, "httpget: malformed URL: %s\n", url);
   221 	return PR_FAILURE;
   222     }
   224     start = end = url + 7;
   225     dst = host;
   226     hostEnd = host + hostSize;
   227     while (*end && *end != ':' && *end != '/') {
   228 	if (dst == hostEnd - 1) {
   229 	    fprintf(stderr, "httpget: host name too long\n");
   230 	    return PR_FAILURE;
   231 	}
   232 	*(dst++) = *(end++);
   233     }
   234     *dst = '\0';
   236     if (*end == '\0') {
   237 	PR_snprintf(port, portSize, "%d", 80);
   238 	PR_snprintf(path, pathSize, "%s", "/");
   239 	return PR_SUCCESS;
   240     }
   242     if (*end == ':') {
   243 	end++;
   244 	dst = port;
   245 	portEnd = port + portSize;
   246 	while (*end && *end != '/') {
   247 	    if (dst == portEnd - 1) {
   248 		fprintf(stderr, "httpget: port number too long\n");
   249 		return PR_FAILURE;
   250 	    }
   251 	    *(dst++) = *(end++);
   252         }
   253 	*dst = '\0';
   254 	if (*end == '\0') {
   255 	    PR_snprintf(path, pathSize, "%s", "/");
   256 	    return PR_SUCCESS;
   257         }
   258     } else {
   259 	PR_snprintf(port, portSize, "%d", 80);
   260     }
   262     dst = path;
   263     pathEnd = path + pathSize;
   264     while (*end) {
   265 	if (dst == pathEnd - 1) {
   266 	    fprintf(stderr, "httpget: file pathname too long\n");
   267 	    return PR_FAILURE;
   268 	}
   269 	*(dst++) = *(end++);
   270     }
   271     *dst = '\0';
   272     return PR_SUCCESS;
   273 }
   275 void PrintUsage(void) {
   276     fprintf(stderr, "usage: httpget url\n"
   277 		    "       httpget -o outputfile url\n"
   278 		    "       httpget url -o outputfile\n");
   279 }
   281 int main(int argc, char **argv)
   282 {
   283     PRHostEnt hostentry;
   284     char buf[PR_NETDB_BUF_SIZE];
   285     PRNetAddr addr;
   286     PRFileDesc *socket = NULL, *file = NULL;
   287     PRIntn cmdSize;
   288     char host[HOST_SIZE];
   289     char port[PORT_SIZE];
   290     char path[PATH_SIZE];
   291     char line[LINE_SIZE];
   292     int exitStatus = 0;
   293     PRBool endOfHeader = PR_FALSE;
   294     char *url;
   295     char *fileName = NULL;
   296     PRUint32 fileSize;
   298     if (argc != 2 && argc != 4) {
   299 	PrintUsage();
   300 	exit(1);
   301     }
   303     if (argc == 2) {
   304 	/*
   305 	 * case 1: httpget url
   306 	 */
   307 	url = argv[1];
   308     } else {
   309 	if (strcmp(argv[1], "-o") == 0) {
   310 	    /*
   311 	     * case 2: httpget -o outputfile url
   312 	     */
   313 	    fileName = argv[2];
   314 	    url = argv[3];
   315         } else {
   316 	    /*
   317 	     * case 3: httpget url -o outputfile
   318 	     */
   319 	    url = argv[1];
   320 	    if (strcmp(argv[2], "-o") != 0) {
   321 		PrintUsage();
   322 		exit(1);
   323             }
   324 	    fileName = argv[3];
   325 	}
   326     }
   328     if (ParseURL(url, host, sizeof(host), port, sizeof(port),
   329 	    path, sizeof(path)) == PR_FAILURE) {
   330 	exit(1);
   331     }
   333     if (PR_GetHostByName(host, buf, sizeof(buf), &hostentry)
   334 	    == PR_FAILURE) {
   335         fprintf(stderr, "httpget: unknown host name: %s\n", host);
   336 	exit(1);
   337     }
   339     addr.inet.family = PR_AF_INET;
   340     addr.inet.port = PR_htons((short) atoi(port));
   341     addr.inet.ip = *((PRUint32 *) hostentry.h_addr_list[0]);
   343     socket = PR_NewTCPSocket();
   344     if (socket == NULL) {
   345 	fprintf(stderr, "httpget: cannot create new tcp socket\n");
   346 	exit(1);
   347     }
   349     if (PR_Connect(socket, &addr, PR_INTERVAL_NO_TIMEOUT) == PR_FAILURE) {
   350 	fprintf(stderr, "httpget: cannot connect to http server\n");
   351 	exitStatus = 1;
   352 	goto done;
   353     }
   355     if (fileName == NULL) {
   356 	file = PR_STDOUT;
   357     } else {
   358         file = PR_Open(fileName, PR_RDWR | PR_CREATE_FILE | PR_TRUNCATE,
   359 		00777);
   360         if (file == NULL) {
   361 	    fprintf(stderr, "httpget: cannot open file %s: (%d, %d)\n",
   362 		    fileName, PR_GetError(), PR_GetOSError());
   363 	    exitStatus = 1;
   364 	    goto done;
   365 	}
   366     }
   368     cmdSize = PR_snprintf(buf, sizeof(buf), "GET %s HTTP/1.0\r\n\r\n", path);
   369     PR_ASSERT(cmdSize == (PRIntn) strlen("GET  HTTP/1.0\r\n\r\n")
   370             + (PRIntn) strlen(path));
   371     if (PR_Write(socket, buf, cmdSize) != cmdSize) {
   372 	fprintf(stderr, "httpget: cannot write to http server\n");
   373 	exitStatus = 1;
   374 	goto done;
   375     }
   377     if (ReadLine(socket, line, sizeof(line)) <= 0) {
   378 	fprintf(stderr, "httpget: cannot read line from http server\n");
   379 	exitStatus = 1;
   380 	goto done;
   381     }
   383     /* HTTP response: 200 == OK */
   384     if (strstr(line, "200") == NULL) {
   385 	fprintf(stderr, "httpget: %s\n", line);
   386 	exitStatus = 1;
   387 	goto done;
   388     }
   390     while (ReadLine(socket, line, sizeof(line)) > 0) {
   391 	if (line[0] == '\n') {
   392 	    endOfHeader = PR_TRUE;
   393 	    break;
   394 	}
   395 	if (strncmp(line, "Content-Length", 14) == 0
   396 		|| strncmp(line, "Content-length", 14) == 0) {
   397 	    char *p = line + 14;
   399 	    while (*p == ' ' || *p == '\t') {
   400 		p++;
   401 	    }
   402 	    if (*p != ':') {
   403 		continue;
   404             }
   405 	    p++;
   406 	    while (*p == ' ' || *p == '\t') {
   407 		p++;
   408 	    }
   409 	    fileSize = 0;
   410 	    while ('0' <= *p && *p <= '9') {
   411 		fileSize = 10 * fileSize + (*p - '0');
   412 		p++;
   413             }
   414 	}
   415     }
   416     if (endOfHeader == PR_FALSE) {
   417 	fprintf(stderr, "httpget: cannot read line from http server\n");
   418 	exitStatus = 1;
   419 	goto done;
   420     }
   422     if (fileName == NULL || fileSize == 0) {
   423         FetchFile(socket, file);
   424     } else {
   425 	FastFetchFile(socket, file, fileSize);
   426     }
   428 done:
   429     if (socket) PR_Close(socket);
   430     if (file) PR_Close(file);
   431     PR_Cleanup();
   432     return exitStatus;
   433 }

mercurial