nsprpub/tools/httpget.c

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6
michael@0 7 /*
michael@0 8 * Author: Wan-Teh Chang
michael@0 9 *
michael@0 10 * Given an HTTP URL, httpget uses the GET method to fetch the file.
michael@0 11 * The fetched file is written to stdout by default, or can be
michael@0 12 * saved in an output file.
michael@0 13 *
michael@0 14 * This is a single-threaded program.
michael@0 15 */
michael@0 16
michael@0 17 #include "prio.h"
michael@0 18 #include "prnetdb.h"
michael@0 19 #include "prlog.h"
michael@0 20 #include "prerror.h"
michael@0 21 #include "prprf.h"
michael@0 22 #include "prinit.h"
michael@0 23
michael@0 24 #include <stdio.h>
michael@0 25 #include <string.h>
michael@0 26 #include <stdlib.h> /* for atoi */
michael@0 27
michael@0 28 #define FCOPY_BUFFER_SIZE (16 * 1024)
michael@0 29 #define INPUT_BUFFER_SIZE 1024
michael@0 30 #define LINE_SIZE 512
michael@0 31 #define HOST_SIZE 256
michael@0 32 #define PORT_SIZE 32
michael@0 33 #define PATH_SIZE 512
michael@0 34
michael@0 35 /*
michael@0 36 * A buffer for storing the excess input data for ReadLine.
michael@0 37 * The data in the buffer starts from (including) the element pointed to
michael@0 38 * by inputHead, and ends just before (not including) the element pointed
michael@0 39 * to by inputTail. The buffer is empty if inputHead == inputTail.
michael@0 40 */
michael@0 41
michael@0 42 static char inputBuf[INPUT_BUFFER_SIZE];
michael@0 43 /*
michael@0 44 * inputBufEnd points just past the end of inputBuf
michael@0 45 */
michael@0 46 static char *inputBufEnd = inputBuf + sizeof(inputBuf);
michael@0 47 static char *inputHead = inputBuf;
michael@0 48 static char *inputTail = inputBuf;
michael@0 49
michael@0 50 static PRBool endOfStream = PR_FALSE;
michael@0 51
michael@0 52 /*
michael@0 53 * ReadLine --
michael@0 54 *
michael@0 55 * Read in a line of text, terminated by CRLF or LF, from fd into buf.
michael@0 56 * The terminating CRLF or LF is included (always as '\n'). The text
michael@0 57 * in buf is terminated by a null byte. The excess bytes are stored in
michael@0 58 * inputBuf for use in the next ReadLine call or FetchFile call.
michael@0 59 * Returns the number of bytes in buf. 0 means end of stream. Returns
michael@0 60 * -1 if read fails.
michael@0 61 */
michael@0 62
michael@0 63 PRInt32 ReadLine(PRFileDesc *fd, char *buf, PRUint32 bufSize)
michael@0 64 {
michael@0 65 char *dst = buf;
michael@0 66 char *bufEnd = buf + bufSize; /* just past the end of buf */
michael@0 67 PRBool lineFound = PR_FALSE;
michael@0 68 char *crPtr = NULL; /* points to the CR ('\r') character */
michael@0 69 PRInt32 nRead;
michael@0 70
michael@0 71 loop:
michael@0 72 PR_ASSERT(inputBuf <= inputHead && inputHead <= inputTail
michael@0 73 && inputTail <= inputBufEnd);
michael@0 74 while (lineFound == PR_FALSE && inputHead != inputTail
michael@0 75 && dst < bufEnd - 1) {
michael@0 76 if (*inputHead == '\r') {
michael@0 77 crPtr = dst;
michael@0 78 } else if (*inputHead == '\n') {
michael@0 79 lineFound = PR_TRUE;
michael@0 80 if (crPtr == dst - 1) {
michael@0 81 dst--;
michael@0 82 }
michael@0 83 }
michael@0 84 *(dst++) = *(inputHead++);
michael@0 85 }
michael@0 86 if (lineFound == PR_TRUE || dst == bufEnd - 1 || endOfStream == PR_TRUE) {
michael@0 87 *dst = '\0';
michael@0 88 return dst - buf;
michael@0 89 }
michael@0 90
michael@0 91 /*
michael@0 92 * The input buffer should be empty now
michael@0 93 */
michael@0 94 PR_ASSERT(inputHead == inputTail);
michael@0 95
michael@0 96 nRead = PR_Read(fd, inputBuf, sizeof(inputBuf));
michael@0 97 if (nRead == -1) {
michael@0 98 *dst = '\0';
michael@0 99 return -1;
michael@0 100 } else if (nRead == 0) {
michael@0 101 endOfStream = PR_TRUE;
michael@0 102 *dst = '\0';
michael@0 103 return dst - buf;
michael@0 104 }
michael@0 105 inputHead = inputBuf;
michael@0 106 inputTail = inputBuf + nRead;
michael@0 107 goto loop;
michael@0 108 }
michael@0 109
michael@0 110 PRInt32 DrainInputBuffer(char *buf, PRUint32 bufSize)
michael@0 111 {
michael@0 112 PRInt32 nBytes = inputTail - inputHead;
michael@0 113
michael@0 114 if (nBytes == 0) {
michael@0 115 if (endOfStream) {
michael@0 116 return -1;
michael@0 117 } else {
michael@0 118 return 0;
michael@0 119 }
michael@0 120 }
michael@0 121 if ((PRInt32) bufSize < nBytes) {
michael@0 122 nBytes = bufSize;
michael@0 123 }
michael@0 124 memcpy(buf, inputHead, nBytes);
michael@0 125 inputHead += nBytes;
michael@0 126 return nBytes;
michael@0 127 }
michael@0 128
michael@0 129 PRStatus FetchFile(PRFileDesc *in, PRFileDesc *out)
michael@0 130 {
michael@0 131 char buf[FCOPY_BUFFER_SIZE];
michael@0 132 PRInt32 nBytes;
michael@0 133
michael@0 134 while ((nBytes = DrainInputBuffer(buf, sizeof(buf))) > 0) {
michael@0 135 if (PR_Write(out, buf, nBytes) != nBytes) {
michael@0 136 fprintf(stderr, "httpget: cannot write to file\n");
michael@0 137 return PR_FAILURE;
michael@0 138 }
michael@0 139 }
michael@0 140 if (nBytes < 0) {
michael@0 141 /* Input buffer is empty and end of stream */
michael@0 142 return PR_SUCCESS;
michael@0 143 }
michael@0 144 while ((nBytes = PR_Read(in, buf, sizeof(buf))) > 0) {
michael@0 145 if (PR_Write(out, buf, nBytes) != nBytes) {
michael@0 146 fprintf(stderr, "httpget: cannot write to file\n");
michael@0 147 return PR_FAILURE;
michael@0 148 }
michael@0 149 }
michael@0 150 if (nBytes < 0) {
michael@0 151 fprintf(stderr, "httpget: cannot read from socket\n");
michael@0 152 return PR_FAILURE;
michael@0 153 }
michael@0 154 return PR_SUCCESS;
michael@0 155 }
michael@0 156
michael@0 157 PRStatus FastFetchFile(PRFileDesc *in, PRFileDesc *out, PRUint32 size)
michael@0 158 {
michael@0 159 PRInt32 nBytes;
michael@0 160 PRFileMap *outfMap;
michael@0 161 void *addr;
michael@0 162 char *start;
michael@0 163 PRUint32 rem;
michael@0 164 PRUint32 bytesToRead;
michael@0 165 PRStatus rv;
michael@0 166 PRInt64 sz64;
michael@0 167
michael@0 168 LL_UI2L(sz64, size);
michael@0 169 outfMap = PR_CreateFileMap(out, sz64, PR_PROT_READWRITE);
michael@0 170 PR_ASSERT(outfMap);
michael@0 171 addr = PR_MemMap(outfMap, LL_ZERO, size);
michael@0 172 if (addr == NULL) {
michael@0 173 fprintf(stderr, "cannot memory-map file: (%d, %d)\n", PR_GetError(),
michael@0 174 PR_GetOSError());
michael@0 175
michael@0 176 PR_CloseFileMap(outfMap);
michael@0 177 return PR_FAILURE;
michael@0 178 }
michael@0 179 start = (char *) addr;
michael@0 180 rem = size;
michael@0 181 while ((nBytes = DrainInputBuffer(start, rem)) > 0) {
michael@0 182 start += nBytes;
michael@0 183 rem -= nBytes;
michael@0 184 }
michael@0 185 if (nBytes < 0) {
michael@0 186 /* Input buffer is empty and end of stream */
michael@0 187 return PR_SUCCESS;
michael@0 188 }
michael@0 189 bytesToRead = (rem < FCOPY_BUFFER_SIZE) ? rem : FCOPY_BUFFER_SIZE;
michael@0 190 while (rem > 0 && (nBytes = PR_Read(in, start, bytesToRead)) > 0) {
michael@0 191 start += nBytes;
michael@0 192 rem -= nBytes;
michael@0 193 bytesToRead = (rem < FCOPY_BUFFER_SIZE) ? rem : FCOPY_BUFFER_SIZE;
michael@0 194 }
michael@0 195 if (nBytes < 0) {
michael@0 196 fprintf(stderr, "httpget: cannot read from socket\n");
michael@0 197 return PR_FAILURE;
michael@0 198 }
michael@0 199 rv = PR_MemUnmap(addr, size);
michael@0 200 PR_ASSERT(rv == PR_SUCCESS);
michael@0 201 rv = PR_CloseFileMap(outfMap);
michael@0 202 PR_ASSERT(rv == PR_SUCCESS);
michael@0 203 return PR_SUCCESS;
michael@0 204 }
michael@0 205
michael@0 206 PRStatus ParseURL(char *url, char *host, PRUint32 hostSize,
michael@0 207 char *port, PRUint32 portSize, char *path, PRUint32 pathSize)
michael@0 208 {
michael@0 209 char *start, *end;
michael@0 210 char *dst;
michael@0 211 char *hostEnd;
michael@0 212 char *portEnd;
michael@0 213 char *pathEnd;
michael@0 214
michael@0 215 if (strncmp(url, "http", 4)) {
michael@0 216 fprintf(stderr, "httpget: the protocol must be http\n");
michael@0 217 return PR_FAILURE;
michael@0 218 }
michael@0 219 if (strncmp(url + 4, "://", 3) || url[7] == '\0') {
michael@0 220 fprintf(stderr, "httpget: malformed URL: %s\n", url);
michael@0 221 return PR_FAILURE;
michael@0 222 }
michael@0 223
michael@0 224 start = end = url + 7;
michael@0 225 dst = host;
michael@0 226 hostEnd = host + hostSize;
michael@0 227 while (*end && *end != ':' && *end != '/') {
michael@0 228 if (dst == hostEnd - 1) {
michael@0 229 fprintf(stderr, "httpget: host name too long\n");
michael@0 230 return PR_FAILURE;
michael@0 231 }
michael@0 232 *(dst++) = *(end++);
michael@0 233 }
michael@0 234 *dst = '\0';
michael@0 235
michael@0 236 if (*end == '\0') {
michael@0 237 PR_snprintf(port, portSize, "%d", 80);
michael@0 238 PR_snprintf(path, pathSize, "%s", "/");
michael@0 239 return PR_SUCCESS;
michael@0 240 }
michael@0 241
michael@0 242 if (*end == ':') {
michael@0 243 end++;
michael@0 244 dst = port;
michael@0 245 portEnd = port + portSize;
michael@0 246 while (*end && *end != '/') {
michael@0 247 if (dst == portEnd - 1) {
michael@0 248 fprintf(stderr, "httpget: port number too long\n");
michael@0 249 return PR_FAILURE;
michael@0 250 }
michael@0 251 *(dst++) = *(end++);
michael@0 252 }
michael@0 253 *dst = '\0';
michael@0 254 if (*end == '\0') {
michael@0 255 PR_snprintf(path, pathSize, "%s", "/");
michael@0 256 return PR_SUCCESS;
michael@0 257 }
michael@0 258 } else {
michael@0 259 PR_snprintf(port, portSize, "%d", 80);
michael@0 260 }
michael@0 261
michael@0 262 dst = path;
michael@0 263 pathEnd = path + pathSize;
michael@0 264 while (*end) {
michael@0 265 if (dst == pathEnd - 1) {
michael@0 266 fprintf(stderr, "httpget: file pathname too long\n");
michael@0 267 return PR_FAILURE;
michael@0 268 }
michael@0 269 *(dst++) = *(end++);
michael@0 270 }
michael@0 271 *dst = '\0';
michael@0 272 return PR_SUCCESS;
michael@0 273 }
michael@0 274
michael@0 275 void PrintUsage(void) {
michael@0 276 fprintf(stderr, "usage: httpget url\n"
michael@0 277 " httpget -o outputfile url\n"
michael@0 278 " httpget url -o outputfile\n");
michael@0 279 }
michael@0 280
michael@0 281 int main(int argc, char **argv)
michael@0 282 {
michael@0 283 PRHostEnt hostentry;
michael@0 284 char buf[PR_NETDB_BUF_SIZE];
michael@0 285 PRNetAddr addr;
michael@0 286 PRFileDesc *socket = NULL, *file = NULL;
michael@0 287 PRIntn cmdSize;
michael@0 288 char host[HOST_SIZE];
michael@0 289 char port[PORT_SIZE];
michael@0 290 char path[PATH_SIZE];
michael@0 291 char line[LINE_SIZE];
michael@0 292 int exitStatus = 0;
michael@0 293 PRBool endOfHeader = PR_FALSE;
michael@0 294 char *url;
michael@0 295 char *fileName = NULL;
michael@0 296 PRUint32 fileSize;
michael@0 297
michael@0 298 if (argc != 2 && argc != 4) {
michael@0 299 PrintUsage();
michael@0 300 exit(1);
michael@0 301 }
michael@0 302
michael@0 303 if (argc == 2) {
michael@0 304 /*
michael@0 305 * case 1: httpget url
michael@0 306 */
michael@0 307 url = argv[1];
michael@0 308 } else {
michael@0 309 if (strcmp(argv[1], "-o") == 0) {
michael@0 310 /*
michael@0 311 * case 2: httpget -o outputfile url
michael@0 312 */
michael@0 313 fileName = argv[2];
michael@0 314 url = argv[3];
michael@0 315 } else {
michael@0 316 /*
michael@0 317 * case 3: httpget url -o outputfile
michael@0 318 */
michael@0 319 url = argv[1];
michael@0 320 if (strcmp(argv[2], "-o") != 0) {
michael@0 321 PrintUsage();
michael@0 322 exit(1);
michael@0 323 }
michael@0 324 fileName = argv[3];
michael@0 325 }
michael@0 326 }
michael@0 327
michael@0 328 if (ParseURL(url, host, sizeof(host), port, sizeof(port),
michael@0 329 path, sizeof(path)) == PR_FAILURE) {
michael@0 330 exit(1);
michael@0 331 }
michael@0 332
michael@0 333 if (PR_GetHostByName(host, buf, sizeof(buf), &hostentry)
michael@0 334 == PR_FAILURE) {
michael@0 335 fprintf(stderr, "httpget: unknown host name: %s\n", host);
michael@0 336 exit(1);
michael@0 337 }
michael@0 338
michael@0 339 addr.inet.family = PR_AF_INET;
michael@0 340 addr.inet.port = PR_htons((short) atoi(port));
michael@0 341 addr.inet.ip = *((PRUint32 *) hostentry.h_addr_list[0]);
michael@0 342
michael@0 343 socket = PR_NewTCPSocket();
michael@0 344 if (socket == NULL) {
michael@0 345 fprintf(stderr, "httpget: cannot create new tcp socket\n");
michael@0 346 exit(1);
michael@0 347 }
michael@0 348
michael@0 349 if (PR_Connect(socket, &addr, PR_INTERVAL_NO_TIMEOUT) == PR_FAILURE) {
michael@0 350 fprintf(stderr, "httpget: cannot connect to http server\n");
michael@0 351 exitStatus = 1;
michael@0 352 goto done;
michael@0 353 }
michael@0 354
michael@0 355 if (fileName == NULL) {
michael@0 356 file = PR_STDOUT;
michael@0 357 } else {
michael@0 358 file = PR_Open(fileName, PR_RDWR | PR_CREATE_FILE | PR_TRUNCATE,
michael@0 359 00777);
michael@0 360 if (file == NULL) {
michael@0 361 fprintf(stderr, "httpget: cannot open file %s: (%d, %d)\n",
michael@0 362 fileName, PR_GetError(), PR_GetOSError());
michael@0 363 exitStatus = 1;
michael@0 364 goto done;
michael@0 365 }
michael@0 366 }
michael@0 367
michael@0 368 cmdSize = PR_snprintf(buf, sizeof(buf), "GET %s HTTP/1.0\r\n\r\n", path);
michael@0 369 PR_ASSERT(cmdSize == (PRIntn) strlen("GET HTTP/1.0\r\n\r\n")
michael@0 370 + (PRIntn) strlen(path));
michael@0 371 if (PR_Write(socket, buf, cmdSize) != cmdSize) {
michael@0 372 fprintf(stderr, "httpget: cannot write to http server\n");
michael@0 373 exitStatus = 1;
michael@0 374 goto done;
michael@0 375 }
michael@0 376
michael@0 377 if (ReadLine(socket, line, sizeof(line)) <= 0) {
michael@0 378 fprintf(stderr, "httpget: cannot read line from http server\n");
michael@0 379 exitStatus = 1;
michael@0 380 goto done;
michael@0 381 }
michael@0 382
michael@0 383 /* HTTP response: 200 == OK */
michael@0 384 if (strstr(line, "200") == NULL) {
michael@0 385 fprintf(stderr, "httpget: %s\n", line);
michael@0 386 exitStatus = 1;
michael@0 387 goto done;
michael@0 388 }
michael@0 389
michael@0 390 while (ReadLine(socket, line, sizeof(line)) > 0) {
michael@0 391 if (line[0] == '\n') {
michael@0 392 endOfHeader = PR_TRUE;
michael@0 393 break;
michael@0 394 }
michael@0 395 if (strncmp(line, "Content-Length", 14) == 0
michael@0 396 || strncmp(line, "Content-length", 14) == 0) {
michael@0 397 char *p = line + 14;
michael@0 398
michael@0 399 while (*p == ' ' || *p == '\t') {
michael@0 400 p++;
michael@0 401 }
michael@0 402 if (*p != ':') {
michael@0 403 continue;
michael@0 404 }
michael@0 405 p++;
michael@0 406 while (*p == ' ' || *p == '\t') {
michael@0 407 p++;
michael@0 408 }
michael@0 409 fileSize = 0;
michael@0 410 while ('0' <= *p && *p <= '9') {
michael@0 411 fileSize = 10 * fileSize + (*p - '0');
michael@0 412 p++;
michael@0 413 }
michael@0 414 }
michael@0 415 }
michael@0 416 if (endOfHeader == PR_FALSE) {
michael@0 417 fprintf(stderr, "httpget: cannot read line from http server\n");
michael@0 418 exitStatus = 1;
michael@0 419 goto done;
michael@0 420 }
michael@0 421
michael@0 422 if (fileName == NULL || fileSize == 0) {
michael@0 423 FetchFile(socket, file);
michael@0 424 } else {
michael@0 425 FastFetchFile(socket, file, fileSize);
michael@0 426 }
michael@0 427
michael@0 428 done:
michael@0 429 if (socket) PR_Close(socket);
michael@0 430 if (file) PR_Close(file);
michael@0 431 PR_Cleanup();
michael@0 432 return exitStatus;
michael@0 433 }

mercurial