Tue, 06 Jan 2015 21:39:09 +0100
Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.
michael@0 | 1 | /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
michael@0 | 2 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 5 | |
michael@0 | 6 | |
michael@0 | 7 | /* |
michael@0 | 8 | * Author: Wan-Teh Chang |
michael@0 | 9 | * |
michael@0 | 10 | * Given an HTTP URL, httpget uses the GET method to fetch the file. |
michael@0 | 11 | * The fetched file is written to stdout by default, or can be |
michael@0 | 12 | * saved in an output file. |
michael@0 | 13 | * |
michael@0 | 14 | * This is a single-threaded program. |
michael@0 | 15 | */ |
michael@0 | 16 | |
michael@0 | 17 | #include "prio.h" |
michael@0 | 18 | #include "prnetdb.h" |
michael@0 | 19 | #include "prlog.h" |
michael@0 | 20 | #include "prerror.h" |
michael@0 | 21 | #include "prprf.h" |
michael@0 | 22 | #include "prinit.h" |
michael@0 | 23 | |
michael@0 | 24 | #include <stdio.h> |
michael@0 | 25 | #include <string.h> |
michael@0 | 26 | #include <stdlib.h> /* for atoi */ |
michael@0 | 27 | |
michael@0 | 28 | #define FCOPY_BUFFER_SIZE (16 * 1024) |
michael@0 | 29 | #define INPUT_BUFFER_SIZE 1024 |
michael@0 | 30 | #define LINE_SIZE 512 |
michael@0 | 31 | #define HOST_SIZE 256 |
michael@0 | 32 | #define PORT_SIZE 32 |
michael@0 | 33 | #define PATH_SIZE 512 |
michael@0 | 34 | |
michael@0 | 35 | /* |
michael@0 | 36 | * A buffer for storing the excess input data for ReadLine. |
michael@0 | 37 | * The data in the buffer starts from (including) the element pointed to |
michael@0 | 38 | * by inputHead, and ends just before (not including) the element pointed |
michael@0 | 39 | * to by inputTail. The buffer is empty if inputHead == inputTail. |
michael@0 | 40 | */ |
michael@0 | 41 | |
michael@0 | 42 | static char inputBuf[INPUT_BUFFER_SIZE]; |
michael@0 | 43 | /* |
michael@0 | 44 | * inputBufEnd points just past the end of inputBuf |
michael@0 | 45 | */ |
michael@0 | 46 | static char *inputBufEnd = inputBuf + sizeof(inputBuf); |
michael@0 | 47 | static char *inputHead = inputBuf; |
michael@0 | 48 | static char *inputTail = inputBuf; |
michael@0 | 49 | |
michael@0 | 50 | static PRBool endOfStream = PR_FALSE; |
michael@0 | 51 | |
michael@0 | 52 | /* |
michael@0 | 53 | * ReadLine -- |
michael@0 | 54 | * |
michael@0 | 55 | * Read in a line of text, terminated by CRLF or LF, from fd into buf. |
michael@0 | 56 | * The terminating CRLF or LF is included (always as '\n'). The text |
michael@0 | 57 | * in buf is terminated by a null byte. The excess bytes are stored in |
michael@0 | 58 | * inputBuf for use in the next ReadLine call or FetchFile call. |
michael@0 | 59 | * Returns the number of bytes in buf. 0 means end of stream. Returns |
michael@0 | 60 | * -1 if read fails. |
michael@0 | 61 | */ |
michael@0 | 62 | |
michael@0 | 63 | PRInt32 ReadLine(PRFileDesc *fd, char *buf, PRUint32 bufSize) |
michael@0 | 64 | { |
michael@0 | 65 | char *dst = buf; |
michael@0 | 66 | char *bufEnd = buf + bufSize; /* just past the end of buf */ |
michael@0 | 67 | PRBool lineFound = PR_FALSE; |
michael@0 | 68 | char *crPtr = NULL; /* points to the CR ('\r') character */ |
michael@0 | 69 | PRInt32 nRead; |
michael@0 | 70 | |
michael@0 | 71 | loop: |
michael@0 | 72 | PR_ASSERT(inputBuf <= inputHead && inputHead <= inputTail |
michael@0 | 73 | && inputTail <= inputBufEnd); |
michael@0 | 74 | while (lineFound == PR_FALSE && inputHead != inputTail |
michael@0 | 75 | && dst < bufEnd - 1) { |
michael@0 | 76 | if (*inputHead == '\r') { |
michael@0 | 77 | crPtr = dst; |
michael@0 | 78 | } else if (*inputHead == '\n') { |
michael@0 | 79 | lineFound = PR_TRUE; |
michael@0 | 80 | if (crPtr == dst - 1) { |
michael@0 | 81 | dst--; |
michael@0 | 82 | } |
michael@0 | 83 | } |
michael@0 | 84 | *(dst++) = *(inputHead++); |
michael@0 | 85 | } |
michael@0 | 86 | if (lineFound == PR_TRUE || dst == bufEnd - 1 || endOfStream == PR_TRUE) { |
michael@0 | 87 | *dst = '\0'; |
michael@0 | 88 | return dst - buf; |
michael@0 | 89 | } |
michael@0 | 90 | |
michael@0 | 91 | /* |
michael@0 | 92 | * The input buffer should be empty now |
michael@0 | 93 | */ |
michael@0 | 94 | PR_ASSERT(inputHead == inputTail); |
michael@0 | 95 | |
michael@0 | 96 | nRead = PR_Read(fd, inputBuf, sizeof(inputBuf)); |
michael@0 | 97 | if (nRead == -1) { |
michael@0 | 98 | *dst = '\0'; |
michael@0 | 99 | return -1; |
michael@0 | 100 | } else if (nRead == 0) { |
michael@0 | 101 | endOfStream = PR_TRUE; |
michael@0 | 102 | *dst = '\0'; |
michael@0 | 103 | return dst - buf; |
michael@0 | 104 | } |
michael@0 | 105 | inputHead = inputBuf; |
michael@0 | 106 | inputTail = inputBuf + nRead; |
michael@0 | 107 | goto loop; |
michael@0 | 108 | } |
michael@0 | 109 | |
michael@0 | 110 | PRInt32 DrainInputBuffer(char *buf, PRUint32 bufSize) |
michael@0 | 111 | { |
michael@0 | 112 | PRInt32 nBytes = inputTail - inputHead; |
michael@0 | 113 | |
michael@0 | 114 | if (nBytes == 0) { |
michael@0 | 115 | if (endOfStream) { |
michael@0 | 116 | return -1; |
michael@0 | 117 | } else { |
michael@0 | 118 | return 0; |
michael@0 | 119 | } |
michael@0 | 120 | } |
michael@0 | 121 | if ((PRInt32) bufSize < nBytes) { |
michael@0 | 122 | nBytes = bufSize; |
michael@0 | 123 | } |
michael@0 | 124 | memcpy(buf, inputHead, nBytes); |
michael@0 | 125 | inputHead += nBytes; |
michael@0 | 126 | return nBytes; |
michael@0 | 127 | } |
michael@0 | 128 | |
michael@0 | 129 | PRStatus FetchFile(PRFileDesc *in, PRFileDesc *out) |
michael@0 | 130 | { |
michael@0 | 131 | char buf[FCOPY_BUFFER_SIZE]; |
michael@0 | 132 | PRInt32 nBytes; |
michael@0 | 133 | |
michael@0 | 134 | while ((nBytes = DrainInputBuffer(buf, sizeof(buf))) > 0) { |
michael@0 | 135 | if (PR_Write(out, buf, nBytes) != nBytes) { |
michael@0 | 136 | fprintf(stderr, "httpget: cannot write to file\n"); |
michael@0 | 137 | return PR_FAILURE; |
michael@0 | 138 | } |
michael@0 | 139 | } |
michael@0 | 140 | if (nBytes < 0) { |
michael@0 | 141 | /* Input buffer is empty and end of stream */ |
michael@0 | 142 | return PR_SUCCESS; |
michael@0 | 143 | } |
michael@0 | 144 | while ((nBytes = PR_Read(in, buf, sizeof(buf))) > 0) { |
michael@0 | 145 | if (PR_Write(out, buf, nBytes) != nBytes) { |
michael@0 | 146 | fprintf(stderr, "httpget: cannot write to file\n"); |
michael@0 | 147 | return PR_FAILURE; |
michael@0 | 148 | } |
michael@0 | 149 | } |
michael@0 | 150 | if (nBytes < 0) { |
michael@0 | 151 | fprintf(stderr, "httpget: cannot read from socket\n"); |
michael@0 | 152 | return PR_FAILURE; |
michael@0 | 153 | } |
michael@0 | 154 | return PR_SUCCESS; |
michael@0 | 155 | } |
michael@0 | 156 | |
michael@0 | 157 | PRStatus FastFetchFile(PRFileDesc *in, PRFileDesc *out, PRUint32 size) |
michael@0 | 158 | { |
michael@0 | 159 | PRInt32 nBytes; |
michael@0 | 160 | PRFileMap *outfMap; |
michael@0 | 161 | void *addr; |
michael@0 | 162 | char *start; |
michael@0 | 163 | PRUint32 rem; |
michael@0 | 164 | PRUint32 bytesToRead; |
michael@0 | 165 | PRStatus rv; |
michael@0 | 166 | PRInt64 sz64; |
michael@0 | 167 | |
michael@0 | 168 | LL_UI2L(sz64, size); |
michael@0 | 169 | outfMap = PR_CreateFileMap(out, sz64, PR_PROT_READWRITE); |
michael@0 | 170 | PR_ASSERT(outfMap); |
michael@0 | 171 | addr = PR_MemMap(outfMap, LL_ZERO, size); |
michael@0 | 172 | if (addr == NULL) { |
michael@0 | 173 | fprintf(stderr, "cannot memory-map file: (%d, %d)\n", PR_GetError(), |
michael@0 | 174 | PR_GetOSError()); |
michael@0 | 175 | |
michael@0 | 176 | PR_CloseFileMap(outfMap); |
michael@0 | 177 | return PR_FAILURE; |
michael@0 | 178 | } |
michael@0 | 179 | start = (char *) addr; |
michael@0 | 180 | rem = size; |
michael@0 | 181 | while ((nBytes = DrainInputBuffer(start, rem)) > 0) { |
michael@0 | 182 | start += nBytes; |
michael@0 | 183 | rem -= nBytes; |
michael@0 | 184 | } |
michael@0 | 185 | if (nBytes < 0) { |
michael@0 | 186 | /* Input buffer is empty and end of stream */ |
michael@0 | 187 | return PR_SUCCESS; |
michael@0 | 188 | } |
michael@0 | 189 | bytesToRead = (rem < FCOPY_BUFFER_SIZE) ? rem : FCOPY_BUFFER_SIZE; |
michael@0 | 190 | while (rem > 0 && (nBytes = PR_Read(in, start, bytesToRead)) > 0) { |
michael@0 | 191 | start += nBytes; |
michael@0 | 192 | rem -= nBytes; |
michael@0 | 193 | bytesToRead = (rem < FCOPY_BUFFER_SIZE) ? rem : FCOPY_BUFFER_SIZE; |
michael@0 | 194 | } |
michael@0 | 195 | if (nBytes < 0) { |
michael@0 | 196 | fprintf(stderr, "httpget: cannot read from socket\n"); |
michael@0 | 197 | return PR_FAILURE; |
michael@0 | 198 | } |
michael@0 | 199 | rv = PR_MemUnmap(addr, size); |
michael@0 | 200 | PR_ASSERT(rv == PR_SUCCESS); |
michael@0 | 201 | rv = PR_CloseFileMap(outfMap); |
michael@0 | 202 | PR_ASSERT(rv == PR_SUCCESS); |
michael@0 | 203 | return PR_SUCCESS; |
michael@0 | 204 | } |
michael@0 | 205 | |
michael@0 | 206 | PRStatus ParseURL(char *url, char *host, PRUint32 hostSize, |
michael@0 | 207 | char *port, PRUint32 portSize, char *path, PRUint32 pathSize) |
michael@0 | 208 | { |
michael@0 | 209 | char *start, *end; |
michael@0 | 210 | char *dst; |
michael@0 | 211 | char *hostEnd; |
michael@0 | 212 | char *portEnd; |
michael@0 | 213 | char *pathEnd; |
michael@0 | 214 | |
michael@0 | 215 | if (strncmp(url, "http", 4)) { |
michael@0 | 216 | fprintf(stderr, "httpget: the protocol must be http\n"); |
michael@0 | 217 | return PR_FAILURE; |
michael@0 | 218 | } |
michael@0 | 219 | if (strncmp(url + 4, "://", 3) || url[7] == '\0') { |
michael@0 | 220 | fprintf(stderr, "httpget: malformed URL: %s\n", url); |
michael@0 | 221 | return PR_FAILURE; |
michael@0 | 222 | } |
michael@0 | 223 | |
michael@0 | 224 | start = end = url + 7; |
michael@0 | 225 | dst = host; |
michael@0 | 226 | hostEnd = host + hostSize; |
michael@0 | 227 | while (*end && *end != ':' && *end != '/') { |
michael@0 | 228 | if (dst == hostEnd - 1) { |
michael@0 | 229 | fprintf(stderr, "httpget: host name too long\n"); |
michael@0 | 230 | return PR_FAILURE; |
michael@0 | 231 | } |
michael@0 | 232 | *(dst++) = *(end++); |
michael@0 | 233 | } |
michael@0 | 234 | *dst = '\0'; |
michael@0 | 235 | |
michael@0 | 236 | if (*end == '\0') { |
michael@0 | 237 | PR_snprintf(port, portSize, "%d", 80); |
michael@0 | 238 | PR_snprintf(path, pathSize, "%s", "/"); |
michael@0 | 239 | return PR_SUCCESS; |
michael@0 | 240 | } |
michael@0 | 241 | |
michael@0 | 242 | if (*end == ':') { |
michael@0 | 243 | end++; |
michael@0 | 244 | dst = port; |
michael@0 | 245 | portEnd = port + portSize; |
michael@0 | 246 | while (*end && *end != '/') { |
michael@0 | 247 | if (dst == portEnd - 1) { |
michael@0 | 248 | fprintf(stderr, "httpget: port number too long\n"); |
michael@0 | 249 | return PR_FAILURE; |
michael@0 | 250 | } |
michael@0 | 251 | *(dst++) = *(end++); |
michael@0 | 252 | } |
michael@0 | 253 | *dst = '\0'; |
michael@0 | 254 | if (*end == '\0') { |
michael@0 | 255 | PR_snprintf(path, pathSize, "%s", "/"); |
michael@0 | 256 | return PR_SUCCESS; |
michael@0 | 257 | } |
michael@0 | 258 | } else { |
michael@0 | 259 | PR_snprintf(port, portSize, "%d", 80); |
michael@0 | 260 | } |
michael@0 | 261 | |
michael@0 | 262 | dst = path; |
michael@0 | 263 | pathEnd = path + pathSize; |
michael@0 | 264 | while (*end) { |
michael@0 | 265 | if (dst == pathEnd - 1) { |
michael@0 | 266 | fprintf(stderr, "httpget: file pathname too long\n"); |
michael@0 | 267 | return PR_FAILURE; |
michael@0 | 268 | } |
michael@0 | 269 | *(dst++) = *(end++); |
michael@0 | 270 | } |
michael@0 | 271 | *dst = '\0'; |
michael@0 | 272 | return PR_SUCCESS; |
michael@0 | 273 | } |
michael@0 | 274 | |
michael@0 | 275 | void PrintUsage(void) { |
michael@0 | 276 | fprintf(stderr, "usage: httpget url\n" |
michael@0 | 277 | " httpget -o outputfile url\n" |
michael@0 | 278 | " httpget url -o outputfile\n"); |
michael@0 | 279 | } |
michael@0 | 280 | |
michael@0 | 281 | int main(int argc, char **argv) |
michael@0 | 282 | { |
michael@0 | 283 | PRHostEnt hostentry; |
michael@0 | 284 | char buf[PR_NETDB_BUF_SIZE]; |
michael@0 | 285 | PRNetAddr addr; |
michael@0 | 286 | PRFileDesc *socket = NULL, *file = NULL; |
michael@0 | 287 | PRIntn cmdSize; |
michael@0 | 288 | char host[HOST_SIZE]; |
michael@0 | 289 | char port[PORT_SIZE]; |
michael@0 | 290 | char path[PATH_SIZE]; |
michael@0 | 291 | char line[LINE_SIZE]; |
michael@0 | 292 | int exitStatus = 0; |
michael@0 | 293 | PRBool endOfHeader = PR_FALSE; |
michael@0 | 294 | char *url; |
michael@0 | 295 | char *fileName = NULL; |
michael@0 | 296 | PRUint32 fileSize; |
michael@0 | 297 | |
michael@0 | 298 | if (argc != 2 && argc != 4) { |
michael@0 | 299 | PrintUsage(); |
michael@0 | 300 | exit(1); |
michael@0 | 301 | } |
michael@0 | 302 | |
michael@0 | 303 | if (argc == 2) { |
michael@0 | 304 | /* |
michael@0 | 305 | * case 1: httpget url |
michael@0 | 306 | */ |
michael@0 | 307 | url = argv[1]; |
michael@0 | 308 | } else { |
michael@0 | 309 | if (strcmp(argv[1], "-o") == 0) { |
michael@0 | 310 | /* |
michael@0 | 311 | * case 2: httpget -o outputfile url |
michael@0 | 312 | */ |
michael@0 | 313 | fileName = argv[2]; |
michael@0 | 314 | url = argv[3]; |
michael@0 | 315 | } else { |
michael@0 | 316 | /* |
michael@0 | 317 | * case 3: httpget url -o outputfile |
michael@0 | 318 | */ |
michael@0 | 319 | url = argv[1]; |
michael@0 | 320 | if (strcmp(argv[2], "-o") != 0) { |
michael@0 | 321 | PrintUsage(); |
michael@0 | 322 | exit(1); |
michael@0 | 323 | } |
michael@0 | 324 | fileName = argv[3]; |
michael@0 | 325 | } |
michael@0 | 326 | } |
michael@0 | 327 | |
michael@0 | 328 | if (ParseURL(url, host, sizeof(host), port, sizeof(port), |
michael@0 | 329 | path, sizeof(path)) == PR_FAILURE) { |
michael@0 | 330 | exit(1); |
michael@0 | 331 | } |
michael@0 | 332 | |
michael@0 | 333 | if (PR_GetHostByName(host, buf, sizeof(buf), &hostentry) |
michael@0 | 334 | == PR_FAILURE) { |
michael@0 | 335 | fprintf(stderr, "httpget: unknown host name: %s\n", host); |
michael@0 | 336 | exit(1); |
michael@0 | 337 | } |
michael@0 | 338 | |
michael@0 | 339 | addr.inet.family = PR_AF_INET; |
michael@0 | 340 | addr.inet.port = PR_htons((short) atoi(port)); |
michael@0 | 341 | addr.inet.ip = *((PRUint32 *) hostentry.h_addr_list[0]); |
michael@0 | 342 | |
michael@0 | 343 | socket = PR_NewTCPSocket(); |
michael@0 | 344 | if (socket == NULL) { |
michael@0 | 345 | fprintf(stderr, "httpget: cannot create new tcp socket\n"); |
michael@0 | 346 | exit(1); |
michael@0 | 347 | } |
michael@0 | 348 | |
michael@0 | 349 | if (PR_Connect(socket, &addr, PR_INTERVAL_NO_TIMEOUT) == PR_FAILURE) { |
michael@0 | 350 | fprintf(stderr, "httpget: cannot connect to http server\n"); |
michael@0 | 351 | exitStatus = 1; |
michael@0 | 352 | goto done; |
michael@0 | 353 | } |
michael@0 | 354 | |
michael@0 | 355 | if (fileName == NULL) { |
michael@0 | 356 | file = PR_STDOUT; |
michael@0 | 357 | } else { |
michael@0 | 358 | file = PR_Open(fileName, PR_RDWR | PR_CREATE_FILE | PR_TRUNCATE, |
michael@0 | 359 | 00777); |
michael@0 | 360 | if (file == NULL) { |
michael@0 | 361 | fprintf(stderr, "httpget: cannot open file %s: (%d, %d)\n", |
michael@0 | 362 | fileName, PR_GetError(), PR_GetOSError()); |
michael@0 | 363 | exitStatus = 1; |
michael@0 | 364 | goto done; |
michael@0 | 365 | } |
michael@0 | 366 | } |
michael@0 | 367 | |
michael@0 | 368 | cmdSize = PR_snprintf(buf, sizeof(buf), "GET %s HTTP/1.0\r\n\r\n", path); |
michael@0 | 369 | PR_ASSERT(cmdSize == (PRIntn) strlen("GET HTTP/1.0\r\n\r\n") |
michael@0 | 370 | + (PRIntn) strlen(path)); |
michael@0 | 371 | if (PR_Write(socket, buf, cmdSize) != cmdSize) { |
michael@0 | 372 | fprintf(stderr, "httpget: cannot write to http server\n"); |
michael@0 | 373 | exitStatus = 1; |
michael@0 | 374 | goto done; |
michael@0 | 375 | } |
michael@0 | 376 | |
michael@0 | 377 | if (ReadLine(socket, line, sizeof(line)) <= 0) { |
michael@0 | 378 | fprintf(stderr, "httpget: cannot read line from http server\n"); |
michael@0 | 379 | exitStatus = 1; |
michael@0 | 380 | goto done; |
michael@0 | 381 | } |
michael@0 | 382 | |
michael@0 | 383 | /* HTTP response: 200 == OK */ |
michael@0 | 384 | if (strstr(line, "200") == NULL) { |
michael@0 | 385 | fprintf(stderr, "httpget: %s\n", line); |
michael@0 | 386 | exitStatus = 1; |
michael@0 | 387 | goto done; |
michael@0 | 388 | } |
michael@0 | 389 | |
michael@0 | 390 | while (ReadLine(socket, line, sizeof(line)) > 0) { |
michael@0 | 391 | if (line[0] == '\n') { |
michael@0 | 392 | endOfHeader = PR_TRUE; |
michael@0 | 393 | break; |
michael@0 | 394 | } |
michael@0 | 395 | if (strncmp(line, "Content-Length", 14) == 0 |
michael@0 | 396 | || strncmp(line, "Content-length", 14) == 0) { |
michael@0 | 397 | char *p = line + 14; |
michael@0 | 398 | |
michael@0 | 399 | while (*p == ' ' || *p == '\t') { |
michael@0 | 400 | p++; |
michael@0 | 401 | } |
michael@0 | 402 | if (*p != ':') { |
michael@0 | 403 | continue; |
michael@0 | 404 | } |
michael@0 | 405 | p++; |
michael@0 | 406 | while (*p == ' ' || *p == '\t') { |
michael@0 | 407 | p++; |
michael@0 | 408 | } |
michael@0 | 409 | fileSize = 0; |
michael@0 | 410 | while ('0' <= *p && *p <= '9') { |
michael@0 | 411 | fileSize = 10 * fileSize + (*p - '0'); |
michael@0 | 412 | p++; |
michael@0 | 413 | } |
michael@0 | 414 | } |
michael@0 | 415 | } |
michael@0 | 416 | if (endOfHeader == PR_FALSE) { |
michael@0 | 417 | fprintf(stderr, "httpget: cannot read line from http server\n"); |
michael@0 | 418 | exitStatus = 1; |
michael@0 | 419 | goto done; |
michael@0 | 420 | } |
michael@0 | 421 | |
michael@0 | 422 | if (fileName == NULL || fileSize == 0) { |
michael@0 | 423 | FetchFile(socket, file); |
michael@0 | 424 | } else { |
michael@0 | 425 | FastFetchFile(socket, file, fileSize); |
michael@0 | 426 | } |
michael@0 | 427 | |
michael@0 | 428 | done: |
michael@0 | 429 | if (socket) PR_Close(socket); |
michael@0 | 430 | if (file) PR_Close(file); |
michael@0 | 431 | PR_Cleanup(); |
michael@0 | 432 | return exitStatus; |
michael@0 | 433 | } |