nsprpub/tools/httpget.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/nsprpub/tools/httpget.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,433 @@
     1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +
    1.10 +/*
    1.11 + * Author: Wan-Teh Chang
    1.12 + *
    1.13 + * Given an HTTP URL, httpget uses the GET method to fetch the file.
    1.14 + * The fetched file is written to stdout by default, or can be
    1.15 + * saved in an output file.
    1.16 + *
    1.17 + * This is a single-threaded program.
    1.18 + */
    1.19 +
    1.20 +#include "prio.h"
    1.21 +#include "prnetdb.h"
    1.22 +#include "prlog.h"
    1.23 +#include "prerror.h"
    1.24 +#include "prprf.h"
    1.25 +#include "prinit.h"
    1.26 +
    1.27 +#include <stdio.h>
    1.28 +#include <string.h>
    1.29 +#include <stdlib.h>  /* for atoi */
    1.30 +
    1.31 +#define FCOPY_BUFFER_SIZE (16 * 1024)
    1.32 +#define INPUT_BUFFER_SIZE 1024
    1.33 +#define LINE_SIZE 512
    1.34 +#define HOST_SIZE 256
    1.35 +#define PORT_SIZE 32
    1.36 +#define PATH_SIZE 512
    1.37 +
    1.38 +/*
    1.39 + * A buffer for storing the excess input data for ReadLine.
    1.40 + * The data in the buffer starts from (including) the element pointed to
    1.41 + * by inputHead, and ends just before (not including) the element pointed
    1.42 + * to by inputTail.  The buffer is empty if inputHead == inputTail.
    1.43 + */
    1.44 +
    1.45 +static char inputBuf[INPUT_BUFFER_SIZE];
    1.46 +/*
    1.47 + * inputBufEnd points just past the end of inputBuf
    1.48 + */
    1.49 +static char *inputBufEnd = inputBuf + sizeof(inputBuf);
    1.50 +static char *inputHead = inputBuf;
    1.51 +static char *inputTail = inputBuf;
    1.52 +
    1.53 +static PRBool endOfStream = PR_FALSE;
    1.54 +
    1.55 +/*
    1.56 + * ReadLine --
    1.57 + *
    1.58 + * Read in a line of text, terminated by CRLF or LF, from fd into buf.
    1.59 + * The terminating CRLF or LF is included (always as '\n').  The text
    1.60 + * in buf is terminated by a null byte.  The excess bytes are stored in
    1.61 + * inputBuf for use in the next ReadLine call or FetchFile call.
    1.62 + * Returns the number of bytes in buf.  0 means end of stream.  Returns
    1.63 + * -1 if read fails.
    1.64 + */
    1.65 +
    1.66 +PRInt32 ReadLine(PRFileDesc *fd, char *buf, PRUint32 bufSize)
    1.67 +{
    1.68 +    char *dst = buf;
    1.69 +    char *bufEnd = buf + bufSize;  /* just past the end of buf */
    1.70 +    PRBool lineFound = PR_FALSE;
    1.71 +    char *crPtr = NULL;  /* points to the CR ('\r') character */
    1.72 +    PRInt32 nRead;
    1.73 +
    1.74 +loop:
    1.75 +    PR_ASSERT(inputBuf <= inputHead && inputHead <= inputTail
    1.76 +	    && inputTail <= inputBufEnd);
    1.77 +    while (lineFound == PR_FALSE && inputHead != inputTail
    1.78 +	    && dst < bufEnd - 1) {
    1.79 +	if (*inputHead == '\r') {
    1.80 +	    crPtr = dst;
    1.81 +	} else if (*inputHead == '\n') {
    1.82 +	    lineFound = PR_TRUE;
    1.83 +	    if (crPtr == dst - 1) {
    1.84 +		dst--; 
    1.85 +	    }
    1.86 +	}
    1.87 +	*(dst++) = *(inputHead++);
    1.88 +    }
    1.89 +    if (lineFound == PR_TRUE || dst == bufEnd - 1 || endOfStream == PR_TRUE) {
    1.90 +	*dst = '\0';
    1.91 +	return dst - buf;
    1.92 +    }
    1.93 +
    1.94 +    /*
    1.95 +     * The input buffer should be empty now
    1.96 +     */
    1.97 +    PR_ASSERT(inputHead == inputTail);
    1.98 +
    1.99 +    nRead = PR_Read(fd, inputBuf, sizeof(inputBuf));
   1.100 +    if (nRead == -1) {
   1.101 +	*dst = '\0';
   1.102 +	return -1;
   1.103 +    } else if (nRead == 0) {
   1.104 +	endOfStream = PR_TRUE;
   1.105 +	*dst = '\0';
   1.106 +	return dst - buf;
   1.107 +    }
   1.108 +    inputHead = inputBuf;
   1.109 +    inputTail = inputBuf + nRead;
   1.110 +    goto loop;
   1.111 +}
   1.112 +
   1.113 +PRInt32 DrainInputBuffer(char *buf, PRUint32 bufSize)
   1.114 +{
   1.115 +    PRInt32 nBytes = inputTail - inputHead;
   1.116 +
   1.117 +    if (nBytes == 0) {
   1.118 +	if (endOfStream) {
   1.119 +	    return -1;
   1.120 +	} else {
   1.121 +	    return 0;
   1.122 +	}
   1.123 +    }
   1.124 +    if ((PRInt32) bufSize < nBytes) {
   1.125 +	nBytes = bufSize;
   1.126 +    }
   1.127 +    memcpy(buf, inputHead, nBytes);
   1.128 +    inputHead += nBytes;
   1.129 +    return nBytes;
   1.130 +}
   1.131 +
   1.132 +PRStatus FetchFile(PRFileDesc *in, PRFileDesc *out)
   1.133 +{
   1.134 +    char buf[FCOPY_BUFFER_SIZE];
   1.135 +    PRInt32 nBytes;
   1.136 +
   1.137 +    while ((nBytes = DrainInputBuffer(buf, sizeof(buf))) > 0) {
   1.138 +	if (PR_Write(out, buf, nBytes) != nBytes) {
   1.139 +            fprintf(stderr, "httpget: cannot write to file\n");
   1.140 +	    return PR_FAILURE;
   1.141 +	}
   1.142 +    }
   1.143 +    if (nBytes < 0) {
   1.144 +	/* Input buffer is empty and end of stream */
   1.145 +	return PR_SUCCESS;
   1.146 +    }
   1.147 +    while ((nBytes = PR_Read(in, buf, sizeof(buf))) > 0) {
   1.148 +	if (PR_Write(out, buf, nBytes) != nBytes) {
   1.149 +	    fprintf(stderr, "httpget: cannot write to file\n");
   1.150 +	    return PR_FAILURE;
   1.151 +        }
   1.152 +    }
   1.153 +    if (nBytes < 0) {
   1.154 +	fprintf(stderr, "httpget: cannot read from socket\n");
   1.155 +	return PR_FAILURE;
   1.156 +    }
   1.157 +    return PR_SUCCESS;
   1.158 +}
   1.159 +
   1.160 +PRStatus FastFetchFile(PRFileDesc *in, PRFileDesc *out, PRUint32 size)
   1.161 +{
   1.162 +    PRInt32 nBytes;
   1.163 +    PRFileMap *outfMap;
   1.164 +    void *addr;
   1.165 +    char *start;
   1.166 +    PRUint32 rem;
   1.167 +    PRUint32 bytesToRead;
   1.168 +    PRStatus rv;
   1.169 +    PRInt64 sz64;
   1.170 +
   1.171 +    LL_UI2L(sz64, size);
   1.172 +    outfMap = PR_CreateFileMap(out, sz64, PR_PROT_READWRITE);
   1.173 +    PR_ASSERT(outfMap);
   1.174 +    addr = PR_MemMap(outfMap, LL_ZERO, size);
   1.175 +    if (addr == NULL) {
   1.176 +	fprintf(stderr, "cannot memory-map file: (%d, %d)\n", PR_GetError(),
   1.177 +		PR_GetOSError());
   1.178 +
   1.179 +	PR_CloseFileMap(outfMap);
   1.180 +	return PR_FAILURE;
   1.181 +    }
   1.182 +    start = (char *) addr;
   1.183 +    rem = size;
   1.184 +    while ((nBytes = DrainInputBuffer(start, rem)) > 0) {
   1.185 +	start += nBytes;
   1.186 +	rem -= nBytes;
   1.187 +    }
   1.188 +    if (nBytes < 0) {
   1.189 +	/* Input buffer is empty and end of stream */
   1.190 +	return PR_SUCCESS;
   1.191 +    }
   1.192 +    bytesToRead = (rem < FCOPY_BUFFER_SIZE) ? rem : FCOPY_BUFFER_SIZE;
   1.193 +    while (rem > 0 && (nBytes = PR_Read(in, start, bytesToRead)) > 0) {
   1.194 +	start += nBytes;
   1.195 +	rem -= nBytes;
   1.196 +        bytesToRead = (rem < FCOPY_BUFFER_SIZE) ? rem : FCOPY_BUFFER_SIZE;
   1.197 +    }
   1.198 +    if (nBytes < 0) {
   1.199 +	fprintf(stderr, "httpget: cannot read from socket\n");
   1.200 +	return PR_FAILURE;
   1.201 +    }
   1.202 +    rv = PR_MemUnmap(addr, size);
   1.203 +    PR_ASSERT(rv == PR_SUCCESS);
   1.204 +    rv = PR_CloseFileMap(outfMap);
   1.205 +    PR_ASSERT(rv == PR_SUCCESS);
   1.206 +    return PR_SUCCESS;
   1.207 +}
   1.208 +
   1.209 +PRStatus ParseURL(char *url, char *host, PRUint32 hostSize,
   1.210 +    char *port, PRUint32 portSize, char *path, PRUint32 pathSize)
   1.211 +{
   1.212 +    char *start, *end;
   1.213 +    char *dst;
   1.214 +    char *hostEnd;
   1.215 +    char *portEnd;
   1.216 +    char *pathEnd;
   1.217 +
   1.218 +    if (strncmp(url, "http", 4)) {
   1.219 +	fprintf(stderr, "httpget: the protocol must be http\n");
   1.220 +	return PR_FAILURE;
   1.221 +    }
   1.222 +    if (strncmp(url + 4, "://", 3) || url[7] == '\0') {
   1.223 +	fprintf(stderr, "httpget: malformed URL: %s\n", url);
   1.224 +	return PR_FAILURE;
   1.225 +    }
   1.226 +
   1.227 +    start = end = url + 7;
   1.228 +    dst = host;
   1.229 +    hostEnd = host + hostSize;
   1.230 +    while (*end && *end != ':' && *end != '/') {
   1.231 +	if (dst == hostEnd - 1) {
   1.232 +	    fprintf(stderr, "httpget: host name too long\n");
   1.233 +	    return PR_FAILURE;
   1.234 +	}
   1.235 +	*(dst++) = *(end++);
   1.236 +    }
   1.237 +    *dst = '\0';
   1.238 +
   1.239 +    if (*end == '\0') {
   1.240 +	PR_snprintf(port, portSize, "%d", 80);
   1.241 +	PR_snprintf(path, pathSize, "%s", "/");
   1.242 +	return PR_SUCCESS;
   1.243 +    }
   1.244 +
   1.245 +    if (*end == ':') {
   1.246 +	end++;
   1.247 +	dst = port;
   1.248 +	portEnd = port + portSize;
   1.249 +	while (*end && *end != '/') {
   1.250 +	    if (dst == portEnd - 1) {
   1.251 +		fprintf(stderr, "httpget: port number too long\n");
   1.252 +		return PR_FAILURE;
   1.253 +	    }
   1.254 +	    *(dst++) = *(end++);
   1.255 +        }
   1.256 +	*dst = '\0';
   1.257 +	if (*end == '\0') {
   1.258 +	    PR_snprintf(path, pathSize, "%s", "/");
   1.259 +	    return PR_SUCCESS;
   1.260 +        }
   1.261 +    } else {
   1.262 +	PR_snprintf(port, portSize, "%d", 80);
   1.263 +    }
   1.264 +
   1.265 +    dst = path;
   1.266 +    pathEnd = path + pathSize;
   1.267 +    while (*end) {
   1.268 +	if (dst == pathEnd - 1) {
   1.269 +	    fprintf(stderr, "httpget: file pathname too long\n");
   1.270 +	    return PR_FAILURE;
   1.271 +	}
   1.272 +	*(dst++) = *(end++);
   1.273 +    }
   1.274 +    *dst = '\0';
   1.275 +    return PR_SUCCESS;
   1.276 +}
   1.277 +
   1.278 +void PrintUsage(void) {
   1.279 +    fprintf(stderr, "usage: httpget url\n"
   1.280 +		    "       httpget -o outputfile url\n"
   1.281 +		    "       httpget url -o outputfile\n");
   1.282 +}
   1.283 +
   1.284 +int main(int argc, char **argv)
   1.285 +{
   1.286 +    PRHostEnt hostentry;
   1.287 +    char buf[PR_NETDB_BUF_SIZE];
   1.288 +    PRNetAddr addr;
   1.289 +    PRFileDesc *socket = NULL, *file = NULL;
   1.290 +    PRIntn cmdSize;
   1.291 +    char host[HOST_SIZE];
   1.292 +    char port[PORT_SIZE];
   1.293 +    char path[PATH_SIZE];
   1.294 +    char line[LINE_SIZE];
   1.295 +    int exitStatus = 0;
   1.296 +    PRBool endOfHeader = PR_FALSE;
   1.297 +    char *url;
   1.298 +    char *fileName = NULL;
   1.299 +    PRUint32 fileSize;
   1.300 +
   1.301 +    if (argc != 2 && argc != 4) {
   1.302 +	PrintUsage();
   1.303 +	exit(1);
   1.304 +    }
   1.305 +
   1.306 +    if (argc == 2) {
   1.307 +	/*
   1.308 +	 * case 1: httpget url
   1.309 +	 */
   1.310 +	url = argv[1];
   1.311 +    } else {
   1.312 +	if (strcmp(argv[1], "-o") == 0) {
   1.313 +	    /*
   1.314 +	     * case 2: httpget -o outputfile url
   1.315 +	     */
   1.316 +	    fileName = argv[2];
   1.317 +	    url = argv[3];
   1.318 +        } else {
   1.319 +	    /*
   1.320 +	     * case 3: httpget url -o outputfile
   1.321 +	     */
   1.322 +	    url = argv[1];
   1.323 +	    if (strcmp(argv[2], "-o") != 0) {
   1.324 +		PrintUsage();
   1.325 +		exit(1);
   1.326 +            }
   1.327 +	    fileName = argv[3];
   1.328 +	}
   1.329 +    }
   1.330 +
   1.331 +    if (ParseURL(url, host, sizeof(host), port, sizeof(port),
   1.332 +	    path, sizeof(path)) == PR_FAILURE) {
   1.333 +	exit(1);
   1.334 +    }
   1.335 +
   1.336 +    if (PR_GetHostByName(host, buf, sizeof(buf), &hostentry)
   1.337 +	    == PR_FAILURE) {
   1.338 +        fprintf(stderr, "httpget: unknown host name: %s\n", host);
   1.339 +	exit(1);
   1.340 +    }
   1.341 +
   1.342 +    addr.inet.family = PR_AF_INET;
   1.343 +    addr.inet.port = PR_htons((short) atoi(port));
   1.344 +    addr.inet.ip = *((PRUint32 *) hostentry.h_addr_list[0]);
   1.345 +
   1.346 +    socket = PR_NewTCPSocket();
   1.347 +    if (socket == NULL) {
   1.348 +	fprintf(stderr, "httpget: cannot create new tcp socket\n");
   1.349 +	exit(1);
   1.350 +    }
   1.351 +
   1.352 +    if (PR_Connect(socket, &addr, PR_INTERVAL_NO_TIMEOUT) == PR_FAILURE) {
   1.353 +	fprintf(stderr, "httpget: cannot connect to http server\n");
   1.354 +	exitStatus = 1;
   1.355 +	goto done;
   1.356 +    }
   1.357 +
   1.358 +    if (fileName == NULL) {
   1.359 +	file = PR_STDOUT;
   1.360 +    } else {
   1.361 +        file = PR_Open(fileName, PR_RDWR | PR_CREATE_FILE | PR_TRUNCATE,
   1.362 +		00777);
   1.363 +        if (file == NULL) {
   1.364 +	    fprintf(stderr, "httpget: cannot open file %s: (%d, %d)\n",
   1.365 +		    fileName, PR_GetError(), PR_GetOSError());
   1.366 +	    exitStatus = 1;
   1.367 +	    goto done;
   1.368 +	}
   1.369 +    }
   1.370 +
   1.371 +    cmdSize = PR_snprintf(buf, sizeof(buf), "GET %s HTTP/1.0\r\n\r\n", path);
   1.372 +    PR_ASSERT(cmdSize == (PRIntn) strlen("GET  HTTP/1.0\r\n\r\n")
   1.373 +            + (PRIntn) strlen(path));
   1.374 +    if (PR_Write(socket, buf, cmdSize) != cmdSize) {
   1.375 +	fprintf(stderr, "httpget: cannot write to http server\n");
   1.376 +	exitStatus = 1;
   1.377 +	goto done;
   1.378 +    }
   1.379 +
   1.380 +    if (ReadLine(socket, line, sizeof(line)) <= 0) {
   1.381 +	fprintf(stderr, "httpget: cannot read line from http server\n");
   1.382 +	exitStatus = 1;
   1.383 +	goto done;
   1.384 +    }
   1.385 +
   1.386 +    /* HTTP response: 200 == OK */
   1.387 +    if (strstr(line, "200") == NULL) {
   1.388 +	fprintf(stderr, "httpget: %s\n", line);
   1.389 +	exitStatus = 1;
   1.390 +	goto done;
   1.391 +    }
   1.392 +
   1.393 +    while (ReadLine(socket, line, sizeof(line)) > 0) {
   1.394 +	if (line[0] == '\n') {
   1.395 +	    endOfHeader = PR_TRUE;
   1.396 +	    break;
   1.397 +	}
   1.398 +	if (strncmp(line, "Content-Length", 14) == 0
   1.399 +		|| strncmp(line, "Content-length", 14) == 0) {
   1.400 +	    char *p = line + 14;
   1.401 +
   1.402 +	    while (*p == ' ' || *p == '\t') {
   1.403 +		p++;
   1.404 +	    }
   1.405 +	    if (*p != ':') {
   1.406 +		continue;
   1.407 +            }
   1.408 +	    p++;
   1.409 +	    while (*p == ' ' || *p == '\t') {
   1.410 +		p++;
   1.411 +	    }
   1.412 +	    fileSize = 0;
   1.413 +	    while ('0' <= *p && *p <= '9') {
   1.414 +		fileSize = 10 * fileSize + (*p - '0');
   1.415 +		p++;
   1.416 +            }
   1.417 +	}
   1.418 +    }
   1.419 +    if (endOfHeader == PR_FALSE) {
   1.420 +	fprintf(stderr, "httpget: cannot read line from http server\n");
   1.421 +	exitStatus = 1;
   1.422 +	goto done;
   1.423 +    }
   1.424 +
   1.425 +    if (fileName == NULL || fileSize == 0) {
   1.426 +        FetchFile(socket, file);
   1.427 +    } else {
   1.428 +	FastFetchFile(socket, file, fileSize);
   1.429 +    }
   1.430 +
   1.431 +done:
   1.432 +    if (socket) PR_Close(socket);
   1.433 +    if (file) PR_Close(file);
   1.434 +    PR_Cleanup();
   1.435 +    return exitStatus;
   1.436 +}

mercurial