michael@0: /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: michael@0: /* michael@0: * Author: Wan-Teh Chang michael@0: * michael@0: * Given an HTTP URL, httpget uses the GET method to fetch the file. michael@0: * The fetched file is written to stdout by default, or can be michael@0: * saved in an output file. michael@0: * michael@0: * This is a single-threaded program. michael@0: */ michael@0: michael@0: #include "prio.h" michael@0: #include "prnetdb.h" michael@0: #include "prlog.h" michael@0: #include "prerror.h" michael@0: #include "prprf.h" michael@0: #include "prinit.h" michael@0: michael@0: #include michael@0: #include michael@0: #include /* for atoi */ michael@0: michael@0: #define FCOPY_BUFFER_SIZE (16 * 1024) michael@0: #define INPUT_BUFFER_SIZE 1024 michael@0: #define LINE_SIZE 512 michael@0: #define HOST_SIZE 256 michael@0: #define PORT_SIZE 32 michael@0: #define PATH_SIZE 512 michael@0: michael@0: /* michael@0: * A buffer for storing the excess input data for ReadLine. michael@0: * The data in the buffer starts from (including) the element pointed to michael@0: * by inputHead, and ends just before (not including) the element pointed michael@0: * to by inputTail. The buffer is empty if inputHead == inputTail. michael@0: */ michael@0: michael@0: static char inputBuf[INPUT_BUFFER_SIZE]; michael@0: /* michael@0: * inputBufEnd points just past the end of inputBuf michael@0: */ michael@0: static char *inputBufEnd = inputBuf + sizeof(inputBuf); michael@0: static char *inputHead = inputBuf; michael@0: static char *inputTail = inputBuf; michael@0: michael@0: static PRBool endOfStream = PR_FALSE; michael@0: michael@0: /* michael@0: * ReadLine -- michael@0: * michael@0: * Read in a line of text, terminated by CRLF or LF, from fd into buf. michael@0: * The terminating CRLF or LF is included (always as '\n'). The text michael@0: * in buf is terminated by a null byte. The excess bytes are stored in michael@0: * inputBuf for use in the next ReadLine call or FetchFile call. michael@0: * Returns the number of bytes in buf. 0 means end of stream. Returns michael@0: * -1 if read fails. michael@0: */ michael@0: michael@0: PRInt32 ReadLine(PRFileDesc *fd, char *buf, PRUint32 bufSize) michael@0: { michael@0: char *dst = buf; michael@0: char *bufEnd = buf + bufSize; /* just past the end of buf */ michael@0: PRBool lineFound = PR_FALSE; michael@0: char *crPtr = NULL; /* points to the CR ('\r') character */ michael@0: PRInt32 nRead; michael@0: michael@0: loop: michael@0: PR_ASSERT(inputBuf <= inputHead && inputHead <= inputTail michael@0: && inputTail <= inputBufEnd); michael@0: while (lineFound == PR_FALSE && inputHead != inputTail michael@0: && dst < bufEnd - 1) { michael@0: if (*inputHead == '\r') { michael@0: crPtr = dst; michael@0: } else if (*inputHead == '\n') { michael@0: lineFound = PR_TRUE; michael@0: if (crPtr == dst - 1) { michael@0: dst--; michael@0: } michael@0: } michael@0: *(dst++) = *(inputHead++); michael@0: } michael@0: if (lineFound == PR_TRUE || dst == bufEnd - 1 || endOfStream == PR_TRUE) { michael@0: *dst = '\0'; michael@0: return dst - buf; michael@0: } michael@0: michael@0: /* michael@0: * The input buffer should be empty now michael@0: */ michael@0: PR_ASSERT(inputHead == inputTail); michael@0: michael@0: nRead = PR_Read(fd, inputBuf, sizeof(inputBuf)); michael@0: if (nRead == -1) { michael@0: *dst = '\0'; michael@0: return -1; michael@0: } else if (nRead == 0) { michael@0: endOfStream = PR_TRUE; michael@0: *dst = '\0'; michael@0: return dst - buf; michael@0: } michael@0: inputHead = inputBuf; michael@0: inputTail = inputBuf + nRead; michael@0: goto loop; michael@0: } michael@0: michael@0: PRInt32 DrainInputBuffer(char *buf, PRUint32 bufSize) michael@0: { michael@0: PRInt32 nBytes = inputTail - inputHead; michael@0: michael@0: if (nBytes == 0) { michael@0: if (endOfStream) { michael@0: return -1; michael@0: } else { michael@0: return 0; michael@0: } michael@0: } michael@0: if ((PRInt32) bufSize < nBytes) { michael@0: nBytes = bufSize; michael@0: } michael@0: memcpy(buf, inputHead, nBytes); michael@0: inputHead += nBytes; michael@0: return nBytes; michael@0: } michael@0: michael@0: PRStatus FetchFile(PRFileDesc *in, PRFileDesc *out) michael@0: { michael@0: char buf[FCOPY_BUFFER_SIZE]; michael@0: PRInt32 nBytes; michael@0: michael@0: while ((nBytes = DrainInputBuffer(buf, sizeof(buf))) > 0) { michael@0: if (PR_Write(out, buf, nBytes) != nBytes) { michael@0: fprintf(stderr, "httpget: cannot write to file\n"); michael@0: return PR_FAILURE; michael@0: } michael@0: } michael@0: if (nBytes < 0) { michael@0: /* Input buffer is empty and end of stream */ michael@0: return PR_SUCCESS; michael@0: } michael@0: while ((nBytes = PR_Read(in, buf, sizeof(buf))) > 0) { michael@0: if (PR_Write(out, buf, nBytes) != nBytes) { michael@0: fprintf(stderr, "httpget: cannot write to file\n"); michael@0: return PR_FAILURE; michael@0: } michael@0: } michael@0: if (nBytes < 0) { michael@0: fprintf(stderr, "httpget: cannot read from socket\n"); michael@0: return PR_FAILURE; michael@0: } michael@0: return PR_SUCCESS; michael@0: } michael@0: michael@0: PRStatus FastFetchFile(PRFileDesc *in, PRFileDesc *out, PRUint32 size) michael@0: { michael@0: PRInt32 nBytes; michael@0: PRFileMap *outfMap; michael@0: void *addr; michael@0: char *start; michael@0: PRUint32 rem; michael@0: PRUint32 bytesToRead; michael@0: PRStatus rv; michael@0: PRInt64 sz64; michael@0: michael@0: LL_UI2L(sz64, size); michael@0: outfMap = PR_CreateFileMap(out, sz64, PR_PROT_READWRITE); michael@0: PR_ASSERT(outfMap); michael@0: addr = PR_MemMap(outfMap, LL_ZERO, size); michael@0: if (addr == NULL) { michael@0: fprintf(stderr, "cannot memory-map file: (%d, %d)\n", PR_GetError(), michael@0: PR_GetOSError()); michael@0: michael@0: PR_CloseFileMap(outfMap); michael@0: return PR_FAILURE; michael@0: } michael@0: start = (char *) addr; michael@0: rem = size; michael@0: while ((nBytes = DrainInputBuffer(start, rem)) > 0) { michael@0: start += nBytes; michael@0: rem -= nBytes; michael@0: } michael@0: if (nBytes < 0) { michael@0: /* Input buffer is empty and end of stream */ michael@0: return PR_SUCCESS; michael@0: } michael@0: bytesToRead = (rem < FCOPY_BUFFER_SIZE) ? rem : FCOPY_BUFFER_SIZE; michael@0: while (rem > 0 && (nBytes = PR_Read(in, start, bytesToRead)) > 0) { michael@0: start += nBytes; michael@0: rem -= nBytes; michael@0: bytesToRead = (rem < FCOPY_BUFFER_SIZE) ? rem : FCOPY_BUFFER_SIZE; michael@0: } michael@0: if (nBytes < 0) { michael@0: fprintf(stderr, "httpget: cannot read from socket\n"); michael@0: return PR_FAILURE; michael@0: } michael@0: rv = PR_MemUnmap(addr, size); michael@0: PR_ASSERT(rv == PR_SUCCESS); michael@0: rv = PR_CloseFileMap(outfMap); michael@0: PR_ASSERT(rv == PR_SUCCESS); michael@0: return PR_SUCCESS; michael@0: } michael@0: michael@0: PRStatus ParseURL(char *url, char *host, PRUint32 hostSize, michael@0: char *port, PRUint32 portSize, char *path, PRUint32 pathSize) michael@0: { michael@0: char *start, *end; michael@0: char *dst; michael@0: char *hostEnd; michael@0: char *portEnd; michael@0: char *pathEnd; michael@0: michael@0: if (strncmp(url, "http", 4)) { michael@0: fprintf(stderr, "httpget: the protocol must be http\n"); michael@0: return PR_FAILURE; michael@0: } michael@0: if (strncmp(url + 4, "://", 3) || url[7] == '\0') { michael@0: fprintf(stderr, "httpget: malformed URL: %s\n", url); michael@0: return PR_FAILURE; michael@0: } michael@0: michael@0: start = end = url + 7; michael@0: dst = host; michael@0: hostEnd = host + hostSize; michael@0: while (*end && *end != ':' && *end != '/') { michael@0: if (dst == hostEnd - 1) { michael@0: fprintf(stderr, "httpget: host name too long\n"); michael@0: return PR_FAILURE; michael@0: } michael@0: *(dst++) = *(end++); michael@0: } michael@0: *dst = '\0'; michael@0: michael@0: if (*end == '\0') { michael@0: PR_snprintf(port, portSize, "%d", 80); michael@0: PR_snprintf(path, pathSize, "%s", "/"); michael@0: return PR_SUCCESS; michael@0: } michael@0: michael@0: if (*end == ':') { michael@0: end++; michael@0: dst = port; michael@0: portEnd = port + portSize; michael@0: while (*end && *end != '/') { michael@0: if (dst == portEnd - 1) { michael@0: fprintf(stderr, "httpget: port number too long\n"); michael@0: return PR_FAILURE; michael@0: } michael@0: *(dst++) = *(end++); michael@0: } michael@0: *dst = '\0'; michael@0: if (*end == '\0') { michael@0: PR_snprintf(path, pathSize, "%s", "/"); michael@0: return PR_SUCCESS; michael@0: } michael@0: } else { michael@0: PR_snprintf(port, portSize, "%d", 80); michael@0: } michael@0: michael@0: dst = path; michael@0: pathEnd = path + pathSize; michael@0: while (*end) { michael@0: if (dst == pathEnd - 1) { michael@0: fprintf(stderr, "httpget: file pathname too long\n"); michael@0: return PR_FAILURE; michael@0: } michael@0: *(dst++) = *(end++); michael@0: } michael@0: *dst = '\0'; michael@0: return PR_SUCCESS; michael@0: } michael@0: michael@0: void PrintUsage(void) { michael@0: fprintf(stderr, "usage: httpget url\n" michael@0: " httpget -o outputfile url\n" michael@0: " httpget url -o outputfile\n"); michael@0: } michael@0: michael@0: int main(int argc, char **argv) michael@0: { michael@0: PRHostEnt hostentry; michael@0: char buf[PR_NETDB_BUF_SIZE]; michael@0: PRNetAddr addr; michael@0: PRFileDesc *socket = NULL, *file = NULL; michael@0: PRIntn cmdSize; michael@0: char host[HOST_SIZE]; michael@0: char port[PORT_SIZE]; michael@0: char path[PATH_SIZE]; michael@0: char line[LINE_SIZE]; michael@0: int exitStatus = 0; michael@0: PRBool endOfHeader = PR_FALSE; michael@0: char *url; michael@0: char *fileName = NULL; michael@0: PRUint32 fileSize; michael@0: michael@0: if (argc != 2 && argc != 4) { michael@0: PrintUsage(); michael@0: exit(1); michael@0: } michael@0: michael@0: if (argc == 2) { michael@0: /* michael@0: * case 1: httpget url michael@0: */ michael@0: url = argv[1]; michael@0: } else { michael@0: if (strcmp(argv[1], "-o") == 0) { michael@0: /* michael@0: * case 2: httpget -o outputfile url michael@0: */ michael@0: fileName = argv[2]; michael@0: url = argv[3]; michael@0: } else { michael@0: /* michael@0: * case 3: httpget url -o outputfile michael@0: */ michael@0: url = argv[1]; michael@0: if (strcmp(argv[2], "-o") != 0) { michael@0: PrintUsage(); michael@0: exit(1); michael@0: } michael@0: fileName = argv[3]; michael@0: } michael@0: } michael@0: michael@0: if (ParseURL(url, host, sizeof(host), port, sizeof(port), michael@0: path, sizeof(path)) == PR_FAILURE) { michael@0: exit(1); michael@0: } michael@0: michael@0: if (PR_GetHostByName(host, buf, sizeof(buf), &hostentry) michael@0: == PR_FAILURE) { michael@0: fprintf(stderr, "httpget: unknown host name: %s\n", host); michael@0: exit(1); michael@0: } michael@0: michael@0: addr.inet.family = PR_AF_INET; michael@0: addr.inet.port = PR_htons((short) atoi(port)); michael@0: addr.inet.ip = *((PRUint32 *) hostentry.h_addr_list[0]); michael@0: michael@0: socket = PR_NewTCPSocket(); michael@0: if (socket == NULL) { michael@0: fprintf(stderr, "httpget: cannot create new tcp socket\n"); michael@0: exit(1); michael@0: } michael@0: michael@0: if (PR_Connect(socket, &addr, PR_INTERVAL_NO_TIMEOUT) == PR_FAILURE) { michael@0: fprintf(stderr, "httpget: cannot connect to http server\n"); michael@0: exitStatus = 1; michael@0: goto done; michael@0: } michael@0: michael@0: if (fileName == NULL) { michael@0: file = PR_STDOUT; michael@0: } else { michael@0: file = PR_Open(fileName, PR_RDWR | PR_CREATE_FILE | PR_TRUNCATE, michael@0: 00777); michael@0: if (file == NULL) { michael@0: fprintf(stderr, "httpget: cannot open file %s: (%d, %d)\n", michael@0: fileName, PR_GetError(), PR_GetOSError()); michael@0: exitStatus = 1; michael@0: goto done; michael@0: } michael@0: } michael@0: michael@0: cmdSize = PR_snprintf(buf, sizeof(buf), "GET %s HTTP/1.0\r\n\r\n", path); michael@0: PR_ASSERT(cmdSize == (PRIntn) strlen("GET HTTP/1.0\r\n\r\n") michael@0: + (PRIntn) strlen(path)); michael@0: if (PR_Write(socket, buf, cmdSize) != cmdSize) { michael@0: fprintf(stderr, "httpget: cannot write to http server\n"); michael@0: exitStatus = 1; michael@0: goto done; michael@0: } michael@0: michael@0: if (ReadLine(socket, line, sizeof(line)) <= 0) { michael@0: fprintf(stderr, "httpget: cannot read line from http server\n"); michael@0: exitStatus = 1; michael@0: goto done; michael@0: } michael@0: michael@0: /* HTTP response: 200 == OK */ michael@0: if (strstr(line, "200") == NULL) { michael@0: fprintf(stderr, "httpget: %s\n", line); michael@0: exitStatus = 1; michael@0: goto done; michael@0: } michael@0: michael@0: while (ReadLine(socket, line, sizeof(line)) > 0) { michael@0: if (line[0] == '\n') { michael@0: endOfHeader = PR_TRUE; michael@0: break; michael@0: } michael@0: if (strncmp(line, "Content-Length", 14) == 0 michael@0: || strncmp(line, "Content-length", 14) == 0) { michael@0: char *p = line + 14; michael@0: michael@0: while (*p == ' ' || *p == '\t') { michael@0: p++; michael@0: } michael@0: if (*p != ':') { michael@0: continue; michael@0: } michael@0: p++; michael@0: while (*p == ' ' || *p == '\t') { michael@0: p++; michael@0: } michael@0: fileSize = 0; michael@0: while ('0' <= *p && *p <= '9') { michael@0: fileSize = 10 * fileSize + (*p - '0'); michael@0: p++; michael@0: } michael@0: } michael@0: } michael@0: if (endOfHeader == PR_FALSE) { michael@0: fprintf(stderr, "httpget: cannot read line from http server\n"); michael@0: exitStatus = 1; michael@0: goto done; michael@0: } michael@0: michael@0: if (fileName == NULL || fileSize == 0) { michael@0: FetchFile(socket, file); michael@0: } else { michael@0: FastFetchFile(socket, file, fileSize); michael@0: } michael@0: michael@0: done: michael@0: if (socket) PR_Close(socket); michael@0: if (file) PR_Close(file); michael@0: PR_Cleanup(); michael@0: return exitStatus; michael@0: }