|
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 |
|
7 /* |
|
8 * Author: Wan-Teh Chang |
|
9 * |
|
10 * Given an HTTP URL, httpget uses the GET method to fetch the file. |
|
11 * The fetched file is written to stdout by default, or can be |
|
12 * saved in an output file. |
|
13 * |
|
14 * This is a single-threaded program. |
|
15 */ |
|
16 |
|
17 #include "prio.h" |
|
18 #include "prnetdb.h" |
|
19 #include "prlog.h" |
|
20 #include "prerror.h" |
|
21 #include "prprf.h" |
|
22 #include "prinit.h" |
|
23 |
|
24 #include <stdio.h> |
|
25 #include <string.h> |
|
26 #include <stdlib.h> /* for atoi */ |
|
27 |
|
28 #define FCOPY_BUFFER_SIZE (16 * 1024) |
|
29 #define INPUT_BUFFER_SIZE 1024 |
|
30 #define LINE_SIZE 512 |
|
31 #define HOST_SIZE 256 |
|
32 #define PORT_SIZE 32 |
|
33 #define PATH_SIZE 512 |
|
34 |
|
35 /* |
|
36 * A buffer for storing the excess input data for ReadLine. |
|
37 * The data in the buffer starts from (including) the element pointed to |
|
38 * by inputHead, and ends just before (not including) the element pointed |
|
39 * to by inputTail. The buffer is empty if inputHead == inputTail. |
|
40 */ |
|
41 |
|
42 static char inputBuf[INPUT_BUFFER_SIZE]; |
|
43 /* |
|
44 * inputBufEnd points just past the end of inputBuf |
|
45 */ |
|
46 static char *inputBufEnd = inputBuf + sizeof(inputBuf); |
|
47 static char *inputHead = inputBuf; |
|
48 static char *inputTail = inputBuf; |
|
49 |
|
50 static PRBool endOfStream = PR_FALSE; |
|
51 |
|
52 /* |
|
53 * ReadLine -- |
|
54 * |
|
55 * Read in a line of text, terminated by CRLF or LF, from fd into buf. |
|
56 * The terminating CRLF or LF is included (always as '\n'). The text |
|
57 * in buf is terminated by a null byte. The excess bytes are stored in |
|
58 * inputBuf for use in the next ReadLine call or FetchFile call. |
|
59 * Returns the number of bytes in buf. 0 means end of stream. Returns |
|
60 * -1 if read fails. |
|
61 */ |
|
62 |
|
63 PRInt32 ReadLine(PRFileDesc *fd, char *buf, PRUint32 bufSize) |
|
64 { |
|
65 char *dst = buf; |
|
66 char *bufEnd = buf + bufSize; /* just past the end of buf */ |
|
67 PRBool lineFound = PR_FALSE; |
|
68 char *crPtr = NULL; /* points to the CR ('\r') character */ |
|
69 PRInt32 nRead; |
|
70 |
|
71 loop: |
|
72 PR_ASSERT(inputBuf <= inputHead && inputHead <= inputTail |
|
73 && inputTail <= inputBufEnd); |
|
74 while (lineFound == PR_FALSE && inputHead != inputTail |
|
75 && dst < bufEnd - 1) { |
|
76 if (*inputHead == '\r') { |
|
77 crPtr = dst; |
|
78 } else if (*inputHead == '\n') { |
|
79 lineFound = PR_TRUE; |
|
80 if (crPtr == dst - 1) { |
|
81 dst--; |
|
82 } |
|
83 } |
|
84 *(dst++) = *(inputHead++); |
|
85 } |
|
86 if (lineFound == PR_TRUE || dst == bufEnd - 1 || endOfStream == PR_TRUE) { |
|
87 *dst = '\0'; |
|
88 return dst - buf; |
|
89 } |
|
90 |
|
91 /* |
|
92 * The input buffer should be empty now |
|
93 */ |
|
94 PR_ASSERT(inputHead == inputTail); |
|
95 |
|
96 nRead = PR_Read(fd, inputBuf, sizeof(inputBuf)); |
|
97 if (nRead == -1) { |
|
98 *dst = '\0'; |
|
99 return -1; |
|
100 } else if (nRead == 0) { |
|
101 endOfStream = PR_TRUE; |
|
102 *dst = '\0'; |
|
103 return dst - buf; |
|
104 } |
|
105 inputHead = inputBuf; |
|
106 inputTail = inputBuf + nRead; |
|
107 goto loop; |
|
108 } |
|
109 |
|
110 PRInt32 DrainInputBuffer(char *buf, PRUint32 bufSize) |
|
111 { |
|
112 PRInt32 nBytes = inputTail - inputHead; |
|
113 |
|
114 if (nBytes == 0) { |
|
115 if (endOfStream) { |
|
116 return -1; |
|
117 } else { |
|
118 return 0; |
|
119 } |
|
120 } |
|
121 if ((PRInt32) bufSize < nBytes) { |
|
122 nBytes = bufSize; |
|
123 } |
|
124 memcpy(buf, inputHead, nBytes); |
|
125 inputHead += nBytes; |
|
126 return nBytes; |
|
127 } |
|
128 |
|
129 PRStatus FetchFile(PRFileDesc *in, PRFileDesc *out) |
|
130 { |
|
131 char buf[FCOPY_BUFFER_SIZE]; |
|
132 PRInt32 nBytes; |
|
133 |
|
134 while ((nBytes = DrainInputBuffer(buf, sizeof(buf))) > 0) { |
|
135 if (PR_Write(out, buf, nBytes) != nBytes) { |
|
136 fprintf(stderr, "httpget: cannot write to file\n"); |
|
137 return PR_FAILURE; |
|
138 } |
|
139 } |
|
140 if (nBytes < 0) { |
|
141 /* Input buffer is empty and end of stream */ |
|
142 return PR_SUCCESS; |
|
143 } |
|
144 while ((nBytes = PR_Read(in, buf, sizeof(buf))) > 0) { |
|
145 if (PR_Write(out, buf, nBytes) != nBytes) { |
|
146 fprintf(stderr, "httpget: cannot write to file\n"); |
|
147 return PR_FAILURE; |
|
148 } |
|
149 } |
|
150 if (nBytes < 0) { |
|
151 fprintf(stderr, "httpget: cannot read from socket\n"); |
|
152 return PR_FAILURE; |
|
153 } |
|
154 return PR_SUCCESS; |
|
155 } |
|
156 |
|
157 PRStatus FastFetchFile(PRFileDesc *in, PRFileDesc *out, PRUint32 size) |
|
158 { |
|
159 PRInt32 nBytes; |
|
160 PRFileMap *outfMap; |
|
161 void *addr; |
|
162 char *start; |
|
163 PRUint32 rem; |
|
164 PRUint32 bytesToRead; |
|
165 PRStatus rv; |
|
166 PRInt64 sz64; |
|
167 |
|
168 LL_UI2L(sz64, size); |
|
169 outfMap = PR_CreateFileMap(out, sz64, PR_PROT_READWRITE); |
|
170 PR_ASSERT(outfMap); |
|
171 addr = PR_MemMap(outfMap, LL_ZERO, size); |
|
172 if (addr == NULL) { |
|
173 fprintf(stderr, "cannot memory-map file: (%d, %d)\n", PR_GetError(), |
|
174 PR_GetOSError()); |
|
175 |
|
176 PR_CloseFileMap(outfMap); |
|
177 return PR_FAILURE; |
|
178 } |
|
179 start = (char *) addr; |
|
180 rem = size; |
|
181 while ((nBytes = DrainInputBuffer(start, rem)) > 0) { |
|
182 start += nBytes; |
|
183 rem -= nBytes; |
|
184 } |
|
185 if (nBytes < 0) { |
|
186 /* Input buffer is empty and end of stream */ |
|
187 return PR_SUCCESS; |
|
188 } |
|
189 bytesToRead = (rem < FCOPY_BUFFER_SIZE) ? rem : FCOPY_BUFFER_SIZE; |
|
190 while (rem > 0 && (nBytes = PR_Read(in, start, bytesToRead)) > 0) { |
|
191 start += nBytes; |
|
192 rem -= nBytes; |
|
193 bytesToRead = (rem < FCOPY_BUFFER_SIZE) ? rem : FCOPY_BUFFER_SIZE; |
|
194 } |
|
195 if (nBytes < 0) { |
|
196 fprintf(stderr, "httpget: cannot read from socket\n"); |
|
197 return PR_FAILURE; |
|
198 } |
|
199 rv = PR_MemUnmap(addr, size); |
|
200 PR_ASSERT(rv == PR_SUCCESS); |
|
201 rv = PR_CloseFileMap(outfMap); |
|
202 PR_ASSERT(rv == PR_SUCCESS); |
|
203 return PR_SUCCESS; |
|
204 } |
|
205 |
|
206 PRStatus ParseURL(char *url, char *host, PRUint32 hostSize, |
|
207 char *port, PRUint32 portSize, char *path, PRUint32 pathSize) |
|
208 { |
|
209 char *start, *end; |
|
210 char *dst; |
|
211 char *hostEnd; |
|
212 char *portEnd; |
|
213 char *pathEnd; |
|
214 |
|
215 if (strncmp(url, "http", 4)) { |
|
216 fprintf(stderr, "httpget: the protocol must be http\n"); |
|
217 return PR_FAILURE; |
|
218 } |
|
219 if (strncmp(url + 4, "://", 3) || url[7] == '\0') { |
|
220 fprintf(stderr, "httpget: malformed URL: %s\n", url); |
|
221 return PR_FAILURE; |
|
222 } |
|
223 |
|
224 start = end = url + 7; |
|
225 dst = host; |
|
226 hostEnd = host + hostSize; |
|
227 while (*end && *end != ':' && *end != '/') { |
|
228 if (dst == hostEnd - 1) { |
|
229 fprintf(stderr, "httpget: host name too long\n"); |
|
230 return PR_FAILURE; |
|
231 } |
|
232 *(dst++) = *(end++); |
|
233 } |
|
234 *dst = '\0'; |
|
235 |
|
236 if (*end == '\0') { |
|
237 PR_snprintf(port, portSize, "%d", 80); |
|
238 PR_snprintf(path, pathSize, "%s", "/"); |
|
239 return PR_SUCCESS; |
|
240 } |
|
241 |
|
242 if (*end == ':') { |
|
243 end++; |
|
244 dst = port; |
|
245 portEnd = port + portSize; |
|
246 while (*end && *end != '/') { |
|
247 if (dst == portEnd - 1) { |
|
248 fprintf(stderr, "httpget: port number too long\n"); |
|
249 return PR_FAILURE; |
|
250 } |
|
251 *(dst++) = *(end++); |
|
252 } |
|
253 *dst = '\0'; |
|
254 if (*end == '\0') { |
|
255 PR_snprintf(path, pathSize, "%s", "/"); |
|
256 return PR_SUCCESS; |
|
257 } |
|
258 } else { |
|
259 PR_snprintf(port, portSize, "%d", 80); |
|
260 } |
|
261 |
|
262 dst = path; |
|
263 pathEnd = path + pathSize; |
|
264 while (*end) { |
|
265 if (dst == pathEnd - 1) { |
|
266 fprintf(stderr, "httpget: file pathname too long\n"); |
|
267 return PR_FAILURE; |
|
268 } |
|
269 *(dst++) = *(end++); |
|
270 } |
|
271 *dst = '\0'; |
|
272 return PR_SUCCESS; |
|
273 } |
|
274 |
|
275 void PrintUsage(void) { |
|
276 fprintf(stderr, "usage: httpget url\n" |
|
277 " httpget -o outputfile url\n" |
|
278 " httpget url -o outputfile\n"); |
|
279 } |
|
280 |
|
281 int main(int argc, char **argv) |
|
282 { |
|
283 PRHostEnt hostentry; |
|
284 char buf[PR_NETDB_BUF_SIZE]; |
|
285 PRNetAddr addr; |
|
286 PRFileDesc *socket = NULL, *file = NULL; |
|
287 PRIntn cmdSize; |
|
288 char host[HOST_SIZE]; |
|
289 char port[PORT_SIZE]; |
|
290 char path[PATH_SIZE]; |
|
291 char line[LINE_SIZE]; |
|
292 int exitStatus = 0; |
|
293 PRBool endOfHeader = PR_FALSE; |
|
294 char *url; |
|
295 char *fileName = NULL; |
|
296 PRUint32 fileSize; |
|
297 |
|
298 if (argc != 2 && argc != 4) { |
|
299 PrintUsage(); |
|
300 exit(1); |
|
301 } |
|
302 |
|
303 if (argc == 2) { |
|
304 /* |
|
305 * case 1: httpget url |
|
306 */ |
|
307 url = argv[1]; |
|
308 } else { |
|
309 if (strcmp(argv[1], "-o") == 0) { |
|
310 /* |
|
311 * case 2: httpget -o outputfile url |
|
312 */ |
|
313 fileName = argv[2]; |
|
314 url = argv[3]; |
|
315 } else { |
|
316 /* |
|
317 * case 3: httpget url -o outputfile |
|
318 */ |
|
319 url = argv[1]; |
|
320 if (strcmp(argv[2], "-o") != 0) { |
|
321 PrintUsage(); |
|
322 exit(1); |
|
323 } |
|
324 fileName = argv[3]; |
|
325 } |
|
326 } |
|
327 |
|
328 if (ParseURL(url, host, sizeof(host), port, sizeof(port), |
|
329 path, sizeof(path)) == PR_FAILURE) { |
|
330 exit(1); |
|
331 } |
|
332 |
|
333 if (PR_GetHostByName(host, buf, sizeof(buf), &hostentry) |
|
334 == PR_FAILURE) { |
|
335 fprintf(stderr, "httpget: unknown host name: %s\n", host); |
|
336 exit(1); |
|
337 } |
|
338 |
|
339 addr.inet.family = PR_AF_INET; |
|
340 addr.inet.port = PR_htons((short) atoi(port)); |
|
341 addr.inet.ip = *((PRUint32 *) hostentry.h_addr_list[0]); |
|
342 |
|
343 socket = PR_NewTCPSocket(); |
|
344 if (socket == NULL) { |
|
345 fprintf(stderr, "httpget: cannot create new tcp socket\n"); |
|
346 exit(1); |
|
347 } |
|
348 |
|
349 if (PR_Connect(socket, &addr, PR_INTERVAL_NO_TIMEOUT) == PR_FAILURE) { |
|
350 fprintf(stderr, "httpget: cannot connect to http server\n"); |
|
351 exitStatus = 1; |
|
352 goto done; |
|
353 } |
|
354 |
|
355 if (fileName == NULL) { |
|
356 file = PR_STDOUT; |
|
357 } else { |
|
358 file = PR_Open(fileName, PR_RDWR | PR_CREATE_FILE | PR_TRUNCATE, |
|
359 00777); |
|
360 if (file == NULL) { |
|
361 fprintf(stderr, "httpget: cannot open file %s: (%d, %d)\n", |
|
362 fileName, PR_GetError(), PR_GetOSError()); |
|
363 exitStatus = 1; |
|
364 goto done; |
|
365 } |
|
366 } |
|
367 |
|
368 cmdSize = PR_snprintf(buf, sizeof(buf), "GET %s HTTP/1.0\r\n\r\n", path); |
|
369 PR_ASSERT(cmdSize == (PRIntn) strlen("GET HTTP/1.0\r\n\r\n") |
|
370 + (PRIntn) strlen(path)); |
|
371 if (PR_Write(socket, buf, cmdSize) != cmdSize) { |
|
372 fprintf(stderr, "httpget: cannot write to http server\n"); |
|
373 exitStatus = 1; |
|
374 goto done; |
|
375 } |
|
376 |
|
377 if (ReadLine(socket, line, sizeof(line)) <= 0) { |
|
378 fprintf(stderr, "httpget: cannot read line from http server\n"); |
|
379 exitStatus = 1; |
|
380 goto done; |
|
381 } |
|
382 |
|
383 /* HTTP response: 200 == OK */ |
|
384 if (strstr(line, "200") == NULL) { |
|
385 fprintf(stderr, "httpget: %s\n", line); |
|
386 exitStatus = 1; |
|
387 goto done; |
|
388 } |
|
389 |
|
390 while (ReadLine(socket, line, sizeof(line)) > 0) { |
|
391 if (line[0] == '\n') { |
|
392 endOfHeader = PR_TRUE; |
|
393 break; |
|
394 } |
|
395 if (strncmp(line, "Content-Length", 14) == 0 |
|
396 || strncmp(line, "Content-length", 14) == 0) { |
|
397 char *p = line + 14; |
|
398 |
|
399 while (*p == ' ' || *p == '\t') { |
|
400 p++; |
|
401 } |
|
402 if (*p != ':') { |
|
403 continue; |
|
404 } |
|
405 p++; |
|
406 while (*p == ' ' || *p == '\t') { |
|
407 p++; |
|
408 } |
|
409 fileSize = 0; |
|
410 while ('0' <= *p && *p <= '9') { |
|
411 fileSize = 10 * fileSize + (*p - '0'); |
|
412 p++; |
|
413 } |
|
414 } |
|
415 } |
|
416 if (endOfHeader == PR_FALSE) { |
|
417 fprintf(stderr, "httpget: cannot read line from http server\n"); |
|
418 exitStatus = 1; |
|
419 goto done; |
|
420 } |
|
421 |
|
422 if (fileName == NULL || fileSize == 0) { |
|
423 FetchFile(socket, file); |
|
424 } else { |
|
425 FastFetchFile(socket, file, fileSize); |
|
426 } |
|
427 |
|
428 done: |
|
429 if (socket) PR_Close(socket); |
|
430 if (file) PR_Close(file); |
|
431 PR_Cleanup(); |
|
432 return exitStatus; |
|
433 } |