|
1 /* gzread.c -- zlib functions for reading gzip files |
|
2 * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013 Mark Adler |
|
3 * For conditions of distribution and use, see copyright notice in zlib.h |
|
4 */ |
|
5 |
|
6 #include "gzguts.h" |
|
7 |
|
8 /* Local functions */ |
|
9 local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *)); |
|
10 local int gz_avail OF((gz_statep)); |
|
11 local int gz_look OF((gz_statep)); |
|
12 local int gz_decomp OF((gz_statep)); |
|
13 local int gz_fetch OF((gz_statep)); |
|
14 local int gz_skip OF((gz_statep, z_off64_t)); |
|
15 |
|
16 /* Use read() to load a buffer -- return -1 on error, otherwise 0. Read from |
|
17 state->fd, and update state->eof, state->err, and state->msg as appropriate. |
|
18 This function needs to loop on read(), since read() is not guaranteed to |
|
19 read the number of bytes requested, depending on the type of descriptor. */ |
|
20 local int gz_load(state, buf, len, have) |
|
21 gz_statep state; |
|
22 unsigned char *buf; |
|
23 unsigned len; |
|
24 unsigned *have; |
|
25 { |
|
26 int ret; |
|
27 |
|
28 *have = 0; |
|
29 do { |
|
30 ret = read(state->fd, buf + *have, len - *have); |
|
31 if (ret <= 0) |
|
32 break; |
|
33 *have += ret; |
|
34 } while (*have < len); |
|
35 if (ret < 0) { |
|
36 gz_error(state, Z_ERRNO, zstrerror()); |
|
37 return -1; |
|
38 } |
|
39 if (ret == 0) |
|
40 state->eof = 1; |
|
41 return 0; |
|
42 } |
|
43 |
|
44 /* Load up input buffer and set eof flag if last data loaded -- return -1 on |
|
45 error, 0 otherwise. Note that the eof flag is set when the end of the input |
|
46 file is reached, even though there may be unused data in the buffer. Once |
|
47 that data has been used, no more attempts will be made to read the file. |
|
48 If strm->avail_in != 0, then the current data is moved to the beginning of |
|
49 the input buffer, and then the remainder of the buffer is loaded with the |
|
50 available data from the input file. */ |
|
51 local int gz_avail(state) |
|
52 gz_statep state; |
|
53 { |
|
54 unsigned got; |
|
55 z_streamp strm = &(state->strm); |
|
56 |
|
57 if (state->err != Z_OK && state->err != Z_BUF_ERROR) |
|
58 return -1; |
|
59 if (state->eof == 0) { |
|
60 if (strm->avail_in) { /* copy what's there to the start */ |
|
61 unsigned char *p = state->in; |
|
62 unsigned const char *q = strm->next_in; |
|
63 unsigned n = strm->avail_in; |
|
64 do { |
|
65 *p++ = *q++; |
|
66 } while (--n); |
|
67 } |
|
68 if (gz_load(state, state->in + strm->avail_in, |
|
69 state->size - strm->avail_in, &got) == -1) |
|
70 return -1; |
|
71 strm->avail_in += got; |
|
72 strm->next_in = state->in; |
|
73 } |
|
74 return 0; |
|
75 } |
|
76 |
|
77 /* Look for gzip header, set up for inflate or copy. state->x.have must be 0. |
|
78 If this is the first time in, allocate required memory. state->how will be |
|
79 left unchanged if there is no more input data available, will be set to COPY |
|
80 if there is no gzip header and direct copying will be performed, or it will |
|
81 be set to GZIP for decompression. If direct copying, then leftover input |
|
82 data from the input buffer will be copied to the output buffer. In that |
|
83 case, all further file reads will be directly to either the output buffer or |
|
84 a user buffer. If decompressing, the inflate state will be initialized. |
|
85 gz_look() will return 0 on success or -1 on failure. */ |
|
86 local int gz_look(state) |
|
87 gz_statep state; |
|
88 { |
|
89 z_streamp strm = &(state->strm); |
|
90 |
|
91 /* allocate read buffers and inflate memory */ |
|
92 if (state->size == 0) { |
|
93 /* allocate buffers */ |
|
94 state->in = (unsigned char *)malloc(state->want); |
|
95 state->out = (unsigned char *)malloc(state->want << 1); |
|
96 if (state->in == NULL || state->out == NULL) { |
|
97 if (state->out != NULL) |
|
98 free(state->out); |
|
99 if (state->in != NULL) |
|
100 free(state->in); |
|
101 gz_error(state, Z_MEM_ERROR, "out of memory"); |
|
102 return -1; |
|
103 } |
|
104 state->size = state->want; |
|
105 |
|
106 /* allocate inflate memory */ |
|
107 state->strm.zalloc = Z_NULL; |
|
108 state->strm.zfree = Z_NULL; |
|
109 state->strm.opaque = Z_NULL; |
|
110 state->strm.avail_in = 0; |
|
111 state->strm.next_in = Z_NULL; |
|
112 if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) { /* gunzip */ |
|
113 free(state->out); |
|
114 free(state->in); |
|
115 state->size = 0; |
|
116 gz_error(state, Z_MEM_ERROR, "out of memory"); |
|
117 return -1; |
|
118 } |
|
119 } |
|
120 |
|
121 /* get at least the magic bytes in the input buffer */ |
|
122 if (strm->avail_in < 2) { |
|
123 if (gz_avail(state) == -1) |
|
124 return -1; |
|
125 if (strm->avail_in == 0) |
|
126 return 0; |
|
127 } |
|
128 |
|
129 /* look for gzip magic bytes -- if there, do gzip decoding (note: there is |
|
130 a logical dilemma here when considering the case of a partially written |
|
131 gzip file, to wit, if a single 31 byte is written, then we cannot tell |
|
132 whether this is a single-byte file, or just a partially written gzip |
|
133 file -- for here we assume that if a gzip file is being written, then |
|
134 the header will be written in a single operation, so that reading a |
|
135 single byte is sufficient indication that it is not a gzip file) */ |
|
136 if (strm->avail_in > 1 && |
|
137 strm->next_in[0] == 31 && strm->next_in[1] == 139) { |
|
138 inflateReset(strm); |
|
139 state->how = GZIP; |
|
140 state->direct = 0; |
|
141 return 0; |
|
142 } |
|
143 |
|
144 /* no gzip header -- if we were decoding gzip before, then this is trailing |
|
145 garbage. Ignore the trailing garbage and finish. */ |
|
146 if (state->direct == 0) { |
|
147 strm->avail_in = 0; |
|
148 state->eof = 1; |
|
149 state->x.have = 0; |
|
150 return 0; |
|
151 } |
|
152 |
|
153 /* doing raw i/o, copy any leftover input to output -- this assumes that |
|
154 the output buffer is larger than the input buffer, which also assures |
|
155 space for gzungetc() */ |
|
156 state->x.next = state->out; |
|
157 if (strm->avail_in) { |
|
158 memcpy(state->x.next, strm->next_in, strm->avail_in); |
|
159 state->x.have = strm->avail_in; |
|
160 strm->avail_in = 0; |
|
161 } |
|
162 state->how = COPY; |
|
163 state->direct = 1; |
|
164 return 0; |
|
165 } |
|
166 |
|
167 /* Decompress from input to the provided next_out and avail_out in the state. |
|
168 On return, state->x.have and state->x.next point to the just decompressed |
|
169 data. If the gzip stream completes, state->how is reset to LOOK to look for |
|
170 the next gzip stream or raw data, once state->x.have is depleted. Returns 0 |
|
171 on success, -1 on failure. */ |
|
172 local int gz_decomp(state) |
|
173 gz_statep state; |
|
174 { |
|
175 int ret = Z_OK; |
|
176 unsigned had; |
|
177 z_streamp strm = &(state->strm); |
|
178 |
|
179 /* fill output buffer up to end of deflate stream */ |
|
180 had = strm->avail_out; |
|
181 do { |
|
182 /* get more input for inflate() */ |
|
183 if (strm->avail_in == 0 && gz_avail(state) == -1) |
|
184 return -1; |
|
185 if (strm->avail_in == 0) { |
|
186 gz_error(state, Z_BUF_ERROR, "unexpected end of file"); |
|
187 break; |
|
188 } |
|
189 |
|
190 /* decompress and handle errors */ |
|
191 ret = inflate(strm, Z_NO_FLUSH); |
|
192 if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) { |
|
193 gz_error(state, Z_STREAM_ERROR, |
|
194 "internal error: inflate stream corrupt"); |
|
195 return -1; |
|
196 } |
|
197 if (ret == Z_MEM_ERROR) { |
|
198 gz_error(state, Z_MEM_ERROR, "out of memory"); |
|
199 return -1; |
|
200 } |
|
201 if (ret == Z_DATA_ERROR) { /* deflate stream invalid */ |
|
202 gz_error(state, Z_DATA_ERROR, |
|
203 strm->msg == NULL ? "compressed data error" : strm->msg); |
|
204 return -1; |
|
205 } |
|
206 } while (strm->avail_out && ret != Z_STREAM_END); |
|
207 |
|
208 /* update available output */ |
|
209 state->x.have = had - strm->avail_out; |
|
210 state->x.next = strm->next_out - state->x.have; |
|
211 |
|
212 /* if the gzip stream completed successfully, look for another */ |
|
213 if (ret == Z_STREAM_END) |
|
214 state->how = LOOK; |
|
215 |
|
216 /* good decompression */ |
|
217 return 0; |
|
218 } |
|
219 |
|
220 /* Fetch data and put it in the output buffer. Assumes state->x.have is 0. |
|
221 Data is either copied from the input file or decompressed from the input |
|
222 file depending on state->how. If state->how is LOOK, then a gzip header is |
|
223 looked for to determine whether to copy or decompress. Returns -1 on error, |
|
224 otherwise 0. gz_fetch() will leave state->how as COPY or GZIP unless the |
|
225 end of the input file has been reached and all data has been processed. */ |
|
226 local int gz_fetch(state) |
|
227 gz_statep state; |
|
228 { |
|
229 z_streamp strm = &(state->strm); |
|
230 |
|
231 do { |
|
232 switch(state->how) { |
|
233 case LOOK: /* -> LOOK, COPY (only if never GZIP), or GZIP */ |
|
234 if (gz_look(state) == -1) |
|
235 return -1; |
|
236 if (state->how == LOOK) |
|
237 return 0; |
|
238 break; |
|
239 case COPY: /* -> COPY */ |
|
240 if (gz_load(state, state->out, state->size << 1, &(state->x.have)) |
|
241 == -1) |
|
242 return -1; |
|
243 state->x.next = state->out; |
|
244 return 0; |
|
245 case GZIP: /* -> GZIP or LOOK (if end of gzip stream) */ |
|
246 strm->avail_out = state->size << 1; |
|
247 strm->next_out = state->out; |
|
248 if (gz_decomp(state) == -1) |
|
249 return -1; |
|
250 } |
|
251 } while (state->x.have == 0 && (!state->eof || strm->avail_in)); |
|
252 return 0; |
|
253 } |
|
254 |
|
255 /* Skip len uncompressed bytes of output. Return -1 on error, 0 on success. */ |
|
256 local int gz_skip(state, len) |
|
257 gz_statep state; |
|
258 z_off64_t len; |
|
259 { |
|
260 unsigned n; |
|
261 |
|
262 /* skip over len bytes or reach end-of-file, whichever comes first */ |
|
263 while (len) |
|
264 /* skip over whatever is in output buffer */ |
|
265 if (state->x.have) { |
|
266 n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ? |
|
267 (unsigned)len : state->x.have; |
|
268 state->x.have -= n; |
|
269 state->x.next += n; |
|
270 state->x.pos += n; |
|
271 len -= n; |
|
272 } |
|
273 |
|
274 /* output buffer empty -- return if we're at the end of the input */ |
|
275 else if (state->eof && state->strm.avail_in == 0) |
|
276 break; |
|
277 |
|
278 /* need more data to skip -- load up output buffer */ |
|
279 else { |
|
280 /* get more output, looking for header if required */ |
|
281 if (gz_fetch(state) == -1) |
|
282 return -1; |
|
283 } |
|
284 return 0; |
|
285 } |
|
286 |
|
287 /* -- see zlib.h -- */ |
|
288 int ZEXPORT gzread(file, buf, len) |
|
289 gzFile file; |
|
290 voidp buf; |
|
291 unsigned len; |
|
292 { |
|
293 unsigned got, n; |
|
294 gz_statep state; |
|
295 z_streamp strm; |
|
296 |
|
297 /* get internal structure */ |
|
298 if (file == NULL) |
|
299 return -1; |
|
300 state = (gz_statep)file; |
|
301 strm = &(state->strm); |
|
302 |
|
303 /* check that we're reading and that there's no (serious) error */ |
|
304 if (state->mode != GZ_READ || |
|
305 (state->err != Z_OK && state->err != Z_BUF_ERROR)) |
|
306 return -1; |
|
307 |
|
308 /* since an int is returned, make sure len fits in one, otherwise return |
|
309 with an error (this avoids the flaw in the interface) */ |
|
310 if ((int)len < 0) { |
|
311 gz_error(state, Z_DATA_ERROR, "requested length does not fit in int"); |
|
312 return -1; |
|
313 } |
|
314 |
|
315 /* if len is zero, avoid unnecessary operations */ |
|
316 if (len == 0) |
|
317 return 0; |
|
318 |
|
319 /* process a skip request */ |
|
320 if (state->seek) { |
|
321 state->seek = 0; |
|
322 if (gz_skip(state, state->skip) == -1) |
|
323 return -1; |
|
324 } |
|
325 |
|
326 /* get len bytes to buf, or less than len if at the end */ |
|
327 got = 0; |
|
328 do { |
|
329 /* first just try copying data from the output buffer */ |
|
330 if (state->x.have) { |
|
331 n = state->x.have > len ? len : state->x.have; |
|
332 memcpy(buf, state->x.next, n); |
|
333 state->x.next += n; |
|
334 state->x.have -= n; |
|
335 } |
|
336 |
|
337 /* output buffer empty -- return if we're at the end of the input */ |
|
338 else if (state->eof && strm->avail_in == 0) { |
|
339 state->past = 1; /* tried to read past end */ |
|
340 break; |
|
341 } |
|
342 |
|
343 /* need output data -- for small len or new stream load up our output |
|
344 buffer */ |
|
345 else if (state->how == LOOK || len < (state->size << 1)) { |
|
346 /* get more output, looking for header if required */ |
|
347 if (gz_fetch(state) == -1) |
|
348 return -1; |
|
349 continue; /* no progress yet -- go back to copy above */ |
|
350 /* the copy above assures that we will leave with space in the |
|
351 output buffer, allowing at least one gzungetc() to succeed */ |
|
352 } |
|
353 |
|
354 /* large len -- read directly into user buffer */ |
|
355 else if (state->how == COPY) { /* read directly */ |
|
356 if (gz_load(state, (unsigned char *)buf, len, &n) == -1) |
|
357 return -1; |
|
358 } |
|
359 |
|
360 /* large len -- decompress directly into user buffer */ |
|
361 else { /* state->how == GZIP */ |
|
362 strm->avail_out = len; |
|
363 strm->next_out = (unsigned char *)buf; |
|
364 if (gz_decomp(state) == -1) |
|
365 return -1; |
|
366 n = state->x.have; |
|
367 state->x.have = 0; |
|
368 } |
|
369 |
|
370 /* update progress */ |
|
371 len -= n; |
|
372 buf = (char *)buf + n; |
|
373 got += n; |
|
374 state->x.pos += n; |
|
375 } while (len); |
|
376 |
|
377 /* return number of bytes read into user buffer (will fit in int) */ |
|
378 return (int)got; |
|
379 } |
|
380 |
|
381 /* -- see zlib.h -- */ |
|
382 #ifdef Z_PREFIX_SET |
|
383 # undef z_gzgetc |
|
384 #else |
|
385 # undef gzgetc |
|
386 #endif |
|
387 int ZEXPORT gzgetc(file) |
|
388 gzFile file; |
|
389 { |
|
390 int ret; |
|
391 unsigned char buf[1]; |
|
392 gz_statep state; |
|
393 |
|
394 /* get internal structure */ |
|
395 if (file == NULL) |
|
396 return -1; |
|
397 state = (gz_statep)file; |
|
398 |
|
399 /* check that we're reading and that there's no (serious) error */ |
|
400 if (state->mode != GZ_READ || |
|
401 (state->err != Z_OK && state->err != Z_BUF_ERROR)) |
|
402 return -1; |
|
403 |
|
404 /* try output buffer (no need to check for skip request) */ |
|
405 if (state->x.have) { |
|
406 state->x.have--; |
|
407 state->x.pos++; |
|
408 return *(state->x.next)++; |
|
409 } |
|
410 |
|
411 /* nothing there -- try gzread() */ |
|
412 ret = gzread(file, buf, 1); |
|
413 return ret < 1 ? -1 : buf[0]; |
|
414 } |
|
415 |
|
416 int ZEXPORT gzgetc_(file) |
|
417 gzFile file; |
|
418 { |
|
419 return gzgetc(file); |
|
420 } |
|
421 |
|
422 /* -- see zlib.h -- */ |
|
423 int ZEXPORT gzungetc(c, file) |
|
424 int c; |
|
425 gzFile file; |
|
426 { |
|
427 gz_statep state; |
|
428 |
|
429 /* get internal structure */ |
|
430 if (file == NULL) |
|
431 return -1; |
|
432 state = (gz_statep)file; |
|
433 |
|
434 /* check that we're reading and that there's no (serious) error */ |
|
435 if (state->mode != GZ_READ || |
|
436 (state->err != Z_OK && state->err != Z_BUF_ERROR)) |
|
437 return -1; |
|
438 |
|
439 /* process a skip request */ |
|
440 if (state->seek) { |
|
441 state->seek = 0; |
|
442 if (gz_skip(state, state->skip) == -1) |
|
443 return -1; |
|
444 } |
|
445 |
|
446 /* can't push EOF */ |
|
447 if (c < 0) |
|
448 return -1; |
|
449 |
|
450 /* if output buffer empty, put byte at end (allows more pushing) */ |
|
451 if (state->x.have == 0) { |
|
452 state->x.have = 1; |
|
453 state->x.next = state->out + (state->size << 1) - 1; |
|
454 state->x.next[0] = c; |
|
455 state->x.pos--; |
|
456 state->past = 0; |
|
457 return c; |
|
458 } |
|
459 |
|
460 /* if no room, give up (must have already done a gzungetc()) */ |
|
461 if (state->x.have == (state->size << 1)) { |
|
462 gz_error(state, Z_DATA_ERROR, "out of room to push characters"); |
|
463 return -1; |
|
464 } |
|
465 |
|
466 /* slide output data if needed and insert byte before existing data */ |
|
467 if (state->x.next == state->out) { |
|
468 unsigned char *src = state->out + state->x.have; |
|
469 unsigned char *dest = state->out + (state->size << 1); |
|
470 while (src > state->out) |
|
471 *--dest = *--src; |
|
472 state->x.next = dest; |
|
473 } |
|
474 state->x.have++; |
|
475 state->x.next--; |
|
476 state->x.next[0] = c; |
|
477 state->x.pos--; |
|
478 state->past = 0; |
|
479 return c; |
|
480 } |
|
481 |
|
482 /* -- see zlib.h -- */ |
|
483 char * ZEXPORT gzgets(file, buf, len) |
|
484 gzFile file; |
|
485 char *buf; |
|
486 int len; |
|
487 { |
|
488 unsigned left, n; |
|
489 char *str; |
|
490 unsigned char *eol; |
|
491 gz_statep state; |
|
492 |
|
493 /* check parameters and get internal structure */ |
|
494 if (file == NULL || buf == NULL || len < 1) |
|
495 return NULL; |
|
496 state = (gz_statep)file; |
|
497 |
|
498 /* check that we're reading and that there's no (serious) error */ |
|
499 if (state->mode != GZ_READ || |
|
500 (state->err != Z_OK && state->err != Z_BUF_ERROR)) |
|
501 return NULL; |
|
502 |
|
503 /* process a skip request */ |
|
504 if (state->seek) { |
|
505 state->seek = 0; |
|
506 if (gz_skip(state, state->skip) == -1) |
|
507 return NULL; |
|
508 } |
|
509 |
|
510 /* copy output bytes up to new line or len - 1, whichever comes first -- |
|
511 append a terminating zero to the string (we don't check for a zero in |
|
512 the contents, let the user worry about that) */ |
|
513 str = buf; |
|
514 left = (unsigned)len - 1; |
|
515 if (left) do { |
|
516 /* assure that something is in the output buffer */ |
|
517 if (state->x.have == 0 && gz_fetch(state) == -1) |
|
518 return NULL; /* error */ |
|
519 if (state->x.have == 0) { /* end of file */ |
|
520 state->past = 1; /* read past end */ |
|
521 break; /* return what we have */ |
|
522 } |
|
523 |
|
524 /* look for end-of-line in current output buffer */ |
|
525 n = state->x.have > left ? left : state->x.have; |
|
526 eol = (unsigned char *)memchr(state->x.next, '\n', n); |
|
527 if (eol != NULL) |
|
528 n = (unsigned)(eol - state->x.next) + 1; |
|
529 |
|
530 /* copy through end-of-line, or remainder if not found */ |
|
531 memcpy(buf, state->x.next, n); |
|
532 state->x.have -= n; |
|
533 state->x.next += n; |
|
534 state->x.pos += n; |
|
535 left -= n; |
|
536 buf += n; |
|
537 } while (left && eol == NULL); |
|
538 |
|
539 /* return terminated string, or if nothing, end of file */ |
|
540 if (buf == str) |
|
541 return NULL; |
|
542 buf[0] = 0; |
|
543 return str; |
|
544 } |
|
545 |
|
546 /* -- see zlib.h -- */ |
|
547 int ZEXPORT gzdirect(file) |
|
548 gzFile file; |
|
549 { |
|
550 gz_statep state; |
|
551 |
|
552 /* get internal structure */ |
|
553 if (file == NULL) |
|
554 return 0; |
|
555 state = (gz_statep)file; |
|
556 |
|
557 /* if the state is not known, but we can find out, then do so (this is |
|
558 mainly for right after a gzopen() or gzdopen()) */ |
|
559 if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0) |
|
560 (void)gz_look(state); |
|
561 |
|
562 /* return 1 if transparent, 0 if processing a gzip stream */ |
|
563 return state->direct; |
|
564 } |
|
565 |
|
566 /* -- see zlib.h -- */ |
|
567 int ZEXPORT gzclose_r(file) |
|
568 gzFile file; |
|
569 { |
|
570 int ret, err; |
|
571 gz_statep state; |
|
572 |
|
573 /* get internal structure */ |
|
574 if (file == NULL) |
|
575 return Z_STREAM_ERROR; |
|
576 state = (gz_statep)file; |
|
577 |
|
578 /* check that we're reading */ |
|
579 if (state->mode != GZ_READ) |
|
580 return Z_STREAM_ERROR; |
|
581 |
|
582 /* free memory and close file */ |
|
583 if (state->size) { |
|
584 inflateEnd(&(state->strm)); |
|
585 free(state->out); |
|
586 free(state->in); |
|
587 } |
|
588 err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK; |
|
589 gz_error(state, Z_OK, NULL); |
|
590 free(state->path); |
|
591 ret = close(state->fd); |
|
592 free(state); |
|
593 return ret ? Z_ERRNO : err; |
|
594 } |