michael@0: /* gzread.c -- zlib functions for reading gzip files michael@0: * Copyright (C) 2004, 2005, 2010 Mark Adler michael@0: * For conditions of distribution and use, see copyright notice in zlib.h michael@0: */ michael@0: michael@0: #include "gzguts.h" michael@0: michael@0: /* Local functions */ michael@0: local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *)); michael@0: local int gz_avail OF((gz_statep)); michael@0: local int gz_next4 OF((gz_statep, unsigned long *)); michael@0: local int gz_head OF((gz_statep)); michael@0: local int gz_decomp OF((gz_statep)); michael@0: local int gz_make OF((gz_statep)); michael@0: local int gz_skip OF((gz_statep, z_off64_t)); michael@0: michael@0: /* Use read() to load a buffer -- return -1 on error, otherwise 0. Read from michael@0: state->fd, and update state->eof, state->err, and state->msg as appropriate. michael@0: This function needs to loop on read(), since read() is not guaranteed to michael@0: read the number of bytes requested, depending on the type of descriptor. */ michael@0: local int gz_load(state, buf, len, have) michael@0: gz_statep state; michael@0: unsigned char *buf; michael@0: unsigned len; michael@0: unsigned *have; michael@0: { michael@0: int ret; michael@0: michael@0: *have = 0; michael@0: do { michael@0: ret = read(state->fd, buf + *have, len - *have); michael@0: if (ret <= 0) michael@0: break; michael@0: *have += ret; michael@0: } while (*have < len); michael@0: if (ret < 0) { michael@0: gz_error(state, Z_ERRNO, zstrerror()); michael@0: return -1; michael@0: } michael@0: if (ret == 0) michael@0: state->eof = 1; michael@0: return 0; michael@0: } michael@0: michael@0: /* Load up input buffer and set eof flag if last data loaded -- return -1 on michael@0: error, 0 otherwise. Note that the eof flag is set when the end of the input michael@0: file is reached, even though there may be unused data in the buffer. Once michael@0: that data has been used, no more attempts will be made to read the file. michael@0: gz_avail() assumes that strm->avail_in == 0. */ michael@0: local int gz_avail(state) michael@0: gz_statep state; michael@0: { michael@0: z_streamp strm = &(state->strm); michael@0: michael@0: if (state->err != Z_OK) michael@0: return -1; michael@0: if (state->eof == 0) { michael@0: if (gz_load(state, state->in, state->size, michael@0: (unsigned *)&(strm->avail_in)) == -1) michael@0: return -1; michael@0: strm->next_in = state->in; michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: /* Get next byte from input, or -1 if end or error. */ michael@0: #define NEXT() ((strm->avail_in == 0 && gz_avail(state) == -1) ? -1 : \ michael@0: (strm->avail_in == 0 ? -1 : \ michael@0: (strm->avail_in--, *(strm->next_in)++))) michael@0: michael@0: /* Get a four-byte little-endian integer and return 0 on success and the value michael@0: in *ret. Otherwise -1 is returned and *ret is not modified. */ michael@0: local int gz_next4(state, ret) michael@0: gz_statep state; michael@0: unsigned long *ret; michael@0: { michael@0: int ch; michael@0: unsigned long val; michael@0: z_streamp strm = &(state->strm); michael@0: michael@0: val = NEXT(); michael@0: val += (unsigned)NEXT() << 8; michael@0: val += (unsigned long)NEXT() << 16; michael@0: ch = NEXT(); michael@0: if (ch == -1) michael@0: return -1; michael@0: val += (unsigned long)ch << 24; michael@0: *ret = val; michael@0: return 0; michael@0: } michael@0: michael@0: /* Look for gzip header, set up for inflate or copy. state->have must be zero. michael@0: If this is the first time in, allocate required memory. state->how will be michael@0: left unchanged if there is no more input data available, will be set to COPY michael@0: if there is no gzip header and direct copying will be performed, or it will michael@0: be set to GZIP for decompression, and the gzip header will be skipped so michael@0: that the next available input data is the raw deflate stream. If direct michael@0: copying, then leftover input data from the input buffer will be copied to michael@0: the output buffer. In that case, all further file reads will be directly to michael@0: either the output buffer or a user buffer. If decompressing, the inflate michael@0: state and the check value will be initialized. gz_head() will return 0 on michael@0: success or -1 on failure. Failures may include read errors or gzip header michael@0: errors. */ michael@0: local int gz_head(state) michael@0: gz_statep state; michael@0: { michael@0: z_streamp strm = &(state->strm); michael@0: int flags; michael@0: unsigned len; michael@0: michael@0: /* allocate read buffers and inflate memory */ michael@0: if (state->size == 0) { michael@0: /* allocate buffers */ michael@0: state->in = malloc(state->want); michael@0: state->out = malloc(state->want << 1); michael@0: if (state->in == NULL || state->out == NULL) { michael@0: if (state->out != NULL) michael@0: free(state->out); michael@0: if (state->in != NULL) michael@0: free(state->in); michael@0: gz_error(state, Z_MEM_ERROR, "out of memory"); michael@0: return -1; michael@0: } michael@0: state->size = state->want; michael@0: michael@0: /* allocate inflate memory */ michael@0: state->strm.zalloc = Z_NULL; michael@0: state->strm.zfree = Z_NULL; michael@0: state->strm.opaque = Z_NULL; michael@0: state->strm.avail_in = 0; michael@0: state->strm.next_in = Z_NULL; michael@0: if (inflateInit2(&(state->strm), -15) != Z_OK) { /* raw inflate */ michael@0: free(state->out); michael@0: free(state->in); michael@0: state->size = 0; michael@0: gz_error(state, Z_MEM_ERROR, "out of memory"); michael@0: return -1; michael@0: } michael@0: } michael@0: michael@0: /* get some data in the input buffer */ michael@0: if (strm->avail_in == 0) { michael@0: if (gz_avail(state) == -1) michael@0: return -1; michael@0: if (strm->avail_in == 0) michael@0: return 0; michael@0: } michael@0: michael@0: /* look for the gzip magic header bytes 31 and 139 */ michael@0: if (strm->next_in[0] == 31) { michael@0: strm->avail_in--; michael@0: strm->next_in++; michael@0: if (strm->avail_in == 0 && gz_avail(state) == -1) michael@0: return -1; michael@0: if (strm->avail_in && strm->next_in[0] == 139) { michael@0: /* we have a gzip header, woo hoo! */ michael@0: strm->avail_in--; michael@0: strm->next_in++; michael@0: michael@0: /* skip rest of header */ michael@0: if (NEXT() != 8) { /* compression method */ michael@0: gz_error(state, Z_DATA_ERROR, "unknown compression method"); michael@0: return -1; michael@0: } michael@0: flags = NEXT(); michael@0: if (flags & 0xe0) { /* reserved flag bits */ michael@0: gz_error(state, Z_DATA_ERROR, "unknown header flags set"); michael@0: return -1; michael@0: } michael@0: NEXT(); /* modification time */ michael@0: NEXT(); michael@0: NEXT(); michael@0: NEXT(); michael@0: NEXT(); /* extra flags */ michael@0: NEXT(); /* operating system */ michael@0: if (flags & 4) { /* extra field */ michael@0: len = (unsigned)NEXT(); michael@0: len += (unsigned)NEXT() << 8; michael@0: while (len--) michael@0: if (NEXT() < 0) michael@0: break; michael@0: } michael@0: if (flags & 8) /* file name */ michael@0: while (NEXT() > 0) michael@0: ; michael@0: if (flags & 16) /* comment */ michael@0: while (NEXT() > 0) michael@0: ; michael@0: if (flags & 2) { /* header crc */ michael@0: NEXT(); michael@0: NEXT(); michael@0: } michael@0: /* an unexpected end of file is not checked for here -- it will be michael@0: noticed on the first request for uncompressed data */ michael@0: michael@0: /* set up for decompression */ michael@0: inflateReset(strm); michael@0: strm->adler = crc32(0L, Z_NULL, 0); michael@0: state->how = GZIP; michael@0: state->direct = 0; michael@0: return 0; michael@0: } michael@0: else { michael@0: /* not a gzip file -- save first byte (31) and fall to raw i/o */ michael@0: state->out[0] = 31; michael@0: state->have = 1; michael@0: } michael@0: } michael@0: michael@0: /* doing raw i/o, save start of raw data for seeking, copy any leftover michael@0: input to output -- this assumes that the output buffer is larger than michael@0: the input buffer, which also assures space for gzungetc() */ michael@0: state->raw = state->pos; michael@0: state->next = state->out; michael@0: if (strm->avail_in) { michael@0: memcpy(state->next + state->have, strm->next_in, strm->avail_in); michael@0: state->have += strm->avail_in; michael@0: strm->avail_in = 0; michael@0: } michael@0: state->how = COPY; michael@0: state->direct = 1; michael@0: return 0; michael@0: } michael@0: michael@0: /* Decompress from input to the provided next_out and avail_out in the state. michael@0: If the end of the compressed data is reached, then verify the gzip trailer michael@0: check value and length (modulo 2^32). state->have and state->next are set michael@0: to point to the just decompressed data, and the crc is updated. If the michael@0: trailer is verified, state->how is reset to LOOK to look for the next gzip michael@0: stream or raw data, once state->have is depleted. Returns 0 on success, -1 michael@0: on failure. Failures may include invalid compressed data or a failed gzip michael@0: trailer verification. */ michael@0: local int gz_decomp(state) michael@0: gz_statep state; michael@0: { michael@0: int ret; michael@0: unsigned had; michael@0: unsigned long crc, len; michael@0: z_streamp strm = &(state->strm); michael@0: michael@0: /* fill output buffer up to end of deflate stream */ michael@0: had = strm->avail_out; michael@0: do { michael@0: /* get more input for inflate() */ michael@0: if (strm->avail_in == 0 && gz_avail(state) == -1) michael@0: return -1; michael@0: if (strm->avail_in == 0) { michael@0: gz_error(state, Z_DATA_ERROR, "unexpected end of file"); michael@0: return -1; michael@0: } michael@0: michael@0: /* decompress and handle errors */ michael@0: ret = inflate(strm, Z_NO_FLUSH); michael@0: if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) { michael@0: gz_error(state, Z_STREAM_ERROR, michael@0: "internal error: inflate stream corrupt"); michael@0: return -1; michael@0: } michael@0: if (ret == Z_MEM_ERROR) { michael@0: gz_error(state, Z_MEM_ERROR, "out of memory"); michael@0: return -1; michael@0: } michael@0: if (ret == Z_DATA_ERROR) { /* deflate stream invalid */ michael@0: gz_error(state, Z_DATA_ERROR, michael@0: strm->msg == NULL ? "compressed data error" : strm->msg); michael@0: return -1; michael@0: } michael@0: } while (strm->avail_out && ret != Z_STREAM_END); michael@0: michael@0: /* update available output and crc check value */ michael@0: state->have = had - strm->avail_out; michael@0: state->next = strm->next_out - state->have; michael@0: strm->adler = crc32(strm->adler, state->next, state->have); michael@0: michael@0: /* check gzip trailer if at end of deflate stream */ michael@0: if (ret == Z_STREAM_END) { michael@0: if (gz_next4(state, &crc) == -1 || gz_next4(state, &len) == -1) { michael@0: gz_error(state, Z_DATA_ERROR, "unexpected end of file"); michael@0: return -1; michael@0: } michael@0: if (crc != strm->adler) { michael@0: gz_error(state, Z_DATA_ERROR, "incorrect data check"); michael@0: return -1; michael@0: } michael@0: if (len != (strm->total_out & 0xffffffffL)) { michael@0: gz_error(state, Z_DATA_ERROR, "incorrect length check"); michael@0: return -1; michael@0: } michael@0: state->how = LOOK; /* ready for next stream, once have is 0 (leave michael@0: state->direct unchanged to remember how) */ michael@0: } michael@0: michael@0: /* good decompression */ michael@0: return 0; michael@0: } michael@0: michael@0: /* Make data and put in the output buffer. Assumes that state->have == 0. michael@0: Data is either copied from the input file or decompressed from the input michael@0: file depending on state->how. If state->how is LOOK, then a gzip header is michael@0: looked for (and skipped if found) to determine wither to copy or decompress. michael@0: Returns -1 on error, otherwise 0. gz_make() will leave state->have as COPY michael@0: or GZIP unless the end of the input file has been reached and all data has michael@0: been processed. */ michael@0: local int gz_make(state) michael@0: gz_statep state; michael@0: { michael@0: z_streamp strm = &(state->strm); michael@0: michael@0: if (state->how == LOOK) { /* look for gzip header */ michael@0: if (gz_head(state) == -1) michael@0: return -1; michael@0: if (state->have) /* got some data from gz_head() */ michael@0: return 0; michael@0: } michael@0: if (state->how == COPY) { /* straight copy */ michael@0: if (gz_load(state, state->out, state->size << 1, &(state->have)) == -1) michael@0: return -1; michael@0: state->next = state->out; michael@0: } michael@0: else if (state->how == GZIP) { /* decompress */ michael@0: strm->avail_out = state->size << 1; michael@0: strm->next_out = state->out; michael@0: if (gz_decomp(state) == -1) michael@0: return -1; michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: /* Skip len uncompressed bytes of output. Return -1 on error, 0 on success. */ michael@0: local int gz_skip(state, len) michael@0: gz_statep state; michael@0: z_off64_t len; michael@0: { michael@0: unsigned n; michael@0: michael@0: /* skip over len bytes or reach end-of-file, whichever comes first */ michael@0: while (len) michael@0: /* skip over whatever is in output buffer */ michael@0: if (state->have) { michael@0: n = GT_OFF(state->have) || (z_off64_t)state->have > len ? michael@0: (unsigned)len : state->have; michael@0: state->have -= n; michael@0: state->next += n; michael@0: state->pos += n; michael@0: len -= n; michael@0: } michael@0: michael@0: /* output buffer empty -- return if we're at the end of the input */ michael@0: else if (state->eof && state->strm.avail_in == 0) michael@0: break; michael@0: michael@0: /* need more data to skip -- load up output buffer */ michael@0: else { michael@0: /* get more output, looking for header if required */ michael@0: if (gz_make(state) == -1) michael@0: return -1; michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: /* -- see zlib.h -- */ michael@0: int ZEXPORT gzread(file, buf, len) michael@0: gzFile file; michael@0: voidp buf; michael@0: unsigned len; michael@0: { michael@0: unsigned got, n; michael@0: gz_statep state; michael@0: z_streamp strm; michael@0: michael@0: /* get internal structure */ michael@0: if (file == NULL) michael@0: return -1; michael@0: state = (gz_statep)file; michael@0: strm = &(state->strm); michael@0: michael@0: /* check that we're reading and that there's no error */ michael@0: if (state->mode != GZ_READ || state->err != Z_OK) michael@0: return -1; michael@0: michael@0: /* since an int is returned, make sure len fits in one, otherwise return michael@0: with an error (this avoids the flaw in the interface) */ michael@0: if ((int)len < 0) { michael@0: gz_error(state, Z_BUF_ERROR, "requested length does not fit in int"); michael@0: return -1; michael@0: } michael@0: michael@0: /* if len is zero, avoid unnecessary operations */ michael@0: if (len == 0) michael@0: return 0; michael@0: michael@0: /* process a skip request */ michael@0: if (state->seek) { michael@0: state->seek = 0; michael@0: if (gz_skip(state, state->skip) == -1) michael@0: return -1; michael@0: } michael@0: michael@0: /* get len bytes to buf, or less than len if at the end */ michael@0: got = 0; michael@0: do { michael@0: /* first just try copying data from the output buffer */ michael@0: if (state->have) { michael@0: n = state->have > len ? len : state->have; michael@0: memcpy(buf, state->next, n); michael@0: state->next += n; michael@0: state->have -= n; michael@0: } michael@0: michael@0: /* output buffer empty -- return if we're at the end of the input */ michael@0: else if (state->eof && strm->avail_in == 0) michael@0: break; michael@0: michael@0: /* need output data -- for small len or new stream load up our output michael@0: buffer */ michael@0: else if (state->how == LOOK || len < (state->size << 1)) { michael@0: /* get more output, looking for header if required */ michael@0: if (gz_make(state) == -1) michael@0: return -1; michael@0: continue; /* no progress yet -- go back to memcpy() above */ michael@0: /* the copy above assures that we will leave with space in the michael@0: output buffer, allowing at least one gzungetc() to succeed */ michael@0: } michael@0: michael@0: /* large len -- read directly into user buffer */ michael@0: else if (state->how == COPY) { /* read directly */ michael@0: if (gz_load(state, buf, len, &n) == -1) michael@0: return -1; michael@0: } michael@0: michael@0: /* large len -- decompress directly into user buffer */ michael@0: else { /* state->how == GZIP */ michael@0: strm->avail_out = len; michael@0: strm->next_out = buf; michael@0: if (gz_decomp(state) == -1) michael@0: return -1; michael@0: n = state->have; michael@0: state->have = 0; michael@0: } michael@0: michael@0: /* update progress */ michael@0: len -= n; michael@0: buf = (char *)buf + n; michael@0: got += n; michael@0: state->pos += n; michael@0: } while (len); michael@0: michael@0: /* return number of bytes read into user buffer (will fit in int) */ michael@0: return (int)got; michael@0: } michael@0: michael@0: /* -- see zlib.h -- */ michael@0: int ZEXPORT gzgetc(file) michael@0: gzFile file; michael@0: { michael@0: int ret; michael@0: unsigned char buf[1]; michael@0: gz_statep state; michael@0: michael@0: /* get internal structure */ michael@0: if (file == NULL) michael@0: return -1; michael@0: state = (gz_statep)file; michael@0: michael@0: /* check that we're reading and that there's no error */ michael@0: if (state->mode != GZ_READ || state->err != Z_OK) michael@0: return -1; michael@0: michael@0: /* try output buffer (no need to check for skip request) */ michael@0: if (state->have) { michael@0: state->have--; michael@0: state->pos++; michael@0: return *(state->next)++; michael@0: } michael@0: michael@0: /* nothing there -- try gzread() */ michael@0: ret = gzread(file, buf, 1); michael@0: return ret < 1 ? -1 : buf[0]; michael@0: } michael@0: michael@0: /* -- see zlib.h -- */ michael@0: int ZEXPORT gzungetc(c, file) michael@0: int c; michael@0: gzFile file; michael@0: { michael@0: gz_statep state; michael@0: michael@0: /* get internal structure */ michael@0: if (file == NULL) michael@0: return -1; michael@0: state = (gz_statep)file; michael@0: michael@0: /* check that we're reading and that there's no error */ michael@0: if (state->mode != GZ_READ || state->err != Z_OK) michael@0: return -1; michael@0: michael@0: /* process a skip request */ michael@0: if (state->seek) { michael@0: state->seek = 0; michael@0: if (gz_skip(state, state->skip) == -1) michael@0: return -1; michael@0: } michael@0: michael@0: /* can't push EOF */ michael@0: if (c < 0) michael@0: return -1; michael@0: michael@0: /* if output buffer empty, put byte at end (allows more pushing) */ michael@0: if (state->have == 0) { michael@0: state->have = 1; michael@0: state->next = state->out + (state->size << 1) - 1; michael@0: state->next[0] = c; michael@0: state->pos--; michael@0: return c; michael@0: } michael@0: michael@0: /* if no room, give up (must have already done a gzungetc()) */ michael@0: if (state->have == (state->size << 1)) { michael@0: gz_error(state, Z_BUF_ERROR, "out of room to push characters"); michael@0: return -1; michael@0: } michael@0: michael@0: /* slide output data if needed and insert byte before existing data */ michael@0: if (state->next == state->out) { michael@0: unsigned char *src = state->out + state->have; michael@0: unsigned char *dest = state->out + (state->size << 1); michael@0: while (src > state->out) michael@0: *--dest = *--src; michael@0: state->next = dest; michael@0: } michael@0: state->have++; michael@0: state->next--; michael@0: state->next[0] = c; michael@0: state->pos--; michael@0: return c; michael@0: } michael@0: michael@0: /* -- see zlib.h -- */ michael@0: char * ZEXPORT gzgets(file, buf, len) michael@0: gzFile file; michael@0: char *buf; michael@0: int len; michael@0: { michael@0: unsigned left, n; michael@0: char *str; michael@0: unsigned char *eol; michael@0: gz_statep state; michael@0: michael@0: /* check parameters and get internal structure */ michael@0: if (file == NULL || buf == NULL || len < 1) michael@0: return NULL; michael@0: state = (gz_statep)file; michael@0: michael@0: /* check that we're reading and that there's no error */ michael@0: if (state->mode != GZ_READ || state->err != Z_OK) michael@0: return NULL; michael@0: michael@0: /* process a skip request */ michael@0: if (state->seek) { michael@0: state->seek = 0; michael@0: if (gz_skip(state, state->skip) == -1) michael@0: return NULL; michael@0: } michael@0: michael@0: /* copy output bytes up to new line or len - 1, whichever comes first -- michael@0: append a terminating zero to the string (we don't check for a zero in michael@0: the contents, let the user worry about that) */ michael@0: str = buf; michael@0: left = (unsigned)len - 1; michael@0: if (left) do { michael@0: /* assure that something is in the output buffer */ michael@0: if (state->have == 0) { michael@0: if (gz_make(state) == -1) michael@0: return NULL; /* error */ michael@0: if (state->have == 0) { /* end of file */ michael@0: if (buf == str) /* got bupkus */ michael@0: return NULL; michael@0: break; /* got something -- return it */ michael@0: } michael@0: } michael@0: michael@0: /* look for end-of-line in current output buffer */ michael@0: n = state->have > left ? left : state->have; michael@0: eol = memchr(state->next, '\n', n); michael@0: if (eol != NULL) michael@0: n = (unsigned)(eol - state->next) + 1; michael@0: michael@0: /* copy through end-of-line, or remainder if not found */ michael@0: memcpy(buf, state->next, n); michael@0: state->have -= n; michael@0: state->next += n; michael@0: state->pos += n; michael@0: left -= n; michael@0: buf += n; michael@0: } while (left && eol == NULL); michael@0: michael@0: /* found end-of-line or out of space -- terminate string and return it */ michael@0: buf[0] = 0; michael@0: return str; michael@0: } michael@0: michael@0: /* -- see zlib.h -- */ michael@0: int ZEXPORT gzdirect(file) michael@0: gzFile file; michael@0: { michael@0: gz_statep state; michael@0: michael@0: /* get internal structure */ michael@0: if (file == NULL) michael@0: return 0; michael@0: state = (gz_statep)file; michael@0: michael@0: /* check that we're reading */ michael@0: if (state->mode != GZ_READ) michael@0: return 0; michael@0: michael@0: /* if the state is not known, but we can find out, then do so (this is michael@0: mainly for right after a gzopen() or gzdopen()) */ michael@0: if (state->how == LOOK && state->have == 0) michael@0: (void)gz_head(state); michael@0: michael@0: /* return 1 if reading direct, 0 if decompressing a gzip stream */ michael@0: return state->direct; michael@0: } michael@0: michael@0: /* -- see zlib.h -- */ michael@0: int ZEXPORT gzclose_r(file) michael@0: gzFile file; michael@0: { michael@0: int ret; michael@0: gz_statep state; michael@0: michael@0: /* get internal structure */ michael@0: if (file == NULL) michael@0: return Z_STREAM_ERROR; michael@0: state = (gz_statep)file; michael@0: michael@0: /* check that we're reading */ michael@0: if (state->mode != GZ_READ) michael@0: return Z_STREAM_ERROR; michael@0: michael@0: /* free memory and close file */ michael@0: if (state->size) { michael@0: inflateEnd(&(state->strm)); michael@0: free(state->out); michael@0: free(state->in); michael@0: } michael@0: gz_error(state, Z_OK, NULL); michael@0: free(state->path); michael@0: ret = close(state->fd); michael@0: free(state); michael@0: return ret ? Z_ERRNO : Z_OK; michael@0: }