extensions/spellcheck/hunspell/src/hunzip.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/extensions/spellcheck/hunspell/src/hunzip.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,227 @@
     1.4 +/******* BEGIN LICENSE BLOCK *******
     1.5 + * Version: MPL 1.1/GPL 2.0/LGPL 2.1
     1.6 + * 
     1.7 + * The contents of this file are subject to the Mozilla Public License Version
     1.8 + * 1.1 (the "License"); you may not use this file except in compliance with
     1.9 + * the License. You may obtain a copy of the License at
    1.10 + * http://www.mozilla.org/MPL/
    1.11 + * 
    1.12 + * Software distributed under the License is distributed on an "AS IS" basis,
    1.13 + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
    1.14 + * for the specific language governing rights and limitations under the
    1.15 + * License.
    1.16 + * 
    1.17 + * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
    1.18 + * and László Németh (Hunspell). Portions created by the Initial Developers
    1.19 + * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
    1.20 + * 
    1.21 + * Contributor(s): László Németh (nemethl@gyorsposta.hu)
    1.22 + *                 Caolan McNamara (caolanm@redhat.com)
    1.23 + * 
    1.24 + * Alternatively, the contents of this file may be used under the terms of
    1.25 + * either the GNU General Public License Version 2 or later (the "GPL"), or
    1.26 + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
    1.27 + * in which case the provisions of the GPL or the LGPL are applicable instead
    1.28 + * of those above. If you wish to allow use of your version of this file only
    1.29 + * under the terms of either the GPL or the LGPL, and not to allow others to
    1.30 + * use your version of this file under the terms of the MPL, indicate your
    1.31 + * decision by deleting the provisions above and replace them with the notice
    1.32 + * and other provisions required by the GPL or the LGPL. If you do not delete
    1.33 + * the provisions above, a recipient may use your version of this file under
    1.34 + * the terms of any one of the MPL, the GPL or the LGPL.
    1.35 + *
    1.36 + ******* END LICENSE BLOCK *******/
    1.37 +
    1.38 +#include <stdlib.h> 
    1.39 +#include <string.h>
    1.40 +#include <stdio.h> 
    1.41 +
    1.42 +#include "hunzip.hxx"
    1.43 +
    1.44 +#define CODELEN  65536
    1.45 +#define BASEBITREC 5000
    1.46 +
    1.47 +#define UNCOMPRESSED '\002'
    1.48 +#define MAGIC "hz0"
    1.49 +#define MAGIC_ENCRYPT "hz1"
    1.50 +#define MAGICLEN (sizeof(MAGIC) - 1)
    1.51 +
    1.52 +int Hunzip::fail(const char * err, const char * par) {
    1.53 +    fprintf(stderr, err, par);
    1.54 +    return -1;
    1.55 +}
    1.56 +
    1.57 +Hunzip::Hunzip(const char * file, const char * key) {
    1.58 +    bufsiz = 0;
    1.59 +    lastbit = 0;
    1.60 +    inc = 0;
    1.61 +    outc = 0;
    1.62 +    dec = NULL;
    1.63 +    fin = NULL;
    1.64 +    filename = (char *) malloc(strlen(file) + 1);
    1.65 +    if (filename) strcpy(filename, file);
    1.66 +    if (getcode(key) == -1) bufsiz = -1;
    1.67 +    else bufsiz = getbuf();
    1.68 +}
    1.69 +
    1.70 +int Hunzip::getcode(const char * key) {
    1.71 +    unsigned char c[2];
    1.72 +    int i, j, n, p;
    1.73 +    int allocatedbit = BASEBITREC;
    1.74 +    const char * enc = key;
    1.75 +
    1.76 +    if (!filename) return -1;
    1.77 +
    1.78 +    fin = fopen(filename, "rb");
    1.79 +    if (!fin) return -1;
    1.80 +
    1.81 +    // read magic number
    1.82 +    if ((fread(in, 1, 3, fin) < MAGICLEN)
    1.83 +        || !(strncmp(MAGIC, in, MAGICLEN) == 0 ||
    1.84 +                strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0)) {
    1.85 +            return fail(MSG_FORMAT, filename);
    1.86 +    }
    1.87 +
    1.88 +    // check encryption
    1.89 +    if (strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0) {
    1.90 +        unsigned char cs;
    1.91 +        if (!key) return fail(MSG_KEY, filename);
    1.92 +        if (fread(&c, 1, 1, fin) < 1) return fail(MSG_FORMAT, filename);
    1.93 +        for (cs = 0; *enc; enc++) cs ^= *enc;
    1.94 +        if (cs != c[0]) return fail(MSG_KEY, filename);
    1.95 +        enc = key;
    1.96 +    } else key = NULL;
    1.97 +
    1.98 +    // read record count
    1.99 +    if (fread(&c, 1, 2, fin) < 2) return fail(MSG_FORMAT, filename);
   1.100 +
   1.101 +    if (key) {
   1.102 +        c[0] ^= *enc;
   1.103 +        if (*(++enc) == '\0') enc = key;
   1.104 +        c[1] ^= *enc;
   1.105 +    }        
   1.106 +    
   1.107 +    n = ((int) c[0] << 8) + c[1];
   1.108 +    dec = (struct bit *) malloc(BASEBITREC * sizeof(struct bit));
   1.109 +    if (!dec) return fail(MSG_MEMORY, filename);
   1.110 +    dec[0].v[0] = 0;
   1.111 +    dec[0].v[1] = 0;
   1.112 +
   1.113 +    // read codes
   1.114 +    for (i = 0; i < n; i++) {
   1.115 +        unsigned char l;
   1.116 +        if (fread(c, 1, 2, fin) < 2) return fail(MSG_FORMAT, filename);
   1.117 +        if (key) {
   1.118 +            if (*(++enc) == '\0') enc = key;
   1.119 +            c[0] ^= *enc;
   1.120 +            if (*(++enc) == '\0') enc = key;            
   1.121 +            c[1] ^= *enc;
   1.122 +        }        
   1.123 +        if (fread(&l, 1, 1, fin) < 1) return fail(MSG_FORMAT, filename);
   1.124 +        if (key) {
   1.125 +            if (*(++enc) == '\0') enc = key;
   1.126 +            l ^= *enc;
   1.127 +        }
   1.128 +        if (fread(in, 1, l/8+1, fin) < (size_t) l/8+1) return fail(MSG_FORMAT, filename);
   1.129 +        if (key) for (j = 0; j <= l/8; j++) {
   1.130 +            if (*(++enc) == '\0') enc = key;
   1.131 +            in[j] ^= *enc;
   1.132 +        }
   1.133 +        p = 0;
   1.134 +        for (j = 0; j < l; j++) {
   1.135 +            int b = (in[j/8] & (1 << (7 - (j % 8)))) ? 1 : 0;
   1.136 +            int oldp = p;
   1.137 +            p = dec[p].v[b];
   1.138 +            if (p == 0) {
   1.139 +                lastbit++;
   1.140 +                if (lastbit == allocatedbit) {
   1.141 +                    allocatedbit += BASEBITREC;
   1.142 +                    dec = (struct bit *) realloc(dec, allocatedbit * sizeof(struct bit));
   1.143 +                }
   1.144 +                dec[lastbit].v[0] = 0;
   1.145 +                dec[lastbit].v[1] = 0;
   1.146 +                dec[oldp].v[b] = lastbit;
   1.147 +                p = lastbit;
   1.148 +            }
   1.149 +        }
   1.150 +        dec[p].c[0] = c[0];
   1.151 +        dec[p].c[1] = c[1];
   1.152 +    }
   1.153 +    return 0;
   1.154 +}
   1.155 +
   1.156 +Hunzip::~Hunzip()
   1.157 +{
   1.158 +    if (dec) free(dec);
   1.159 +    if (fin) fclose(fin);
   1.160 +    if (filename) free(filename);
   1.161 +}
   1.162 +
   1.163 +int Hunzip::getbuf() {
   1.164 +    int p = 0;
   1.165 +    int o = 0;
   1.166 +    do {
   1.167 +        if (inc == 0) inbits = fread(in, 1, BUFSIZE, fin) * 8;
   1.168 +        for (; inc < inbits; inc++) {
   1.169 +            int b = (in[inc / 8] & (1 << (7 - (inc % 8)))) ? 1 : 0;
   1.170 +            int oldp = p;
   1.171 +            p = dec[p].v[b];
   1.172 +            if (p == 0) {
   1.173 +                if (oldp == lastbit) {
   1.174 +                    fclose(fin);
   1.175 +                    fin = NULL;
   1.176 +                    // add last odd byte
   1.177 +                    if (dec[lastbit].c[0]) out[o++]  = dec[lastbit].c[1];
   1.178 +                    return o;
   1.179 +                }
   1.180 +                out[o++] = dec[oldp].c[0];
   1.181 +                out[o++] = dec[oldp].c[1];
   1.182 +                if (o == BUFSIZE) return o;
   1.183 +                p = dec[p].v[b];
   1.184 +            }
   1.185 +        }
   1.186 +        inc = 0;
   1.187 +    } while (inbits == BUFSIZE * 8);
   1.188 +    return fail(MSG_FORMAT, filename);
   1.189 +}
   1.190 +
   1.191 +const char * Hunzip::getline() {
   1.192 +    char linebuf[BUFSIZE];
   1.193 +    int l = 0, eol = 0, left = 0, right = 0;
   1.194 +    if (bufsiz == -1) return NULL;
   1.195 +    while (l < bufsiz && !eol) {
   1.196 +        linebuf[l++] = out[outc];
   1.197 +        switch (out[outc]) {
   1.198 +            case '\t': break;
   1.199 +            case 31: { // escape
   1.200 +                if (++outc == bufsiz) {
   1.201 +                    bufsiz = getbuf();
   1.202 +                    outc = 0;
   1.203 +                }
   1.204 +                linebuf[l - 1] = out[outc];
   1.205 +                break;
   1.206 +            }
   1.207 +            case ' ': break;
   1.208 +            default: if (((unsigned char) out[outc]) < 47) {
   1.209 +                if (out[outc] > 32) {
   1.210 +                    right = out[outc] - 31;
   1.211 +                    if (++outc == bufsiz) {
   1.212 +                        bufsiz = getbuf();
   1.213 +                        outc = 0;
   1.214 +                    }
   1.215 +                }
   1.216 +                if (out[outc] == 30) left = 9; else left = out[outc];
   1.217 +                linebuf[l-1] = '\n';
   1.218 +                eol = 1;
   1.219 +            }
   1.220 +        }
   1.221 +        if (++outc == bufsiz) {
   1.222 +            outc = 0;
   1.223 +            bufsiz = fin ? getbuf(): -1;
   1.224 +        }
   1.225 +    }
   1.226 +    if (right) strcpy(linebuf + l - 1, line + strlen(line) - right - 1);
   1.227 +    else linebuf[l] = '\0';
   1.228 +    strcpy(line + left, linebuf);
   1.229 +    return line;
   1.230 +}

mercurial