mfbt/lz4.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/mfbt/lz4.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1162 @@
     1.4 +/*
     1.5 +   LZ4 - Fast LZ compression algorithm
     1.6 +   Copyright (C) 2011-2014, Yann Collet.
     1.7 +   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
     1.8 +
     1.9 +   Redistribution and use in source and binary forms, with or without
    1.10 +   modification, are permitted provided that the following conditions are
    1.11 +   met:
    1.12 +
    1.13 +       * Redistributions of source code must retain the above copyright
    1.14 +   notice, this list of conditions and the following disclaimer.
    1.15 +       * Redistributions in binary form must reproduce the above
    1.16 +   copyright notice, this list of conditions and the following disclaimer
    1.17 +   in the documentation and/or other materials provided with the
    1.18 +   distribution.
    1.19 +
    1.20 +   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    1.21 +   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    1.22 +   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
    1.23 +   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
    1.24 +   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
    1.25 +   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
    1.26 +   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
    1.27 +   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
    1.28 +   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    1.29 +   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    1.30 +   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    1.31 +
    1.32 +   You can contact the author at :
    1.33 +   - LZ4 source repository : http://code.google.com/p/lz4/
    1.34 +   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
    1.35 +*/
    1.36 +
    1.37 +/**************************************
    1.38 +   Tuning parameters
    1.39 +**************************************/
    1.40 +/*
    1.41 + * HEAPMODE :
    1.42 + * Select how default compression functions will allocate memory for their hash table,
    1.43 + * in memory stack (0:default, fastest), or in memory heap (1:requires memory allocation (malloc)).
    1.44 + */
    1.45 +#define HEAPMODE 0
    1.46 +
    1.47 +
    1.48 +/**************************************
    1.49 +   CPU Feature Detection
    1.50 +**************************************/
    1.51 +/* 32 or 64 bits ? */
    1.52 +#if (defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) \
    1.53 +  || defined(__powerpc64__) || defined(__powerpc64le__) \
    1.54 +  || defined(__ppc64__) || defined(__ppc64le__) \
    1.55 +  || defined(__PPC64__) || defined(__PPC64LE__) \
    1.56 +  || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) )   /* Detects 64 bits mode */
    1.57 +#  define LZ4_ARCH64 1
    1.58 +#else
    1.59 +#  define LZ4_ARCH64 0
    1.60 +#endif
    1.61 +
    1.62 +/*
    1.63 + * Little Endian or Big Endian ?
    1.64 + * Overwrite the #define below if you know your architecture endianess
    1.65 + */
    1.66 +#include <stdlib.h>   /* Apparently required to detect endianess */
    1.67 +#if defined (__GLIBC__)
    1.68 +#  include <endian.h>
    1.69 +#  if (__BYTE_ORDER == __BIG_ENDIAN)
    1.70 +#     define LZ4_BIG_ENDIAN 1
    1.71 +#  endif
    1.72 +#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN))
    1.73 +#  define LZ4_BIG_ENDIAN 1
    1.74 +#elif defined(__sparc) || defined(__sparc__) \
    1.75 +   || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \
    1.76 +   || defined(__hpux)  || defined(__hppa) \
    1.77 +   || defined(_MIPSEB) || defined(__s390__)
    1.78 +#  define LZ4_BIG_ENDIAN 1
    1.79 +#else
    1.80 +/* Little Endian assumed. PDP Endian and other very rare endian format are unsupported. */
    1.81 +#endif
    1.82 +
    1.83 +/*
    1.84 + * Unaligned memory access is automatically enabled for "common" CPU, such as x86.
    1.85 + * For others CPU, such as ARM, the compiler may be more cautious, inserting unnecessary extra code to ensure aligned access property
    1.86 + * If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance
    1.87 + */
    1.88 +#if defined(__ARM_FEATURE_UNALIGNED)
    1.89 +#  define LZ4_FORCE_UNALIGNED_ACCESS 1
    1.90 +#endif
    1.91 +
    1.92 +/* Define this parameter if your target system or compiler does not support hardware bit count */
    1.93 +#if defined(_MSC_VER) && defined(_WIN32_WCE)   /* Visual Studio for Windows CE does not support Hardware bit count */
    1.94 +#  define LZ4_FORCE_SW_BITCOUNT
    1.95 +#endif
    1.96 +
    1.97 +/*
    1.98 + * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE :
    1.99 + * This option may provide a small boost to performance for some big endian cpu, although probably modest.
   1.100 + * You may set this option to 1 if data will remain within closed environment.
   1.101 + * This option is useless on Little_Endian CPU (such as x86)
   1.102 + */
   1.103 +
   1.104 +/* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
   1.105 +
   1.106 +
   1.107 +/**************************************
   1.108 + Compiler Options
   1.109 +**************************************/
   1.110 +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   /* C99 */
   1.111 +/* "restrict" is a known keyword */
   1.112 +#else
   1.113 +#  define restrict /* Disable restrict */
   1.114 +#endif
   1.115 +
   1.116 +#ifdef _MSC_VER    /* Visual Studio */
   1.117 +#  define FORCE_INLINE static __forceinline
   1.118 +#  include <intrin.h>                    /* For Visual 2005 */
   1.119 +#  if LZ4_ARCH64   /* 64-bits */
   1.120 +#    pragma intrinsic(_BitScanForward64) /* For Visual 2005 */
   1.121 +#    pragma intrinsic(_BitScanReverse64) /* For Visual 2005 */
   1.122 +#  else            /* 32-bits */
   1.123 +#    pragma intrinsic(_BitScanForward)   /* For Visual 2005 */
   1.124 +#    pragma intrinsic(_BitScanReverse)   /* For Visual 2005 */
   1.125 +#  endif
   1.126 +#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
   1.127 +#else
   1.128 +#  ifdef __GNUC__
   1.129 +#    define FORCE_INLINE static inline __attribute__((always_inline))
   1.130 +#  else
   1.131 +#    define FORCE_INLINE static inline
   1.132 +#  endif
   1.133 +#endif
   1.134 +
   1.135 +#ifdef _MSC_VER  /* Visual Studio */
   1.136 +#  define lz4_bswap16(x) _byteswap_ushort(x)
   1.137 +#else
   1.138 +#  define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
   1.139 +#endif
   1.140 +
   1.141 +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
   1.142 +
   1.143 +#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
   1.144 +#  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
   1.145 +#else
   1.146 +#  define expect(expr,value)    (expr)
   1.147 +#endif
   1.148 +
   1.149 +#define likely(expr)     expect((expr) != 0, 1)
   1.150 +#define unlikely(expr)   expect((expr) != 0, 0)
   1.151 +
   1.152 +
   1.153 +/**************************************
   1.154 +   Memory routines
   1.155 +**************************************/
   1.156 +#include <stdlib.h>   /* malloc, calloc, free */
   1.157 +#define ALLOCATOR(n,s) calloc(n,s)
   1.158 +#define FREEMEM        free
   1.159 +#include <string.h>   /* memset, memcpy */
   1.160 +#define MEM_INIT       memset
   1.161 +
   1.162 +
   1.163 +/**************************************
   1.164 +   Includes
   1.165 +**************************************/
   1.166 +#include "lz4.h"
   1.167 +
   1.168 +
   1.169 +/**************************************
   1.170 +   Basic Types
   1.171 +**************************************/
   1.172 +#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   /* C99 */
   1.173 +# include <stdint.h>
   1.174 +  typedef  uint8_t BYTE;
   1.175 +  typedef uint16_t U16;
   1.176 +  typedef uint32_t U32;
   1.177 +  typedef  int32_t S32;
   1.178 +  typedef uint64_t U64;
   1.179 +#else
   1.180 +  typedef unsigned char       BYTE;
   1.181 +  typedef unsigned short      U16;
   1.182 +  typedef unsigned int        U32;
   1.183 +  typedef   signed int        S32;
   1.184 +  typedef unsigned long long  U64;
   1.185 +#endif
   1.186 +
   1.187 +#if defined(__GNUC__)  && !defined(LZ4_FORCE_UNALIGNED_ACCESS)
   1.188 +#  define _PACKED __attribute__ ((packed))
   1.189 +#else
   1.190 +#  define _PACKED
   1.191 +#endif
   1.192 +
   1.193 +#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
   1.194 +#  if defined(__IBMC__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC)
   1.195 +#    pragma pack(1)
   1.196 +#  else
   1.197 +#    pragma pack(push, 1)
   1.198 +#  endif
   1.199 +#endif
   1.200 +
   1.201 +typedef struct { U16 v; }  _PACKED U16_S;
   1.202 +typedef struct { U32 v; }  _PACKED U32_S;
   1.203 +typedef struct { U64 v; }  _PACKED U64_S;
   1.204 +typedef struct {size_t v;} _PACKED size_t_S;
   1.205 +
   1.206 +#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
   1.207 +#  if defined(__SUNPRO_C) || defined(__SUNPRO_CC)
   1.208 +#    pragma pack(0)
   1.209 +#  else
   1.210 +#    pragma pack(pop)
   1.211 +#  endif
   1.212 +#endif
   1.213 +
   1.214 +#define A16(x)   (((U16_S *)(x))->v)
   1.215 +#define A32(x)   (((U32_S *)(x))->v)
   1.216 +#define A64(x)   (((U64_S *)(x))->v)
   1.217 +#define AARCH(x) (((size_t_S *)(x))->v)
   1.218 +
   1.219 +
   1.220 +/**************************************
   1.221 +   Constants
   1.222 +**************************************/
   1.223 +#define LZ4_HASHLOG   (LZ4_MEMORY_USAGE-2)
   1.224 +#define HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
   1.225 +#define HASH_SIZE_U32 (1 << LZ4_HASHLOG)
   1.226 +
   1.227 +#define MINMATCH 4
   1.228 +
   1.229 +#define COPYLENGTH 8
   1.230 +#define LASTLITERALS 5
   1.231 +#define MFLIMIT (COPYLENGTH+MINMATCH)
   1.232 +static const int LZ4_minLength = (MFLIMIT+1);
   1.233 +
   1.234 +#define KB *(1U<<10)
   1.235 +#define MB *(1U<<20)
   1.236 +#define GB *(1U<<30)
   1.237 +
   1.238 +#define LZ4_64KLIMIT ((64 KB) + (MFLIMIT-1))
   1.239 +#define SKIPSTRENGTH 6   /* Increasing this value will make the compression run slower on incompressible data */
   1.240 +
   1.241 +#define MAXD_LOG 16
   1.242 +#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
   1.243 +
   1.244 +#define ML_BITS  4
   1.245 +#define ML_MASK  ((1U<<ML_BITS)-1)
   1.246 +#define RUN_BITS (8-ML_BITS)
   1.247 +#define RUN_MASK ((1U<<RUN_BITS)-1)
   1.248 +
   1.249 +
   1.250 +/**************************************
   1.251 +   Structures and local types
   1.252 +**************************************/
   1.253 +typedef struct {
   1.254 +    U32  hashTable[HASH_SIZE_U32];
   1.255 +    U32  currentOffset;
   1.256 +    U32  initCheck;
   1.257 +    const BYTE* dictionary;
   1.258 +    const BYTE* bufferStart;
   1.259 +    U32  dictSize;
   1.260 +} LZ4_stream_t_internal;
   1.261 +
   1.262 +typedef enum { notLimited = 0, limitedOutput = 1 } limitedOutput_directive;
   1.263 +typedef enum { byPtr, byU32, byU16 } tableType_t;
   1.264 +
   1.265 +typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
   1.266 +typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
   1.267 +
   1.268 +typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
   1.269 +typedef enum { full = 0, partial = 1 } earlyEnd_directive;
   1.270 +
   1.271 +
   1.272 +/**************************************
   1.273 +   Architecture-specific macros
   1.274 +**************************************/
   1.275 +#define STEPSIZE                  sizeof(size_t)
   1.276 +#define LZ4_COPYSTEP(d,s)         { AARCH(d) = AARCH(s); d+=STEPSIZE; s+=STEPSIZE; }
   1.277 +#define LZ4_COPY8(d,s)            { LZ4_COPYSTEP(d,s); if (STEPSIZE<8) LZ4_COPYSTEP(d,s); }
   1.278 +
   1.279 +#if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
   1.280 +#  define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
   1.281 +#  define LZ4_WRITE_LITTLEENDIAN_16(p,i)  { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; }
   1.282 +#else      /* Little Endian */
   1.283 +#  define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
   1.284 +#  define LZ4_WRITE_LITTLEENDIAN_16(p,v)  { A16(p) = v; p+=2; }
   1.285 +#endif
   1.286 +
   1.287 +
   1.288 +/**************************************
   1.289 +   Macros
   1.290 +**************************************/
   1.291 +#define LZ4_STATIC_ASSERT(c)    { enum { LZ4_static_assert = 1/(!!(c)) }; }   /* use only *after* variable declarations */
   1.292 +#if LZ4_ARCH64 || !defined(__GNUC__)
   1.293 +#  define LZ4_WILDCOPY(d,s,e)   { do { LZ4_COPY8(d,s) } while (d<e); }        /* at the end, d>=e; */
   1.294 +#else
   1.295 +#  define LZ4_WILDCOPY(d,s,e)   { if (likely(e-d <= 8)) LZ4_COPY8(d,s) else do { LZ4_COPY8(d,s) } while (d<e); }
   1.296 +#endif
   1.297 +
   1.298 +
   1.299 +/****************************
   1.300 +   Private local functions
   1.301 +****************************/
   1.302 +#if LZ4_ARCH64
   1.303 +
   1.304 +int LZ4_NbCommonBytes (register U64 val)
   1.305 +{
   1.306 +# if defined(LZ4_BIG_ENDIAN)
   1.307 +#   if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
   1.308 +    unsigned long r = 0;
   1.309 +    _BitScanReverse64( &r, val );
   1.310 +    return (int)(r>>3);
   1.311 +#   elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
   1.312 +    return (__builtin_clzll(val) >> 3);
   1.313 +#   else
   1.314 +    int r;
   1.315 +    if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
   1.316 +    if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
   1.317 +    r += (!val);
   1.318 +    return r;
   1.319 +#   endif
   1.320 +# else
   1.321 +#   if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
   1.322 +    unsigned long r = 0;
   1.323 +    _BitScanForward64( &r, val );
   1.324 +    return (int)(r>>3);
   1.325 +#   elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
   1.326 +    return (__builtin_ctzll(val) >> 3);
   1.327 +#   else
   1.328 +    static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
   1.329 +    return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
   1.330 +#   endif
   1.331 +# endif
   1.332 +}
   1.333 +
   1.334 +#else
   1.335 +
   1.336 +int LZ4_NbCommonBytes (register U32 val)
   1.337 +{
   1.338 +# if defined(LZ4_BIG_ENDIAN)
   1.339 +#   if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
   1.340 +    unsigned long r = 0;
   1.341 +    _BitScanReverse( &r, val );
   1.342 +    return (int)(r>>3);
   1.343 +#   elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
   1.344 +    return (__builtin_clz(val) >> 3);
   1.345 +#   else
   1.346 +    int r;
   1.347 +    if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
   1.348 +    r += (!val);
   1.349 +    return r;
   1.350 +#   endif
   1.351 +# else
   1.352 +#   if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
   1.353 +    unsigned long r;
   1.354 +    _BitScanForward( &r, val );
   1.355 +    return (int)(r>>3);
   1.356 +#   elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
   1.357 +    return (__builtin_ctz(val) >> 3);
   1.358 +#   else
   1.359 +    static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
   1.360 +    return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
   1.361 +#   endif
   1.362 +# endif
   1.363 +}
   1.364 +
   1.365 +#endif
   1.366 +
   1.367 +
   1.368 +/********************************
   1.369 +   Compression functions
   1.370 +********************************/
   1.371 +int LZ4_compressBound(int isize)  { return LZ4_COMPRESSBOUND(isize); }
   1.372 +
   1.373 +static int LZ4_hashSequence(U32 sequence, tableType_t tableType)
   1.374 +{
   1.375 +    if (tableType == byU16)
   1.376 +        return (((sequence) * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
   1.377 +    else
   1.378 +        return (((sequence) * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
   1.379 +}
   1.380 +
   1.381 +static int LZ4_hashPosition(const BYTE* p, tableType_t tableType) { return LZ4_hashSequence(A32(p), tableType); }
   1.382 +
   1.383 +static void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase)
   1.384 +{
   1.385 +    switch (tableType)
   1.386 +    {
   1.387 +    case byPtr: { const BYTE** hashTable = (const BYTE**) tableBase; hashTable[h] = p; break; }
   1.388 +    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); break; }
   1.389 +    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); break; }
   1.390 +    }
   1.391 +}
   1.392 +
   1.393 +static void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
   1.394 +{
   1.395 +    U32 h = LZ4_hashPosition(p, tableType);
   1.396 +    LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
   1.397 +}
   1.398 +
   1.399 +static const BYTE* LZ4_getPositionOnHash(U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase)
   1.400 +{
   1.401 +    if (tableType == byPtr) { const BYTE** hashTable = (const BYTE**) tableBase; return hashTable[h]; }
   1.402 +    if (tableType == byU32) { U32* hashTable = (U32*) tableBase; return hashTable[h] + srcBase; }
   1.403 +    { U16* hashTable = (U16*) tableBase; return hashTable[h] + srcBase; }   /* default, to ensure a return */
   1.404 +}
   1.405 +
   1.406 +static const BYTE* LZ4_getPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
   1.407 +{
   1.408 +    U32 h = LZ4_hashPosition(p, tableType);
   1.409 +    return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
   1.410 +}
   1.411 +
   1.412 +static unsigned LZ4_count(const BYTE* pIn, const BYTE* pRef, const BYTE* pInLimit)
   1.413 +{
   1.414 +    const BYTE* const pStart = pIn;
   1.415 +
   1.416 +    while (likely(pIn<pInLimit-(STEPSIZE-1)))
   1.417 +    {
   1.418 +        size_t diff = AARCH(pRef) ^ AARCH(pIn);
   1.419 +        if (!diff) { pIn+=STEPSIZE; pRef+=STEPSIZE; continue; }
   1.420 +        pIn += LZ4_NbCommonBytes(diff);
   1.421 +        return (unsigned)(pIn - pStart);
   1.422 +    }
   1.423 +    if (sizeof(void*)==8) if ((pIn<(pInLimit-3)) && (A32(pRef) == A32(pIn))) { pIn+=4; pRef+=4; }
   1.424 +    if ((pIn<(pInLimit-1)) && (A16(pRef) == A16(pIn))) { pIn+=2; pRef+=2; }
   1.425 +    if ((pIn<pInLimit) && (*pRef == *pIn)) pIn++;
   1.426 +
   1.427 +    return (unsigned)(pIn - pStart);
   1.428 +}
   1.429 +
   1.430 +
   1.431 +static int LZ4_compress_generic(
   1.432 +                 void* ctx,
   1.433 +                 const char* source,
   1.434 +                 char* dest,
   1.435 +                 int inputSize,
   1.436 +                 int maxOutputSize,
   1.437 +
   1.438 +                 limitedOutput_directive outputLimited,
   1.439 +                 tableType_t tableType,
   1.440 +                 dict_directive dict,
   1.441 +                 dictIssue_directive dictIssue)
   1.442 +{
   1.443 +    LZ4_stream_t_internal* const dictPtr = (LZ4_stream_t_internal*)ctx;
   1.444 +
   1.445 +    const BYTE* ip = (const BYTE*) source;
   1.446 +    const BYTE* base;
   1.447 +    const BYTE* lowLimit;
   1.448 +    const BYTE* const lowRefLimit = ip - dictPtr->dictSize;
   1.449 +    const BYTE* const dictionary = dictPtr->dictionary;
   1.450 +    const BYTE* const dictEnd = dictionary + dictPtr->dictSize;
   1.451 +    const size_t dictDelta = dictEnd - (const BYTE*)source;
   1.452 +    const BYTE* anchor = (const BYTE*) source;
   1.453 +    const BYTE* const iend = ip + inputSize;
   1.454 +    const BYTE* const mflimit = iend - MFLIMIT;
   1.455 +    const BYTE* const matchlimit = iend - LASTLITERALS;
   1.456 +
   1.457 +    BYTE* op = (BYTE*) dest;
   1.458 +    BYTE* const olimit = op + maxOutputSize;
   1.459 +
   1.460 +    const int skipStrength = SKIPSTRENGTH;
   1.461 +    U32 forwardH;
   1.462 +    size_t refDelta=0;
   1.463 +
   1.464 +    /* Init conditions */
   1.465 +    if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0;          /* Unsupported input size, too large (or negative) */
   1.466 +    switch(dict)
   1.467 +    {
   1.468 +    case noDict:
   1.469 +    default:
   1.470 +        base = (const BYTE*)source;
   1.471 +        lowLimit = (const BYTE*)source;
   1.472 +        break;
   1.473 +    case withPrefix64k:
   1.474 +        base = (const BYTE*)source - dictPtr->currentOffset;
   1.475 +        lowLimit = (const BYTE*)source - dictPtr->dictSize;
   1.476 +        break;
   1.477 +    case usingExtDict:
   1.478 +        base = (const BYTE*)source - dictPtr->currentOffset;
   1.479 +        lowLimit = (const BYTE*)source;
   1.480 +        break;
   1.481 +    }
   1.482 +    if ((tableType == byU16) && (inputSize>=(int)LZ4_64KLIMIT)) return 0;   /* Size too large (not within 64K limit) */
   1.483 +    if (inputSize<LZ4_minLength) goto _last_literals;                       /* Input too small, no compression (all literals) */
   1.484 +
   1.485 +    /* First Byte */
   1.486 +    LZ4_putPosition(ip, ctx, tableType, base);
   1.487 +    ip++; forwardH = LZ4_hashPosition(ip, tableType);
   1.488 +
   1.489 +    /* Main Loop */
   1.490 +    for ( ; ; )
   1.491 +    {
   1.492 +        const BYTE* ref;
   1.493 +        BYTE* token;
   1.494 +        {
   1.495 +            const BYTE* forwardIp = ip;
   1.496 +            unsigned step=1;
   1.497 +            unsigned searchMatchNb = (1U << skipStrength);
   1.498 +
   1.499 +            /* Find a match */
   1.500 +            do {
   1.501 +                U32 h = forwardH;
   1.502 +                ip = forwardIp;
   1.503 +                forwardIp += step;
   1.504 +                step = searchMatchNb++ >> skipStrength;
   1.505 +                //if (step>8) step=8;   // required for valid forwardIp ; slows down uncompressible data a bit
   1.506 +
   1.507 +                if (unlikely(forwardIp > mflimit)) goto _last_literals;
   1.508 +
   1.509 +                ref = LZ4_getPositionOnHash(h, ctx, tableType, base);
   1.510 +                if (dict==usingExtDict)
   1.511 +                {
   1.512 +                    if (ref<(const BYTE*)source)
   1.513 +                    {
   1.514 +                        refDelta = dictDelta;
   1.515 +                        lowLimit = dictionary;
   1.516 +                    }
   1.517 +                    else
   1.518 +                    {
   1.519 +                        refDelta = 0;
   1.520 +                        lowLimit = (const BYTE*)source;
   1.521 +                    }
   1.522 +                }
   1.523 +                forwardH = LZ4_hashPosition(forwardIp, tableType);
   1.524 +                LZ4_putPositionOnHash(ip, h, ctx, tableType, base);
   1.525 +
   1.526 +            } while ( ((dictIssue==dictSmall) ? (ref < lowRefLimit) : 0)
   1.527 +                || ((tableType==byU16) ? 0 : (ref + MAX_DISTANCE < ip))
   1.528 +                || (A32(ref+refDelta) != A32(ip)) );
   1.529 +        }
   1.530 +
   1.531 +        /* Catch up */
   1.532 +        while ((ip>anchor) && (ref+refDelta > lowLimit) && (unlikely(ip[-1]==ref[refDelta-1]))) { ip--; ref--; }
   1.533 +
   1.534 +        {
   1.535 +            /* Encode Literal length */
   1.536 +            unsigned litLength = (unsigned)(ip - anchor);
   1.537 +            token = op++;
   1.538 +            if ((outputLimited) && (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)))
   1.539 +                return 0;   /* Check output limit */
   1.540 +            if (litLength>=RUN_MASK)
   1.541 +            {
   1.542 +                int len = (int)litLength-RUN_MASK;
   1.543 +                *token=(RUN_MASK<<ML_BITS);
   1.544 +                for(; len >= 255 ; len-=255) *op++ = 255;
   1.545 +                *op++ = (BYTE)len;
   1.546 +            }
   1.547 +            else *token = (BYTE)(litLength<<ML_BITS);
   1.548 +
   1.549 +            /* Copy Literals */
   1.550 +            { BYTE* end = op+litLength; LZ4_WILDCOPY(op,anchor,end); op=end; }
   1.551 +        }
   1.552 +
   1.553 +_next_match:
   1.554 +        /* Encode Offset */
   1.555 +        LZ4_WRITE_LITTLEENDIAN_16(op, (U16)(ip-ref));
   1.556 +
   1.557 +        /* Encode MatchLength */
   1.558 +        {
   1.559 +            unsigned matchLength;
   1.560 +
   1.561 +            if ((dict==usingExtDict) && (lowLimit==dictionary))
   1.562 +            {
   1.563 +                const BYTE* limit;
   1.564 +                ref += refDelta;
   1.565 +                limit = ip + (dictEnd-ref);
   1.566 +                if (limit > matchlimit) limit = matchlimit;
   1.567 +                matchLength = LZ4_count(ip+MINMATCH, ref+MINMATCH, limit);
   1.568 +                ip += MINMATCH + matchLength;
   1.569 +                if (ip==limit)
   1.570 +                {
   1.571 +                    unsigned more = LZ4_count(ip, (const BYTE*)source, matchlimit);
   1.572 +                    matchLength += more;
   1.573 +                    ip += more;
   1.574 +                }
   1.575 +            }
   1.576 +            else
   1.577 +            {
   1.578 +                matchLength = LZ4_count(ip+MINMATCH, ref+MINMATCH, matchlimit);
   1.579 +                ip += MINMATCH + matchLength;
   1.580 +            }
   1.581 +
   1.582 +            if (matchLength>=ML_MASK)
   1.583 +            {
   1.584 +                if ((outputLimited) && (unlikely(op + (1 + LASTLITERALS) + (matchLength>>8) > olimit)))
   1.585 +                    return 0;    /* Check output limit */
   1.586 +                *token += ML_MASK;
   1.587 +                matchLength -= ML_MASK;
   1.588 +                for (; matchLength >= 510 ; matchLength-=510) { *op++ = 255; *op++ = 255; }
   1.589 +                if (matchLength >= 255) { matchLength-=255; *op++ = 255; }
   1.590 +                *op++ = (BYTE)matchLength;
   1.591 +            }
   1.592 +            else *token += (BYTE)(matchLength);
   1.593 +        }
   1.594 +
   1.595 +        anchor = ip;
   1.596 +
   1.597 +        /* Test end of chunk */
   1.598 +        if (ip > mflimit) break;
   1.599 +
   1.600 +        /* Fill table */
   1.601 +        LZ4_putPosition(ip-2, ctx, tableType, base);
   1.602 +
   1.603 +        /* Test next position */
   1.604 +        ref = LZ4_getPosition(ip, ctx, tableType, base);
   1.605 +        if (dict==usingExtDict)
   1.606 +        {
   1.607 +            if (ref<(const BYTE*)source)
   1.608 +            {
   1.609 +                refDelta = dictDelta;
   1.610 +                lowLimit = dictionary;
   1.611 +            }
   1.612 +            else
   1.613 +            {
   1.614 +                refDelta = 0;
   1.615 +                lowLimit = (const BYTE*)source;
   1.616 +            }
   1.617 +        }
   1.618 +        LZ4_putPosition(ip, ctx, tableType, base);
   1.619 +        if ( ((dictIssue==dictSmall) ? (ref>=lowRefLimit) : 1)
   1.620 +            && (ref+MAX_DISTANCE>=ip)
   1.621 +            && (A32(ref+refDelta)==A32(ip)) )
   1.622 +        { token=op++; *token=0; goto _next_match; }
   1.623 +
   1.624 +        /* Prepare next loop */
   1.625 +        forwardH = LZ4_hashPosition(++ip, tableType);
   1.626 +    }
   1.627 +
   1.628 +_last_literals:
   1.629 +    /* Encode Last Literals */
   1.630 +    {
   1.631 +        int lastRun = (int)(iend - anchor);
   1.632 +        if ((outputLimited) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize))
   1.633 +            return 0;   /* Check output limit */
   1.634 +        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun >= 255 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
   1.635 +        else *op++ = (BYTE)(lastRun<<ML_BITS);
   1.636 +        memcpy(op, anchor, iend - anchor);
   1.637 +        op += iend-anchor;
   1.638 +    }
   1.639 +
   1.640 +    /* End */
   1.641 +    return (int) (((char*)op)-dest);
   1.642 +}
   1.643 +
   1.644 +
   1.645 +int LZ4_compress(const char* source, char* dest, int inputSize)
   1.646 +{
   1.647 +#if (HEAPMODE)
   1.648 +    void* ctx = ALLOCATOR(LZ4_STREAMSIZE_U32, 4);   /* Aligned on 4-bytes boundaries */
   1.649 +#else
   1.650 +    U32 ctx[LZ4_STREAMSIZE_U32] = {0};      /* Ensure data is aligned on 4-bytes boundaries */
   1.651 +#endif
   1.652 +    int result;
   1.653 +
   1.654 +    if (inputSize < (int)LZ4_64KLIMIT)
   1.655 +        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, byU16, noDict, noDictIssue);
   1.656 +    else
   1.657 +        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, (sizeof(void*)==8) ? byU32 : byPtr, noDict, noDictIssue);
   1.658 +
   1.659 +#if (HEAPMODE)
   1.660 +    FREEMEM(ctx);
   1.661 +#endif
   1.662 +    return result;
   1.663 +}
   1.664 +
   1.665 +int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
   1.666 +{
   1.667 +#if (HEAPMODE)
   1.668 +    void* ctx = ALLOCATOR(LZ4_STREAMSIZE_U32, 4);   /* Aligned on 4-bytes boundaries */
   1.669 +#else
   1.670 +    U32 ctx[LZ4_STREAMSIZE_U32] = {0};      /* Ensure data is aligned on 4-bytes boundaries */
   1.671 +#endif
   1.672 +    int result;
   1.673 +
   1.674 +    if (inputSize < (int)LZ4_64KLIMIT)
   1.675 +        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue);
   1.676 +    else
   1.677 +        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limitedOutput, (sizeof(void*)==8) ? byU32 : byPtr, noDict, noDictIssue);
   1.678 +
   1.679 +#if (HEAPMODE)
   1.680 +    FREEMEM(ctx);
   1.681 +#endif
   1.682 +    return result;
   1.683 +}
   1.684 +
   1.685 +
   1.686 +/*****************************************
   1.687 +   Experimental : Streaming functions
   1.688 +*****************************************/
   1.689 +
   1.690 +void* LZ4_createStream()
   1.691 +{
   1.692 +    void* lz4s = ALLOCATOR(4, LZ4_STREAMSIZE_U32);
   1.693 +    MEM_INIT(lz4s, 0, LZ4_STREAMSIZE);
   1.694 +    return lz4s;
   1.695 +}
   1.696 +
   1.697 +int LZ4_free (void* LZ4_stream)
   1.698 +{
   1.699 +    FREEMEM(LZ4_stream);
   1.700 +    return (0);
   1.701 +}
   1.702 +
   1.703 +
   1.704 +int LZ4_loadDict (void* LZ4_dict, const char* dictionary, int dictSize)
   1.705 +{
   1.706 +    LZ4_stream_t_internal* dict = (LZ4_stream_t_internal*) LZ4_dict;
   1.707 +    const BYTE* p = (const BYTE*)dictionary;
   1.708 +    const BYTE* const dictEnd = p + dictSize;
   1.709 +    const BYTE* base;
   1.710 +
   1.711 +    LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal));      /* A compilation error here means LZ4_STREAMSIZE is not large enough */
   1.712 +    if (dict->initCheck) MEM_INIT(dict, 0, sizeof(LZ4_stream_t_internal));   /* Uninitialized structure detected */
   1.713 +
   1.714 +    if (dictSize < MINMATCH)
   1.715 +    {
   1.716 +        dict->dictionary = NULL;
   1.717 +        dict->dictSize = 0;
   1.718 +        return 1;
   1.719 +    }
   1.720 +
   1.721 +    if (p <= dictEnd - 64 KB) p = dictEnd - 64 KB;
   1.722 +    base = p - dict->currentOffset;
   1.723 +    dict->dictionary = p;
   1.724 +    dict->dictSize = (U32)(dictEnd - p);
   1.725 +    dict->currentOffset += dict->dictSize;
   1.726 +
   1.727 +    while (p <= dictEnd-MINMATCH)
   1.728 +    {
   1.729 +        LZ4_putPosition(p, dict, byU32, base);
   1.730 +        p+=3;
   1.731 +    }
   1.732 +
   1.733 +    return 1;
   1.734 +}
   1.735 +
   1.736 +
   1.737 +void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, const BYTE* src)
   1.738 +{
   1.739 +    if ((LZ4_dict->currentOffset > 0x80000000) ||
   1.740 +        ((size_t)LZ4_dict->currentOffset > (size_t)src))   /* address space overflow */
   1.741 +    {
   1.742 +        /* rescale hash table */
   1.743 +        U32 delta = LZ4_dict->currentOffset - 64 KB;
   1.744 +        const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
   1.745 +        int i;
   1.746 +        for (i=0; i<HASH_SIZE_U32; i++)
   1.747 +        {
   1.748 +            if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i]=0;
   1.749 +            else LZ4_dict->hashTable[i] -= delta;
   1.750 +        }
   1.751 +        LZ4_dict->currentOffset = 64 KB;
   1.752 +        if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB;
   1.753 +        LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
   1.754 +    }
   1.755 +}
   1.756 +
   1.757 +
   1.758 +FORCE_INLINE int LZ4_compress_continue_generic (void* LZ4_stream, const char* source, char* dest, int inputSize,
   1.759 +                                                int maxOutputSize, limitedOutput_directive limit)
   1.760 +{
   1.761 +    LZ4_stream_t_internal* streamPtr = (LZ4_stream_t_internal*)LZ4_stream;
   1.762 +    const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize;
   1.763 +
   1.764 +    const BYTE* smallest = (const BYTE*) source;
   1.765 +    if (streamPtr->initCheck) return 0;   /* Uninitialized structure detected */
   1.766 +    if ((streamPtr->dictSize>0) && (smallest>dictEnd)) smallest = dictEnd;
   1.767 +    LZ4_renormDictT(streamPtr, smallest);
   1.768 +
   1.769 +    /* Check overlapping input/dictionary space */
   1.770 +    {
   1.771 +        const BYTE* sourceEnd = (const BYTE*) source + inputSize;
   1.772 +        if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd))
   1.773 +        {
   1.774 +            streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
   1.775 +            if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB;
   1.776 +            if (streamPtr->dictSize < 4) streamPtr->dictSize = 0;
   1.777 +            streamPtr->dictionary = dictEnd - streamPtr->dictSize;
   1.778 +        }
   1.779 +    }
   1.780 +
   1.781 +    /* prefix mode : source data follows dictionary */
   1.782 +    if (dictEnd == (const BYTE*)source)
   1.783 +    {
   1.784 +        int result;
   1.785 +        if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
   1.786 +            result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limit, byU32, withPrefix64k, dictSmall);
   1.787 +        else
   1.788 +            result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limit, byU32, withPrefix64k, noDictIssue);
   1.789 +        streamPtr->dictSize += (U32)inputSize;
   1.790 +        streamPtr->currentOffset += (U32)inputSize;
   1.791 +        return result;
   1.792 +    }
   1.793 +
   1.794 +    /* external dictionary mode */
   1.795 +    {
   1.796 +        int result;
   1.797 +        if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
   1.798 +            result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limit, byU32, usingExtDict, dictSmall);
   1.799 +        else
   1.800 +            result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limit, byU32, usingExtDict, noDictIssue);
   1.801 +        streamPtr->dictionary = (const BYTE*)source;
   1.802 +        streamPtr->dictSize = (U32)inputSize;
   1.803 +        streamPtr->currentOffset += (U32)inputSize;
   1.804 +        return result;
   1.805 +    }
   1.806 +}
   1.807 +
   1.808 +
   1.809 +int LZ4_compress_continue (void* LZ4_stream, const char* source, char* dest, int inputSize)
   1.810 +{
   1.811 +    return LZ4_compress_continue_generic(LZ4_stream, source, dest, inputSize, 0, notLimited);
   1.812 +}
   1.813 +
   1.814 +int LZ4_compress_limitedOutput_continue (void* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize)
   1.815 +{
   1.816 +    return LZ4_compress_continue_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput);
   1.817 +}
   1.818 +
   1.819 +
   1.820 +// Hidden debug function, to force separate dictionary mode
   1.821 +int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int inputSize)
   1.822 +{
   1.823 +    LZ4_stream_t_internal* streamPtr = (LZ4_stream_t_internal*)LZ4_dict;
   1.824 +    int result;
   1.825 +    const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize;
   1.826 +
   1.827 +    const BYTE* smallest = dictEnd;
   1.828 +    if (smallest > (const BYTE*) source) smallest = (const BYTE*) source;
   1.829 +    LZ4_renormDictT((LZ4_stream_t_internal*)LZ4_dict, smallest);
   1.830 +
   1.831 +    result = LZ4_compress_generic(LZ4_dict, source, dest, inputSize, 0, notLimited, byU32, usingExtDict, noDictIssue);
   1.832 +
   1.833 +    streamPtr->dictionary = (const BYTE*)source;
   1.834 +    streamPtr->dictSize = (U32)inputSize;
   1.835 +    streamPtr->currentOffset += (U32)inputSize;
   1.836 +
   1.837 +    return result;
   1.838 +}
   1.839 +
   1.840 +
   1.841 +int LZ4_saveDict (void* LZ4_dict, char* safeBuffer, int dictSize)
   1.842 +{
   1.843 +    LZ4_stream_t_internal* dict = (LZ4_stream_t_internal*) LZ4_dict;
   1.844 +    const BYTE* previousDictEnd = dict->dictionary + dict->dictSize;
   1.845 +
   1.846 +    if ((U32)dictSize > 64 KB) dictSize = 64 KB;   /* useless to define a dictionary > 64 KB */
   1.847 +    if ((U32)dictSize > dict->dictSize) dictSize = dict->dictSize;
   1.848 +
   1.849 +    memcpy(safeBuffer, previousDictEnd - dictSize, dictSize);
   1.850 +
   1.851 +    dict->dictionary = (const BYTE*)safeBuffer;
   1.852 +    dict->dictSize = (U32)dictSize;
   1.853 +
   1.854 +    return 1;
   1.855 +}
   1.856 +
   1.857 +
   1.858 +
   1.859 +/****************************
   1.860 +   Decompression functions
   1.861 +****************************/
   1.862 +/*
   1.863 + * This generic decompression function cover all use cases.
   1.864 + * It shall be instanciated several times, using different sets of directives
   1.865 + * Note that it is essential this generic function is really inlined,
   1.866 + * in order to remove useless branches during compilation optimisation.
   1.867 + */
   1.868 +FORCE_INLINE int LZ4_decompress_generic(
   1.869 +                 const char* source,
   1.870 +                 char* dest,
   1.871 +                 int inputSize,
   1.872 +                 int outputSize,         /* If endOnInput==endOnInputSize, this value is the max size of Output Buffer. */
   1.873 +
   1.874 +                 int endOnInput,         /* endOnOutputSize, endOnInputSize */
   1.875 +                 int partialDecoding,    /* full, partial */
   1.876 +                 int targetOutputSize,   /* only used if partialDecoding==partial */
   1.877 +                 int dict,               /* noDict, withPrefix64k, usingExtDict */
   1.878 +                 const char* dictStart,  /* only if dict==usingExtDict */
   1.879 +                 int dictSize            /* note : = 0 if noDict */
   1.880 +                 )
   1.881 +{
   1.882 +    /* Local Variables */
   1.883 +    const BYTE* restrict ip = (const BYTE*) source;
   1.884 +    const BYTE* ref;
   1.885 +    const BYTE* const iend = ip + inputSize;
   1.886 +
   1.887 +    BYTE* op = (BYTE*) dest;
   1.888 +    BYTE* const oend = op + outputSize;
   1.889 +    BYTE* cpy;
   1.890 +    BYTE* oexit = op + targetOutputSize;
   1.891 +    const BYTE* const lowLimit = (const BYTE*)dest - dictSize;
   1.892 +
   1.893 +    const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize;
   1.894 +//#define OLD
   1.895 +#ifdef OLD
   1.896 +    const size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};   /* static reduces speed for LZ4_decompress_safe() on GCC64 */
   1.897 +#else
   1.898 +    const size_t dec32table[] = {4-0, 4-3, 4-2, 4-3, 4-0, 4-0, 4-0, 4-0};   /* static reduces speed for LZ4_decompress_safe() on GCC64 */
   1.899 +#endif
   1.900 +    static const size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
   1.901 +
   1.902 +    const int checkOffset = (endOnInput) && (dictSize < (int)(64 KB));
   1.903 +
   1.904 +
   1.905 +    /* Special cases */
   1.906 +    if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT;                        /* targetOutputSize too high => decode everything */
   1.907 +    if ((endOnInput) && (unlikely(outputSize==0))) return ((inputSize==1) && (*ip==0)) ? 0 : -1;   /* Empty output buffer */
   1.908 +    if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1);
   1.909 +
   1.910 +
   1.911 +    /* Main Loop */
   1.912 +    while (1)
   1.913 +    {
   1.914 +        unsigned token;
   1.915 +        size_t length;
   1.916 +
   1.917 +        /* get runlength */
   1.918 +        token = *ip++;
   1.919 +        if ((length=(token>>ML_BITS)) == RUN_MASK)
   1.920 +        {
   1.921 +            unsigned s;
   1.922 +            do
   1.923 +            {
   1.924 +                s = *ip++;
   1.925 +                length += s;
   1.926 +            }
   1.927 +            while (likely((endOnInput)?ip<iend-RUN_MASK:1) && (s==255));
   1.928 +            //if ((sizeof(void*)==4) && unlikely(length>LZ4_MAX_INPUT_SIZE)) goto _output_error;   /* overflow detection */
   1.929 +            if ((sizeof(void*)==4) && unlikely((size_t)(op+length)<(size_t)(op))) goto _output_error;   /* quickfix issue 134 */
   1.930 +            if ((endOnInput) && (sizeof(void*)==4) && unlikely((size_t)(ip+length)<(size_t)(ip))) goto _output_error;   /* quickfix issue 134 */
   1.931 +        }
   1.932 +
   1.933 +        /* copy literals */
   1.934 +        cpy = op+length;
   1.935 +        if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) )
   1.936 +            || ((!endOnInput) && (cpy>oend-COPYLENGTH)))
   1.937 +        {
   1.938 +            if (partialDecoding)
   1.939 +            {
   1.940 +                if (cpy > oend) goto _output_error;                           /* Error : write attempt beyond end of output buffer */
   1.941 +                if ((endOnInput) && (ip+length > iend)) goto _output_error;   /* Error : read attempt beyond end of input buffer */
   1.942 +            }
   1.943 +            else
   1.944 +            {
   1.945 +                if ((!endOnInput) && (cpy != oend)) goto _output_error;       /* Error : block decoding must stop exactly there */
   1.946 +                if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error;   /* Error : input must be consumed */
   1.947 +            }
   1.948 +            memcpy(op, ip, length);
   1.949 +            ip += length;
   1.950 +            op += length;
   1.951 +            break;                                       /* Necessarily EOF, due to parsing restrictions */
   1.952 +        }
   1.953 +        LZ4_WILDCOPY(op, ip, cpy); ip -= (op-cpy); op = cpy;
   1.954 +
   1.955 +        /* get offset */
   1.956 +        LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
   1.957 +        if ((checkOffset) && (unlikely(ref < lowLimit))) goto _output_error;   /* Error : offset outside destination buffer */
   1.958 +
   1.959 +        /* get matchlength */
   1.960 +        if ((length=(token&ML_MASK)) == ML_MASK)
   1.961 +        {
   1.962 +            unsigned s;
   1.963 +            do
   1.964 +            {
   1.965 +                if ((endOnInput) && (ip > iend-LASTLITERALS)) goto _output_error;
   1.966 +                s = *ip++;
   1.967 +                length += s;
   1.968 +            } while (s==255);
   1.969 +            //if ((sizeof(void*)==4) && unlikely(length>LZ4_MAX_INPUT_SIZE)) goto _output_error;   /* overflow detection */
   1.970 +            if ((sizeof(void*)==4) && unlikely((size_t)(op+length)<(size_t)op)) goto _output_error;   /* quickfix issue 134 */
   1.971 +        }
   1.972 +
   1.973 +        /* check external dictionary */
   1.974 +        if ((dict==usingExtDict) && (ref < (BYTE* const)dest))
   1.975 +        {
   1.976 +            if (unlikely(op+length+MINMATCH > oend-LASTLITERALS)) goto _output_error;
   1.977 +
   1.978 +            if (length+MINMATCH <= (size_t)(dest-(char*)ref))
   1.979 +            {
   1.980 +                ref = dictEnd - (dest-(char*)ref);
   1.981 +                memcpy(op, ref, length+MINMATCH);
   1.982 +                op += length+MINMATCH;
   1.983 +            }
   1.984 +            else
   1.985 +            {
   1.986 +                size_t copySize = (size_t)(dest-(char*)ref);
   1.987 +                memcpy(op, dictEnd - copySize, copySize);
   1.988 +                op += copySize;
   1.989 +                copySize = length+MINMATCH - copySize;
   1.990 +                if (copySize > (size_t)((char*)op-dest))   /* overlap */
   1.991 +                {
   1.992 +                    BYTE* const cpy = op + copySize;
   1.993 +                    const BYTE* ref = (BYTE*)dest;
   1.994 +                    while (op < cpy) *op++ = *ref++;
   1.995 +                }
   1.996 +                else
   1.997 +                {
   1.998 +                    memcpy(op, dest, copySize);
   1.999 +                    op += copySize;
  1.1000 +                }
  1.1001 +            }
  1.1002 +            continue;
  1.1003 +        }
  1.1004 +
  1.1005 +        /* copy repeated sequence */
  1.1006 +        if (unlikely((op-ref)<(int)STEPSIZE))
  1.1007 +        {
  1.1008 +            const size_t dec64 = dec64table[(sizeof(void*)==4) ? 0 : op-ref];
  1.1009 +            op[0] = ref[0];
  1.1010 +            op[1] = ref[1];
  1.1011 +            op[2] = ref[2];
  1.1012 +            op[3] = ref[3];
  1.1013 +#ifdef OLD
  1.1014 +            op += 4, ref += 4; ref -= dec32table[op-ref];
  1.1015 +            A32(op) = A32(ref);
  1.1016 +            op += STEPSIZE-4; ref -= dec64;
  1.1017 +#else
  1.1018 +            ref += dec32table[op-ref];
  1.1019 +            A32(op+4) = A32(ref);
  1.1020 +            op += STEPSIZE; ref -= dec64;
  1.1021 +#endif
  1.1022 +        } else { LZ4_COPYSTEP(op,ref); }
  1.1023 +        cpy = op + length - (STEPSIZE-4);
  1.1024 +
  1.1025 +        if (unlikely(cpy>oend-COPYLENGTH-(STEPSIZE-4)))
  1.1026 +        {
  1.1027 +            if (cpy > oend-LASTLITERALS) goto _output_error;    /* Error : last 5 bytes must be literals */
  1.1028 +            if (op<oend-COPYLENGTH) LZ4_WILDCOPY(op, ref, (oend-COPYLENGTH));
  1.1029 +            while(op<cpy) *op++=*ref++;
  1.1030 +            op=cpy;
  1.1031 +            continue;
  1.1032 +        }
  1.1033 +        LZ4_WILDCOPY(op, ref, cpy);
  1.1034 +        op=cpy;   /* correction */
  1.1035 +    }
  1.1036 +
  1.1037 +    /* end of decoding */
  1.1038 +    if (endOnInput)
  1.1039 +       return (int) (((char*)op)-dest);     /* Nb of output bytes decoded */
  1.1040 +    else
  1.1041 +       return (int) (((char*)ip)-source);   /* Nb of input bytes read */
  1.1042 +
  1.1043 +    /* Overflow error detected */
  1.1044 +_output_error:
  1.1045 +    return (int) (-(((char*)ip)-source))-1;
  1.1046 +}
  1.1047 +
  1.1048 +
  1.1049 +int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxOutputSize)
  1.1050 +{
  1.1051 +    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, noDict, NULL, 0);
  1.1052 +}
  1.1053 +
  1.1054 +int LZ4_decompress_safe_partial(const char* source, char* dest, int compressedSize, int targetOutputSize, int maxOutputSize)
  1.1055 +{
  1.1056 +    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, partial, targetOutputSize, noDict, NULL, 0);
  1.1057 +}
  1.1058 +
  1.1059 +int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
  1.1060 +{
  1.1061 +    return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, NULL, 0);
  1.1062 +}
  1.1063 +
  1.1064 +/* streaming decompression functions */
  1.1065 +
  1.1066 +//#define LZ4_STREAMDECODESIZE_U32 4
  1.1067 +//#define LZ4_STREAMDECODESIZE     (LZ4_STREAMDECODESIZE_U32 * sizeof(unsigned int))
  1.1068 +//typedef struct { unsigned int table[LZ4_STREAMDECODESIZE_U32]; } LZ4_streamDecode_t;
  1.1069 +typedef struct
  1.1070 +{
  1.1071 +    const char* dictionary;
  1.1072 +    int dictSize;
  1.1073 +} LZ4_streamDecode_t_internal;
  1.1074 +
  1.1075 +/*
  1.1076 + * If you prefer dynamic allocation methods,
  1.1077 + * LZ4_createStreamDecode()
  1.1078 + * provides a pointer (void*) towards an initialized LZ4_streamDecode_t structure.
  1.1079 + */
  1.1080 +void* LZ4_createStreamDecode()
  1.1081 +{
  1.1082 +    void* lz4s = ALLOCATOR(sizeof(U32), LZ4_STREAMDECODESIZE_U32);
  1.1083 +    MEM_INIT(lz4s, 0, LZ4_STREAMDECODESIZE);
  1.1084 +    return lz4s;
  1.1085 +}
  1.1086 +
  1.1087 +/*
  1.1088 + * LZ4_setDictDecode
  1.1089 + * Use this function to instruct where to find the dictionary
  1.1090 + * This function is not necessary if previous data is still available where it was decoded.
  1.1091 + * Loading a size of 0 is allowed (same effect as no dictionary).
  1.1092 + * Return : 1 if OK, 0 if error
  1.1093 + */
  1.1094 +int LZ4_setDictDecode (void* LZ4_streamDecode, const char* dictionary, int dictSize)
  1.1095 +{
  1.1096 +    LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode;
  1.1097 +    lz4sd->dictionary = dictionary;
  1.1098 +    lz4sd->dictSize = dictSize;
  1.1099 +    return 1;
  1.1100 +}
  1.1101 +
  1.1102 +/*
  1.1103 +*_continue() :
  1.1104 +    These decoding functions allow decompression of multiple blocks in "streaming" mode.
  1.1105 +    Previously decoded blocks must still be available at the memory position where they were decoded.
  1.1106 +    If it's not possible, save the relevant part of decoded data into a safe buffer,
  1.1107 +    and indicate where it stands using LZ4_setDictDecode()
  1.1108 +*/
  1.1109 +int LZ4_decompress_safe_continue (void* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
  1.1110 +{
  1.1111 +    LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode;
  1.1112 +    int result;
  1.1113 +
  1.1114 +    result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, lz4sd->dictionary, lz4sd->dictSize);
  1.1115 +    if (result <= 0) return result;
  1.1116 +    if (lz4sd->dictionary + lz4sd->dictSize == dest)
  1.1117 +    {
  1.1118 +        lz4sd->dictSize += result;
  1.1119 +    }
  1.1120 +    else
  1.1121 +    {
  1.1122 +        lz4sd->dictionary = dest;
  1.1123 +        lz4sd->dictSize = result;
  1.1124 +    }
  1.1125 +
  1.1126 +    return result;
  1.1127 +}
  1.1128 +
  1.1129 +int LZ4_decompress_fast_continue (void* LZ4_streamDecode, const char* source, char* dest, int originalSize)
  1.1130 +{
  1.1131 +    LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode;
  1.1132 +    int result;
  1.1133 +
  1.1134 +    result = LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, usingExtDict, lz4sd->dictionary, lz4sd->dictSize);
  1.1135 +    if (result <= 0) return result;
  1.1136 +    if (lz4sd->dictionary + lz4sd->dictSize == dest)
  1.1137 +    {
  1.1138 +        lz4sd->dictSize += result;
  1.1139 +    }
  1.1140 +    else
  1.1141 +    {
  1.1142 +        lz4sd->dictionary = dest;
  1.1143 +        lz4sd->dictSize = result;
  1.1144 +    }
  1.1145 +
  1.1146 +    return result;
  1.1147 +}
  1.1148 +
  1.1149 +
  1.1150 +/*
  1.1151 +Advanced decoding functions :
  1.1152 +*_usingDict() :
  1.1153 +    These decoding functions work the same as "_continue" ones,
  1.1154 +    the dictionary must be explicitly provided within parameters
  1.1155 +*/
  1.1156 +
  1.1157 +int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
  1.1158 +{
  1.1159 +    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, dictStart, dictSize);
  1.1160 +}
  1.1161 +
  1.1162 +int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
  1.1163 +{
  1.1164 +    return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, usingExtDict, dictStart, dictSize);
  1.1165 +}

mercurial