other-licenses/bsdiff/bsdiff.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/other-licenses/bsdiff/bsdiff.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,404 @@
     1.4 +/* vim:set ts=8 sw=8 sts=8 noet: */
     1.5 +/*
     1.6 +  bsdiff.c -- Binary patch generator.
     1.7 +
     1.8 +  Copyright 2003 Colin Percival
     1.9 +
    1.10 +  For the terms under which this work may be distributed, please see
    1.11 +  the adjoining file "LICENSE".
    1.12 +
    1.13 +  ChangeLog:
    1.14 +  2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit
    1.15 +               values throughout.
    1.16 +                 --Benjamin Smedberg <benjamin@smedbergs.us>
    1.17 +  2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table
    1.18 +               provided by libbz2.
    1.19 +                 --Darin Fisher <darin@meer.net>
    1.20 +*/
    1.21 +
    1.22 +#include "bspatch.h"
    1.23 +
    1.24 +#include <stdlib.h>
    1.25 +#include <stdio.h>
    1.26 +#include <string.h>
    1.27 +#include <fcntl.h>
    1.28 +#include <stdarg.h>
    1.29 +#ifdef XP_WIN
    1.30 +#include <io.h>
    1.31 +#include <winsock2.h>
    1.32 +#else
    1.33 +#include <unistd.h>
    1.34 +#include <arpa/inet.h>
    1.35 +#define _O_BINARY 0
    1.36 +#endif
    1.37 +
    1.38 +#undef MIN
    1.39 +#define MIN(x,y) (((x)<(y)) ? (x) : (y))
    1.40 +
    1.41 +/*---------------------------------------------------------------------------*/
    1.42 +
    1.43 +/* This variable lives in libbz2.  It's declared in bzlib_private.h, so we just
    1.44 + * declare it here to avoid including that entire header file.
    1.45 + */
    1.46 +extern unsigned int BZ2_crc32Table[256];
    1.47 +
    1.48 +static unsigned int
    1.49 +crc32(const unsigned char *buf, unsigned int len)
    1.50 +{
    1.51 +	unsigned int crc = 0xffffffffL;
    1.52 +
    1.53 +	const unsigned char *end = buf + len;
    1.54 +	for (; buf != end; ++buf)
    1.55 +		crc = (crc << 8) ^ BZ2_crc32Table[(crc >> 24) ^ *buf];
    1.56 +
    1.57 +	crc = ~crc;
    1.58 +	return crc;
    1.59 +}
    1.60 +
    1.61 +/*---------------------------------------------------------------------------*/
    1.62 +
    1.63 +static void
    1.64 +reporterr(int e, const char *fmt, ...)
    1.65 +{
    1.66 +	if (fmt) {
    1.67 +		va_list args;
    1.68 +		va_start(args, fmt);
    1.69 +		vfprintf(stderr, fmt, args);
    1.70 +		va_end(args);
    1.71 +	}
    1.72 +
    1.73 +	exit(e);
    1.74 +}
    1.75 +
    1.76 +static void
    1.77 +split(int32_t *I,int32_t *V,int32_t start,int32_t len,int32_t h)
    1.78 +{
    1.79 +	int32_t i,j,k,x,tmp,jj,kk;
    1.80 +
    1.81 +	if(len<16) {
    1.82 +		for(k=start;k<start+len;k+=j) {
    1.83 +			j=1;x=V[I[k]+h];
    1.84 +			for(i=1;k+i<start+len;i++) {
    1.85 +				if(V[I[k+i]+h]<x) {
    1.86 +					x=V[I[k+i]+h];
    1.87 +					j=0;
    1.88 +				};
    1.89 +				if(V[I[k+i]+h]==x) {
    1.90 +					tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp;
    1.91 +					j++;
    1.92 +				};
    1.93 +			};
    1.94 +			for(i=0;i<j;i++) V[I[k+i]]=k+j-1;
    1.95 +			if(j==1) I[k]=-1;
    1.96 +		};
    1.97 +		return;
    1.98 +	};
    1.99 +
   1.100 +	x=V[I[start+len/2]+h];
   1.101 +	jj=0;kk=0;
   1.102 +	for(i=start;i<start+len;i++) {
   1.103 +		if(V[I[i]+h]<x) jj++;
   1.104 +		if(V[I[i]+h]==x) kk++;
   1.105 +	};
   1.106 +	jj+=start;kk+=jj;
   1.107 +
   1.108 +	i=start;j=0;k=0;
   1.109 +	while(i<jj) {
   1.110 +		if(V[I[i]+h]<x) {
   1.111 +			i++;
   1.112 +		} else if(V[I[i]+h]==x) {
   1.113 +			tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp;
   1.114 +			j++;
   1.115 +		} else {
   1.116 +			tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp;
   1.117 +			k++;
   1.118 +		};
   1.119 +	};
   1.120 +
   1.121 +	while(jj+j<kk) {
   1.122 +		if(V[I[jj+j]+h]==x) {
   1.123 +			j++;
   1.124 +		} else {
   1.125 +			tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp;
   1.126 +			k++;
   1.127 +		};
   1.128 +	};
   1.129 +
   1.130 +	if(jj>start) split(I,V,start,jj-start,h);
   1.131 +
   1.132 +	for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1;
   1.133 +	if(jj==kk-1) I[jj]=-1;
   1.134 +
   1.135 +	if(start+len>kk) split(I,V,kk,start+len-kk,h);
   1.136 +}
   1.137 +
   1.138 +static void
   1.139 +qsufsort(int32_t *I,int32_t *V,unsigned char *old,int32_t oldsize)
   1.140 +{
   1.141 +	int32_t buckets[256];
   1.142 +	int32_t i,h,len;
   1.143 +
   1.144 +	for(i=0;i<256;i++) buckets[i]=0;
   1.145 +	for(i=0;i<oldsize;i++) buckets[old[i]]++;
   1.146 +	for(i=1;i<256;i++) buckets[i]+=buckets[i-1];
   1.147 +	for(i=255;i>0;i--) buckets[i]=buckets[i-1];
   1.148 +	buckets[0]=0;
   1.149 +
   1.150 +	for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i;
   1.151 +	I[0]=oldsize;
   1.152 +	for(i=0;i<oldsize;i++) V[i]=buckets[old[i]];
   1.153 +	V[oldsize]=0;
   1.154 +	for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1;
   1.155 +	I[0]=-1;
   1.156 +
   1.157 +	for(h=1;I[0]!=-(oldsize+1);h+=h) {
   1.158 +		len=0;
   1.159 +		for(i=0;i<oldsize+1;) {
   1.160 +			if(I[i]<0) {
   1.161 +				len-=I[i];
   1.162 +				i-=I[i];
   1.163 +			} else {
   1.164 +				if(len) I[i-len]=-len;
   1.165 +				len=V[I[i]]+1-i;
   1.166 +				split(I,V,i,len,h);
   1.167 +				i+=len;
   1.168 +				len=0;
   1.169 +			};
   1.170 +		};
   1.171 +		if(len) I[i-len]=-len;
   1.172 +	};
   1.173 +
   1.174 +	for(i=0;i<oldsize+1;i++) I[V[i]]=i;
   1.175 +}
   1.176 +
   1.177 +static int32_t
   1.178 +matchlen(unsigned char *old,int32_t oldsize,unsigned char *newbuf,int32_t newsize)
   1.179 +{
   1.180 +	int32_t i;
   1.181 +
   1.182 +	for(i=0;(i<oldsize)&&(i<newsize);i++)
   1.183 +		if(old[i]!=newbuf[i]) break;
   1.184 +
   1.185 +	return i;
   1.186 +}
   1.187 +
   1.188 +static int32_t
   1.189 +search(int32_t *I,unsigned char *old,int32_t oldsize,
   1.190 +       unsigned char *newbuf,int32_t newsize,int32_t st,int32_t en,int32_t *pos)
   1.191 +{
   1.192 +	int32_t x,y;
   1.193 +
   1.194 +	if(en-st<2) {
   1.195 +		x=matchlen(old+I[st],oldsize-I[st],newbuf,newsize);
   1.196 +		y=matchlen(old+I[en],oldsize-I[en],newbuf,newsize);
   1.197 +
   1.198 +		if(x>y) {
   1.199 +			*pos=I[st];
   1.200 +			return x;
   1.201 +		} else {
   1.202 +			*pos=I[en];
   1.203 +			return y;
   1.204 +		}
   1.205 +	};
   1.206 +
   1.207 +	x=st+(en-st)/2;
   1.208 +	if(memcmp(old+I[x],newbuf,MIN(oldsize-I[x],newsize))<0) {
   1.209 +		return search(I,old,oldsize,newbuf,newsize,x,en,pos);
   1.210 +	} else {
   1.211 +		return search(I,old,oldsize,newbuf,newsize,st,x,pos);
   1.212 +	};
   1.213 +}
   1.214 +
   1.215 +int main(int argc,char *argv[])
   1.216 +{
   1.217 +	int fd;
   1.218 +	unsigned char *old,*newbuf;
   1.219 +	int32_t oldsize,newsize;
   1.220 +	int32_t *I,*V;
   1.221 +
   1.222 +	int32_t scan,pos,len;
   1.223 +	int32_t lastscan,lastpos,lastoffset;
   1.224 +	int32_t oldscore,scsc;
   1.225 +
   1.226 +	int32_t s,Sf,lenf,Sb,lenb;
   1.227 +	int32_t overlap,Ss,lens;
   1.228 +	int32_t i;
   1.229 +
   1.230 +	int32_t dblen,eblen;
   1.231 +	unsigned char *db,*eb;
   1.232 +
   1.233 +	unsigned int scrc;
   1.234 +
   1.235 +	MBSPatchHeader header = {
   1.236 +		{'M','B','D','I','F','F','1','0'},
   1.237 +		0, 0, 0, 0, 0, 0
   1.238 +	};
   1.239 +
   1.240 +	uint32_t numtriples;
   1.241 +
   1.242 +	if(argc!=4)
   1.243 +		reporterr(1,"usage: %s <oldfile> <newfile> <patchfile>\n",argv[0]);
   1.244 +
   1.245 +	/* Allocate oldsize+1 bytes instead of oldsize bytes to ensure
   1.246 +		that we never try to malloc(0) and get a NULL pointer */
   1.247 +	if(((fd=open(argv[1],O_RDONLY|_O_BINARY,0))<0) ||
   1.248 +		((oldsize=lseek(fd,0,SEEK_END))==-1) ||
   1.249 +		((old=(unsigned char*) malloc(oldsize+1))==NULL) ||
   1.250 +		(lseek(fd,0,SEEK_SET)!=0) ||
   1.251 +		(read(fd,old,oldsize)!=oldsize) ||
   1.252 +		(close(fd)==-1))
   1.253 +		reporterr(1,"%s\n",argv[1]);
   1.254 +
   1.255 +	scrc = crc32(old, oldsize);
   1.256 +
   1.257 +	if(((I=(int32_t*) malloc((oldsize+1)*sizeof(int32_t)))==NULL) ||
   1.258 +		((V=(int32_t*) malloc((oldsize+1)*sizeof(int32_t)))==NULL))
   1.259 +		reporterr(1,NULL);
   1.260 +
   1.261 +	qsufsort(I,V,old,oldsize);
   1.262 +
   1.263 +	free(V);
   1.264 +
   1.265 +	/* Allocate newsize+1 bytes instead of newsize bytes to ensure
   1.266 +		that we never try to malloc(0) and get a NULL pointer */
   1.267 +	if(((fd=open(argv[2],O_RDONLY|_O_BINARY,0))<0) ||
   1.268 +		((newsize=lseek(fd,0,SEEK_END))==-1) ||
   1.269 +		((newbuf=(unsigned char*) malloc(newsize+1))==NULL) ||
   1.270 +		(lseek(fd,0,SEEK_SET)!=0) ||
   1.271 +		(read(fd,newbuf,newsize)!=newsize) ||
   1.272 +		(close(fd)==-1)) reporterr(1,"%s\n",argv[2]);
   1.273 +
   1.274 +	if(((db=(unsigned char*) malloc(newsize+1))==NULL) ||
   1.275 +		((eb=(unsigned char*) malloc(newsize+1))==NULL))
   1.276 +		reporterr(1,NULL);
   1.277 +
   1.278 +	dblen=0;
   1.279 +	eblen=0;
   1.280 +
   1.281 +	if((fd=open(argv[3],O_CREAT|O_TRUNC|O_WRONLY|_O_BINARY,0666))<0)
   1.282 +		reporterr(1,"%s\n",argv[3]);
   1.283 +
   1.284 +	/* start writing here */
   1.285 +
   1.286 +	/* We don't know the lengths yet, so we will write the header again
   1.287 +		 at the end */
   1.288 +
   1.289 +	if(write(fd,&header,sizeof(MBSPatchHeader))!=sizeof(MBSPatchHeader))
   1.290 +		reporterr(1,"%s\n",argv[3]);
   1.291 +
   1.292 +	scan=0;len=0;
   1.293 +	lastscan=0;lastpos=0;lastoffset=0;
   1.294 +	numtriples = 0;
   1.295 +	while(scan<newsize) {
   1.296 +		oldscore=0;
   1.297 +
   1.298 +		for(scsc=scan+=len;scan<newsize;scan++) {
   1.299 +			len=search(I,old,oldsize,newbuf+scan,newsize-scan,
   1.300 +					0,oldsize,&pos);
   1.301 +
   1.302 +			for(;scsc<scan+len;scsc++)
   1.303 +			if((scsc+lastoffset<oldsize) &&
   1.304 +				(old[scsc+lastoffset] == newbuf[scsc]))
   1.305 +				oldscore++;
   1.306 +
   1.307 +			if(((len==oldscore) && (len!=0)) || 
   1.308 +				(len>oldscore+8)) break;
   1.309 +
   1.310 +			if((scan+lastoffset<oldsize) &&
   1.311 +				(old[scan+lastoffset] == newbuf[scan]))
   1.312 +				oldscore--;
   1.313 +		};
   1.314 +
   1.315 +		if((len!=oldscore) || (scan==newsize)) {
   1.316 +			MBSPatchTriple triple;
   1.317 +
   1.318 +			s=0;Sf=0;lenf=0;
   1.319 +			for(i=0;(lastscan+i<scan)&&(lastpos+i<oldsize);) {
   1.320 +				if(old[lastpos+i]==newbuf[lastscan+i]) s++;
   1.321 +				i++;
   1.322 +				if(s*2-i>Sf*2-lenf) { Sf=s; lenf=i; };
   1.323 +			};
   1.324 +
   1.325 +			lenb=0;
   1.326 +			if(scan<newsize) {
   1.327 +				s=0;Sb=0;
   1.328 +				for(i=1;(scan>=lastscan+i)&&(pos>=i);i++) {
   1.329 +					if(old[pos-i]==newbuf[scan-i]) s++;
   1.330 +					if(s*2-i>Sb*2-lenb) { Sb=s; lenb=i; };
   1.331 +				};
   1.332 +			};
   1.333 +
   1.334 +			if(lastscan+lenf>scan-lenb) {
   1.335 +				overlap=(lastscan+lenf)-(scan-lenb);
   1.336 +				s=0;Ss=0;lens=0;
   1.337 +				for(i=0;i<overlap;i++) {
   1.338 +					if(newbuf[lastscan+lenf-overlap+i]==
   1.339 +					   old[lastpos+lenf-overlap+i]) s++;
   1.340 +					if(newbuf[scan-lenb+i]==
   1.341 +					   old[pos-lenb+i]) s--;
   1.342 +					if(s>Ss) { Ss=s; lens=i+1; };
   1.343 +				};
   1.344 +
   1.345 +				lenf+=lens-overlap;
   1.346 +				lenb-=lens;
   1.347 +			};
   1.348 +
   1.349 +			for(i=0;i<lenf;i++)
   1.350 +				db[dblen+i]=newbuf[lastscan+i]-old[lastpos+i];
   1.351 +			for(i=0;i<(scan-lenb)-(lastscan+lenf);i++)
   1.352 +				eb[eblen+i]=newbuf[lastscan+lenf+i];
   1.353 +
   1.354 +			dblen+=lenf;
   1.355 +			eblen+=(scan-lenb)-(lastscan+lenf);
   1.356 +
   1.357 +			triple.x = htonl(lenf);
   1.358 +			triple.y = htonl((scan-lenb)-(lastscan+lenf));
   1.359 +			triple.z = htonl((pos-lenb)-(lastpos+lenf));
   1.360 +			if (write(fd,&triple,sizeof(triple)) != sizeof(triple))
   1.361 +				reporterr(1,NULL);
   1.362 +
   1.363 +#ifdef DEBUG_bsmedberg
   1.364 +			printf("Writing a block:\n"
   1.365 +			       "	X: %u\n"
   1.366 +			       "	Y: %u\n"
   1.367 +			       "	Z: %i\n",
   1.368 +			       (uint32_t) lenf,
   1.369 +			       (uint32_t) ((scan-lenb)-(lastscan+lenf)),
   1.370 +			       (uint32_t) ((pos-lenb)-(lastpos+lenf)));
   1.371 +#endif
   1.372 +
   1.373 +			++numtriples;
   1.374 +
   1.375 +			lastscan=scan-lenb;
   1.376 +			lastpos=pos-lenb;
   1.377 +			lastoffset=pos-scan;
   1.378 +		};
   1.379 +	};
   1.380 +
   1.381 +	if(write(fd,db,dblen)!=dblen)
   1.382 +		reporterr(1,NULL);
   1.383 +
   1.384 +	if(write(fd,eb,eblen)!=eblen)
   1.385 +		reporterr(1,NULL);
   1.386 +
   1.387 +	header.slen	= htonl(oldsize);
   1.388 +	header.scrc32	= htonl(scrc);
   1.389 +	header.dlen	= htonl(newsize);
   1.390 +	header.cblen	= htonl(numtriples * sizeof(MBSPatchTriple));
   1.391 +	header.difflen	= htonl(dblen);
   1.392 +	header.extralen = htonl(eblen);
   1.393 +
   1.394 +	if (lseek(fd,0,SEEK_SET) == -1 ||
   1.395 +	    write(fd,&header,sizeof(header)) != sizeof(header) ||
   1.396 +	    close(fd) == -1)
   1.397 +		reporterr(1,NULL);
   1.398 +
   1.399 +	free(db);
   1.400 +	free(eb);
   1.401 +	free(I);
   1.402 +	free(old);
   1.403 +	free(newbuf);
   1.404 +
   1.405 +	return 0;
   1.406 +}
   1.407 +

mercurial