other-licenses/bsdiff/bsdiff.c

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /* vim:set ts=8 sw=8 sts=8 noet: */
michael@0 2 /*
michael@0 3 bsdiff.c -- Binary patch generator.
michael@0 4
michael@0 5 Copyright 2003 Colin Percival
michael@0 6
michael@0 7 For the terms under which this work may be distributed, please see
michael@0 8 the adjoining file "LICENSE".
michael@0 9
michael@0 10 ChangeLog:
michael@0 11 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit
michael@0 12 values throughout.
michael@0 13 --Benjamin Smedberg <benjamin@smedbergs.us>
michael@0 14 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table
michael@0 15 provided by libbz2.
michael@0 16 --Darin Fisher <darin@meer.net>
michael@0 17 */
michael@0 18
michael@0 19 #include "bspatch.h"
michael@0 20
michael@0 21 #include <stdlib.h>
michael@0 22 #include <stdio.h>
michael@0 23 #include <string.h>
michael@0 24 #include <fcntl.h>
michael@0 25 #include <stdarg.h>
michael@0 26 #ifdef XP_WIN
michael@0 27 #include <io.h>
michael@0 28 #include <winsock2.h>
michael@0 29 #else
michael@0 30 #include <unistd.h>
michael@0 31 #include <arpa/inet.h>
michael@0 32 #define _O_BINARY 0
michael@0 33 #endif
michael@0 34
michael@0 35 #undef MIN
michael@0 36 #define MIN(x,y) (((x)<(y)) ? (x) : (y))
michael@0 37
michael@0 38 /*---------------------------------------------------------------------------*/
michael@0 39
michael@0 40 /* This variable lives in libbz2. It's declared in bzlib_private.h, so we just
michael@0 41 * declare it here to avoid including that entire header file.
michael@0 42 */
michael@0 43 extern unsigned int BZ2_crc32Table[256];
michael@0 44
michael@0 45 static unsigned int
michael@0 46 crc32(const unsigned char *buf, unsigned int len)
michael@0 47 {
michael@0 48 unsigned int crc = 0xffffffffL;
michael@0 49
michael@0 50 const unsigned char *end = buf + len;
michael@0 51 for (; buf != end; ++buf)
michael@0 52 crc = (crc << 8) ^ BZ2_crc32Table[(crc >> 24) ^ *buf];
michael@0 53
michael@0 54 crc = ~crc;
michael@0 55 return crc;
michael@0 56 }
michael@0 57
michael@0 58 /*---------------------------------------------------------------------------*/
michael@0 59
michael@0 60 static void
michael@0 61 reporterr(int e, const char *fmt, ...)
michael@0 62 {
michael@0 63 if (fmt) {
michael@0 64 va_list args;
michael@0 65 va_start(args, fmt);
michael@0 66 vfprintf(stderr, fmt, args);
michael@0 67 va_end(args);
michael@0 68 }
michael@0 69
michael@0 70 exit(e);
michael@0 71 }
michael@0 72
michael@0 73 static void
michael@0 74 split(int32_t *I,int32_t *V,int32_t start,int32_t len,int32_t h)
michael@0 75 {
michael@0 76 int32_t i,j,k,x,tmp,jj,kk;
michael@0 77
michael@0 78 if(len<16) {
michael@0 79 for(k=start;k<start+len;k+=j) {
michael@0 80 j=1;x=V[I[k]+h];
michael@0 81 for(i=1;k+i<start+len;i++) {
michael@0 82 if(V[I[k+i]+h]<x) {
michael@0 83 x=V[I[k+i]+h];
michael@0 84 j=0;
michael@0 85 };
michael@0 86 if(V[I[k+i]+h]==x) {
michael@0 87 tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp;
michael@0 88 j++;
michael@0 89 };
michael@0 90 };
michael@0 91 for(i=0;i<j;i++) V[I[k+i]]=k+j-1;
michael@0 92 if(j==1) I[k]=-1;
michael@0 93 };
michael@0 94 return;
michael@0 95 };
michael@0 96
michael@0 97 x=V[I[start+len/2]+h];
michael@0 98 jj=0;kk=0;
michael@0 99 for(i=start;i<start+len;i++) {
michael@0 100 if(V[I[i]+h]<x) jj++;
michael@0 101 if(V[I[i]+h]==x) kk++;
michael@0 102 };
michael@0 103 jj+=start;kk+=jj;
michael@0 104
michael@0 105 i=start;j=0;k=0;
michael@0 106 while(i<jj) {
michael@0 107 if(V[I[i]+h]<x) {
michael@0 108 i++;
michael@0 109 } else if(V[I[i]+h]==x) {
michael@0 110 tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp;
michael@0 111 j++;
michael@0 112 } else {
michael@0 113 tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp;
michael@0 114 k++;
michael@0 115 };
michael@0 116 };
michael@0 117
michael@0 118 while(jj+j<kk) {
michael@0 119 if(V[I[jj+j]+h]==x) {
michael@0 120 j++;
michael@0 121 } else {
michael@0 122 tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp;
michael@0 123 k++;
michael@0 124 };
michael@0 125 };
michael@0 126
michael@0 127 if(jj>start) split(I,V,start,jj-start,h);
michael@0 128
michael@0 129 for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1;
michael@0 130 if(jj==kk-1) I[jj]=-1;
michael@0 131
michael@0 132 if(start+len>kk) split(I,V,kk,start+len-kk,h);
michael@0 133 }
michael@0 134
michael@0 135 static void
michael@0 136 qsufsort(int32_t *I,int32_t *V,unsigned char *old,int32_t oldsize)
michael@0 137 {
michael@0 138 int32_t buckets[256];
michael@0 139 int32_t i,h,len;
michael@0 140
michael@0 141 for(i=0;i<256;i++) buckets[i]=0;
michael@0 142 for(i=0;i<oldsize;i++) buckets[old[i]]++;
michael@0 143 for(i=1;i<256;i++) buckets[i]+=buckets[i-1];
michael@0 144 for(i=255;i>0;i--) buckets[i]=buckets[i-1];
michael@0 145 buckets[0]=0;
michael@0 146
michael@0 147 for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i;
michael@0 148 I[0]=oldsize;
michael@0 149 for(i=0;i<oldsize;i++) V[i]=buckets[old[i]];
michael@0 150 V[oldsize]=0;
michael@0 151 for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1;
michael@0 152 I[0]=-1;
michael@0 153
michael@0 154 for(h=1;I[0]!=-(oldsize+1);h+=h) {
michael@0 155 len=0;
michael@0 156 for(i=0;i<oldsize+1;) {
michael@0 157 if(I[i]<0) {
michael@0 158 len-=I[i];
michael@0 159 i-=I[i];
michael@0 160 } else {
michael@0 161 if(len) I[i-len]=-len;
michael@0 162 len=V[I[i]]+1-i;
michael@0 163 split(I,V,i,len,h);
michael@0 164 i+=len;
michael@0 165 len=0;
michael@0 166 };
michael@0 167 };
michael@0 168 if(len) I[i-len]=-len;
michael@0 169 };
michael@0 170
michael@0 171 for(i=0;i<oldsize+1;i++) I[V[i]]=i;
michael@0 172 }
michael@0 173
michael@0 174 static int32_t
michael@0 175 matchlen(unsigned char *old,int32_t oldsize,unsigned char *newbuf,int32_t newsize)
michael@0 176 {
michael@0 177 int32_t i;
michael@0 178
michael@0 179 for(i=0;(i<oldsize)&&(i<newsize);i++)
michael@0 180 if(old[i]!=newbuf[i]) break;
michael@0 181
michael@0 182 return i;
michael@0 183 }
michael@0 184
michael@0 185 static int32_t
michael@0 186 search(int32_t *I,unsigned char *old,int32_t oldsize,
michael@0 187 unsigned char *newbuf,int32_t newsize,int32_t st,int32_t en,int32_t *pos)
michael@0 188 {
michael@0 189 int32_t x,y;
michael@0 190
michael@0 191 if(en-st<2) {
michael@0 192 x=matchlen(old+I[st],oldsize-I[st],newbuf,newsize);
michael@0 193 y=matchlen(old+I[en],oldsize-I[en],newbuf,newsize);
michael@0 194
michael@0 195 if(x>y) {
michael@0 196 *pos=I[st];
michael@0 197 return x;
michael@0 198 } else {
michael@0 199 *pos=I[en];
michael@0 200 return y;
michael@0 201 }
michael@0 202 };
michael@0 203
michael@0 204 x=st+(en-st)/2;
michael@0 205 if(memcmp(old+I[x],newbuf,MIN(oldsize-I[x],newsize))<0) {
michael@0 206 return search(I,old,oldsize,newbuf,newsize,x,en,pos);
michael@0 207 } else {
michael@0 208 return search(I,old,oldsize,newbuf,newsize,st,x,pos);
michael@0 209 };
michael@0 210 }
michael@0 211
michael@0 212 int main(int argc,char *argv[])
michael@0 213 {
michael@0 214 int fd;
michael@0 215 unsigned char *old,*newbuf;
michael@0 216 int32_t oldsize,newsize;
michael@0 217 int32_t *I,*V;
michael@0 218
michael@0 219 int32_t scan,pos,len;
michael@0 220 int32_t lastscan,lastpos,lastoffset;
michael@0 221 int32_t oldscore,scsc;
michael@0 222
michael@0 223 int32_t s,Sf,lenf,Sb,lenb;
michael@0 224 int32_t overlap,Ss,lens;
michael@0 225 int32_t i;
michael@0 226
michael@0 227 int32_t dblen,eblen;
michael@0 228 unsigned char *db,*eb;
michael@0 229
michael@0 230 unsigned int scrc;
michael@0 231
michael@0 232 MBSPatchHeader header = {
michael@0 233 {'M','B','D','I','F','F','1','0'},
michael@0 234 0, 0, 0, 0, 0, 0
michael@0 235 };
michael@0 236
michael@0 237 uint32_t numtriples;
michael@0 238
michael@0 239 if(argc!=4)
michael@0 240 reporterr(1,"usage: %s <oldfile> <newfile> <patchfile>\n",argv[0]);
michael@0 241
michael@0 242 /* Allocate oldsize+1 bytes instead of oldsize bytes to ensure
michael@0 243 that we never try to malloc(0) and get a NULL pointer */
michael@0 244 if(((fd=open(argv[1],O_RDONLY|_O_BINARY,0))<0) ||
michael@0 245 ((oldsize=lseek(fd,0,SEEK_END))==-1) ||
michael@0 246 ((old=(unsigned char*) malloc(oldsize+1))==NULL) ||
michael@0 247 (lseek(fd,0,SEEK_SET)!=0) ||
michael@0 248 (read(fd,old,oldsize)!=oldsize) ||
michael@0 249 (close(fd)==-1))
michael@0 250 reporterr(1,"%s\n",argv[1]);
michael@0 251
michael@0 252 scrc = crc32(old, oldsize);
michael@0 253
michael@0 254 if(((I=(int32_t*) malloc((oldsize+1)*sizeof(int32_t)))==NULL) ||
michael@0 255 ((V=(int32_t*) malloc((oldsize+1)*sizeof(int32_t)))==NULL))
michael@0 256 reporterr(1,NULL);
michael@0 257
michael@0 258 qsufsort(I,V,old,oldsize);
michael@0 259
michael@0 260 free(V);
michael@0 261
michael@0 262 /* Allocate newsize+1 bytes instead of newsize bytes to ensure
michael@0 263 that we never try to malloc(0) and get a NULL pointer */
michael@0 264 if(((fd=open(argv[2],O_RDONLY|_O_BINARY,0))<0) ||
michael@0 265 ((newsize=lseek(fd,0,SEEK_END))==-1) ||
michael@0 266 ((newbuf=(unsigned char*) malloc(newsize+1))==NULL) ||
michael@0 267 (lseek(fd,0,SEEK_SET)!=0) ||
michael@0 268 (read(fd,newbuf,newsize)!=newsize) ||
michael@0 269 (close(fd)==-1)) reporterr(1,"%s\n",argv[2]);
michael@0 270
michael@0 271 if(((db=(unsigned char*) malloc(newsize+1))==NULL) ||
michael@0 272 ((eb=(unsigned char*) malloc(newsize+1))==NULL))
michael@0 273 reporterr(1,NULL);
michael@0 274
michael@0 275 dblen=0;
michael@0 276 eblen=0;
michael@0 277
michael@0 278 if((fd=open(argv[3],O_CREAT|O_TRUNC|O_WRONLY|_O_BINARY,0666))<0)
michael@0 279 reporterr(1,"%s\n",argv[3]);
michael@0 280
michael@0 281 /* start writing here */
michael@0 282
michael@0 283 /* We don't know the lengths yet, so we will write the header again
michael@0 284 at the end */
michael@0 285
michael@0 286 if(write(fd,&header,sizeof(MBSPatchHeader))!=sizeof(MBSPatchHeader))
michael@0 287 reporterr(1,"%s\n",argv[3]);
michael@0 288
michael@0 289 scan=0;len=0;
michael@0 290 lastscan=0;lastpos=0;lastoffset=0;
michael@0 291 numtriples = 0;
michael@0 292 while(scan<newsize) {
michael@0 293 oldscore=0;
michael@0 294
michael@0 295 for(scsc=scan+=len;scan<newsize;scan++) {
michael@0 296 len=search(I,old,oldsize,newbuf+scan,newsize-scan,
michael@0 297 0,oldsize,&pos);
michael@0 298
michael@0 299 for(;scsc<scan+len;scsc++)
michael@0 300 if((scsc+lastoffset<oldsize) &&
michael@0 301 (old[scsc+lastoffset] == newbuf[scsc]))
michael@0 302 oldscore++;
michael@0 303
michael@0 304 if(((len==oldscore) && (len!=0)) ||
michael@0 305 (len>oldscore+8)) break;
michael@0 306
michael@0 307 if((scan+lastoffset<oldsize) &&
michael@0 308 (old[scan+lastoffset] == newbuf[scan]))
michael@0 309 oldscore--;
michael@0 310 };
michael@0 311
michael@0 312 if((len!=oldscore) || (scan==newsize)) {
michael@0 313 MBSPatchTriple triple;
michael@0 314
michael@0 315 s=0;Sf=0;lenf=0;
michael@0 316 for(i=0;(lastscan+i<scan)&&(lastpos+i<oldsize);) {
michael@0 317 if(old[lastpos+i]==newbuf[lastscan+i]) s++;
michael@0 318 i++;
michael@0 319 if(s*2-i>Sf*2-lenf) { Sf=s; lenf=i; };
michael@0 320 };
michael@0 321
michael@0 322 lenb=0;
michael@0 323 if(scan<newsize) {
michael@0 324 s=0;Sb=0;
michael@0 325 for(i=1;(scan>=lastscan+i)&&(pos>=i);i++) {
michael@0 326 if(old[pos-i]==newbuf[scan-i]) s++;
michael@0 327 if(s*2-i>Sb*2-lenb) { Sb=s; lenb=i; };
michael@0 328 };
michael@0 329 };
michael@0 330
michael@0 331 if(lastscan+lenf>scan-lenb) {
michael@0 332 overlap=(lastscan+lenf)-(scan-lenb);
michael@0 333 s=0;Ss=0;lens=0;
michael@0 334 for(i=0;i<overlap;i++) {
michael@0 335 if(newbuf[lastscan+lenf-overlap+i]==
michael@0 336 old[lastpos+lenf-overlap+i]) s++;
michael@0 337 if(newbuf[scan-lenb+i]==
michael@0 338 old[pos-lenb+i]) s--;
michael@0 339 if(s>Ss) { Ss=s; lens=i+1; };
michael@0 340 };
michael@0 341
michael@0 342 lenf+=lens-overlap;
michael@0 343 lenb-=lens;
michael@0 344 };
michael@0 345
michael@0 346 for(i=0;i<lenf;i++)
michael@0 347 db[dblen+i]=newbuf[lastscan+i]-old[lastpos+i];
michael@0 348 for(i=0;i<(scan-lenb)-(lastscan+lenf);i++)
michael@0 349 eb[eblen+i]=newbuf[lastscan+lenf+i];
michael@0 350
michael@0 351 dblen+=lenf;
michael@0 352 eblen+=(scan-lenb)-(lastscan+lenf);
michael@0 353
michael@0 354 triple.x = htonl(lenf);
michael@0 355 triple.y = htonl((scan-lenb)-(lastscan+lenf));
michael@0 356 triple.z = htonl((pos-lenb)-(lastpos+lenf));
michael@0 357 if (write(fd,&triple,sizeof(triple)) != sizeof(triple))
michael@0 358 reporterr(1,NULL);
michael@0 359
michael@0 360 #ifdef DEBUG_bsmedberg
michael@0 361 printf("Writing a block:\n"
michael@0 362 " X: %u\n"
michael@0 363 " Y: %u\n"
michael@0 364 " Z: %i\n",
michael@0 365 (uint32_t) lenf,
michael@0 366 (uint32_t) ((scan-lenb)-(lastscan+lenf)),
michael@0 367 (uint32_t) ((pos-lenb)-(lastpos+lenf)));
michael@0 368 #endif
michael@0 369
michael@0 370 ++numtriples;
michael@0 371
michael@0 372 lastscan=scan-lenb;
michael@0 373 lastpos=pos-lenb;
michael@0 374 lastoffset=pos-scan;
michael@0 375 };
michael@0 376 };
michael@0 377
michael@0 378 if(write(fd,db,dblen)!=dblen)
michael@0 379 reporterr(1,NULL);
michael@0 380
michael@0 381 if(write(fd,eb,eblen)!=eblen)
michael@0 382 reporterr(1,NULL);
michael@0 383
michael@0 384 header.slen = htonl(oldsize);
michael@0 385 header.scrc32 = htonl(scrc);
michael@0 386 header.dlen = htonl(newsize);
michael@0 387 header.cblen = htonl(numtriples * sizeof(MBSPatchTriple));
michael@0 388 header.difflen = htonl(dblen);
michael@0 389 header.extralen = htonl(eblen);
michael@0 390
michael@0 391 if (lseek(fd,0,SEEK_SET) == -1 ||
michael@0 392 write(fd,&header,sizeof(header)) != sizeof(header) ||
michael@0 393 close(fd) == -1)
michael@0 394 reporterr(1,NULL);
michael@0 395
michael@0 396 free(db);
michael@0 397 free(eb);
michael@0 398 free(I);
michael@0 399 free(old);
michael@0 400 free(newbuf);
michael@0 401
michael@0 402 return 0;
michael@0 403 }
michael@0 404

mercurial