|
1 /* vim:set ts=8 sw=8 sts=8 noet: */ |
|
2 /* |
|
3 bsdiff.c -- Binary patch generator. |
|
4 |
|
5 Copyright 2003 Colin Percival |
|
6 |
|
7 For the terms under which this work may be distributed, please see |
|
8 the adjoining file "LICENSE". |
|
9 |
|
10 ChangeLog: |
|
11 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit |
|
12 values throughout. |
|
13 --Benjamin Smedberg <benjamin@smedbergs.us> |
|
14 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table |
|
15 provided by libbz2. |
|
16 --Darin Fisher <darin@meer.net> |
|
17 */ |
|
18 |
|
19 #include "bspatch.h" |
|
20 |
|
21 #include <stdlib.h> |
|
22 #include <stdio.h> |
|
23 #include <string.h> |
|
24 #include <fcntl.h> |
|
25 #include <stdarg.h> |
|
26 #ifdef XP_WIN |
|
27 #include <io.h> |
|
28 #include <winsock2.h> |
|
29 #else |
|
30 #include <unistd.h> |
|
31 #include <arpa/inet.h> |
|
32 #define _O_BINARY 0 |
|
33 #endif |
|
34 |
|
35 #undef MIN |
|
36 #define MIN(x,y) (((x)<(y)) ? (x) : (y)) |
|
37 |
|
38 /*---------------------------------------------------------------------------*/ |
|
39 |
|
40 /* This variable lives in libbz2. It's declared in bzlib_private.h, so we just |
|
41 * declare it here to avoid including that entire header file. |
|
42 */ |
|
43 extern unsigned int BZ2_crc32Table[256]; |
|
44 |
|
45 static unsigned int |
|
46 crc32(const unsigned char *buf, unsigned int len) |
|
47 { |
|
48 unsigned int crc = 0xffffffffL; |
|
49 |
|
50 const unsigned char *end = buf + len; |
|
51 for (; buf != end; ++buf) |
|
52 crc = (crc << 8) ^ BZ2_crc32Table[(crc >> 24) ^ *buf]; |
|
53 |
|
54 crc = ~crc; |
|
55 return crc; |
|
56 } |
|
57 |
|
58 /*---------------------------------------------------------------------------*/ |
|
59 |
|
60 static void |
|
61 reporterr(int e, const char *fmt, ...) |
|
62 { |
|
63 if (fmt) { |
|
64 va_list args; |
|
65 va_start(args, fmt); |
|
66 vfprintf(stderr, fmt, args); |
|
67 va_end(args); |
|
68 } |
|
69 |
|
70 exit(e); |
|
71 } |
|
72 |
|
73 static void |
|
74 split(int32_t *I,int32_t *V,int32_t start,int32_t len,int32_t h) |
|
75 { |
|
76 int32_t i,j,k,x,tmp,jj,kk; |
|
77 |
|
78 if(len<16) { |
|
79 for(k=start;k<start+len;k+=j) { |
|
80 j=1;x=V[I[k]+h]; |
|
81 for(i=1;k+i<start+len;i++) { |
|
82 if(V[I[k+i]+h]<x) { |
|
83 x=V[I[k+i]+h]; |
|
84 j=0; |
|
85 }; |
|
86 if(V[I[k+i]+h]==x) { |
|
87 tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp; |
|
88 j++; |
|
89 }; |
|
90 }; |
|
91 for(i=0;i<j;i++) V[I[k+i]]=k+j-1; |
|
92 if(j==1) I[k]=-1; |
|
93 }; |
|
94 return; |
|
95 }; |
|
96 |
|
97 x=V[I[start+len/2]+h]; |
|
98 jj=0;kk=0; |
|
99 for(i=start;i<start+len;i++) { |
|
100 if(V[I[i]+h]<x) jj++; |
|
101 if(V[I[i]+h]==x) kk++; |
|
102 }; |
|
103 jj+=start;kk+=jj; |
|
104 |
|
105 i=start;j=0;k=0; |
|
106 while(i<jj) { |
|
107 if(V[I[i]+h]<x) { |
|
108 i++; |
|
109 } else if(V[I[i]+h]==x) { |
|
110 tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp; |
|
111 j++; |
|
112 } else { |
|
113 tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp; |
|
114 k++; |
|
115 }; |
|
116 }; |
|
117 |
|
118 while(jj+j<kk) { |
|
119 if(V[I[jj+j]+h]==x) { |
|
120 j++; |
|
121 } else { |
|
122 tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp; |
|
123 k++; |
|
124 }; |
|
125 }; |
|
126 |
|
127 if(jj>start) split(I,V,start,jj-start,h); |
|
128 |
|
129 for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1; |
|
130 if(jj==kk-1) I[jj]=-1; |
|
131 |
|
132 if(start+len>kk) split(I,V,kk,start+len-kk,h); |
|
133 } |
|
134 |
|
135 static void |
|
136 qsufsort(int32_t *I,int32_t *V,unsigned char *old,int32_t oldsize) |
|
137 { |
|
138 int32_t buckets[256]; |
|
139 int32_t i,h,len; |
|
140 |
|
141 for(i=0;i<256;i++) buckets[i]=0; |
|
142 for(i=0;i<oldsize;i++) buckets[old[i]]++; |
|
143 for(i=1;i<256;i++) buckets[i]+=buckets[i-1]; |
|
144 for(i=255;i>0;i--) buckets[i]=buckets[i-1]; |
|
145 buckets[0]=0; |
|
146 |
|
147 for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i; |
|
148 I[0]=oldsize; |
|
149 for(i=0;i<oldsize;i++) V[i]=buckets[old[i]]; |
|
150 V[oldsize]=0; |
|
151 for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1; |
|
152 I[0]=-1; |
|
153 |
|
154 for(h=1;I[0]!=-(oldsize+1);h+=h) { |
|
155 len=0; |
|
156 for(i=0;i<oldsize+1;) { |
|
157 if(I[i]<0) { |
|
158 len-=I[i]; |
|
159 i-=I[i]; |
|
160 } else { |
|
161 if(len) I[i-len]=-len; |
|
162 len=V[I[i]]+1-i; |
|
163 split(I,V,i,len,h); |
|
164 i+=len; |
|
165 len=0; |
|
166 }; |
|
167 }; |
|
168 if(len) I[i-len]=-len; |
|
169 }; |
|
170 |
|
171 for(i=0;i<oldsize+1;i++) I[V[i]]=i; |
|
172 } |
|
173 |
|
174 static int32_t |
|
175 matchlen(unsigned char *old,int32_t oldsize,unsigned char *newbuf,int32_t newsize) |
|
176 { |
|
177 int32_t i; |
|
178 |
|
179 for(i=0;(i<oldsize)&&(i<newsize);i++) |
|
180 if(old[i]!=newbuf[i]) break; |
|
181 |
|
182 return i; |
|
183 } |
|
184 |
|
185 static int32_t |
|
186 search(int32_t *I,unsigned char *old,int32_t oldsize, |
|
187 unsigned char *newbuf,int32_t newsize,int32_t st,int32_t en,int32_t *pos) |
|
188 { |
|
189 int32_t x,y; |
|
190 |
|
191 if(en-st<2) { |
|
192 x=matchlen(old+I[st],oldsize-I[st],newbuf,newsize); |
|
193 y=matchlen(old+I[en],oldsize-I[en],newbuf,newsize); |
|
194 |
|
195 if(x>y) { |
|
196 *pos=I[st]; |
|
197 return x; |
|
198 } else { |
|
199 *pos=I[en]; |
|
200 return y; |
|
201 } |
|
202 }; |
|
203 |
|
204 x=st+(en-st)/2; |
|
205 if(memcmp(old+I[x],newbuf,MIN(oldsize-I[x],newsize))<0) { |
|
206 return search(I,old,oldsize,newbuf,newsize,x,en,pos); |
|
207 } else { |
|
208 return search(I,old,oldsize,newbuf,newsize,st,x,pos); |
|
209 }; |
|
210 } |
|
211 |
|
212 int main(int argc,char *argv[]) |
|
213 { |
|
214 int fd; |
|
215 unsigned char *old,*newbuf; |
|
216 int32_t oldsize,newsize; |
|
217 int32_t *I,*V; |
|
218 |
|
219 int32_t scan,pos,len; |
|
220 int32_t lastscan,lastpos,lastoffset; |
|
221 int32_t oldscore,scsc; |
|
222 |
|
223 int32_t s,Sf,lenf,Sb,lenb; |
|
224 int32_t overlap,Ss,lens; |
|
225 int32_t i; |
|
226 |
|
227 int32_t dblen,eblen; |
|
228 unsigned char *db,*eb; |
|
229 |
|
230 unsigned int scrc; |
|
231 |
|
232 MBSPatchHeader header = { |
|
233 {'M','B','D','I','F','F','1','0'}, |
|
234 0, 0, 0, 0, 0, 0 |
|
235 }; |
|
236 |
|
237 uint32_t numtriples; |
|
238 |
|
239 if(argc!=4) |
|
240 reporterr(1,"usage: %s <oldfile> <newfile> <patchfile>\n",argv[0]); |
|
241 |
|
242 /* Allocate oldsize+1 bytes instead of oldsize bytes to ensure |
|
243 that we never try to malloc(0) and get a NULL pointer */ |
|
244 if(((fd=open(argv[1],O_RDONLY|_O_BINARY,0))<0) || |
|
245 ((oldsize=lseek(fd,0,SEEK_END))==-1) || |
|
246 ((old=(unsigned char*) malloc(oldsize+1))==NULL) || |
|
247 (lseek(fd,0,SEEK_SET)!=0) || |
|
248 (read(fd,old,oldsize)!=oldsize) || |
|
249 (close(fd)==-1)) |
|
250 reporterr(1,"%s\n",argv[1]); |
|
251 |
|
252 scrc = crc32(old, oldsize); |
|
253 |
|
254 if(((I=(int32_t*) malloc((oldsize+1)*sizeof(int32_t)))==NULL) || |
|
255 ((V=(int32_t*) malloc((oldsize+1)*sizeof(int32_t)))==NULL)) |
|
256 reporterr(1,NULL); |
|
257 |
|
258 qsufsort(I,V,old,oldsize); |
|
259 |
|
260 free(V); |
|
261 |
|
262 /* Allocate newsize+1 bytes instead of newsize bytes to ensure |
|
263 that we never try to malloc(0) and get a NULL pointer */ |
|
264 if(((fd=open(argv[2],O_RDONLY|_O_BINARY,0))<0) || |
|
265 ((newsize=lseek(fd,0,SEEK_END))==-1) || |
|
266 ((newbuf=(unsigned char*) malloc(newsize+1))==NULL) || |
|
267 (lseek(fd,0,SEEK_SET)!=0) || |
|
268 (read(fd,newbuf,newsize)!=newsize) || |
|
269 (close(fd)==-1)) reporterr(1,"%s\n",argv[2]); |
|
270 |
|
271 if(((db=(unsigned char*) malloc(newsize+1))==NULL) || |
|
272 ((eb=(unsigned char*) malloc(newsize+1))==NULL)) |
|
273 reporterr(1,NULL); |
|
274 |
|
275 dblen=0; |
|
276 eblen=0; |
|
277 |
|
278 if((fd=open(argv[3],O_CREAT|O_TRUNC|O_WRONLY|_O_BINARY,0666))<0) |
|
279 reporterr(1,"%s\n",argv[3]); |
|
280 |
|
281 /* start writing here */ |
|
282 |
|
283 /* We don't know the lengths yet, so we will write the header again |
|
284 at the end */ |
|
285 |
|
286 if(write(fd,&header,sizeof(MBSPatchHeader))!=sizeof(MBSPatchHeader)) |
|
287 reporterr(1,"%s\n",argv[3]); |
|
288 |
|
289 scan=0;len=0; |
|
290 lastscan=0;lastpos=0;lastoffset=0; |
|
291 numtriples = 0; |
|
292 while(scan<newsize) { |
|
293 oldscore=0; |
|
294 |
|
295 for(scsc=scan+=len;scan<newsize;scan++) { |
|
296 len=search(I,old,oldsize,newbuf+scan,newsize-scan, |
|
297 0,oldsize,&pos); |
|
298 |
|
299 for(;scsc<scan+len;scsc++) |
|
300 if((scsc+lastoffset<oldsize) && |
|
301 (old[scsc+lastoffset] == newbuf[scsc])) |
|
302 oldscore++; |
|
303 |
|
304 if(((len==oldscore) && (len!=0)) || |
|
305 (len>oldscore+8)) break; |
|
306 |
|
307 if((scan+lastoffset<oldsize) && |
|
308 (old[scan+lastoffset] == newbuf[scan])) |
|
309 oldscore--; |
|
310 }; |
|
311 |
|
312 if((len!=oldscore) || (scan==newsize)) { |
|
313 MBSPatchTriple triple; |
|
314 |
|
315 s=0;Sf=0;lenf=0; |
|
316 for(i=0;(lastscan+i<scan)&&(lastpos+i<oldsize);) { |
|
317 if(old[lastpos+i]==newbuf[lastscan+i]) s++; |
|
318 i++; |
|
319 if(s*2-i>Sf*2-lenf) { Sf=s; lenf=i; }; |
|
320 }; |
|
321 |
|
322 lenb=0; |
|
323 if(scan<newsize) { |
|
324 s=0;Sb=0; |
|
325 for(i=1;(scan>=lastscan+i)&&(pos>=i);i++) { |
|
326 if(old[pos-i]==newbuf[scan-i]) s++; |
|
327 if(s*2-i>Sb*2-lenb) { Sb=s; lenb=i; }; |
|
328 }; |
|
329 }; |
|
330 |
|
331 if(lastscan+lenf>scan-lenb) { |
|
332 overlap=(lastscan+lenf)-(scan-lenb); |
|
333 s=0;Ss=0;lens=0; |
|
334 for(i=0;i<overlap;i++) { |
|
335 if(newbuf[lastscan+lenf-overlap+i]== |
|
336 old[lastpos+lenf-overlap+i]) s++; |
|
337 if(newbuf[scan-lenb+i]== |
|
338 old[pos-lenb+i]) s--; |
|
339 if(s>Ss) { Ss=s; lens=i+1; }; |
|
340 }; |
|
341 |
|
342 lenf+=lens-overlap; |
|
343 lenb-=lens; |
|
344 }; |
|
345 |
|
346 for(i=0;i<lenf;i++) |
|
347 db[dblen+i]=newbuf[lastscan+i]-old[lastpos+i]; |
|
348 for(i=0;i<(scan-lenb)-(lastscan+lenf);i++) |
|
349 eb[eblen+i]=newbuf[lastscan+lenf+i]; |
|
350 |
|
351 dblen+=lenf; |
|
352 eblen+=(scan-lenb)-(lastscan+lenf); |
|
353 |
|
354 triple.x = htonl(lenf); |
|
355 triple.y = htonl((scan-lenb)-(lastscan+lenf)); |
|
356 triple.z = htonl((pos-lenb)-(lastpos+lenf)); |
|
357 if (write(fd,&triple,sizeof(triple)) != sizeof(triple)) |
|
358 reporterr(1,NULL); |
|
359 |
|
360 #ifdef DEBUG_bsmedberg |
|
361 printf("Writing a block:\n" |
|
362 " X: %u\n" |
|
363 " Y: %u\n" |
|
364 " Z: %i\n", |
|
365 (uint32_t) lenf, |
|
366 (uint32_t) ((scan-lenb)-(lastscan+lenf)), |
|
367 (uint32_t) ((pos-lenb)-(lastpos+lenf))); |
|
368 #endif |
|
369 |
|
370 ++numtriples; |
|
371 |
|
372 lastscan=scan-lenb; |
|
373 lastpos=pos-lenb; |
|
374 lastoffset=pos-scan; |
|
375 }; |
|
376 }; |
|
377 |
|
378 if(write(fd,db,dblen)!=dblen) |
|
379 reporterr(1,NULL); |
|
380 |
|
381 if(write(fd,eb,eblen)!=eblen) |
|
382 reporterr(1,NULL); |
|
383 |
|
384 header.slen = htonl(oldsize); |
|
385 header.scrc32 = htonl(scrc); |
|
386 header.dlen = htonl(newsize); |
|
387 header.cblen = htonl(numtriples * sizeof(MBSPatchTriple)); |
|
388 header.difflen = htonl(dblen); |
|
389 header.extralen = htonl(eblen); |
|
390 |
|
391 if (lseek(fd,0,SEEK_SET) == -1 || |
|
392 write(fd,&header,sizeof(header)) != sizeof(header) || |
|
393 close(fd) == -1) |
|
394 reporterr(1,NULL); |
|
395 |
|
396 free(db); |
|
397 free(eb); |
|
398 free(I); |
|
399 free(old); |
|
400 free(newbuf); |
|
401 |
|
402 return 0; |
|
403 } |
|
404 |