blob: 870c66100e52b377ac421704150d05d7324d574f [file] [log] [blame]
/*
* Copyright (c) 2005, Joe Desbonnet, (jdesbonnet@gmail.com)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the <organization> nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY <copyright holder> ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL <copyright holder> BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package ie.wombat.jbdiff;
import java.io.BufferedInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.zip.GZIPOutputStream;
/**
* Java Binary Diff utility. Based on bsdiff (v4.2) by Colin Percival (see
* http://www.daemonology.net/bsdiff/ ) and distributed under BSD license.
*
* <p>
* Running this on large files will probably require an increase of the default
* maximum heap size (use java -Xmx200m)
* </p>
*
* @author Joe Desbonnet, joe@galway.net
*
*/
public class JBDiff {
// JBDiff extensions by Stefan.Liebig@compeople.de:
//
// - uses GZIP compressor to compress ALL of the blocks (ctrl,diff,extra).
// - added interfaces that allows using of JBDiff with streams and byte
// arrays.
private static final String VERSION = "jbdiff-0.1.0.1";
// This is ´jbdiff40´.
private static final byte[] MAGIC_BYTES = new byte[] { 0x6a, 0x62, 0x64,
0x69, 0x66, 0x66, 0x34, 0x30 };
private final static void split(int[] I, int[] V, int start, int len, int h) {
int i, j, k, x, tmp, jj, kk;
if (len < 16) {
for (k = start; k < start + len; k += j) {
j = 1;
x = V[I[k] + h];
for (i = 1; k + i < start + len; i++) {
if (V[I[k + i] + h] < x) {
x = V[I[k + i] + h];
j = 0;
}
if (V[I[k + i] + h] == x) {
tmp = I[k + j];
I[k + j] = I[k + i];
I[k + i] = tmp;
j++;
}
}
for (i = 0; i < j; i++) {
V[I[k + i]] = k + j - 1;
}
if (j == 1) {
I[k] = -1;
}
}
return;
}
x = V[I[start + len / 2] + h];
jj = 0;
kk = 0;
for (i = start; i < start + len; i++) {
if (V[I[i] + h] < x) {
jj++;
}
if (V[I[i] + h] == x) {
kk++;
}
}
jj += start;
kk += jj;
i = start;
j = 0;
k = 0;
while (i < jj) {
if (V[I[i] + h] < x) {
i++;
} else if (V[I[i] + h] == x) {
tmp = I[i];
I[i] = I[jj + j];
I[jj + j] = tmp;
j++;
} else {
tmp = I[i];
I[i] = I[kk + k];
I[kk + k] = tmp;
k++;
}
}
while (jj + j < kk) {
if (V[I[jj + j] + h] == x) {
j++;
} else {
tmp = I[jj + j];
I[jj + j] = I[kk + k];
I[kk + k] = tmp;
k++;
}
}
if (jj > start) {
split(I, V, start, jj - start, h);
}
for (i = 0; i < kk - jj; i++) {
V[I[jj + i]] = kk - 1;
}
if (jj == kk - 1) {
I[jj] = -1;
}
if (start + len > kk) {
split(I, V, kk, start + len - kk, h);
}
}
/**
* Fast suffix sporting. Larsson and Sadakane's qsufsort algorithm. See
* http://www.cs.lth.se/Research/Algorithms/Papers/jesper5.ps
*
* @param I
* @param V
* @param oldBuf
* @param oldsize
*/
private static void qsufsort(int[] I, int[] V, byte[] oldBuf, int oldsize) {
// int oldsize = oldBuf.length;
int[] buckets = new int[256];
// No need to do that in Java.
// for ( int i = 0; i < 256; i++ ) {
// buckets[i] = 0;
// }
for (int i = 0; i < oldsize; i++) {
buckets[oldBuf[i] & 0xff]++;
}
for (int i = 1; i < 256; i++) {
buckets[i] += buckets[i - 1];
}
for (int i = 255; i > 0; i--) {
buckets[i] = buckets[i - 1];
}
buckets[0] = 0;
for (int i = 0; i < oldsize; i++) {
I[++buckets[oldBuf[i] & 0xff]] = i;
}
I[0] = oldsize;
for (int i = 0; i < oldsize; i++) {
V[i] = buckets[oldBuf[i] & 0xff];
}
V[oldsize] = 0;
for (int i = 1; i < 256; i++) {
if (buckets[i] == buckets[i - 1] + 1) {
I[buckets[i]] = -1;
}
}
I[0] = -1;
for (int h = 1; I[0] != -(oldsize + 1); h += h) {
int len = 0;
int i;
for (i = 0; i < oldsize + 1;) {
if (I[i] < 0) {
len -= I[i];
i -= I[i];
} else {
// if(len) I[i-len]=-len;
if (len != 0) {
I[i - len] = -len;
}
len = V[I[i]] + 1 - i;
split(I, V, i, len, h);
i += len;
len = 0;
}
}
if (len != 0) {
I[i - len] = -len;
}
}
for (int i = 0; i < oldsize + 1; i++) {
I[V[i]] = i;
}
}
/**
* Count the number of bytes that match in oldBuf (starting at offset
* oldOffset) and newBuf (starting at offset newOffset).
*
* @param oldBuf
* @param oldOffset
* @param newBuf
* @param newOffset
* @return
*/
private final static int matchlen(byte[] oldBuf, int oldSize,
int oldOffset, byte[] newBuf, int newSize, int newOffset) {
// int end = Math
// .min(oldBuf.length - oldOffset, newBuf.length - newOffset);
int end = Math.min(oldSize - oldOffset, newSize - newOffset);
for (int i = 0; i < end; i++) {
if (oldBuf[oldOffset + i] != newBuf[newOffset + i]) {
return i;
}
}
return end;
}
private final static int search(int[] I, byte[] oldBuf, int oldSize,
byte[] newBuf, int newSize, int newBufOffset, int start, int end,
IntByRef pos) {
if (end - start < 2) {
int x = matchlen(oldBuf, oldSize, I[start], newBuf, newSize,
newBufOffset);
int y = matchlen(oldBuf, oldSize, I[end], newBuf, newSize,
newBufOffset);
if (x > y) {
pos.value = I[start];
return x;
} else {
pos.value = I[end];
return y;
}
}
int x = start + (end - start) / 2;
if (Util.memcmp(oldBuf, oldSize, I[x], newBuf, newSize, newBufOffset) < 0) {
return search(I, oldBuf, oldSize, newBuf, newSize, newBufOffset, x,
end, pos);
} else {
return search(I, oldBuf, oldSize, newBuf, newSize, newBufOffset,
start, x, pos);
}
}
/**
* @param oldFile
* @param newFile
* @param diffFile
* @throws IOException
*/
public static void bsdiff(File oldFile, File newFile, File diffFile)
throws IOException {
InputStream oldInputStream = new BufferedInputStream(
new FileInputStream(oldFile));
InputStream newInputStream = new BufferedInputStream(
new FileInputStream(newFile));
OutputStream diffOutputStream = new FileOutputStream(diffFile);
byte[] diffBytes = bsdiff(oldInputStream, (int) oldFile.length(),
newInputStream, (int) newFile.length());
diffOutputStream.write(diffBytes);
diffOutputStream.close();
}
/**
* @param oldInputStream
* @param oldsize
* @param newInputStream
* @param newsize
* @return
* @throws IOException
*/
public static byte[] bsdiff(InputStream oldInputStream, int oldsize,
InputStream newInputStream, int newsize) throws IOException {
byte[] oldBuf = new byte[oldsize];
Util.readFromStream(oldInputStream, oldBuf, 0, oldsize);
oldInputStream.close();
byte[] newBuf = new byte[newsize];
Util.readFromStream(newInputStream, newBuf, 0, newsize);
newInputStream.close();
return bsdiff(oldBuf, oldsize, newBuf, newsize);
}
/**
* @param oldBuf
* @param oldsize
* @param newBuf
* @param newsize
* @return
* @throws IOException
*/
public static byte[] bsdiff(byte[] oldBuf, int oldsize, byte[] newBuf,
int newsize) throws IOException {
int[] I = new int[oldsize + 1];
qsufsort(I, new int[oldsize + 1], oldBuf, oldsize);
// diff block
int dblen = 0;
byte[] db = new byte[newsize];
// extra block
int eblen = 0;
byte[] eb = new byte[newsize];
/*
* Diff file is composed as follows:
*
* Header (32 bytes) Data (from offset 32 to end of file)
*
* Header: Offset 0, length 8 bytes: file magic "jbdiff40" Offset 8,
* length 8 bytes: length of compressed ctrl block Offset 16, length 8
* bytes: length of compressed diff block Offset 24, length 8 bytes:
* length of new file
*
* Data: 32 (length ctrlBlockLen): ctrlBlock (bzip2) 32+ctrlBlockLen
* (length diffBlockLen): diffBlock (bzip2) 32+ctrlBlockLen+diffBlockLen
* (to end of file): extraBlock (bzip2)
*
* ctrlBlock comprises a set of records, each record 12 bytes. A record
* comprises 3 x 32 bit integers. The ctrlBlock is not compressed.
*/
ByteArrayOutputStream byteOut = new ByteArrayOutputStream();
DataOutputStream diffOut = new DataOutputStream(byteOut);
/*
* Write as much of header as we have now. Size of ctrlBlock and
* diffBlock must be filled in later.
*/
diffOut.write(MAGIC_BYTES);
diffOut.writeLong(-1); // place holder for ctrlBlockLen
diffOut.writeLong(-1); // place holder for diffBlockLen
diffOut.writeLong(newsize);
diffOut.flush();
GZIPOutputStream bzip2Out = new GZIPOutputStream(diffOut);
DataOutputStream dataOut = new DataOutputStream(bzip2Out);
int oldscore, scsc;
int overlap, Ss, lens;
int i;
int scan = 0;
int len = 0;
int lastscan = 0;
int lastpos = 0;
int lastoffset = 0;
IntByRef pos = new IntByRef();
// int ctrlBlockLen = 0;
while (scan < newsize) {
oldscore = 0;
for (scsc = scan += len; scan < newsize; scan++) {
len = search(I, oldBuf, oldsize, newBuf, newsize, scan, 0,
oldsize, pos);
for (; scsc < scan + len; scsc++) {
if ((scsc + lastoffset < oldsize)
&& (oldBuf[scsc + lastoffset] == newBuf[scsc])) {
oldscore++;
}
}
if (((len == oldscore) && (len != 0)) || (len > oldscore + 8)) {
break;
}
if ((scan + lastoffset < oldsize)
&& (oldBuf[scan + lastoffset] == newBuf[scan])) {
oldscore--;
}
}
if ((len != oldscore) || (scan == newsize)) {
int s = 0;
int Sf = 0;
int lenf = 0;
for (i = 0; (lastscan + i < scan) && (lastpos + i < oldsize);) {
if (oldBuf[lastpos + i] == newBuf[lastscan + i])
s++;
i++;
if (s * 2 - i > Sf * 2 - lenf) {
Sf = s;
lenf = i;
}
}
int lenb = 0;
if (scan < newsize) {
s = 0;
int Sb = 0;
for (i = 1; (scan >= lastscan + i) && (pos.value >= i); i++) {
if (oldBuf[pos.value - i] == newBuf[scan - i])
s++;
if (s * 2 - i > Sb * 2 - lenb) {
Sb = s;
lenb = i;
}
}
}
if (lastscan + lenf > scan - lenb) {
overlap = (lastscan + lenf) - (scan - lenb);
s = 0;
Ss = 0;
lens = 0;
for (i = 0; i < overlap; i++) {
if (newBuf[lastscan + lenf - overlap + i] == oldBuf[lastpos
+ lenf - overlap + i]) {
s++;
}
if (newBuf[scan - lenb + i] == oldBuf[pos.value - lenb
+ i]) {
s--;
}
if (s > Ss) {
Ss = s;
lens = i + 1;
}
}
lenf += lens - overlap;
lenb -= lens;
}
// ? byte casting introduced here -- might affect things
for (i = 0; i < lenf; i++) {
db[dblen + i] = (byte) (newBuf[lastscan + i] - oldBuf[lastpos
+ i]);
}
for (i = 0; i < (scan - lenb) - (lastscan + lenf); i++) {
eb[eblen + i] = newBuf[lastscan + lenf + i];
}
dblen += lenf;
eblen += (scan - lenb) - (lastscan + lenf);
/*
* Write control block entry (3 x int)
*/
// diffOut.writeInt( lenf );
// diffOut.writeInt( ( scan - lenb ) - ( lastscan + lenf ) );
// diffOut.writeInt( ( pos[0] - lenb ) - ( lastpos + lenf ) );
// ctrlBlockLen += 12;
dataOut.writeInt(lenf);
dataOut.writeInt((scan - lenb) - (lastscan + lenf));
dataOut.writeInt((pos.value - lenb) - (lastpos + lenf));
lastscan = scan - lenb;
lastpos = pos.value - lenb;
lastoffset = pos.value - scan;
} // end if
} // end while loop
dataOut.flush();
bzip2Out.finish();
// now compressed ctrlBlockLen
int ctrlBlockLen = diffOut.size() - Util.HEADER_SIZE;
// System.err.println( "Diff: ctrlBlockLen=" + ctrlBlockLen );
// GZIPOutputStream gzOut;
/*
* Write diff block
*/
// gzOut = new GZIPOutputStream( diffOut );
bzip2Out = new GZIPOutputStream(diffOut);
bzip2Out.write(db, 0, dblen);
bzip2Out.finish();
bzip2Out.flush();
int diffBlockLen = diffOut.size() - ctrlBlockLen - Util.HEADER_SIZE;
// System.err.println( "Diff: diffBlockLen=" + diffBlockLen );
/*
* Write extra block
*/
// gzOut = new GZIPOutputStream( diffOut );
bzip2Out = new GZIPOutputStream(diffOut);
bzip2Out.write(eb, 0, eblen);
bzip2Out.finish();
bzip2Out.flush();
// long extraBlockLen = diffOut.size() - diffBlockLen - ctrlBlockLen -
// HEADER_SIZE;
// System.err.println( "Diff: extraBlockLen=" + extraBlockLen );
diffOut.close();
/*
* Write missing header info.
*/
ByteArrayOutputStream byteHeaderOut = new ByteArrayOutputStream(
Util.HEADER_SIZE);
DataOutputStream headerOut = new DataOutputStream(byteHeaderOut);
headerOut.write(MAGIC_BYTES);
headerOut.writeLong(ctrlBlockLen); // place holder for ctrlBlockLen
headerOut.writeLong(diffBlockLen); // place holder for diffBlockLen
headerOut.writeLong(newsize);
headerOut.close();
// Copy header information into the diff
byte[] diffBytes = byteOut.toByteArray();
byte[] headerBytes = byteHeaderOut.toByteArray();
System.arraycopy(headerBytes, 0, diffBytes, 0, headerBytes.length);
return diffBytes;
// /*
// * Write missing header info. Need to reopen the file with
// RandomAccessFile
// * for this.
// */
// RandomAccessFile diff = new RandomAccessFile( diffFile, "rw" );
// diff.seek( 8 );
// diff.writeLong( ctrlBlockLen ); // ctrlBlockLen (compressed) @offset
// 8
// diff.writeLong( diffBlockLen ); // diffBlockLen (compressed) @offset
// 16
// diff.close();
}
/**
* Run JBDiff from the command line. Params: oldfile newfile difffile. diff
* file will be created.
*
* @param arg
* @throws IOException
*/
public static void main(String[] arg) throws IOException {
if (arg.length != 3) {
System.err
.println("usage example: java -Xmx200m ie.wombat.jbdiff.JBDiff oldfile newfile patchfile\n");
return;
}
File oldFile = new File(arg[0]);
File newFile = new File(arg[1]);
File diffFile = new File(arg[2]);
bsdiff(oldFile, newFile, diffFile);
}
private static class IntByRef {
private int value;
}
}