| /* |
| * Copyright (c) 2005, Joe Desbonnet, (jdesbonnet@gmail.com) |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * * Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * * Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * * Neither the name of the <organization> nor the |
| * names of its contributors may be used to endorse or promote products |
| * derived from this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY <copyright holder> ``AS IS'' AND ANY |
| * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| * DISCLAIMED. IN NO EVENT SHALL <copyright holder> BE LIABLE FOR ANY |
| * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
| * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| package ie.wombat.jbdiff; |
| |
| import java.io.BufferedInputStream; |
| import java.io.ByteArrayOutputStream; |
| import java.io.DataOutputStream; |
| import java.io.File; |
| import java.io.FileInputStream; |
| import java.io.FileOutputStream; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.OutputStream; |
| import java.util.zip.GZIPOutputStream; |
| |
| /** |
| * Java Binary Diff utility. Based on bsdiff (v4.2) by Colin Percival (see |
| * http://www.daemonology.net/bsdiff/ ) and distributed under BSD license. |
| * |
| * <p> |
| * Running this on large files will probably require an increase of the default |
| * maximum heap size (use java -Xmx200m) |
| * </p> |
| * |
| * @author Joe Desbonnet, joe@galway.net |
| * |
| */ |
| public class JBDiff { |
| |
| // JBDiff extensions by Stefan.Liebig@compeople.de: |
| // |
| // - uses GZIP compressor to compress ALL of the blocks (ctrl,diff,extra). |
| // - added interfaces that allows using of JBDiff with streams and byte |
| // arrays. |
| |
| private static final String VERSION = "jbdiff-0.1.0.1"; |
| |
| // This is ´jbdiff40´. |
| private static final byte[] MAGIC_BYTES = new byte[] { 0x6a, 0x62, 0x64, |
| 0x69, 0x66, 0x66, 0x34, 0x30 }; |
| |
| private final static void split(int[] I, int[] V, int start, int len, int h) { |
| |
| int i, j, k, x, tmp, jj, kk; |
| |
| if (len < 16) { |
| for (k = start; k < start + len; k += j) { |
| j = 1; |
| x = V[I[k] + h]; |
| for (i = 1; k + i < start + len; i++) { |
| if (V[I[k + i] + h] < x) { |
| x = V[I[k + i] + h]; |
| j = 0; |
| } |
| |
| if (V[I[k + i] + h] == x) { |
| tmp = I[k + j]; |
| I[k + j] = I[k + i]; |
| I[k + i] = tmp; |
| j++; |
| } |
| |
| } |
| |
| for (i = 0; i < j; i++) { |
| V[I[k + i]] = k + j - 1; |
| } |
| if (j == 1) { |
| I[k] = -1; |
| } |
| } |
| |
| return; |
| } |
| |
| x = V[I[start + len / 2] + h]; |
| jj = 0; |
| kk = 0; |
| for (i = start; i < start + len; i++) { |
| if (V[I[i] + h] < x) { |
| jj++; |
| } |
| if (V[I[i] + h] == x) { |
| kk++; |
| } |
| } |
| |
| jj += start; |
| kk += jj; |
| |
| i = start; |
| j = 0; |
| k = 0; |
| while (i < jj) { |
| if (V[I[i] + h] < x) { |
| i++; |
| } else if (V[I[i] + h] == x) { |
| tmp = I[i]; |
| I[i] = I[jj + j]; |
| I[jj + j] = tmp; |
| j++; |
| } else { |
| tmp = I[i]; |
| I[i] = I[kk + k]; |
| I[kk + k] = tmp; |
| k++; |
| } |
| |
| } |
| |
| while (jj + j < kk) { |
| if (V[I[jj + j] + h] == x) { |
| j++; |
| } else { |
| tmp = I[jj + j]; |
| I[jj + j] = I[kk + k]; |
| I[kk + k] = tmp; |
| k++; |
| } |
| |
| } |
| |
| if (jj > start) { |
| split(I, V, start, jj - start, h); |
| } |
| |
| for (i = 0; i < kk - jj; i++) { |
| V[I[jj + i]] = kk - 1; |
| } |
| |
| if (jj == kk - 1) { |
| I[jj] = -1; |
| } |
| |
| if (start + len > kk) { |
| split(I, V, kk, start + len - kk, h); |
| } |
| |
| } |
| |
| /** |
| * Fast suffix sporting. Larsson and Sadakane's qsufsort algorithm. See |
| * http://www.cs.lth.se/Research/Algorithms/Papers/jesper5.ps |
| * |
| * @param I |
| * @param V |
| * @param oldBuf |
| * @param oldsize |
| */ |
| private static void qsufsort(int[] I, int[] V, byte[] oldBuf, int oldsize) { |
| |
| // int oldsize = oldBuf.length; |
| int[] buckets = new int[256]; |
| |
| // No need to do that in Java. |
| // for ( int i = 0; i < 256; i++ ) { |
| // buckets[i] = 0; |
| // } |
| |
| for (int i = 0; i < oldsize; i++) { |
| buckets[oldBuf[i] & 0xff]++; |
| } |
| |
| for (int i = 1; i < 256; i++) { |
| buckets[i] += buckets[i - 1]; |
| } |
| |
| for (int i = 255; i > 0; i--) { |
| buckets[i] = buckets[i - 1]; |
| } |
| |
| buckets[0] = 0; |
| |
| for (int i = 0; i < oldsize; i++) { |
| I[++buckets[oldBuf[i] & 0xff]] = i; |
| } |
| |
| I[0] = oldsize; |
| for (int i = 0; i < oldsize; i++) { |
| V[i] = buckets[oldBuf[i] & 0xff]; |
| } |
| V[oldsize] = 0; |
| |
| for (int i = 1; i < 256; i++) { |
| if (buckets[i] == buckets[i - 1] + 1) { |
| I[buckets[i]] = -1; |
| } |
| } |
| |
| I[0] = -1; |
| |
| for (int h = 1; I[0] != -(oldsize + 1); h += h) { |
| int len = 0; |
| int i; |
| for (i = 0; i < oldsize + 1;) { |
| if (I[i] < 0) { |
| len -= I[i]; |
| i -= I[i]; |
| } else { |
| // if(len) I[i-len]=-len; |
| if (len != 0) { |
| I[i - len] = -len; |
| } |
| len = V[I[i]] + 1 - i; |
| split(I, V, i, len, h); |
| i += len; |
| len = 0; |
| } |
| |
| } |
| |
| if (len != 0) { |
| I[i - len] = -len; |
| } |
| } |
| |
| for (int i = 0; i < oldsize + 1; i++) { |
| I[V[i]] = i; |
| } |
| } |
| |
| /** |
| * Count the number of bytes that match in oldBuf (starting at offset |
| * oldOffset) and newBuf (starting at offset newOffset). |
| * |
| * @param oldBuf |
| * @param oldOffset |
| * @param newBuf |
| * @param newOffset |
| * @return |
| */ |
| private final static int matchlen(byte[] oldBuf, int oldSize, |
| int oldOffset, byte[] newBuf, int newSize, int newOffset) { |
| // int end = Math |
| // .min(oldBuf.length - oldOffset, newBuf.length - newOffset); |
| int end = Math.min(oldSize - oldOffset, newSize - newOffset); |
| for (int i = 0; i < end; i++) { |
| if (oldBuf[oldOffset + i] != newBuf[newOffset + i]) { |
| return i; |
| } |
| } |
| return end; |
| } |
| |
| private final static int search(int[] I, byte[] oldBuf, int oldSize, |
| byte[] newBuf, int newSize, int newBufOffset, int start, int end, |
| IntByRef pos) { |
| |
| if (end - start < 2) { |
| int x = matchlen(oldBuf, oldSize, I[start], newBuf, newSize, |
| newBufOffset); |
| int y = matchlen(oldBuf, oldSize, I[end], newBuf, newSize, |
| newBufOffset); |
| |
| if (x > y) { |
| pos.value = I[start]; |
| return x; |
| } else { |
| pos.value = I[end]; |
| return y; |
| } |
| } |
| |
| int x = start + (end - start) / 2; |
| if (Util.memcmp(oldBuf, oldSize, I[x], newBuf, newSize, newBufOffset) < 0) { |
| return search(I, oldBuf, oldSize, newBuf, newSize, newBufOffset, x, |
| end, pos); |
| } else { |
| return search(I, oldBuf, oldSize, newBuf, newSize, newBufOffset, |
| start, x, pos); |
| } |
| |
| } |
| |
| /** |
| * @param oldFile |
| * @param newFile |
| * @param diffFile |
| * @throws IOException |
| */ |
| public static void bsdiff(File oldFile, File newFile, File diffFile) |
| throws IOException { |
| InputStream oldInputStream = new BufferedInputStream( |
| new FileInputStream(oldFile)); |
| InputStream newInputStream = new BufferedInputStream( |
| new FileInputStream(newFile)); |
| OutputStream diffOutputStream = new FileOutputStream(diffFile); |
| |
| byte[] diffBytes = bsdiff(oldInputStream, (int) oldFile.length(), |
| newInputStream, (int) newFile.length()); |
| |
| diffOutputStream.write(diffBytes); |
| diffOutputStream.close(); |
| } |
| |
| /** |
| * @param oldInputStream |
| * @param oldsize |
| * @param newInputStream |
| * @param newsize |
| * @return |
| * @throws IOException |
| */ |
| public static byte[] bsdiff(InputStream oldInputStream, int oldsize, |
| InputStream newInputStream, int newsize) throws IOException { |
| |
| byte[] oldBuf = new byte[oldsize]; |
| |
| Util.readFromStream(oldInputStream, oldBuf, 0, oldsize); |
| oldInputStream.close(); |
| |
| byte[] newBuf = new byte[newsize]; |
| Util.readFromStream(newInputStream, newBuf, 0, newsize); |
| newInputStream.close(); |
| |
| return bsdiff(oldBuf, oldsize, newBuf, newsize); |
| } |
| |
| /** |
| * @param oldBuf |
| * @param oldsize |
| * @param newBuf |
| * @param newsize |
| * @return |
| * @throws IOException |
| */ |
| public static byte[] bsdiff(byte[] oldBuf, int oldsize, byte[] newBuf, |
| int newsize) throws IOException { |
| |
| int[] I = new int[oldsize + 1]; |
| qsufsort(I, new int[oldsize + 1], oldBuf, oldsize); |
| |
| // diff block |
| int dblen = 0; |
| byte[] db = new byte[newsize]; |
| |
| // extra block |
| int eblen = 0; |
| byte[] eb = new byte[newsize]; |
| |
| /* |
| * Diff file is composed as follows: |
| * |
| * Header (32 bytes) Data (from offset 32 to end of file) |
| * |
| * Header: Offset 0, length 8 bytes: file magic "jbdiff40" Offset 8, |
| * length 8 bytes: length of compressed ctrl block Offset 16, length 8 |
| * bytes: length of compressed diff block Offset 24, length 8 bytes: |
| * length of new file |
| * |
| * Data: 32 (length ctrlBlockLen): ctrlBlock (bzip2) 32+ctrlBlockLen |
| * (length diffBlockLen): diffBlock (bzip2) 32+ctrlBlockLen+diffBlockLen |
| * (to end of file): extraBlock (bzip2) |
| * |
| * ctrlBlock comprises a set of records, each record 12 bytes. A record |
| * comprises 3 x 32 bit integers. The ctrlBlock is not compressed. |
| */ |
| |
| ByteArrayOutputStream byteOut = new ByteArrayOutputStream(); |
| DataOutputStream diffOut = new DataOutputStream(byteOut); |
| |
| /* |
| * Write as much of header as we have now. Size of ctrlBlock and |
| * diffBlock must be filled in later. |
| */ |
| diffOut.write(MAGIC_BYTES); |
| diffOut.writeLong(-1); // place holder for ctrlBlockLen |
| diffOut.writeLong(-1); // place holder for diffBlockLen |
| diffOut.writeLong(newsize); |
| diffOut.flush(); |
| |
| GZIPOutputStream bzip2Out = new GZIPOutputStream(diffOut); |
| DataOutputStream dataOut = new DataOutputStream(bzip2Out); |
| |
| int oldscore, scsc; |
| |
| int overlap, Ss, lens; |
| int i; |
| int scan = 0; |
| int len = 0; |
| int lastscan = 0; |
| int lastpos = 0; |
| int lastoffset = 0; |
| |
| IntByRef pos = new IntByRef(); |
| // int ctrlBlockLen = 0; |
| |
| while (scan < newsize) { |
| oldscore = 0; |
| |
| for (scsc = scan += len; scan < newsize; scan++) { |
| |
| len = search(I, oldBuf, oldsize, newBuf, newsize, scan, 0, |
| oldsize, pos); |
| |
| for (; scsc < scan + len; scsc++) { |
| if ((scsc + lastoffset < oldsize) |
| && (oldBuf[scsc + lastoffset] == newBuf[scsc])) { |
| oldscore++; |
| } |
| } |
| |
| if (((len == oldscore) && (len != 0)) || (len > oldscore + 8)) { |
| break; |
| } |
| |
| if ((scan + lastoffset < oldsize) |
| && (oldBuf[scan + lastoffset] == newBuf[scan])) { |
| oldscore--; |
| } |
| } |
| |
| if ((len != oldscore) || (scan == newsize)) { |
| int s = 0; |
| int Sf = 0; |
| int lenf = 0; |
| for (i = 0; (lastscan + i < scan) && (lastpos + i < oldsize);) { |
| if (oldBuf[lastpos + i] == newBuf[lastscan + i]) |
| s++; |
| i++; |
| if (s * 2 - i > Sf * 2 - lenf) { |
| Sf = s; |
| lenf = i; |
| } |
| } |
| |
| int lenb = 0; |
| if (scan < newsize) { |
| s = 0; |
| int Sb = 0; |
| for (i = 1; (scan >= lastscan + i) && (pos.value >= i); i++) { |
| if (oldBuf[pos.value - i] == newBuf[scan - i]) |
| s++; |
| if (s * 2 - i > Sb * 2 - lenb) { |
| Sb = s; |
| lenb = i; |
| } |
| } |
| } |
| |
| if (lastscan + lenf > scan - lenb) { |
| overlap = (lastscan + lenf) - (scan - lenb); |
| s = 0; |
| Ss = 0; |
| lens = 0; |
| for (i = 0; i < overlap; i++) { |
| if (newBuf[lastscan + lenf - overlap + i] == oldBuf[lastpos |
| + lenf - overlap + i]) { |
| s++; |
| } |
| if (newBuf[scan - lenb + i] == oldBuf[pos.value - lenb |
| + i]) { |
| s--; |
| } |
| if (s > Ss) { |
| Ss = s; |
| lens = i + 1; |
| } |
| } |
| |
| lenf += lens - overlap; |
| lenb -= lens; |
| } |
| |
| // ? byte casting introduced here -- might affect things |
| for (i = 0; i < lenf; i++) { |
| db[dblen + i] = (byte) (newBuf[lastscan + i] - oldBuf[lastpos |
| + i]); |
| } |
| |
| for (i = 0; i < (scan - lenb) - (lastscan + lenf); i++) { |
| eb[eblen + i] = newBuf[lastscan + lenf + i]; |
| } |
| |
| dblen += lenf; |
| eblen += (scan - lenb) - (lastscan + lenf); |
| |
| /* |
| * Write control block entry (3 x int) |
| */ |
| // diffOut.writeInt( lenf ); |
| // diffOut.writeInt( ( scan - lenb ) - ( lastscan + lenf ) ); |
| // diffOut.writeInt( ( pos[0] - lenb ) - ( lastpos + lenf ) ); |
| // ctrlBlockLen += 12; |
| dataOut.writeInt(lenf); |
| dataOut.writeInt((scan - lenb) - (lastscan + lenf)); |
| dataOut.writeInt((pos.value - lenb) - (lastpos + lenf)); |
| |
| lastscan = scan - lenb; |
| lastpos = pos.value - lenb; |
| lastoffset = pos.value - scan; |
| } // end if |
| } // end while loop |
| |
| dataOut.flush(); |
| bzip2Out.finish(); |
| |
| // now compressed ctrlBlockLen |
| int ctrlBlockLen = diffOut.size() - Util.HEADER_SIZE; |
| // System.err.println( "Diff: ctrlBlockLen=" + ctrlBlockLen ); |
| |
| // GZIPOutputStream gzOut; |
| |
| /* |
| * Write diff block |
| */ |
| // gzOut = new GZIPOutputStream( diffOut ); |
| bzip2Out = new GZIPOutputStream(diffOut); |
| bzip2Out.write(db, 0, dblen); |
| bzip2Out.finish(); |
| bzip2Out.flush(); |
| int diffBlockLen = diffOut.size() - ctrlBlockLen - Util.HEADER_SIZE; |
| // System.err.println( "Diff: diffBlockLen=" + diffBlockLen ); |
| |
| /* |
| * Write extra block |
| */ |
| // gzOut = new GZIPOutputStream( diffOut ); |
| bzip2Out = new GZIPOutputStream(diffOut); |
| bzip2Out.write(eb, 0, eblen); |
| bzip2Out.finish(); |
| bzip2Out.flush(); |
| // long extraBlockLen = diffOut.size() - diffBlockLen - ctrlBlockLen - |
| // HEADER_SIZE; |
| // System.err.println( "Diff: extraBlockLen=" + extraBlockLen ); |
| |
| diffOut.close(); |
| |
| /* |
| * Write missing header info. |
| */ |
| ByteArrayOutputStream byteHeaderOut = new ByteArrayOutputStream( |
| Util.HEADER_SIZE); |
| DataOutputStream headerOut = new DataOutputStream(byteHeaderOut); |
| headerOut.write(MAGIC_BYTES); |
| headerOut.writeLong(ctrlBlockLen); // place holder for ctrlBlockLen |
| headerOut.writeLong(diffBlockLen); // place holder for diffBlockLen |
| headerOut.writeLong(newsize); |
| headerOut.close(); |
| |
| // Copy header information into the diff |
| byte[] diffBytes = byteOut.toByteArray(); |
| byte[] headerBytes = byteHeaderOut.toByteArray(); |
| |
| System.arraycopy(headerBytes, 0, diffBytes, 0, headerBytes.length); |
| |
| return diffBytes; |
| // /* |
| // * Write missing header info. Need to reopen the file with |
| // RandomAccessFile |
| // * for this. |
| // */ |
| // RandomAccessFile diff = new RandomAccessFile( diffFile, "rw" ); |
| // diff.seek( 8 ); |
| // diff.writeLong( ctrlBlockLen ); // ctrlBlockLen (compressed) @offset |
| // 8 |
| // diff.writeLong( diffBlockLen ); // diffBlockLen (compressed) @offset |
| // 16 |
| // diff.close(); |
| } |
| |
| /** |
| * Run JBDiff from the command line. Params: oldfile newfile difffile. diff |
| * file will be created. |
| * |
| * @param arg |
| * @throws IOException |
| */ |
| public static void main(String[] arg) throws IOException { |
| |
| if (arg.length != 3) { |
| System.err |
| .println("usage example: java -Xmx200m ie.wombat.jbdiff.JBDiff oldfile newfile patchfile\n"); |
| return; |
| } |
| |
| File oldFile = new File(arg[0]); |
| File newFile = new File(arg[1]); |
| File diffFile = new File(arg[2]); |
| |
| bsdiff(oldFile, newFile, diffFile); |
| |
| } |
| |
| private static class IntByRef { |
| private int value; |
| } |
| } |