Use a bucket sort for PackReverseIndex.

Previously it took 1200ms to create a reverse index (sorted by offset).
Using a simple bucket sort algorithm, that time is reduced to 450ms.
The bucket index into the offset array is kept, in order to decrease
the binary search window.

Don't keep a copy of the offsets. Instead, use nth position
to lookup the offset in the PackIndex.

Change-Id: If51ab76752622e04a4430d9a14db95ad02f5329d
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndex.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndex.java
index c5aa5d3..fe9f700 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndex.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndex.java
@@ -44,7 +44,6 @@
 package org.eclipse.jgit.internal.storage.file;
 
 import java.text.MessageFormat;
-import java.util.Arrays;
 
 import org.eclipse.jgit.errors.CorruptObjectException;
 import org.eclipse.jgit.internal.JGitText;
@@ -65,21 +64,22 @@
 	/** Index we were created from, and that has our ObjectId data. */
 	private final PackIndex index;
 
-	/**
-	 * (offset31, truly) Offsets accommodating in 31 bits.
-	 */
-	private final int offsets32[];
+	/** The number of bytes per entry in the offsetIndex. */
+	private final long bucketSize;
 
 	/**
-	 * Offsets not accommodating in 31 bits.
+	 * An index into the nth mapping, where the value is the position after the
+	 * the last index that contains the values of the bucket. For example given
+	 * offset o (and bucket = o / bucketSize), the offset will be contained in
+	 * the range nth[offsetIndex[bucket - 1]] inclusive to
+	 * nth[offsetIndex[bucket]] exclusive.
+	 *
+	 * See {@link #binarySearch}
 	 */
-	private final long offsets64[];
+	private final int[] offsetIndex;
 
-	/** Position of the corresponding {@link #offsets32} in {@link #index}. */
-	private final int nth32[];
-
-	/** Position of the corresponding {@link #offsets64} in {@link #index}. */
-	private final int nth64[];
+	/** Mapping from indices in offset order to indices in SHA-1 order. */
+	private final int[] nth;
 
 	/**
 	 * Create reverse index from straight/forward pack index, by indexing all
@@ -92,38 +92,58 @@
 		index = packIndex;
 
 		final long cnt = index.getObjectCount();
-		final long n64 = index.getOffset64Count();
-		final long n32 = cnt - n64;
-		if (n32 > Integer.MAX_VALUE || n64 > Integer.MAX_VALUE
-				|| cnt > 0xffffffffL)
+		if (cnt + 1 > Integer.MAX_VALUE)
 			throw new IllegalArgumentException(
 					JGitText.get().hugeIndexesAreNotSupportedByJgitYet);
 
-		offsets32 = new int[(int) n32];
-		offsets64 = new long[(int) n64];
-		nth32 = new int[offsets32.length];
-		nth64 = new int[offsets64.length];
-
-		int i32 = 0;
-		int i64 = 0;
-		for (final MutableEntry me : index) {
-			final long o = me.getOffset();
-			if (o <= Integer.MAX_VALUE)
-				offsets32[i32++] = (int) o;
-			else
-				offsets64[i64++] = o;
+		if (cnt == 0) {
+			bucketSize = Long.MAX_VALUE;
+			offsetIndex = new int[1];
+			nth = new int[0];
+			return;
 		}
 
-		Arrays.sort(offsets32);
-		Arrays.sort(offsets64);
+		final long[] offsetsBySha1 = new long[(int) cnt];
 
-		int nth = 0;
+		long maxOffset = 0;
+		int ith = 0;
 		for (final MutableEntry me : index) {
 			final long o = me.getOffset();
-			if (o <= Integer.MAX_VALUE)
-				nth32[Arrays.binarySearch(offsets32, (int) o)] = nth++;
-			else
-				nth64[Arrays.binarySearch(offsets64, o)] = nth++;
+			offsetsBySha1[ith++] = o;
+			if (o > maxOffset)
+				maxOffset = o;
+		}
+
+		bucketSize = maxOffset / cnt + 1;
+		int[] bucketIndex = new int[(int) cnt];
+		int[] bucketValues = new int[(int) cnt + 1];
+		for (int oi = 0; oi < offsetsBySha1.length; oi++) {
+			final long o = offsetsBySha1[oi];
+			final int bucket = (int) (o / bucketSize);
+			final int bucketValuesPos = oi + 1;
+			final int current = bucketIndex[bucket];
+			bucketIndex[bucket] = bucketValuesPos;
+			bucketValues[bucketValuesPos] = current;
+		}
+
+		int nthByOffset = 0;
+		nth = new int[offsetsBySha1.length];
+		offsetIndex = bucketIndex; // Reuse the allocation
+		for (int bi = 0; bi < bucketIndex.length; bi++) {
+			final int start = nthByOffset;
+			// Insertion sort of the values in the bucket.
+			for (int vi = bucketIndex[bi]; vi > 0; vi = bucketValues[vi]) {
+				final int nthBySha1 = vi - 1;
+				final long o = offsetsBySha1[nthBySha1];
+				int insertion = nthByOffset++;
+				for (; start < insertion; insertion--) {
+					if (o > offsetsBySha1[nth[insertion - 1]])
+						break;
+					nth[insertion] = nth[insertion - 1];
+				}
+				nth[insertion] = nthBySha1;
+			}
+			offsetIndex[bi] = nthByOffset;
 		}
 	}
 
@@ -136,17 +156,10 @@
 	 * @return object id for this offset, or null if no object was found.
 	 */
 	public ObjectId findObject(final long offset) {
-		if (offset <= Integer.MAX_VALUE) {
-			final int i32 = Arrays.binarySearch(offsets32, (int) offset);
-			if (i32 < 0)
-				return null;
-			return index.getObjectId(nth32[i32]);
-		} else {
-			final int i64 = Arrays.binarySearch(offsets64, offset);
-			if (i64 < 0)
-				return null;
-			return index.getObjectId(nth64[i64]);
-		}
+		final int ith = binarySearch(offset);
+		if (ith < 0)
+			return null;
+		return index.getObjectId(nth[ith]);
 	}
 
 	/**
@@ -166,52 +179,40 @@
 	 */
 	public long findNextOffset(final long offset, final long maxOffset)
 			throws CorruptObjectException {
-		if (offset <= Integer.MAX_VALUE) {
-			final int i32 = Arrays.binarySearch(offsets32, (int) offset);
-			if (i32 < 0)
-				throw new CorruptObjectException(
-						MessageFormat.format(
-								JGitText.get().cantFindObjectInReversePackIndexForTheSpecifiedOffset,
-								Long.valueOf(offset)));
+		final int ith = binarySearch(offset);
+		if (ith < 0)
+			throw new CorruptObjectException(
+					MessageFormat.format(
+							JGitText.get().cantFindObjectInReversePackIndexForTheSpecifiedOffset,
+							Long.valueOf(offset)));
 
-			if (i32 + 1 == offsets32.length) {
-				if (offsets64.length > 0)
-					return offsets64[0];
-				return maxOffset;
-			}
-			return offsets32[i32 + 1];
-		} else {
-			final int i64 = Arrays.binarySearch(offsets64, offset);
-			if (i64 < 0)
-				throw new CorruptObjectException(
-						MessageFormat.format(
-								JGitText.get().cantFindObjectInReversePackIndexForTheSpecifiedOffset,
-								Long.valueOf(offset)));
-
-			if (i64 + 1 == offsets64.length)
-				return maxOffset;
-			return offsets64[i64 + 1];
-		}
+		if (ith + 1 == nth.length)
+			return maxOffset;
+		return index.getOffset(nth[ith + 1]);
 	}
 
 	int findPostion(long offset) {
-		if (offset <= Integer.MAX_VALUE) {
-			final int i32 = Arrays.binarySearch(offsets32, (int) offset);
-			if (i32 < 0)
-				return -1;
-			return i32;
-		} else {
-			final int i64 = Arrays.binarySearch(offsets64, offset);
-			if (i64 < 0)
-				return -1;
-			return nth32.length + i64;
+		return binarySearch(offset);
+	}
+
+	private int binarySearch(final long offset) {
+		int bucket = (int) (offset / bucketSize);
+		int low = bucket == 0 ? 0 : offsetIndex[bucket - 1];
+		int high = offsetIndex[bucket];
+		while (low < high) {
+			final int mid = (low + high) >>> 1;
+			final long o = index.getOffset(nth[mid]);
+			if (offset < o)
+				high = mid;
+			else if (offset == o)
+				return mid;
+			else
+				low = mid + 1;
 		}
+		return -1;
 	}
 
 	ObjectId findObjectByPosition(int nthPosition) {
-		if (nthPosition < nth32.length)
-			return index.getObjectId(nth32[nthPosition]);
-		final int i64 = nthPosition - nth32.length;
-		return index.getObjectId(nth64[i64]);
+		return index.getObjectId(nth[nthPosition]);
 	}
 }