core/plugins/org.eclipse.dltk.core/search/org/eclipse/dltk/core/search/index/DiskIndex.java - dltk/org.eclipse.dltk.core - Git at Google

 /*******************************************************************************
  * Copyright (c) 2000, 2007 IBM Corporation and others.
  * This program and the accompanying materials are made available under the
  * terms of the Eclipse Public License v. 2.0 which is available at
  * http://www.eclipse.org/legal/epl-2.0.
  *
  * SPDX-License-Identifier: EPL-2.0
  *

  *******************************************************************************/
 package org.eclipse.dltk.core.search.index;

 import java.io.BufferedInputStream;
 import java.io.BufferedOutputStream;
 import java.io.ByteArrayInputStream;
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.RandomAccessFile;
 import java.util.regex.Pattern;

 import org.eclipse.dltk.compiler.CharOperation;
 import org.eclipse.dltk.compiler.util.HashtableOfIntValues;
 import org.eclipse.dltk.compiler.util.HashtableOfObject;
 import org.eclipse.dltk.compiler.util.SimpleLookupTable;
 import org.eclipse.dltk.compiler.util.SimpleSet;
 import org.eclipse.dltk.core.search.SearchPattern;
 import org.eclipse.dltk.core.search.indexing.IIndexConstants;
 import org.eclipse.dltk.internal.core.util.Messages;
 import org.eclipse.dltk.internal.core.util.SimpleWordSet;
 import org.eclipse.dltk.internal.core.util.Util;

 public class DiskIndex {

 	String fileName;

 	private int headerInfoOffset;

 	private int numberOfChunks;

 	private int sizeOfLastChunk;

 	private int[] chunkOffsets;

 	private int documentReferenceSize; // 1, 2 or more bytes... depends on # of
 	// document names

 	private int startOfCategoryTables;

 	private HashtableOfIntValues categoryOffsets;

 	private int cacheUserCount;

 	private String[][] cachedChunks; // decompressed chunks of document names

 	private HashtableOfObject categoryTables; // category name ->
 	// HashtableOfObject(words ->
 	// int[] of document #'s) or
 	// offset if not read yet

 	private char[] cachedCategoryName;
 	char separator = Index.DEFAULT_SEPARATOR;

 	public static final String SIGNATURE = "DLTK INDEX VERSION 1.014"; //$NON-NLS-1$

 	public final static boolean DEBUG = false;

 	private static final int RE_INDEXED = -1;

 	private static final int DELETED = -2;

 	private static final int CHUNK_SIZE = 100;

 	static class IntList {

 		int size;

 		int[] elements;

 		IntList(int[] elements) {
 			this.elements = elements;
 			this.size = elements.length;
 		}

 		void add(int newElement) {
 			if (this.size == this.elements.length) {
 				int newSize = this.size * 3;
 				if (newSize < 7)
 					newSize = 7;
 				System.arraycopy(this.elements, 0,
 						this.elements = new int[newSize], 0, this.size);
 			}
 			this.elements[this.size++] = newElement;
 		}

 		int[] asArray() {
 			int[] result = new int[this.size];
 			System.arraycopy(this.elements, 0, result, 0, this.size);
 			return result;
 		}
 	}

 	DiskIndex(String fileName) {
 		this.fileName = fileName;

 		// clear cached items
 		this.headerInfoOffset = -1;
 		this.numberOfChunks = -1;
 		this.sizeOfLastChunk = -1;
 		this.chunkOffsets = null;
 		this.documentReferenceSize = -1;
 		this.cacheUserCount = -1;
 		this.cachedChunks = null;
 		this.categoryTables = null;
 		this.cachedCategoryName = null;
 		this.categoryOffsets = null;
 	}

 	SimpleSet addDocumentNames(String substring, MemoryIndex memoryIndex)
 			throws IOException {
 		// must skip over documents which have been added/changed/deleted in the
 		// memory index
 		String[] docNames = readAllDocumentNames();
 		SimpleSet results = new SimpleSet(docNames.length);
 		if (substring == null) {
 			if (memoryIndex == null) {
 				for (int i = 0, l = docNames.length; i < l; i++)
 					results.add(docNames[i]);
 			} else {
 				SimpleLookupTable docsToRefs = memoryIndex.docsToReferences;
 				for (int i = 0, l = docNames.length; i < l; i++) {
 					String docName = docNames[i];
 					if (!docsToRefs.containsKey(docName))
 						results.add(docName);
 				}
 			}
 		} else {
 			if (memoryIndex == null) {
 				for (int i = 0, l = docNames.length; i < l; i++)
 					if (docNames[i].startsWith(substring, 0))
 						results.add(docNames[i]);
 			} else {
 				SimpleLookupTable docsToRefs = memoryIndex.docsToReferences;
 				for (int i = 0, l = docNames.length; i < l; i++) {
 					String docName = docNames[i];
 					if (docName.startsWith(substring, 0)
 							&& !docsToRefs.containsKey(docName))
 						results.add(docName);
 				}
 			}
 		}
 		return results;
 	}

 	private HashtableOfObject addQueryResult(HashtableOfObject results,
 			char[] word, HashtableOfObject wordsToDocNumbers,
 			MemoryIndex memoryIndex) throws IOException {
 		// must skip over documents which have been added/changed/deleted in the
 		// memory index
 		if (results == null)
 			results = new HashtableOfObject(13);
 		EntryResult result = (EntryResult) results.get(word);
 		if (memoryIndex == null) {
 			if (result == null)
 				results.put(word, new EntryResult(word, wordsToDocNumbers));
 			else
 				result.addDocumentTable(wordsToDocNumbers);
 		} else {
 			SimpleLookupTable docsToRefs = memoryIndex.docsToReferences;
 			if (result == null)
 				result = new EntryResult(word, null);
 			int[] docNumbers = readDocumentNumbers(wordsToDocNumbers.get(word));
 			for (int i = 0, l = docNumbers.length; i < l; i++) {
 				String docName = readDocumentName(docNumbers[i]);
 				if (!docsToRefs.containsKey(docName))
 					result.addDocumentName(docName);
 			}
 			if (!result.isEmpty())
 				results.put(word, result);
 		}
 		return results;
 	}

 	HashtableOfObject addQueryResults(char[][] categories, char[] key,
 			int matchRule, MemoryIndex memoryIndex) throws IOException {
 		// assumes sender has called startQuery() & will call stopQuery() when
 		// finished
 		if (this.categoryOffsets == null)
 			return null; // file is empty

 		HashtableOfObject results = null; // initialized if needed
 		if (key == null) {
 			for (int i = 0, l = categories.length; i < l; i++) {
 				HashtableOfObject wordsToDocNumbers = readCategoryTable(
 						categories[i], true); // cache if key is null since
 				// its a definite match
 				if (wordsToDocNumbers != null) {
 					char[][] words = wordsToDocNumbers.keyTable;
 					if (results == null)
 						results = new HashtableOfObject(
 								wordsToDocNumbers.elementSize);
 					for (int j = 0, m = words.length; j < m; j++)
 						if (words[j] != null)
 							results = addQueryResult(results, words[j],
 									wordsToDocNumbers, memoryIndex);
 				}
 			}
 			if (results != null && this.cachedChunks == null)
 				cacheDocumentNames();
 		} else {
 			switch (matchRule) {
 			case SearchPattern.R_EXACT_MATCH | SearchPattern.R_CASE_SENSITIVE:
 				for (int i = 0, l = categories.length; i < l; i++) {
 					HashtableOfObject wordsToDocNumbers = readCategoryTable(
 							categories[i], false);
 					if (wordsToDocNumbers != null
 							&& wordsToDocNumbers.containsKey(key))
 						results = addQueryResult(results, key,
 								wordsToDocNumbers, memoryIndex);
 				}
 				break;
 			case SearchPattern.R_PREFIX_MATCH | SearchPattern.R_CASE_SENSITIVE:
 				for (int i = 0, l = categories.length; i < l; i++) {
 					HashtableOfObject wordsToDocNumbers = readCategoryTable(
 							categories[i], false);
 					if (wordsToDocNumbers != null) {
 						char[][] words = wordsToDocNumbers.keyTable;
 						for (int j = 0, m = words.length; j < m; j++) {
 							char[] word = words[j];
 							if (word != null && key[0] == word[0]
 									&& CharOperation.prefixEquals(key, word))
 								results = addQueryResult(results, word,
 										wordsToDocNumbers, memoryIndex);
 						}
 					}
 				}
 				break;
 			case SearchPattern.R_REGEXP_MATCH:
 			case SearchPattern.R_REGEXP_MATCH | SearchPattern.R_CASE_SENSITIVE:
 				Pattern regexpPattern = Pattern
 						.compile(
 								new String(key),
 								(matchRule & SearchPattern.R_CASE_SENSITIVE) == 0 ? Pattern.CASE_INSENSITIVE
 										: 0);
 				for (int i = 0, l = categories.length; i < l; i++) {
 					HashtableOfObject wordsToDocNumbers = readCategoryTable(
 							categories[i], false);
 					if (wordsToDocNumbers != null) {
 						char[][] words = wordsToDocNumbers.keyTable;
 						for (int j = 0, m = words.length; j < m; j++) {
 							char[] word = words[j];
 							if (word != null) {
 								char decodedWord[];
 								int sep = CharOperation.indexOf(
 										IIndexConstants.SEPARATOR, word);
 								if (sep > 0) {
 									decodedWord = CharOperation.subarray(word,
 											0, sep);
 								} else {
 									decodedWord = word;
 								}
 								if (regexpPattern.matcher(
 										new String(decodedWord)).matches()) {
 									results = addQueryResult(results, word,
 											wordsToDocNumbers, memoryIndex);
 								}
 							}
 						}
 					}
 				}
 				break;
 			default:
 				for (int i = 0, l = categories.length; i < l; i++) {
 					HashtableOfObject wordsToDocNumbers = readCategoryTable(
 							categories[i], false);
 					if (wordsToDocNumbers != null) {
 						char[][] words = wordsToDocNumbers.keyTable;
 						for (int j = 0, m = words.length; j < m; j++) {
 							char[] word = words[j];
 							if (word != null
 									&& Index.isMatch(key, word, matchRule)) {
 								results = addQueryResult(results, word,
 										wordsToDocNumbers, memoryIndex);
 							}
 						}
 					}
 				}
 			}
 		}

 		if (results == null)
 			return null;
 		return results;
 	}

 	private void cacheDocumentNames() throws IOException {
 		// will need all document names so get them now
 		this.cachedChunks = new String[this.numberOfChunks][];
 		DataInputStream stream = new DataInputStream(new BufferedInputStream(
 				new FileInputStream(getIndexFile()),
 				this.numberOfChunks > 5 ? 4096 : 2048));
 		try {
 			stream.skip(this.chunkOffsets[0]);
 			for (int i = 0; i < this.numberOfChunks; i++) {
 				int size = i == this.numberOfChunks - 1 ? this.sizeOfLastChunk
 						: CHUNK_SIZE;
 				readChunk(this.cachedChunks[i] = new String[size], stream, 0,
 						size);
 			}
 		} finally {
 			stream.close();
 		}
 	}

 	private String[] computeDocumentNames(String[] onDiskNames,
 			int[] positions, SimpleLookupTable indexedDocuments,
 			MemoryIndex memoryIndex) {
 		int onDiskLength = onDiskNames.length;
 		Object[] docNames = memoryIndex.docsToReferences.keyTable;
 		Object[] referenceTables = memoryIndex.docsToReferences.valueTable;
 		if (onDiskLength == 0) {
 			// disk index was empty, so add every indexed document
 			for (int i = 0, l = referenceTables.length; i < l; i++)
 				if (referenceTables[i] != null)
 					indexedDocuments.put(docNames[i], null); // remember each
 			// new document

 			String[] newDocNames = new String[indexedDocuments.elementSize];
 			int count = 0;
 			Object[] added = indexedDocuments.keyTable;
 			for (int i = 0, l = added.length; i < l; i++)
 				if (added[i] != null)
 					newDocNames[count++] = (String) added[i];
 			Util.sort(newDocNames);
 			for (int i = 0, l = newDocNames.length; i < l; i++)
 				indexedDocuments.put(newDocNames[i], i);
 			return newDocNames;
 		}

 		// initialize positions as if each document will remain in the same
 		// position
 		for (int i = 0; i < onDiskLength; i++)
 			positions[i] = i;

 		// find out if the memory index has any new or deleted documents, if not
 		// then the names & positions are the same
 		int numDeletedDocNames = 0;
 		int numReindexedDocNames = 0;
 		nextPath: for (int i = 0, l = docNames.length; i < l; i++) {
 			String docName = (String) docNames[i];
 			if (docName != null) {
 				for (int j = 0; j < onDiskLength; j++) {
 					if (docName.equals(onDiskNames[j])) {
 						if (referenceTables[i] == null) {
 							positions[j] = DELETED;
 							numDeletedDocNames++;
 						} else {
 							positions[j] = RE_INDEXED;
 							numReindexedDocNames++;
 						}
 						continue nextPath;
 					}
 				}
 				if (referenceTables[i] != null)
 					indexedDocuments.put(docName, null); // remember each new
 				// document, skip
 				// deleted documents
 				// which were never
 				// saved
 			}
 		}

 		String[] newDocNames = onDiskNames;
 		if (numDeletedDocNames > 0 || indexedDocuments.elementSize > 0) {
 			// some new documents have been added or some old ones deleted
 			newDocNames = new String[onDiskLength
 					+ indexedDocuments.elementSize - numDeletedDocNames];
 			int count = 0;
 			for (int i = 0; i < onDiskLength; i++)
 				if (positions[i] >= RE_INDEXED)
 					newDocNames[count++] = onDiskNames[i]; // keep each
 			// unchanged
 			// document
 			Object[] added = indexedDocuments.keyTable;
 			for (int i = 0, l = added.length; i < l; i++)
 				if (added[i] != null)
 					newDocNames[count++] = (String) added[i]; // add each new
 			// document
 			Util.sort(newDocNames);
 			for (int i = 0, l = newDocNames.length; i < l; i++)
 				if (indexedDocuments.containsKey(newDocNames[i]))
 					indexedDocuments.put(newDocNames[i], i); // remember
 			// the
 			// position
 			// for
 			// each
 			// new
 			// document
 		}

 		// need to be able to look up an old position (ref# from a ref[]) and
 		// map it to its new position
 		// if its old position == DELETED then its forgotton
 		// if its old position == ReINDEXED then its also forgotten but its new
 		// position is needed to map references
 		int count = -1;
 		for (int i = 0; i < onDiskLength;) {
 			switch (positions[i]) {
 			case DELETED:
 				i++; // skip over deleted... references are forgotten
 				break;
 			case RE_INDEXED:
 				String newName = newDocNames[++count];
 				if (newName.equals(onDiskNames[i])) {
 					indexedDocuments.put(newName, count); // the
 					// reindexed
 					// docName
 					// that
 					// was
 					// at
 					// position
 					// i is
 					// now
 					// at
 					// position
 					// count
 					i++;
 				}
 				break;
 			default:
 				if (newDocNames[++count].equals(onDiskNames[i]))
 					positions[i++] = count; // the unchanged docName that was at
 				// position i is now at position
 				// count
 			}
 		}
 		return newDocNames;
 	}

 	private void copyQueryResults(HashtableOfObject categoryToWords,
 			int newPosition) {
 		char[][] categoryNames = categoryToWords.keyTable;
 		Object[] wordSets = categoryToWords.valueTable;
 		for (int i = 0, l = categoryNames.length; i < l; i++) {
 			char[] categoryName = categoryNames[i];
 			if (categoryName != null) {
 				SimpleWordSet wordSet = (SimpleWordSet) wordSets[i];
 				HashtableOfObject wordsToDocs = (HashtableOfObject) this.categoryTables
 						.get(categoryName);
 				if (wordsToDocs == null)
 					this.categoryTables.put(categoryName,
 							wordsToDocs = new HashtableOfObject(
 									wordSet.elementSize));

 				char[][] words = wordSet.words;
 				for (int j = 0, m = words.length; j < m; j++) {
 					char[] word = words[j];
 					if (word != null) {
 						Object o = wordsToDocs.get(word);
 						if (o == null) {
 							wordsToDocs.put(word, new int[] { newPosition });
 						} else if (o instanceof IntList) {
 							((IntList) o).add(newPosition);
 						} else {
 							IntList list = new IntList((int[]) o);
 							list.add(newPosition);
 							wordsToDocs.put(word, list);
 						}
 					}
 				}
 			}
 		}
 	}

 	File getIndexFile() {
 		if (this.fileName == null)
 			return null;

 		return new File(this.fileName);
 	}

 	void initialize(boolean reuseExistingFile) throws IOException {
 		File indexFile = getIndexFile();
 		if (indexFile.exists()) {
 			if (reuseExistingFile) {
 				RandomAccessFile file = new RandomAccessFile(this.fileName, "r"); //$NON-NLS-1$
 				try {
 					String signature = file.readUTF();
 					if (!signature.equals(SIGNATURE))
 						throw new IOException(Messages.exception_wrongFormat);

 					this.headerInfoOffset = file.readInt();
 					if (this.headerInfoOffset > 0) // file is empty if its not
 						// set
 						readHeaderInfo(file);
 				} finally {
 					file.close();
 				}
 				return;
 			}
 			if (!indexFile.delete()) {
 				if (DEBUG)
 					System.out
 							.println("initialize - Failed to delete index " + this.fileName); //$NON-NLS-1$
 				throw new IOException("Failed to delete index " + this.fileName); //$NON-NLS-1$
 			}
 		}
 		if (indexFile.createNewFile()) {
 			RandomAccessFile file = new RandomAccessFile(this.fileName, "rw"); //$NON-NLS-1$
 			try {
 				file.writeUTF(SIGNATURE);
 				file.writeInt(-1); // file is empty
 			} finally {
 				file.close();
 			}
 		} else {
 			if (DEBUG)
 				System.out
 						.println("initialize - Failed to create new index " + this.fileName); //$NON-NLS-1$
 			throw new IOException("Failed to create new index " + this.fileName); //$NON-NLS-1$
 		}
 	}

 	private void initializeFrom(DiskIndex diskIndex, File newIndexFile)
 			throws IOException {
 		if (newIndexFile.exists() && !newIndexFile.delete()) { // delete the
 			// temporary
 			// index file
 			if (DEBUG)
 				System.out
 						.println("initializeFrom - Failed to delete temp index " + this.fileName); //$NON-NLS-1$
 		} else if (!newIndexFile.createNewFile()) {
 			if (DEBUG)
 				System.out
 						.println("initializeFrom - Failed to create temp index " + this.fileName); //$NON-NLS-1$
 			throw new IOException(
 					"Failed to create temp index " + this.fileName); //$NON-NLS-1$
 		}

 		int size = diskIndex.categoryOffsets == null ? 8
 				: diskIndex.categoryOffsets.elementSize;
 		this.categoryOffsets = new HashtableOfIntValues(size);
 		this.categoryTables = new HashtableOfObject(size);
 		this.separator = diskIndex.separator;
 	}

 	private void mergeCategories(DiskIndex onDisk, int[] positions,
 			DataOutputStream stream) throws IOException {
 		// at this point, this.categoryTables contains the names -> wordsToDocs
 		// added in copyQueryResults()
 		char[][] oldNames = onDisk.categoryOffsets.keyTable;
 		for (int i = 0, l = oldNames.length; i < l; i++) {
 			char[] oldName = oldNames[i];
 			if (oldName != null && !this.categoryTables.containsKey(oldName))
 				this.categoryTables.put(oldName, null);
 		}

 		char[][] categoryNames = this.categoryTables.keyTable;
 		for (int i = 0, l = categoryNames.length; i < l; i++)
 			if (categoryNames[i] != null)
 				mergeCategory(categoryNames[i], onDisk, positions, stream);
 		this.categoryTables = null;
 	}

 	private void mergeCategory(char[] categoryName, DiskIndex onDisk,
 			int[] positions, DataOutputStream stream) throws IOException {
 		HashtableOfObject wordsToDocs = (HashtableOfObject) this.categoryTables
 				.get(categoryName);
 		if (wordsToDocs == null)
 			wordsToDocs = new HashtableOfObject(3);

 		HashtableOfObject oldWordsToDocs = onDisk.readCategoryTable(
 				categoryName, true);
 		if (oldWordsToDocs != null) {
 			char[][] oldWords = oldWordsToDocs.keyTable;
 			Object[] oldArrayOffsets = oldWordsToDocs.valueTable;
 			nextWord: for (int i = 0, l = oldWords.length; i < l; i++) {
 				char[] oldWord = oldWords[i];
 				if (oldWord != null) {
 					int[] oldDocNumbers = (int[]) oldArrayOffsets[i];
 					int length = oldDocNumbers.length;
 					int[] mappedNumbers = new int[length];
 					int count = 0;
 					for (int j = 0; j < length; j++) {
 						int pos = positions[oldDocNumbers[j]];
 						if (pos > RE_INDEXED) // forget any reference to a
 							// document which was deleted or
 							// re_indexed
 							mappedNumbers[count++] = pos;
 					}
 					if (count < length) {
 						if (count == 0)
 							continue nextWord; // skip words which no longer
 						// have any references
 						System.arraycopy(mappedNumbers, 0,
 								mappedNumbers = new int[count], 0, count);
 					}

 					Object o = wordsToDocs.get(oldWord);
 					if (o == null) {
 						wordsToDocs.put(oldWord, mappedNumbers);
 					} else {
 						IntList list = null;
 						if (o instanceof IntList) {
 							list = (IntList) o;
 						} else {
 							list = new IntList((int[]) o);
 							wordsToDocs.put(oldWord, list);
 						}
 						for (int j = 0; j < count; j++)
 							list.add(mappedNumbers[j]);
 					}
 				}
 			}
 			onDisk.categoryTables.put(categoryName, null); // flush cached
 			// table
 		}
 		writeCategoryTable(categoryName, wordsToDocs, stream);
 	}

 	DiskIndex mergeWith(MemoryIndex memoryIndex) throws IOException {
 		// assume write lock is held
 		// compute & write out new docNames
 		String[] docNames = readAllDocumentNames();
 		int previousLength = docNames.length;
 		int[] positions = new int[previousLength]; // keeps track of the
 		// position of each document
 		// in the new sorted
 		// docNames
 		SimpleLookupTable indexedDocuments = new SimpleLookupTable(3); // for
 		// each
 		// new/changed
 		// document
 		// in
 		// the
 		// memoryIndex
 		docNames = computeDocumentNames(docNames, positions, indexedDocuments,
 				memoryIndex);
 		if (docNames.length == 0) {
 			if (previousLength == 0)
 				return this; // nothing to do... memory index contained
 			// deleted documents that had never been saved

 			// index is now empty since all the saved documents were removed
 			DiskIndex newDiskIndex = new DiskIndex(this.fileName);
 			newDiskIndex.initialize(false);
 			return newDiskIndex;
 		}

 		DiskIndex newDiskIndex = new DiskIndex(this.fileName + ".tmp"); //$NON-NLS-1$
 		File newIndexFile = newDiskIndex.getIndexFile();
 		try {
 			newDiskIndex.initializeFrom(this, newIndexFile);
 			DataOutputStream stream = new DataOutputStream(
 					new BufferedOutputStream(new FileOutputStream(newIndexFile,
 							false), 2048));
 			int offsetToHeader = -1;
 			try {
 				newDiskIndex.writeAllDocumentNames(docNames, stream);
 				docNames = null; // free up the space

 				// add each new/changed doc to empty category tables using its
 				// new position #
 				if (indexedDocuments.elementSize > 0) {
 					Object[] names = indexedDocuments.keyTable;
 					Object[] integerPositions = indexedDocuments.valueTable;
 					for (int i = 0, l = names.length; i < l; i++)
 						if (names[i] != null)
 							newDiskIndex
 									.copyQueryResults(
 											(HashtableOfObject) memoryIndex.docsToReferences
 													.get(names[i]),
 											((Integer) integerPositions[i])
 													.intValue());
 				}
 				indexedDocuments = null; // free up the space

 				// merge each category table with the new ones & write them out
 				if (previousLength == 0)
 					newDiskIndex.writeCategories(stream);
 				else
 					newDiskIndex.mergeCategories(this, positions, stream);
 				offsetToHeader = stream.size();
 				newDiskIndex.writeHeaderInfo(stream);
 				positions = null; // free up the space
 			} finally {
 				stream.close();
 			}
 			newDiskIndex.writeOffsetToHeader(offsetToHeader);

 			// rename file by deleting previous index file & renaming temp one
 			File old = getIndexFile();
 			if (old.exists() && !old.delete()) {
 				if (DEBUG)
 					System.out
 							.println("mergeWith - Failed to delete " + this.fileName); //$NON-NLS-1$
 				throw new IOException(
 						"Failed to delete index file " + this.fileName); //$NON-NLS-1$
 			}
 			if (!newIndexFile.renameTo(old)) {
 				if (DEBUG)
 					System.out
 							.println("mergeWith - Failed to rename " + this.fileName); //$NON-NLS-1$
 				throw new IOException(
 						"Failed to rename index file " + this.fileName); //$NON-NLS-1$
 			}
 		} catch (IOException e) {
 			if (newIndexFile.exists() && !newIndexFile.delete())
 				if (DEBUG)
 					System.out
 							.println("mergeWith - Failed to delete temp index " + newDiskIndex.fileName); //$NON-NLS-1$
 			throw e;
 		}

 		newDiskIndex.fileName = this.fileName;
 		return newDiskIndex;
 	}

 	private synchronized String[] readAllDocumentNames() throws IOException {
 		if (this.numberOfChunks <= 0)
 			return new String[0];

 		DataInputStream stream = new DataInputStream(new BufferedInputStream(
 				new FileInputStream(getIndexFile()),
 				this.numberOfChunks > 5 ? 4096 : 2048));
 		try {
 			stream.skip(this.chunkOffsets[0]);
 			int lastIndex = this.numberOfChunks - 1;
 			String[] docNames = new String[lastIndex * CHUNK_SIZE
 					+ sizeOfLastChunk];
 			for (int i = 0; i < this.numberOfChunks; i++)
 				readChunk(docNames, stream, i * CHUNK_SIZE,
 						i < lastIndex ? CHUNK_SIZE : sizeOfLastChunk);
 			return docNames;
 		} finally {
 			stream.close();
 		}
 	}

 	private synchronized HashtableOfObject readCategoryTable(
 			char[] categoryName, boolean readDocNumbers) throws IOException {
 		// result will be null if categoryName is unknown
 		int offset = this.categoryOffsets.get(categoryName);
 		if (offset == HashtableOfIntValues.NO_VALUE)
 			return null;

 		if (this.categoryTables == null) {
 			this.categoryTables = new HashtableOfObject(3);
 		} else {
 			HashtableOfObject cachedTable = (HashtableOfObject) this.categoryTables
 					.get(categoryName);
 			if (cachedTable != null) {
 				if (readDocNumbers) { // must cache remaining document number
 					// arrays
 					Object[] arrayOffsets = cachedTable.valueTable;
 					for (int i = 0, l = arrayOffsets.length; i < l; i++)
 						if (arrayOffsets[i] instanceof Integer)
 							arrayOffsets[i] = readDocumentNumbers(arrayOffsets[i]);
 				}
 				return cachedTable;
 			}
 		}

 		DataInputStream stream = new DataInputStream(new BufferedInputStream(
 				new FileInputStream(getIndexFile()), 2048));
 		HashtableOfObject categoryTable = null;
 		char[][] matchingWords = null;
 		int count = 0;
 		int firstOffset = -1;
 		try {
 			stream.skip(offset);
 			int size = stream.readInt();
 			try {
 				if (size < 0) { // DEBUG
 					System.err
 							.println("-------------------- DEBUG --------------------"); //$NON-NLS-1$
 					System.err.println("file = " + getIndexFile()); //$NON-NLS-1$
 					System.err.println("offset = " + offset); //$NON-NLS-1$
 					System.err.println("size = " + size); //$NON-NLS-1$
 					System.err
 							.println("--------------------   END   --------------------"); //$NON-NLS-1$
 				}
 				categoryTable = new HashtableOfObject(size);
 			} catch (OutOfMemoryError oom) {
 				// DEBUG
 				oom.printStackTrace();
 				System.err
 						.println("-------------------- DEBUG --------------------"); //$NON-NLS-1$
 				System.err.println("file = " + getIndexFile()); //$NON-NLS-1$
 				System.err.println("offset = " + offset); //$NON-NLS-1$
 				System.err.println("size = " + size); //$NON-NLS-1$
 				System.err
 						.println("--------------------   END   --------------------"); //$NON-NLS-1$
 				throw oom;
 			}
 			int largeArraySize = 256;
 			for (int i = 0; i < size; i++) {
 				char[] word = Util.readUTF(stream);
 				int arrayOffset = stream.readInt();
 				// if arrayOffset is:
 				// <= 0 then the array size == 1 with the value -> -arrayOffset
 				// > 1 & < 256 then the size of the array is > 1 & < 256, the
 				// document array follows immediately
 				// 256 if the array size >= 256 followed by another int which is
 				// the offset to the array (written prior to the table)
 				if (arrayOffset <= 0) {
 					categoryTable.put(word, new int[] { -arrayOffset }); // store
 					// 1
 					// element
 					// array
 					// by
 					// negating
 					// documentNumber
 				} else if (arrayOffset < largeArraySize) {
 					categoryTable.put(word,
 							readDocumentArray(stream, arrayOffset));
 					// read in-lined array providing size
 				} else {
 					arrayOffset = stream.readInt(); // read actual offset
 					if (readDocNumbers) {
 						if (matchingWords == null)
 							matchingWords = new char[size][];
 						if (count == 0)
 							firstOffset = arrayOffset;
 						matchingWords[count++] = word;
 					}
 					categoryTable.put(word, arrayOffset); // offset
 					// to
 					// array
 					// in
 					// the
 					// file
 				}
 			}
 			this.categoryTables.put(categoryName, categoryTable);
 			// cache the table as long as its not too big
 			// in practise, some tables can be greater than 500K when the
 			// contain more than 10K elements
 			this.cachedCategoryName = categoryTable.elementSize < 10000 ? categoryName
 					: null;
 		} finally {
 			stream.close();
 		}

 		if (matchingWords != null && count > 0) {
 			stream = new DataInputStream(new BufferedInputStream(
 					new FileInputStream(getIndexFile()), 2048));
 			try {
 				stream.skip(firstOffset);
 				for (int i = 0; i < count; i++)
 					// each array follows the previous one
 					categoryTable.put(matchingWords[i],
 							readDocumentArray(stream, stream.readInt()));
 			} finally {
 				stream.close();
 			}
 		}
 		return categoryTable;
 	}

 	private void readChunk(String[] docNames, DataInputStream stream,
 			int index, int size) throws IOException {
 		String current = stream.readUTF();
 		docNames[index++] = current;
 		for (int i = 1; i < size; i++) {
 			int start = stream.readUnsignedByte(); // number of identical
 			// characters at the
 			// beginning
 			int end = stream.readUnsignedByte(); // number of identical
 			// characters at the end
 			String next = stream.readUTF();
 			if (start > 0) {
 				if (end > 0) {
 					int length = current.length();
 					next = current.substring(0, start) + next
 							+ current.substring(length - end, length);
 				} else {
 					next = current.substring(0, start) + next;
 				}
 			} else if (end > 0) {
 				int length = current.length();
 				next = next + current.substring(length - end, length);
 			}
 			docNames[index++] = next;
 			current = next;
 		}
 	}

 	private int[] readDocumentArray(DataInputStream stream, int arraySize)
 			throws IOException {
 		int[] result = new int[arraySize];
 		switch (this.documentReferenceSize) {
 		case 1:
 			for (int i = 0; i < arraySize; i++)
 				result[i] = stream.readUnsignedByte();
 			break;
 		case 2:
 			for (int i = 0; i < arraySize; i++)
 				result[i] = stream.readUnsignedShort();
 			break;
 		default:
 			for (int i = 0; i < arraySize; i++)
 				result[i] = stream.readInt();
 			break;
 		}
 		return result;
 	}

 	synchronized String readDocumentName(int docNumber) throws IOException {
 		if (this.cachedChunks == null)
 			this.cachedChunks = new String[this.numberOfChunks][];

 		int chunkNumber = docNumber / CHUNK_SIZE;
 		String[] chunk = this.cachedChunks[chunkNumber];
 		if (chunk == null) {
 			boolean isLastChunk = chunkNumber == this.numberOfChunks - 1;
 			int start = this.chunkOffsets[chunkNumber];
 			int numberOfBytes = (isLastChunk ? this.startOfCategoryTables
 					: this.chunkOffsets[chunkNumber + 1]) - start;
 			if (numberOfBytes < 0)
 				throw new IllegalArgumentException();
 			byte[] bytes = new byte[numberOfBytes];
 			FileInputStream file = new FileInputStream(getIndexFile());
 			try {
 				file.skip(start);
 				if (file.read(bytes, 0, numberOfBytes) != numberOfBytes)
 					throw new IOException();
 			} finally {
 				file.close();
 			}
 			DataInputStream stream = new DataInputStream(
 					new ByteArrayInputStream(bytes));
 			int numberOfNames = isLastChunk ? this.sizeOfLastChunk : CHUNK_SIZE;
 			chunk = this.cachedChunks[chunkNumber] = new String[numberOfNames];
 			readChunk(chunk, stream, 0, numberOfNames);
 		}
 		return chunk[docNumber - (chunkNumber * CHUNK_SIZE)];
 	}

 	synchronized int[] readDocumentNumbers(Object arrayOffset)
 			throws IOException {
 		// arrayOffset is either a cached array of docNumbers or an Integer
 		// offset in the file
 		if (arrayOffset instanceof int[])
 			return (int[]) arrayOffset;

 		DataInputStream stream = new DataInputStream(new BufferedInputStream(
 				new FileInputStream(getIndexFile()), 2048));
 		try {
 			stream.skip(((Integer) arrayOffset).intValue());
 			return readDocumentArray(stream, stream.readInt());
 		} finally {
 			stream.close();
 		}
 	}

 	private void readHeaderInfo(RandomAccessFile file) throws IOException {
 		file.seek(this.headerInfoOffset);

 		// must be same order as writeHeaderInfo()
 		this.numberOfChunks = file.readInt();
 		this.sizeOfLastChunk = file.readUnsignedByte();
 		this.documentReferenceSize = file.readUnsignedByte();
 		this.separator = (char) file.readUnsignedByte();

 		this.chunkOffsets = new int[this.numberOfChunks];
 		for (int i = 0; i < this.numberOfChunks; i++)
 			this.chunkOffsets[i] = file.readInt();

 		this.startOfCategoryTables = file.readInt();

 		int size = file.readInt();
 		this.categoryOffsets = new HashtableOfIntValues(size);
 		for (int i = 0; i < size; i++)
 			this.categoryOffsets.put(Util.readUTF(file), file.readInt()); // cache
 		// offset
 		// to
 		// category
 		// table
 		this.categoryTables = new HashtableOfObject(3);
 	}

 	synchronized void startQuery() {
 		this.cacheUserCount++;
 	}

 	synchronized void stopQuery() {
 		if (--this.cacheUserCount < 0) {
 			// clear cached items
 			this.cacheUserCount = -1;
 			this.cachedChunks = null;
 			if (this.categoryTables != null) {
 				if (this.cachedCategoryName == null) {
 					this.categoryTables = null;
 				} else if (this.categoryTables.elementSize > 1) {
 					HashtableOfObject newTables = new HashtableOfObject(3);
 					newTables.put(this.cachedCategoryName,
 							this.categoryTables.get(this.cachedCategoryName));
 					this.categoryTables = newTables;
 				}
 			}
 		}
 	}

 	private void writeAllDocumentNames(String[] sortedDocNames,
 			DataOutputStream stream) throws IOException {
 		if (sortedDocNames.length == 0)
 			throw new IllegalArgumentException();

 		// assume the file was just created by initializeFrom()
 		// in order, write: SIGNATURE & headerInfoOffset place holder, then each
 		// compressed chunk of document names
 		stream.writeUTF(SIGNATURE);
 		this.headerInfoOffset = stream.size();
 		stream.writeInt(-1); // will overwrite with correct value later

 		int size = sortedDocNames.length;
 		this.numberOfChunks = (size / CHUNK_SIZE) + 1;
 		this.sizeOfLastChunk = size % CHUNK_SIZE;
 		if (this.sizeOfLastChunk == 0) {
 			this.numberOfChunks--;
 			this.sizeOfLastChunk = CHUNK_SIZE;
 		}
 		this.documentReferenceSize = size <= 0x7F ? 1
 				: (size <= 0x7FFF ? 2 : 4); // number of bytes used to encode a
 		// reference

 		this.chunkOffsets = new int[this.numberOfChunks];
 		int lastIndex = this.numberOfChunks - 1;
 		for (int i = 0; i < this.numberOfChunks; i++) {
 			this.chunkOffsets[i] = stream.size();

 			int chunkSize = i == lastIndex ? this.sizeOfLastChunk : CHUNK_SIZE;
 			int chunkIndex = i * CHUNK_SIZE;
 			String current = sortedDocNames[chunkIndex];
 			stream.writeUTF(current);
 			for (int j = 1; j < chunkSize; j++) {
 				String next = sortedDocNames[chunkIndex + j];
 				int len1 = current.length();
 				int len2 = next.length();
 				int max = len1 < len2 ? len1 : len2;
 				int start = 0; // number of identical characters at the
 				// beginning (also the index of first character
 				// that is different)
 				while (current.charAt(start) == next.charAt(start)) {
 					start++;
 					if (max == start)
 						break; // current is 'abba', next is 'abbab'
 				}
 				if (start > 255)
 					start = 255;

 				int end = 0; // number of identical characters at the end
 				while (current.charAt(--len1) == next.charAt(--len2)) {
 					end++;
 					if (len2 == start)
 						break; // current is 'abbba', next is 'abba'
 					if (len1 == 0)
 						break; // current is 'xabc', next is 'xyabc'
 				}
 				if (end > 255)
 					end = 255;
 				stream.writeByte(start);
 				stream.writeByte(end);

 				int last = next.length() - end;
 				stream.writeUTF(start < last ? next.substring(start, last) : ""); //$NON-NLS-1$
 				current = next;
 			}
 		}
 		this.startOfCategoryTables = stream.size() + 1;
 	}

 	private void writeCategories(DataOutputStream stream) throws IOException {
 		char[][] categoryNames = this.categoryTables.keyTable;
 		Object[] tables = this.categoryTables.valueTable;
 		for (int i = 0, l = categoryNames.length; i < l; i++)
 			if (categoryNames[i] != null)
 				writeCategoryTable(categoryNames[i],
 						(HashtableOfObject) tables[i], stream);
 		this.categoryTables = null;
 	}

 	private void writeCategoryTable(char[] categoryName,
 			HashtableOfObject wordsToDocs, DataOutputStream stream)
 			throws IOException {
 		// the format of a category table is as follows:
 		// any document number arrays with >= 256 elements are written before
 		// the table (the offset to each array is remembered)
 		// then the number of word->int[] pairs in the table is written
 		// for each word -> int[] pair, the word is written followed by:
 		// an int <= 0 if the array size == 1
 		// an int > 1 & < 256 for the size of the array if its > 1 & < 256, the
 		// document array follows immediately
 		// 256 if the array size >= 256 followed by another int which is the
 		// offset to the array (written prior to the table)

 		int largeArraySize = 256;
 		Object[] values = wordsToDocs.valueTable;
 		for (int i = 0, l = values.length; i < l; i++) {
 			Object o = values[i];
 			if (o != null) {
 				if (o instanceof IntList)
 					o = values[i] = ((IntList) values[i]).asArray();
 				int[] documentNumbers = (int[]) o;
 				if (documentNumbers.length >= largeArraySize) {
 					values[i] = stream.size();
 					writeDocumentNumbers(documentNumbers, stream);
 				}
 			}
 		}

 		this.categoryOffsets.put(categoryName, stream.size()); // remember the
 		// offset to the
 		// start of the
 		// table
 		this.categoryTables.put(categoryName, null); // flush cached table
 		stream.writeInt(wordsToDocs.elementSize);
 		char[][] words = wordsToDocs.keyTable;
 		for (int i = 0, l = words.length; i < l; i++) {
 			Object o = values[i];
 			if (o != null) {
 				Util.writeUTF(stream, words[i]);
 				if (o instanceof int[]) {
 					int[] documentNumbers = (int[]) o;
 					if (documentNumbers.length == 1)
 						stream.writeInt(-documentNumbers[0]); // store an
 					// array of 1
 					// element by
 					// negating the
 					// documentNumber
 					// (can be zero)
 					else
 						writeDocumentNumbers(documentNumbers, stream);
 				} else {
 					stream.writeInt(largeArraySize); // mark to identify that
 					// an offset follows
 					stream.writeInt(((Integer) o).intValue()); // offset in the
 					// file of the
 					// array of
 					// document
 					// numbers
 				}
 			}
 		}
 	}

 	private void writeDocumentNumbers(int[] documentNumbers,
 			DataOutputStream stream) throws IOException {
 		// must store length as a positive int to detect in-lined array of 1
 		// element
 		int length = documentNumbers.length;
 		stream.writeInt(length);
 		Util.sort(documentNumbers);
 		switch (this.documentReferenceSize) {
 		case 1:
 			for (int i = 0; i < length; i++)
 				stream.writeByte(documentNumbers[i]);
 			break;
 		case 2:
 			for (int i = 0; i < length; i++)
 				stream.writeShort(documentNumbers[i]);
 			break;
 		default:
 			for (int i = 0; i < length; i++)
 				stream.writeInt(documentNumbers[i]);
 			break;
 		}
 	}

 	private void writeHeaderInfo(DataOutputStream stream) throws IOException {
 		stream.writeInt(this.numberOfChunks);
 		stream.writeByte(this.sizeOfLastChunk);
 		stream.writeByte(this.documentReferenceSize);
 		stream.writeByte(this.separator);

 		// apend the file with chunk offsets
 		for (int i = 0; i < this.numberOfChunks; i++)
 			stream.writeInt(this.chunkOffsets[i]);

 		stream.writeInt(this.startOfCategoryTables);

 		// append the file with the category offsets... # of name -> offset
 		// pairs, followed by each name & an offset to its word->doc# table
 		stream.writeInt(this.categoryOffsets.elementSize);
 		char[][] categoryNames = this.categoryOffsets.keyTable;
 		int[] offsets = this.categoryOffsets.valueTable;
 		for (int i = 0, l = categoryNames.length; i < l; i++) {
 			if (categoryNames[i] != null) {
 				Util.writeUTF(stream, categoryNames[i]);
 				stream.writeInt(offsets[i]);
 			}
 		}
 	}

 	private void writeOffsetToHeader(int offsetToHeader) throws IOException {
 		if (offsetToHeader > 0) {
 			RandomAccessFile file = new RandomAccessFile(this.fileName, "rw"); //$NON-NLS-1$
 			try {
 				file.seek(this.headerInfoOffset); // offset to position in
 				// header
 				file.writeInt(offsetToHeader);
 				this.headerInfoOffset = offsetToHeader; // update to reflect the
 				// correct offset
 			} finally {
 				file.close();
 			}
 		}
 	}
 }