blob: a75b9a191d82d6112e14d96b38ca7453a6c71339 [file] [log] [blame]
package org.apache.lucene.codecs.lucene3x;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.text.ParseException;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.SegmentInfoReader;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexFormatTooNewException;
import org.apache.lucene.index.IndexFormatTooOldException;
import org.apache.lucene.index.SegmentCommitInfo;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.store.CompoundFileDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;
/**
* Lucene 3x implementation of {@link SegmentInfoReader}.
* @lucene.experimental
* @deprecated Only for reading existing 3.x indexes
*/
@Deprecated
public class Lucene3xSegmentInfoReader extends SegmentInfoReader {
public static void readLegacyInfos(SegmentInfos infos, Directory directory, IndexInput input, int format) throws IOException {
infos.version = input.readLong(); // read version
infos.counter = input.readInt(); // read counter
Lucene3xSegmentInfoReader reader = new Lucene3xSegmentInfoReader();
for (int i = input.readInt(); i > 0; i--) { // read segmentInfos
SegmentCommitInfo siPerCommit = reader.readLegacySegmentInfo(directory, format, input);
SegmentInfo si = siPerCommit.info;
if (si.getVersion() == null) {
// Could be a 3.0 - try to open the doc stores - if it fails, it's a
// 2.x segment, and an IndexFormatTooOldException will be thrown,
// which is what we want.
Directory dir = directory;
if (Lucene3xSegmentInfoFormat.getDocStoreOffset(si) != -1) {
if (Lucene3xSegmentInfoFormat.getDocStoreIsCompoundFile(si)) {
dir = new CompoundFileDirectory(dir, IndexFileNames.segmentFileName(
Lucene3xSegmentInfoFormat.getDocStoreSegment(si), "",
Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), IOContext.READONCE, false);
}
} else if (si.getUseCompoundFile()) {
dir = new CompoundFileDirectory(dir, IndexFileNames.segmentFileName(
si.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), IOContext.READONCE, false);
}
try {
Lucene3xStoredFieldsReader.checkCodeVersion(dir, Lucene3xSegmentInfoFormat.getDocStoreSegment(si));
} finally {
// If we opened the directory, close it
if (dir != directory) dir.close();
}
// Above call succeeded, so it's a 3.0 segment. Upgrade it so the next
// time the segment is read, its version won't be null and we won't
// need to open FieldsReader every time for each such segment.
si.setVersion(Version.LUCENE_3_0_0);
} else if (si.getVersion().equals("2.x")) {
// If it's a 3x index touched by 3.1+ code, then segments record their
// version, whether they are 2.x ones or not. We detect that and throw
// appropriate exception.
throw new IndexFormatTooOldException("segment " + si.name + " in resource " + input, si.getVersion().toString());
}
infos.add(siPerCommit);
}
infos.userData = input.readStringStringMap();
}
@Override
public SegmentInfo read(Directory directory, String segmentName, IOContext context) throws IOException {
// NOTE: this is NOT how 3.x is really written...
String fileName = IndexFileNames.segmentFileName(segmentName, "", Lucene3xSegmentInfoFormat.UPGRADED_SI_EXTENSION);
boolean success = false;
IndexInput input = directory.openInput(fileName, context);
try {
SegmentInfo si = readUpgradedSegmentInfo(segmentName, directory, input);
success = true;
return si;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(input);
} else {
input.close();
}
}
}
private static void addIfExists(Directory dir, Set<String> files, String fileName) throws IOException {
if (dir.fileExists(fileName)) {
files.add(fileName);
}
}
/** reads from legacy 3.x segments_N */
private SegmentCommitInfo readLegacySegmentInfo(Directory dir, int format, IndexInput input) throws IOException {
// check that it is a format we can understand
if (format > Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS) {
throw new IndexFormatTooOldException(input, format,
Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_3_1);
}
if (format < Lucene3xSegmentInfoFormat.FORMAT_3_1) {
throw new IndexFormatTooNewException(input, format,
Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_3_1);
}
final Version version;
if (format <= Lucene3xSegmentInfoFormat.FORMAT_3_1) {
try {
version = Version.parse(input.readString());
} catch (ParseException pe) {
throw new CorruptIndexException("unable to parse version string (input: " + input + "): " + pe.getMessage(), pe);
}
} else {
version = null;
}
final String name = input.readString();
final int docCount = input.readInt();
final long delGen = input.readLong();
final int docStoreOffset = input.readInt();
final Map<String,String> attributes = new HashMap<>();
// parse the docstore stuff and shove it into attributes
final String docStoreSegment;
final boolean docStoreIsCompoundFile;
if (docStoreOffset != -1) {
docStoreSegment = input.readString();
docStoreIsCompoundFile = input.readByte() == SegmentInfo.YES;
attributes.put(Lucene3xSegmentInfoFormat.DS_OFFSET_KEY, Integer.toString(docStoreOffset));
attributes.put(Lucene3xSegmentInfoFormat.DS_NAME_KEY, docStoreSegment);
attributes.put(Lucene3xSegmentInfoFormat.DS_COMPOUND_KEY, Boolean.toString(docStoreIsCompoundFile));
} else {
docStoreSegment = name;
docStoreIsCompoundFile = false;
}
// pre-4.0 indexes write a byte if there is a single norms file
byte b = input.readByte();
//System.out.println("version=" + version + " name=" + name + " docCount=" + docCount + " delGen=" + delGen + " dso=" + docStoreOffset + " dss=" + docStoreSegment + " dssCFs=" + docStoreIsCompoundFile + " b=" + b + " format=" + format);
assert 1 == b : "expected 1 but was: "+ b + " format: " + format;
final int numNormGen = input.readInt();
final Map<Integer,Long> normGen;
if (numNormGen == SegmentInfo.NO) {
normGen = null;
} else {
normGen = new HashMap<>();
for(int j=0;j<numNormGen;j++) {
normGen.put(j, input.readLong());
}
}
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
final int delCount = input.readInt();
assert delCount <= docCount;
final boolean hasProx = input.readByte() == 1;
final Map<String,String> diagnostics = input.readStringStringMap();
if (format <= Lucene3xSegmentInfoFormat.FORMAT_HAS_VECTORS) {
// NOTE: unused
final int hasVectors = input.readByte();
}
// Replicate logic from 3.x's SegmentInfo.files():
final Set<String> files = new HashSet<>();
if (isCompoundFile) {
files.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.COMPOUND_FILE_EXTENSION));
} else {
addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xFieldInfosReader.FIELD_INFOS_EXTENSION));
addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xPostingsFormat.FREQ_EXTENSION));
addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xPostingsFormat.PROX_EXTENSION));
addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xPostingsFormat.TERMS_EXTENSION));
addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION));
addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xNormsProducer.NORMS_EXTENSION));
}
if (docStoreOffset != -1) {
if (docStoreIsCompoundFile) {
files.add(IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION));
} else {
files.add(IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION));
files.add(IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION));
addIfExists(dir, files, IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION));
addIfExists(dir, files, IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION));
addIfExists(dir, files, IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION));
}
} else if (!isCompoundFile) {
files.add(IndexFileNames.segmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION));
files.add(IndexFileNames.segmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION));
addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION));
addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION));
addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION));
}
// parse the normgen stuff and shove it into attributes
if (normGen != null) {
attributes.put(Lucene3xSegmentInfoFormat.NORMGEN_KEY, Integer.toString(numNormGen));
for(Map.Entry<Integer,Long> ent : normGen.entrySet()) {
long gen = ent.getValue();
if (gen >= SegmentInfo.YES) {
// Definitely a separate norm file, with generation:
files.add(IndexFileNames.fileNameFromGeneration(name, "s" + ent.getKey(), gen));
attributes.put(Lucene3xSegmentInfoFormat.NORMGEN_PREFIX + ent.getKey(), Long.toString(gen));
} else if (gen == SegmentInfo.NO) {
// No separate norm
} else {
// We should have already hit indexformat too old exception
assert false;
}
}
}
SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile,
null, diagnostics, Collections.unmodifiableMap(attributes));
info.setFiles(files);
SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, delCount, delGen, -1, -1);
return infoPerCommit;
}
private SegmentInfo readUpgradedSegmentInfo(String name, Directory dir, IndexInput input) throws IOException {
CodecUtil.checkHeader(input, Lucene3xSegmentInfoFormat.UPGRADED_SI_CODEC_NAME,
Lucene3xSegmentInfoFormat.UPGRADED_SI_VERSION_START,
Lucene3xSegmentInfoFormat.UPGRADED_SI_VERSION_CURRENT);
final Version version;
try {
version = Version.parse(input.readString());
} catch (ParseException pe) {
throw new CorruptIndexException("unable to parse version string (input: " + input + "): " + pe.getMessage(), pe);
}
final int docCount = input.readInt();
final Map<String,String> attributes = input.readStringStringMap();
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
final Map<String,String> diagnostics = input.readStringStringMap();
final Set<String> files = input.readStringSet();
SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile,
null, diagnostics, Collections.unmodifiableMap(attributes));
info.setFiles(files);
return info;
}
}