| package org.apache.lucene.codecs.lucene3x; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import java.io.IOException; |
| import java.text.ParseException; |
| import java.util.Collections; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.Map; |
| import java.util.Set; |
| |
| import org.apache.lucene.codecs.CodecUtil; |
| import org.apache.lucene.codecs.SegmentInfoReader; |
| import org.apache.lucene.index.CorruptIndexException; |
| import org.apache.lucene.index.IndexFileNames; |
| import org.apache.lucene.index.IndexFormatTooNewException; |
| import org.apache.lucene.index.IndexFormatTooOldException; |
| import org.apache.lucene.index.SegmentCommitInfo; |
| import org.apache.lucene.index.SegmentInfo; |
| import org.apache.lucene.index.SegmentInfos; |
| import org.apache.lucene.store.CompoundFileDirectory; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.IOContext; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.util.IOUtils; |
| import org.apache.lucene.util.Version; |
| |
| /** |
| * Lucene 3x implementation of {@link SegmentInfoReader}. |
| * @lucene.experimental |
| * @deprecated Only for reading existing 3.x indexes |
| */ |
| @Deprecated |
| public class Lucene3xSegmentInfoReader extends SegmentInfoReader { |
| |
| public static void readLegacyInfos(SegmentInfos infos, Directory directory, IndexInput input, int format) throws IOException { |
| infos.version = input.readLong(); // read version |
| infos.counter = input.readInt(); // read counter |
| Lucene3xSegmentInfoReader reader = new Lucene3xSegmentInfoReader(); |
| for (int i = input.readInt(); i > 0; i--) { // read segmentInfos |
| SegmentCommitInfo siPerCommit = reader.readLegacySegmentInfo(directory, format, input); |
| SegmentInfo si = siPerCommit.info; |
| |
| if (si.getVersion() == null) { |
| // Could be a 3.0 - try to open the doc stores - if it fails, it's a |
| // 2.x segment, and an IndexFormatTooOldException will be thrown, |
| // which is what we want. |
| Directory dir = directory; |
| if (Lucene3xSegmentInfoFormat.getDocStoreOffset(si) != -1) { |
| if (Lucene3xSegmentInfoFormat.getDocStoreIsCompoundFile(si)) { |
| dir = new CompoundFileDirectory(dir, IndexFileNames.segmentFileName( |
| Lucene3xSegmentInfoFormat.getDocStoreSegment(si), "", |
| Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), IOContext.READONCE, false); |
| } |
| } else if (si.getUseCompoundFile()) { |
| dir = new CompoundFileDirectory(dir, IndexFileNames.segmentFileName( |
| si.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), IOContext.READONCE, false); |
| } |
| |
| try { |
| Lucene3xStoredFieldsReader.checkCodeVersion(dir, Lucene3xSegmentInfoFormat.getDocStoreSegment(si)); |
| } finally { |
| // If we opened the directory, close it |
| if (dir != directory) dir.close(); |
| } |
| |
| // Above call succeeded, so it's a 3.0 segment. Upgrade it so the next |
| // time the segment is read, its version won't be null and we won't |
| // need to open FieldsReader every time for each such segment. |
| si.setVersion(Version.LUCENE_3_0_0); |
| } else if (si.getVersion().equals("2.x")) { |
| // If it's a 3x index touched by 3.1+ code, then segments record their |
| // version, whether they are 2.x ones or not. We detect that and throw |
| // appropriate exception. |
| throw new IndexFormatTooOldException("segment " + si.name + " in resource " + input, si.getVersion().toString()); |
| } |
| infos.add(siPerCommit); |
| } |
| |
| infos.userData = input.readStringStringMap(); |
| } |
| |
| @Override |
| public SegmentInfo read(Directory directory, String segmentName, IOContext context) throws IOException { |
| // NOTE: this is NOT how 3.x is really written... |
| String fileName = IndexFileNames.segmentFileName(segmentName, "", Lucene3xSegmentInfoFormat.UPGRADED_SI_EXTENSION); |
| |
| boolean success = false; |
| |
| IndexInput input = directory.openInput(fileName, context); |
| |
| try { |
| SegmentInfo si = readUpgradedSegmentInfo(segmentName, directory, input); |
| success = true; |
| return si; |
| } finally { |
| if (!success) { |
| IOUtils.closeWhileHandlingException(input); |
| } else { |
| input.close(); |
| } |
| } |
| } |
| |
| private static void addIfExists(Directory dir, Set<String> files, String fileName) throws IOException { |
| if (dir.fileExists(fileName)) { |
| files.add(fileName); |
| } |
| } |
| |
| /** reads from legacy 3.x segments_N */ |
| private SegmentCommitInfo readLegacySegmentInfo(Directory dir, int format, IndexInput input) throws IOException { |
| // check that it is a format we can understand |
| if (format > Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS) { |
| throw new IndexFormatTooOldException(input, format, |
| Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_3_1); |
| } |
| if (format < Lucene3xSegmentInfoFormat.FORMAT_3_1) { |
| throw new IndexFormatTooNewException(input, format, |
| Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_3_1); |
| } |
| final Version version; |
| if (format <= Lucene3xSegmentInfoFormat.FORMAT_3_1) { |
| try { |
| version = Version.parse(input.readString()); |
| } catch (ParseException pe) { |
| throw new CorruptIndexException("unable to parse version string (input: " + input + "): " + pe.getMessage(), pe); |
| } |
| } else { |
| version = null; |
| } |
| |
| final String name = input.readString(); |
| |
| final int docCount = input.readInt(); |
| final long delGen = input.readLong(); |
| |
| final int docStoreOffset = input.readInt(); |
| final Map<String,String> attributes = new HashMap<>(); |
| |
| // parse the docstore stuff and shove it into attributes |
| final String docStoreSegment; |
| final boolean docStoreIsCompoundFile; |
| if (docStoreOffset != -1) { |
| docStoreSegment = input.readString(); |
| docStoreIsCompoundFile = input.readByte() == SegmentInfo.YES; |
| attributes.put(Lucene3xSegmentInfoFormat.DS_OFFSET_KEY, Integer.toString(docStoreOffset)); |
| attributes.put(Lucene3xSegmentInfoFormat.DS_NAME_KEY, docStoreSegment); |
| attributes.put(Lucene3xSegmentInfoFormat.DS_COMPOUND_KEY, Boolean.toString(docStoreIsCompoundFile)); |
| } else { |
| docStoreSegment = name; |
| docStoreIsCompoundFile = false; |
| } |
| |
| // pre-4.0 indexes write a byte if there is a single norms file |
| byte b = input.readByte(); |
| |
| //System.out.println("version=" + version + " name=" + name + " docCount=" + docCount + " delGen=" + delGen + " dso=" + docStoreOffset + " dss=" + docStoreSegment + " dssCFs=" + docStoreIsCompoundFile + " b=" + b + " format=" + format); |
| |
| assert 1 == b : "expected 1 but was: "+ b + " format: " + format; |
| final int numNormGen = input.readInt(); |
| final Map<Integer,Long> normGen; |
| if (numNormGen == SegmentInfo.NO) { |
| normGen = null; |
| } else { |
| normGen = new HashMap<>(); |
| for(int j=0;j<numNormGen;j++) { |
| normGen.put(j, input.readLong()); |
| } |
| } |
| final boolean isCompoundFile = input.readByte() == SegmentInfo.YES; |
| |
| final int delCount = input.readInt(); |
| assert delCount <= docCount; |
| |
| final boolean hasProx = input.readByte() == 1; |
| |
| final Map<String,String> diagnostics = input.readStringStringMap(); |
| |
| if (format <= Lucene3xSegmentInfoFormat.FORMAT_HAS_VECTORS) { |
| // NOTE: unused |
| final int hasVectors = input.readByte(); |
| } |
| |
| // Replicate logic from 3.x's SegmentInfo.files(): |
| final Set<String> files = new HashSet<>(); |
| if (isCompoundFile) { |
| files.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.COMPOUND_FILE_EXTENSION)); |
| } else { |
| addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xFieldInfosReader.FIELD_INFOS_EXTENSION)); |
| addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xPostingsFormat.FREQ_EXTENSION)); |
| addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xPostingsFormat.PROX_EXTENSION)); |
| addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xPostingsFormat.TERMS_EXTENSION)); |
| addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION)); |
| addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xNormsProducer.NORMS_EXTENSION)); |
| } |
| |
| if (docStoreOffset != -1) { |
| if (docStoreIsCompoundFile) { |
| files.add(IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION)); |
| } else { |
| files.add(IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION)); |
| files.add(IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION)); |
| addIfExists(dir, files, IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION)); |
| addIfExists(dir, files, IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION)); |
| addIfExists(dir, files, IndexFileNames.segmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION)); |
| } |
| } else if (!isCompoundFile) { |
| files.add(IndexFileNames.segmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION)); |
| files.add(IndexFileNames.segmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION)); |
| addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION)); |
| addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION)); |
| addIfExists(dir, files, IndexFileNames.segmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION)); |
| } |
| |
| // parse the normgen stuff and shove it into attributes |
| if (normGen != null) { |
| attributes.put(Lucene3xSegmentInfoFormat.NORMGEN_KEY, Integer.toString(numNormGen)); |
| for(Map.Entry<Integer,Long> ent : normGen.entrySet()) { |
| long gen = ent.getValue(); |
| if (gen >= SegmentInfo.YES) { |
| // Definitely a separate norm file, with generation: |
| files.add(IndexFileNames.fileNameFromGeneration(name, "s" + ent.getKey(), gen)); |
| attributes.put(Lucene3xSegmentInfoFormat.NORMGEN_PREFIX + ent.getKey(), Long.toString(gen)); |
| } else if (gen == SegmentInfo.NO) { |
| // No separate norm |
| } else { |
| // We should have already hit indexformat too old exception |
| assert false; |
| } |
| } |
| } |
| |
| SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile, |
| null, diagnostics, Collections.unmodifiableMap(attributes)); |
| info.setFiles(files); |
| |
| SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, delCount, delGen, -1, -1); |
| return infoPerCommit; |
| } |
| |
| private SegmentInfo readUpgradedSegmentInfo(String name, Directory dir, IndexInput input) throws IOException { |
| CodecUtil.checkHeader(input, Lucene3xSegmentInfoFormat.UPGRADED_SI_CODEC_NAME, |
| Lucene3xSegmentInfoFormat.UPGRADED_SI_VERSION_START, |
| Lucene3xSegmentInfoFormat.UPGRADED_SI_VERSION_CURRENT); |
| final Version version; |
| try { |
| version = Version.parse(input.readString()); |
| } catch (ParseException pe) { |
| throw new CorruptIndexException("unable to parse version string (input: " + input + "): " + pe.getMessage(), pe); |
| } |
| |
| final int docCount = input.readInt(); |
| |
| final Map<String,String> attributes = input.readStringStringMap(); |
| |
| final boolean isCompoundFile = input.readByte() == SegmentInfo.YES; |
| |
| final Map<String,String> diagnostics = input.readStringStringMap(); |
| |
| final Set<String> files = input.readStringSet(); |
| |
| SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile, |
| null, diagnostics, Collections.unmodifiableMap(attributes)); |
| info.setFiles(files); |
| return info; |
| } |
| } |