| package org.apache.lucene.index; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import java.util.Collection; |
| import java.util.Collections; |
| import java.util.HashMap; |
| import java.util.Iterator; |
| import java.util.Map; |
| import java.util.SortedMap; |
| import java.util.TreeMap; |
| |
| import org.apache.lucene.index.FieldInfo.DocValuesType; |
| import org.apache.lucene.index.FieldInfo.IndexOptions; |
| |
| /** |
| * Collection of {@link FieldInfo}s (accessible by number or by name). |
| * @lucene.experimental |
| */ |
| public class FieldInfos implements Iterable<FieldInfo> { |
| private final boolean hasFreq; |
| private final boolean hasProx; |
| private final boolean hasPayloads; |
| private final boolean hasOffsets; |
| private final boolean hasVectors; |
| private final boolean hasNorms; |
| private final boolean hasDocValues; |
| |
| private final SortedMap<Integer,FieldInfo> byNumber = new TreeMap<>(); |
| private final HashMap<String,FieldInfo> byName = new HashMap<>(); |
| private final Collection<FieldInfo> values; // for an unmodifiable iterator |
| |
| /** |
| * Constructs a new FieldInfos from an array of FieldInfo objects |
| */ |
| public FieldInfos(FieldInfo[] infos) { |
| boolean hasVectors = false; |
| boolean hasProx = false; |
| boolean hasPayloads = false; |
| boolean hasOffsets = false; |
| boolean hasFreq = false; |
| boolean hasNorms = false; |
| boolean hasDocValues = false; |
| |
| for (FieldInfo info : infos) { |
| if (info.number < 0) { |
| throw new IllegalArgumentException("illegal field number: " + info.number + " for field " + info.name); |
| } |
| FieldInfo previous = byNumber.put(info.number, info); |
| if (previous != null) { |
| throw new IllegalArgumentException("duplicate field numbers: " + previous.name + " and " + info.name + " have: " + info.number); |
| } |
| previous = byName.put(info.name, info); |
| if (previous != null) { |
| throw new IllegalArgumentException("duplicate field names: " + previous.number + " and " + info.number + " have: " + info.name); |
| } |
| |
| hasVectors |= info.hasVectors(); |
| hasProx |= info.isIndexed() && info.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; |
| hasFreq |= info.isIndexed() && info.getIndexOptions() != IndexOptions.DOCS_ONLY; |
| hasOffsets |= info.isIndexed() && info.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; |
| hasNorms |= info.hasNorms(); |
| hasDocValues |= info.hasDocValues(); |
| hasPayloads |= info.hasPayloads(); |
| } |
| |
| this.hasVectors = hasVectors; |
| this.hasProx = hasProx; |
| this.hasPayloads = hasPayloads; |
| this.hasOffsets = hasOffsets; |
| this.hasFreq = hasFreq; |
| this.hasNorms = hasNorms; |
| this.hasDocValues = hasDocValues; |
| this.values = Collections.unmodifiableCollection(byNumber.values()); |
| } |
| |
| /** Returns true if any fields have freqs */ |
| public boolean hasFreq() { |
| return hasFreq; |
| } |
| |
| /** Returns true if any fields have positions */ |
| public boolean hasProx() { |
| return hasProx; |
| } |
| |
| /** Returns true if any fields have payloads */ |
| public boolean hasPayloads() { |
| return hasPayloads; |
| } |
| |
| /** Returns true if any fields have offsets */ |
| public boolean hasOffsets() { |
| return hasOffsets; |
| } |
| |
| /** Returns true if any fields have vectors */ |
| public boolean hasVectors() { |
| return hasVectors; |
| } |
| |
| /** Returns true if any fields have norms */ |
| public boolean hasNorms() { |
| return hasNorms; |
| } |
| |
| /** Returns true if any fields have DocValues */ |
| public boolean hasDocValues() { |
| return hasDocValues; |
| } |
| |
| /** Returns the number of fields */ |
| public int size() { |
| assert byNumber.size() == byName.size(); |
| return byNumber.size(); |
| } |
| |
| /** |
| * Returns an iterator over all the fieldinfo objects present, |
| * ordered by ascending field number |
| */ |
| // TODO: what happens if in fact a different order is used? |
| @Override |
| public Iterator<FieldInfo> iterator() { |
| return values.iterator(); |
| } |
| |
| /** |
| * Return the fieldinfo object referenced by the field name |
| * @return the FieldInfo object or null when the given fieldName |
| * doesn't exist. |
| */ |
| public FieldInfo fieldInfo(String fieldName) { |
| return byName.get(fieldName); |
| } |
| |
| /** |
| * Return the fieldinfo object referenced by the fieldNumber. |
| * @param fieldNumber field's number. |
| * @return the FieldInfo object or null when the given fieldNumber |
| * doesn't exist. |
| * @throws IllegalArgumentException if fieldNumber is negative |
| */ |
| public FieldInfo fieldInfo(int fieldNumber) { |
| if (fieldNumber < 0) { |
| throw new IllegalArgumentException("Illegal field number: " + fieldNumber); |
| } |
| return byNumber.get(fieldNumber); |
| } |
| |
| static final class FieldNumbers { |
| |
| private final Map<Integer,String> numberToName; |
| private final Map<String,Integer> nameToNumber; |
| // We use this to enforce that a given field never |
| // changes DV type, even across segments / IndexWriter |
| // sessions: |
| private final Map<String,DocValuesType> docValuesType; |
| |
| // TODO: we should similarly catch an attempt to turn |
| // norms back on after they were already ommitted; today |
| // we silently discard the norm but this is badly trappy |
| private int lowestUnassignedFieldNumber = -1; |
| |
| FieldNumbers() { |
| this.nameToNumber = new HashMap<>(); |
| this.numberToName = new HashMap<>(); |
| this.docValuesType = new HashMap<>(); |
| } |
| |
| /** |
| * Returns the global field number for the given field name. If the name |
| * does not exist yet it tries to add it with the given preferred field |
| * number assigned if possible otherwise the first unassigned field number |
| * is used as the field number. |
| */ |
| synchronized int addOrGet(String fieldName, int preferredFieldNumber, DocValuesType dvType) { |
| if (dvType != null) { |
| DocValuesType currentDVType = docValuesType.get(fieldName); |
| if (currentDVType == null) { |
| docValuesType.put(fieldName, dvType); |
| } else if (currentDVType != null && currentDVType != dvType) { |
| throw new IllegalArgumentException("cannot change DocValues type from " + currentDVType + " to " + dvType + " for field \"" + fieldName + "\""); |
| } |
| } |
| Integer fieldNumber = nameToNumber.get(fieldName); |
| if (fieldNumber == null) { |
| final Integer preferredBoxed = Integer.valueOf(preferredFieldNumber); |
| |
| if (preferredFieldNumber != -1 && !numberToName.containsKey(preferredBoxed)) { |
| // cool - we can use this number globally |
| fieldNumber = preferredBoxed; |
| } else { |
| // find a new FieldNumber |
| while (numberToName.containsKey(++lowestUnassignedFieldNumber)) { |
| // might not be up to date - lets do the work once needed |
| } |
| fieldNumber = lowestUnassignedFieldNumber; |
| } |
| |
| numberToName.put(fieldNumber, fieldName); |
| nameToNumber.put(fieldName, fieldNumber); |
| } |
| |
| return fieldNumber.intValue(); |
| } |
| |
| // used by assert |
| synchronized boolean containsConsistent(Integer number, String name, DocValuesType dvType) { |
| return name.equals(numberToName.get(number)) |
| && number.equals(nameToNumber.get(name)) && |
| (dvType == null || docValuesType.get(name) == null || dvType == docValuesType.get(name)); |
| } |
| |
| /** |
| * Returns true if the {@code fieldName} exists in the map and is of the |
| * same {@code dvType}. |
| */ |
| synchronized boolean contains(String fieldName, DocValuesType dvType) { |
| // used by IndexWriter.updateNumericDocValue |
| if (!nameToNumber.containsKey(fieldName)) { |
| return false; |
| } else { |
| // only return true if the field has the same dvType as the requested one |
| return dvType == docValuesType.get(fieldName); |
| } |
| } |
| |
| synchronized void clear() { |
| numberToName.clear(); |
| nameToNumber.clear(); |
| docValuesType.clear(); |
| } |
| |
| synchronized void setDocValuesType(int number, String name, DocValuesType dvType) { |
| assert containsConsistent(number, name, dvType); |
| docValuesType.put(name, dvType); |
| } |
| } |
| |
| static final class Builder { |
| private final HashMap<String,FieldInfo> byName = new HashMap<>(); |
| final FieldNumbers globalFieldNumbers; |
| |
| Builder() { |
| this(new FieldNumbers()); |
| } |
| |
| /** |
| * Creates a new instance with the given {@link FieldNumbers}. |
| */ |
| Builder(FieldNumbers globalFieldNumbers) { |
| assert globalFieldNumbers != null; |
| this.globalFieldNumbers = globalFieldNumbers; |
| } |
| |
| public void add(FieldInfos other) { |
| for(FieldInfo fieldInfo : other){ |
| add(fieldInfo); |
| } |
| } |
| |
| /** NOTE: this method does not carry over termVector |
| * booleans nor docValuesType; the indexer chain |
| * (TermVectorsConsumerPerField, DocFieldProcessor) must |
| * set these fields when they succeed in consuming |
| * the document */ |
| public FieldInfo addOrUpdate(String name, IndexableFieldType fieldType) { |
| // TODO: really, indexer shouldn't even call this |
| // method (it's only called from DocFieldProcessor); |
| // rather, each component in the chain should update |
| // what it "owns". EG fieldType.indexOptions() should |
| // be updated by maybe FreqProxTermsWriterPerField: |
| return addOrUpdateInternal(name, -1, fieldType.indexed(), false, |
| fieldType.omitNorms(), false, |
| fieldType.indexOptions(), fieldType.docValueType(), null); |
| } |
| |
| private FieldInfo addOrUpdateInternal(String name, int preferredFieldNumber, boolean isIndexed, |
| boolean storeTermVector, |
| boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, DocValuesType docValues, DocValuesType normType) { |
| FieldInfo fi = fieldInfo(name); |
| if (fi == null) { |
| // This field wasn't yet added to this in-RAM |
| // segment's FieldInfo, so now we get a global |
| // number for this field. If the field was seen |
| // before then we'll get the same name and number, |
| // else we'll allocate a new one: |
| final int fieldNumber = globalFieldNumbers.addOrGet(name, preferredFieldNumber, docValues); |
| fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValues, normType, -1, null); |
| assert !byName.containsKey(fi.name); |
| assert globalFieldNumbers.containsConsistent(Integer.valueOf(fi.number), fi.name, fi.getDocValuesType()); |
| byName.put(fi.name, fi); |
| } else { |
| fi.update(isIndexed, storeTermVector, omitNorms, storePayloads, indexOptions); |
| |
| if (docValues != null) { |
| // only pay the synchronization cost if fi does not already have a DVType |
| boolean updateGlobal = !fi.hasDocValues(); |
| fi.setDocValuesType(docValues); // this will also perform the consistency check. |
| if (updateGlobal) { |
| // must also update docValuesType map so it's |
| // aware of this field's DocValueType |
| globalFieldNumbers.setDocValuesType(fi.number, name, docValues); |
| } |
| } |
| |
| if (!fi.omitsNorms() && normType != null) { |
| fi.setNormValueType(normType); |
| } |
| } |
| return fi; |
| } |
| |
| public FieldInfo add(FieldInfo fi) { |
| // IMPORTANT - reuse the field number if possible for consistent field numbers across segments |
| return addOrUpdateInternal(fi.name, fi.number, fi.isIndexed(), fi.hasVectors(), |
| fi.omitsNorms(), fi.hasPayloads(), |
| fi.getIndexOptions(), fi.getDocValuesType(), fi.getNormType()); |
| } |
| |
| public FieldInfo fieldInfo(String fieldName) { |
| return byName.get(fieldName); |
| } |
| |
| final FieldInfos finish() { |
| return new FieldInfos(byName.values().toArray(new FieldInfo[byName.size()])); |
| } |
| } |
| } |