core/org.eclipse.smila.solr/lib/source/org/apache/lucene/codecs/memory/MemoryDocValuesConsumer.java - smila/org.eclipse.smila.core - Git at Google

 package org.apache.lucene.codecs.memory;

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import java.io.IOException;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.NoSuchElementException;

 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.DocValuesConsumer;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.store.ByteArrayDataOutput;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.IntsRefBuilder;
 import org.apache.lucene.util.MathUtil;
 import org.apache.lucene.util.fst.Builder;
 import org.apache.lucene.util.fst.FST.INPUT_TYPE;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.PositiveIntOutputs;
 import org.apache.lucene.util.fst.Util;
 import org.apache.lucene.util.packed.BlockPackedWriter;
 import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
 import org.apache.lucene.util.packed.PackedInts.FormatAndBits;
 import org.apache.lucene.util.packed.PackedInts;

 import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.VERSION_CURRENT;
 import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.BLOCK_SIZE;
 import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.BYTES;
 import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.NUMBER;
 import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.FST;
 import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.SORTED_SET;
 import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.SORTED_SET_SINGLETON;
 import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.SORTED_NUMERIC;
 import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.SORTED_NUMERIC_SINGLETON;
 import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.DELTA_COMPRESSED;
 import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.BLOCK_COMPRESSED;
 import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.GCD_COMPRESSED;
 import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.TABLE_COMPRESSED;

 /**
  * Writer for {@link MemoryDocValuesFormat}
  */
 class MemoryDocValuesConsumer extends DocValuesConsumer {
   IndexOutput data, meta;
   final int maxDoc;
   final float acceptableOverheadRatio;

   MemoryDocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension, float acceptableOverheadRatio) throws IOException {
     this.acceptableOverheadRatio = acceptableOverheadRatio;
     maxDoc = state.segmentInfo.getDocCount();
     boolean success = false;
     try {
       String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
       data = state.directory.createOutput(dataName, state.context);
       CodecUtil.writeHeader(data, dataCodec, VERSION_CURRENT);
       String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
       meta = state.directory.createOutput(metaName, state.context);
       CodecUtil.writeHeader(meta, metaCodec, VERSION_CURRENT);
       success = true;
     } finally {
       if (!success) {
         IOUtils.closeWhileHandlingException(this);
       }
     }
   }

   @Override
   public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
     addNumericField(field, values, true);
   }

   void addNumericField(FieldInfo field, Iterable<Number> values, boolean optimizeStorage) throws IOException {
     meta.writeVInt(field.number);
     meta.writeByte(NUMBER);
     meta.writeLong(data.getFilePointer());
     long minValue = Long.MAX_VALUE;
     long maxValue = Long.MIN_VALUE;
     long blockSum = 0;
     long gcd = 0;
     boolean missing = false;
     // TODO: more efficient?
     HashSet<Long> uniqueValues = null;
     long count = 0;

     if (optimizeStorage) {
       uniqueValues = new HashSet<>();

       long currentBlockMin = Long.MAX_VALUE;
       long currentBlockMax = Long.MIN_VALUE;
       for (Number nv : values) {
         final long v;
         if (nv == null) {
           v = 0;
           missing = true;
         } else {
           v = nv.longValue();
         }

         if (gcd != 1) {
           if (v < Long.MIN_VALUE / 2 || v > Long.MAX_VALUE / 2) {
             // in that case v - minValue might overflow and make the GCD computation return
             // wrong results. Since these extreme values are unlikely, we just discard
             // GCD computation for them
             gcd = 1;
           } else if (count != 0) { // minValue needs to be set first
             gcd = MathUtil.gcd(gcd, v - minValue);
           }
         }

         currentBlockMin = Math.min(minValue, v);
         currentBlockMax = Math.max(maxValue, v);

         minValue = Math.min(minValue, v);
         maxValue = Math.max(maxValue, v);

         if (uniqueValues != null) {
           if (uniqueValues.add(v)) {
             if (uniqueValues.size() > 256) {
               uniqueValues = null;
             }
           }
         }

         ++count;
         if (count % BLOCK_SIZE == 0) {
           final long blockDelta = currentBlockMax - currentBlockMin;
           final int blockDeltaRequired = PackedInts.unsignedBitsRequired(blockDelta);
           final int blockBPV = PackedInts.fastestFormatAndBits(BLOCK_SIZE, blockDeltaRequired, acceptableOverheadRatio).bitsPerValue;
           blockSum += blockBPV;
           currentBlockMax = Long.MIN_VALUE;
           currentBlockMin = Long.MAX_VALUE;
         }
       }
     } else {
       for (Number nv : values) {
         long v = nv.longValue();
         maxValue = Math.max(v, maxValue);
         minValue = Math.min(v, minValue);
         count++;
       }
     }

     if (missing) {
       long start = data.getFilePointer();
       writeMissingBitset(values);
       meta.writeLong(start);
       meta.writeLong(data.getFilePointer() - start);
     } else {
       meta.writeLong(-1L);
     }

     final long delta = maxValue - minValue;
     final int deltaRequired = delta < 0 ? 64 : PackedInts.bitsRequired(delta);
     final FormatAndBits deltaBPV = PackedInts.fastestFormatAndBits(maxDoc, deltaRequired, acceptableOverheadRatio);

     final FormatAndBits tableBPV;
     if (count < Integer.MAX_VALUE && uniqueValues != null) {
       tableBPV = PackedInts.fastestFormatAndBits(maxDoc, PackedInts.bitsRequired(uniqueValues.size()-1), acceptableOverheadRatio);
     } else {
       tableBPV = null;
     }

     final FormatAndBits gcdBPV;
     if (count < Integer.MAX_VALUE && gcd != 0 && gcd != 1) {
       final long gcdDelta = (maxValue - minValue) / gcd;
       final int gcdRequired = gcdDelta < 0 ? 64 : PackedInts.bitsRequired(gcdDelta);
       gcdBPV = PackedInts.fastestFormatAndBits(maxDoc, gcdRequired, acceptableOverheadRatio);
     } else {
       gcdBPV = null;
     }

     boolean doBlock = false;
     if (blockSum != 0) {
       int numBlocks = maxDoc / BLOCK_SIZE;
       float avgBPV = blockSum / (float)numBlocks;
       // just a heuristic, with tiny amounts of blocks our estimate is skewed as we ignore the final "incomplete" block.
       // with at least 4 blocks its pretty accurate. The difference must also be significant (according to acceptable overhead).
       if (numBlocks >= 4 && (avgBPV+avgBPV*acceptableOverheadRatio) < deltaBPV.bitsPerValue) {
         doBlock = true;
       }
     }
     // blockpackedreader allows us to read in huge streams of ints
     if (count >= Integer.MAX_VALUE) {
       doBlock = true;
     }

     if (tableBPV != null && (tableBPV.bitsPerValue+tableBPV.bitsPerValue*acceptableOverheadRatio) < deltaBPV.bitsPerValue) {
       // small number of unique values
       meta.writeByte(TABLE_COMPRESSED); // table-compressed
       Long[] decode = uniqueValues.toArray(new Long[uniqueValues.size()]);
       final HashMap<Long,Integer> encode = new HashMap<>();
       int length = 1 << tableBPV.bitsPerValue;
       data.writeVInt(length);
       for (int i = 0; i < decode.length; i++) {
         data.writeLong(decode[i]);
         encode.put(decode[i], i);
       }
       for (int i = decode.length; i < length; i++) {
         data.writeLong(0);
       }

       meta.writeVInt(PackedInts.VERSION_CURRENT);
       meta.writeLong(count);
       data.writeVInt(tableBPV.format.getId());
       data.writeVInt(tableBPV.bitsPerValue);

       final PackedInts.Writer writer = PackedInts.getWriterNoHeader(data, tableBPV.format, (int)count, tableBPV.bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE);
       for(Number nv : values) {
         writer.add(encode.get(nv == null ? 0 : nv.longValue()));
       }
       writer.finish();
     } else if (gcdBPV != null && (gcdBPV.bitsPerValue+gcdBPV.bitsPerValue*acceptableOverheadRatio) < deltaBPV.bitsPerValue) {
       meta.writeByte(GCD_COMPRESSED);
       meta.writeVInt(PackedInts.VERSION_CURRENT);
       meta.writeLong(count);
       data.writeLong(minValue);
       data.writeLong(gcd);
       data.writeVInt(gcdBPV.format.getId());
       data.writeVInt(gcdBPV.bitsPerValue);

       final PackedInts.Writer writer = PackedInts.getWriterNoHeader(data, gcdBPV.format, (int)count, gcdBPV.bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE);
       for (Number nv : values) {
         long value = nv == null ? 0 : nv.longValue();
         writer.add((value - minValue) / gcd);
       }
       writer.finish();
     } else if (doBlock) {
       meta.writeByte(BLOCK_COMPRESSED); // block delta-compressed
       meta.writeVInt(PackedInts.VERSION_CURRENT);
       meta.writeLong(count);
       data.writeVInt(BLOCK_SIZE);
       final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
       for (Number nv : values) {
         writer.add(nv == null ? 0 : nv.longValue());
       }
       writer.finish();
     } else {
       meta.writeByte(DELTA_COMPRESSED); // delta-compressed
       meta.writeVInt(PackedInts.VERSION_CURRENT);
       meta.writeLong(count);
       final long minDelta = deltaBPV.bitsPerValue == 64 ? 0 : minValue;
       data.writeLong(minDelta);
       data.writeVInt(deltaBPV.format.getId());
       data.writeVInt(deltaBPV.bitsPerValue);

       final PackedInts.Writer writer = PackedInts.getWriterNoHeader(data, deltaBPV.format, (int)count, deltaBPV.bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE);
       for (Number nv : values) {
         long v = nv == null ? 0 : nv.longValue();
         writer.add(v - minDelta);
       }
       writer.finish();
     }
   }

   @Override
   public void close() throws IOException {
     boolean success = false;
     try {
       if (meta != null) {
         meta.writeVInt(-1); // write EOF marker
         CodecUtil.writeFooter(meta); // write checksum
       }
       if (data != null) {
         CodecUtil.writeFooter(data);
       }
       success = true;
     } finally {
       if (success) {
         IOUtils.close(data, meta);
       } else {
         IOUtils.closeWhileHandlingException(data, meta);
       }
       data = meta = null;
     }
   }

   @Override
   public void addBinaryField(FieldInfo field, final Iterable<BytesRef> values) throws IOException {
     // write the byte[] data
     meta.writeVInt(field.number);
     meta.writeByte(BYTES);
     int minLength = Integer.MAX_VALUE;
     int maxLength = Integer.MIN_VALUE;
     final long startFP = data.getFilePointer();
     boolean missing = false;
     for(BytesRef v : values) {
       final int length;
       if (v == null) {
         length = 0;
         missing = true;
       } else {
         length = v.length;
       }
       if (length > MemoryDocValuesFormat.MAX_BINARY_FIELD_LENGTH) {
         throw new IllegalArgumentException("DocValuesField \"" + field.name + "\" is too large, must be <= " + MemoryDocValuesFormat.MAX_BINARY_FIELD_LENGTH);
       }
       minLength = Math.min(minLength, length);
       maxLength = Math.max(maxLength, length);
       if (v != null) {
         data.writeBytes(v.bytes, v.offset, v.length);
       }
     }
     meta.writeLong(startFP);
     meta.writeLong(data.getFilePointer() - startFP);
     if (missing) {
       long start = data.getFilePointer();
       writeMissingBitset(values);
       meta.writeLong(start);
       meta.writeLong(data.getFilePointer() - start);
     } else {
       meta.writeLong(-1L);
     }
     meta.writeVInt(minLength);
     meta.writeVInt(maxLength);

     // if minLength == maxLength, its a fixed-length byte[], we are done (the addresses are implicit)
     // otherwise, we need to record the length fields...
     if (minLength != maxLength) {
       meta.writeVInt(PackedInts.VERSION_CURRENT);
       meta.writeVInt(BLOCK_SIZE);

       final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE);
       long addr = 0;
       for (BytesRef v : values) {
         if (v != null) {
           addr += v.length;
         }
         writer.add(addr);
       }
       writer.finish();
     }
   }

   private void writeFST(FieldInfo field, Iterable<BytesRef> values) throws IOException {
     meta.writeVInt(field.number);
     meta.writeByte(FST);
     meta.writeLong(data.getFilePointer());
     PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
     Builder<Long> builder = new Builder<>(INPUT_TYPE.BYTE1, outputs);
     IntsRefBuilder scratch = new IntsRefBuilder();
     long ord = 0;
     for (BytesRef v : values) {
       builder.add(Util.toIntsRef(v, scratch), ord);
       ord++;
     }
     FST<Long> fst = builder.finish();
     if (fst != null) {
       fst.save(data);
     }
     meta.writeVLong(ord);
   }

   // TODO: in some cases representing missing with minValue-1 wouldn't take up additional space and so on,
   // but this is very simple, and algorithms only check this for values of 0 anyway (doesnt slow down normal decode)
   void writeMissingBitset(Iterable<?> values) throws IOException {
     long bits = 0;
     int count = 0;
     for (Object v : values) {
       if (count == 64) {
         data.writeLong(bits);
         count = 0;
         bits = 0;
       }
       if (v != null) {
         bits |= 1L << (count & 0x3f);
       }
       count++;
     }
     if (count > 0) {
       data.writeLong(bits);
     }
   }

   @Override
   public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
     // write the ordinals as numerics
     addNumericField(field, docToOrd, false);

     // write the values as FST
     writeFST(field, values);
   }

   @Override
   public void addSortedNumericField(FieldInfo field, Iterable<Number> docToValueCount, Iterable<Number> values) throws IOException {
     meta.writeVInt(field.number);

     if (isSingleValued(docToValueCount)) {
       meta.writeByte(SORTED_NUMERIC_SINGLETON);
       addNumericField(field, singletonView(docToValueCount, values, null), true);
     } else {
       meta.writeByte(SORTED_NUMERIC);

       // write the addresses:
       meta.writeVInt(PackedInts.VERSION_CURRENT);
       meta.writeVInt(BLOCK_SIZE);
       meta.writeLong(data.getFilePointer());
       final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE);
       long addr = 0;
       writer.add(addr);
       for (Number v : docToValueCount) {
         addr += v.longValue();
         writer.add(addr);
       }
       writer.finish();
       long valueCount = writer.ord();
       meta.writeLong(valueCount);

       // write the values
       addNumericField(field, values, true);
     }
   }

   // note: this might not be the most efficient... but its fairly simple
   @Override
   public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, final Iterable<Number> docToOrdCount, final Iterable<Number> ords) throws IOException {
     meta.writeVInt(field.number);

     if (isSingleValued(docToOrdCount)) {
       meta.writeByte(SORTED_SET_SINGLETON);
       addSortedField(field, values, singletonView(docToOrdCount, ords, -1L));
     } else {
       meta.writeByte(SORTED_SET);
       // write the ordinals as a binary field
       addBinaryField(field, new Iterable<BytesRef>() {
         @Override
         public Iterator<BytesRef> iterator() {
           return new SortedSetIterator(docToOrdCount.iterator(), ords.iterator());
         }
       });

       // write the values as FST
       writeFST(field, values);
     }
   }

   // per-document vint-encoded byte[]
   static class SortedSetIterator implements Iterator<BytesRef> {
     byte[] buffer = new byte[10];
     ByteArrayDataOutput out = new ByteArrayDataOutput();
     BytesRef ref = new BytesRef();

     final Iterator<Number> counts;
     final Iterator<Number> ords;

     SortedSetIterator(Iterator<Number> counts, Iterator<Number> ords) {
       this.counts = counts;
       this.ords = ords;
     }

     @Override
     public boolean hasNext() {
       return counts.hasNext();
     }

     @Override
     public BytesRef next() {
       if (!hasNext()) {
         throw new NoSuchElementException();
       }

       int count = counts.next().intValue();
       int maxSize = count*9; // worst case
       if (maxSize > buffer.length) {
         buffer = ArrayUtil.grow(buffer, maxSize);
       }

       try {
         encodeValues(count);
       } catch (IOException bogus) {
         throw new RuntimeException(bogus);
       }

       ref.bytes = buffer;
       ref.offset = 0;
       ref.length = out.getPosition();

       return ref;
     }

     // encodes count values to buffer
     private void encodeValues(int count) throws IOException {
       out.reset(buffer);
       long lastOrd = 0;
       for (int i = 0; i < count; i++) {
         long ord = ords.next().longValue();
         out.writeVLong(ord - lastOrd);
         lastOrd = ord;
       }
     }

     @Override
     public void remove() {
       throw new UnsupportedOperationException();
     }
   }
 }
	package org.apache.lucene.codecs.memory;

	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import java.io.IOException;
	import java.util.HashMap;
	import java.util.HashSet;
	import java.util.Iterator;
	import java.util.NoSuchElementException;

	import org.apache.lucene.codecs.CodecUtil;
	import org.apache.lucene.codecs.DocValuesConsumer;
	import org.apache.lucene.index.FieldInfo;
	import org.apache.lucene.index.IndexFileNames;
	import org.apache.lucene.index.SegmentWriteState;
	import org.apache.lucene.store.ByteArrayDataOutput;
	import org.apache.lucene.store.IndexOutput;
	import org.apache.lucene.util.ArrayUtil;
	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.IOUtils;
	import org.apache.lucene.util.IntsRef;
	import org.apache.lucene.util.IntsRefBuilder;
	import org.apache.lucene.util.MathUtil;
	import org.apache.lucene.util.fst.Builder;
	import org.apache.lucene.util.fst.FST.INPUT_TYPE;
	import org.apache.lucene.util.fst.FST;
	import org.apache.lucene.util.fst.PositiveIntOutputs;
	import org.apache.lucene.util.fst.Util;
	import org.apache.lucene.util.packed.BlockPackedWriter;
	import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
	import org.apache.lucene.util.packed.PackedInts.FormatAndBits;
	import org.apache.lucene.util.packed.PackedInts;

	import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.VERSION_CURRENT;
	import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.BLOCK_SIZE;
	import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.BYTES;
	import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.NUMBER;
	import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.FST;
	import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.SORTED_SET;
	import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.SORTED_SET_SINGLETON;
	import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.SORTED_NUMERIC;
	import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.SORTED_NUMERIC_SINGLETON;
	import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.DELTA_COMPRESSED;
	import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.BLOCK_COMPRESSED;
	import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.GCD_COMPRESSED;
	import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.TABLE_COMPRESSED;

	/**
	* Writer for {@link MemoryDocValuesFormat}
	*/
	class MemoryDocValuesConsumer extends DocValuesConsumer {
	IndexOutput data, meta;
	final int maxDoc;
	final float acceptableOverheadRatio;

	MemoryDocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension, float acceptableOverheadRatio) throws IOException {
	this.acceptableOverheadRatio = acceptableOverheadRatio;
	maxDoc = state.segmentInfo.getDocCount();
	boolean success = false;
	try {
	String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
	data = state.directory.createOutput(dataName, state.context);
	CodecUtil.writeHeader(data, dataCodec, VERSION_CURRENT);
	String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
	meta = state.directory.createOutput(metaName, state.context);
	CodecUtil.writeHeader(meta, metaCodec, VERSION_CURRENT);
	success = true;
	} finally {
	if (!success) {
	IOUtils.closeWhileHandlingException(this);
	}
	}
	}

	@Override
	public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
	addNumericField(field, values, true);
	}

	void addNumericField(FieldInfo field, Iterable<Number> values, boolean optimizeStorage) throws IOException {
	meta.writeVInt(field.number);
	meta.writeByte(NUMBER);
	meta.writeLong(data.getFilePointer());
	long minValue = Long.MAX_VALUE;
	long maxValue = Long.MIN_VALUE;
	long blockSum = 0;
	long gcd = 0;
	boolean missing = false;
	// TODO: more efficient?
	HashSet<Long> uniqueValues = null;
	long count = 0;

	if (optimizeStorage) {
	uniqueValues = new HashSet<>();

	long currentBlockMin = Long.MAX_VALUE;
	long currentBlockMax = Long.MIN_VALUE;
	for (Number nv : values) {
	final long v;
	if (nv == null) {
	v = 0;
	missing = true;
	} else {
	v = nv.longValue();
	}

	if (gcd != 1) {
	if (v < Long.MIN_VALUE / 2 \|\| v > Long.MAX_VALUE / 2) {
	// in that case v - minValue might overflow and make the GCD computation return
	// wrong results. Since these extreme values are unlikely, we just discard
	// GCD computation for them
	gcd = 1;
	} else if (count != 0) { // minValue needs to be set first
	gcd = MathUtil.gcd(gcd, v - minValue);
	}
	}

	currentBlockMin = Math.min(minValue, v);
	currentBlockMax = Math.max(maxValue, v);

	minValue = Math.min(minValue, v);
	maxValue = Math.max(maxValue, v);

	if (uniqueValues != null) {
	if (uniqueValues.add(v)) {
	if (uniqueValues.size() > 256) {
	uniqueValues = null;
	}
	}
	}

	++count;
	if (count % BLOCK_SIZE == 0) {
	final long blockDelta = currentBlockMax - currentBlockMin;
	final int blockDeltaRequired = PackedInts.unsignedBitsRequired(blockDelta);
	final int blockBPV = PackedInts.fastestFormatAndBits(BLOCK_SIZE, blockDeltaRequired, acceptableOverheadRatio).bitsPerValue;
	blockSum += blockBPV;
	currentBlockMax = Long.MIN_VALUE;
	currentBlockMin = Long.MAX_VALUE;
	}
	}
	} else {
	for (Number nv : values) {
	long v = nv.longValue();
	maxValue = Math.max(v, maxValue);
	minValue = Math.min(v, minValue);
	count++;
	}
	}

	if (missing) {
	long start = data.getFilePointer();
	writeMissingBitset(values);
	meta.writeLong(start);
	meta.writeLong(data.getFilePointer() - start);
	} else {
	meta.writeLong(-1L);
	}

	final long delta = maxValue - minValue;
	final int deltaRequired = delta < 0 ? 64 : PackedInts.bitsRequired(delta);
	final FormatAndBits deltaBPV = PackedInts.fastestFormatAndBits(maxDoc, deltaRequired, acceptableOverheadRatio);

	final FormatAndBits tableBPV;
	if (count < Integer.MAX_VALUE && uniqueValues != null) {
	tableBPV = PackedInts.fastestFormatAndBits(maxDoc, PackedInts.bitsRequired(uniqueValues.size()-1), acceptableOverheadRatio);
	} else {
	tableBPV = null;
	}

	final FormatAndBits gcdBPV;
	if (count < Integer.MAX_VALUE && gcd != 0 && gcd != 1) {
	final long gcdDelta = (maxValue - minValue) / gcd;
	final int gcdRequired = gcdDelta < 0 ? 64 : PackedInts.bitsRequired(gcdDelta);
	gcdBPV = PackedInts.fastestFormatAndBits(maxDoc, gcdRequired, acceptableOverheadRatio);
	} else {
	gcdBPV = null;
	}

	boolean doBlock = false;
	if (blockSum != 0) {
	int numBlocks = maxDoc / BLOCK_SIZE;
	float avgBPV = blockSum / (float)numBlocks;
	// just a heuristic, with tiny amounts of blocks our estimate is skewed as we ignore the final "incomplete" block.
	// with at least 4 blocks its pretty accurate. The difference must also be significant (according to acceptable overhead).
	if (numBlocks >= 4 && (avgBPV+avgBPV*acceptableOverheadRatio) < deltaBPV.bitsPerValue) {
	doBlock = true;
	}
	}
	// blockpackedreader allows us to read in huge streams of ints
	if (count >= Integer.MAX_VALUE) {
	doBlock = true;
	}

	if (tableBPV != null && (tableBPV.bitsPerValue+tableBPV.bitsPerValue*acceptableOverheadRatio) < deltaBPV.bitsPerValue) {
	// small number of unique values
	meta.writeByte(TABLE_COMPRESSED); // table-compressed
	Long[] decode = uniqueValues.toArray(new Long[uniqueValues.size()]);
	final HashMap<Long,Integer> encode = new HashMap<>();
	int length = 1 << tableBPV.bitsPerValue;
	data.writeVInt(length);
	for (int i = 0; i < decode.length; i++) {
	data.writeLong(decode[i]);
	encode.put(decode[i], i);
	}
	for (int i = decode.length; i < length; i++) {
	data.writeLong(0);
	}

	meta.writeVInt(PackedInts.VERSION_CURRENT);
	meta.writeLong(count);
	data.writeVInt(tableBPV.format.getId());
	data.writeVInt(tableBPV.bitsPerValue);

	final PackedInts.Writer writer = PackedInts.getWriterNoHeader(data, tableBPV.format, (int)count, tableBPV.bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE);
	for(Number nv : values) {
	writer.add(encode.get(nv == null ? 0 : nv.longValue()));
	}
	writer.finish();
	} else if (gcdBPV != null && (gcdBPV.bitsPerValue+gcdBPV.bitsPerValue*acceptableOverheadRatio) < deltaBPV.bitsPerValue) {
	meta.writeByte(GCD_COMPRESSED);
	meta.writeVInt(PackedInts.VERSION_CURRENT);
	meta.writeLong(count);
	data.writeLong(minValue);
	data.writeLong(gcd);
	data.writeVInt(gcdBPV.format.getId());
	data.writeVInt(gcdBPV.bitsPerValue);

	final PackedInts.Writer writer = PackedInts.getWriterNoHeader(data, gcdBPV.format, (int)count, gcdBPV.bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE);
	for (Number nv : values) {
	long value = nv == null ? 0 : nv.longValue();
	writer.add((value - minValue) / gcd);
	}
	writer.finish();
	} else if (doBlock) {
	meta.writeByte(BLOCK_COMPRESSED); // block delta-compressed
	meta.writeVInt(PackedInts.VERSION_CURRENT);
	meta.writeLong(count);
	data.writeVInt(BLOCK_SIZE);
	final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
	for (Number nv : values) {
	writer.add(nv == null ? 0 : nv.longValue());
	}
	writer.finish();
	} else {
	meta.writeByte(DELTA_COMPRESSED); // delta-compressed
	meta.writeVInt(PackedInts.VERSION_CURRENT);
	meta.writeLong(count);
	final long minDelta = deltaBPV.bitsPerValue == 64 ? 0 : minValue;
	data.writeLong(minDelta);
	data.writeVInt(deltaBPV.format.getId());
	data.writeVInt(deltaBPV.bitsPerValue);

	final PackedInts.Writer writer = PackedInts.getWriterNoHeader(data, deltaBPV.format, (int)count, deltaBPV.bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE);
	for (Number nv : values) {
	long v = nv == null ? 0 : nv.longValue();
	writer.add(v - minDelta);
	}
	writer.finish();
	}
	}

	@Override
	public void close() throws IOException {
	boolean success = false;
	try {
	if (meta != null) {
	meta.writeVInt(-1); // write EOF marker
	CodecUtil.writeFooter(meta); // write checksum
	}
	if (data != null) {
	CodecUtil.writeFooter(data);
	}
	success = true;
	} finally {
	if (success) {
	IOUtils.close(data, meta);
	} else {
	IOUtils.closeWhileHandlingException(data, meta);
	}
	data = meta = null;
	}
	}

	@Override
	public void addBinaryField(FieldInfo field, final Iterable<BytesRef> values) throws IOException {
	// write the byte[] data
	meta.writeVInt(field.number);
	meta.writeByte(BYTES);
	int minLength = Integer.MAX_VALUE;
	int maxLength = Integer.MIN_VALUE;
	final long startFP = data.getFilePointer();
	boolean missing = false;
	for(BytesRef v : values) {
	final int length;
	if (v == null) {
	length = 0;
	missing = true;
	} else {
	length = v.length;
	}
	if (length > MemoryDocValuesFormat.MAX_BINARY_FIELD_LENGTH) {
	throw new IllegalArgumentException("DocValuesField \"" + field.name + "\" is too large, must be <= " + MemoryDocValuesFormat.MAX_BINARY_FIELD_LENGTH);
	}
	minLength = Math.min(minLength, length);
	maxLength = Math.max(maxLength, length);
	if (v != null) {
	data.writeBytes(v.bytes, v.offset, v.length);
	}
	}
	meta.writeLong(startFP);
	meta.writeLong(data.getFilePointer() - startFP);
	if (missing) {
	long start = data.getFilePointer();
	writeMissingBitset(values);
	meta.writeLong(start);
	meta.writeLong(data.getFilePointer() - start);
	} else {
	meta.writeLong(-1L);
	}
	meta.writeVInt(minLength);
	meta.writeVInt(maxLength);

	// if minLength == maxLength, its a fixed-length byte[], we are done (the addresses are implicit)
	// otherwise, we need to record the length fields...
	if (minLength != maxLength) {
	meta.writeVInt(PackedInts.VERSION_CURRENT);
	meta.writeVInt(BLOCK_SIZE);

	final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE);
	long addr = 0;
	for (BytesRef v : values) {
	if (v != null) {
	addr += v.length;
	}
	writer.add(addr);
	}
	writer.finish();
	}
	}

	private void writeFST(FieldInfo field, Iterable<BytesRef> values) throws IOException {
	meta.writeVInt(field.number);
	meta.writeByte(FST);
	meta.writeLong(data.getFilePointer());
	PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
	Builder<Long> builder = new Builder<>(INPUT_TYPE.BYTE1, outputs);
	IntsRefBuilder scratch = new IntsRefBuilder();
	long ord = 0;
	for (BytesRef v : values) {
	builder.add(Util.toIntsRef(v, scratch), ord);
	ord++;
	}
	FST<Long> fst = builder.finish();
	if (fst != null) {
	fst.save(data);
	}
	meta.writeVLong(ord);
	}

	// TODO: in some cases representing missing with minValue-1 wouldn't take up additional space and so on,
	// but this is very simple, and algorithms only check this for values of 0 anyway (doesnt slow down normal decode)
	void writeMissingBitset(Iterable<?> values) throws IOException {
	long bits = 0;
	int count = 0;
	for (Object v : values) {
	if (count == 64) {
	data.writeLong(bits);
	count = 0;
	bits = 0;
	}
	if (v != null) {
	bits \|= 1L << (count & 0x3f);
	}
	count++;
	}
	if (count > 0) {
	data.writeLong(bits);
	}
	}

	@Override
	public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
	// write the ordinals as numerics
	addNumericField(field, docToOrd, false);

	// write the values as FST
	writeFST(field, values);
	}

	@Override
	public void addSortedNumericField(FieldInfo field, Iterable<Number> docToValueCount, Iterable<Number> values) throws IOException {
	meta.writeVInt(field.number);

	if (isSingleValued(docToValueCount)) {
	meta.writeByte(SORTED_NUMERIC_SINGLETON);
	addNumericField(field, singletonView(docToValueCount, values, null), true);
	} else {
	meta.writeByte(SORTED_NUMERIC);

	// write the addresses:
	meta.writeVInt(PackedInts.VERSION_CURRENT);
	meta.writeVInt(BLOCK_SIZE);
	meta.writeLong(data.getFilePointer());
	final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE);
	long addr = 0;
	writer.add(addr);
	for (Number v : docToValueCount) {
	addr += v.longValue();
	writer.add(addr);
	}
	writer.finish();
	long valueCount = writer.ord();
	meta.writeLong(valueCount);

	// write the values
	addNumericField(field, values, true);
	}
	}

	// note: this might not be the most efficient... but its fairly simple
	@Override
	public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, final Iterable<Number> docToOrdCount, final Iterable<Number> ords) throws IOException {
	meta.writeVInt(field.number);

	if (isSingleValued(docToOrdCount)) {
	meta.writeByte(SORTED_SET_SINGLETON);
	addSortedField(field, values, singletonView(docToOrdCount, ords, -1L));
	} else {
	meta.writeByte(SORTED_SET);
	// write the ordinals as a binary field
	addBinaryField(field, new Iterable<BytesRef>() {
	@Override
	public Iterator<BytesRef> iterator() {
	return new SortedSetIterator(docToOrdCount.iterator(), ords.iterator());
	}
	});

	// write the values as FST
	writeFST(field, values);
	}
	}

	// per-document vint-encoded byte[]
	static class SortedSetIterator implements Iterator<BytesRef> {
	byte[] buffer = new byte[10];
	ByteArrayDataOutput out = new ByteArrayDataOutput();
	BytesRef ref = new BytesRef();

	final Iterator<Number> counts;
	final Iterator<Number> ords;

	SortedSetIterator(Iterator<Number> counts, Iterator<Number> ords) {
	this.counts = counts;
	this.ords = ords;
	}

	@Override
	public boolean hasNext() {
	return counts.hasNext();
	}

	@Override
	public BytesRef next() {
	if (!hasNext()) {
	throw new NoSuchElementException();
	}

	int count = counts.next().intValue();
	int maxSize = count*9; // worst case
	if (maxSize > buffer.length) {
	buffer = ArrayUtil.grow(buffer, maxSize);
	}

	try {
	encodeValues(count);
	} catch (IOException bogus) {
	throw new RuntimeException(bogus);
	}

	ref.bytes = buffer;
	ref.offset = 0;
	ref.length = out.getPosition();

	return ref;
	}

	// encodes count values to buffer
	private void encodeValues(int count) throws IOException {
	out.reset(buffer);
	long lastOrd = 0;
	for (int i = 0; i < count; i++) {
	long ord = ords.next().longValue();
	out.writeVLong(ord - lastOrd);
	lastOrd = ord;
	}
	}

	@Override
	public void remove() {
	throw new UnsupportedOperationException();
	}
	}
	}