blob: d769bdba92bc7ee843e6870d629d7ddee30fc43d [file] [log] [blame]
package org.apache.lucene.analysis.synonym;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.util.CharArrayMap;
import org.apache.lucene.util.Version;
import java.util.*;
/** Mapping rules for use with {@link SlowSynonymFilter}
* @deprecated (3.4) use {@link SynonymFilterFactory} instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0
*/
@Deprecated
class SlowSynonymMap {
/** @lucene.internal */
public CharArrayMap<SlowSynonymMap> submap; // recursive: Map<String, SynonymMap>
/** @lucene.internal */
public Token[] synonyms;
int flags;
static final int INCLUDE_ORIG=0x01;
static final int IGNORE_CASE=0x02;
public SlowSynonymMap() {}
public SlowSynonymMap(boolean ignoreCase) {
if (ignoreCase) flags |= IGNORE_CASE;
}
public boolean includeOrig() { return (flags & INCLUDE_ORIG) != 0; }
public boolean ignoreCase() { return (flags & IGNORE_CASE) != 0; }
/**
* @param singleMatch List<String>, the sequence of strings to match
* @param replacement List<Token> the list of tokens to use on a match
* @param includeOrig sets a flag on this mapping signaling the generation of matched tokens in addition to the replacement tokens
* @param mergeExisting merge the replacement tokens with any other mappings that exist
*/
public void add(List<String> singleMatch, List<Token> replacement, boolean includeOrig, boolean mergeExisting) {
SlowSynonymMap currMap = this;
for (String str : singleMatch) {
if (currMap.submap==null) {
// for now hardcode at 4.0, as its what the old code did.
// would be nice to fix, but shouldn't store a version in each submap!!!
currMap.submap = new CharArrayMap<>(Version.LUCENE_CURRENT, 1, ignoreCase());
}
SlowSynonymMap map = currMap.submap.get(str);
if (map==null) {
map = new SlowSynonymMap();
map.flags |= flags & IGNORE_CASE;
currMap.submap.put(str, map);
}
currMap = map;
}
if (currMap.synonyms != null && !mergeExisting) {
throw new IllegalArgumentException("SynonymFilter: there is already a mapping for " + singleMatch);
}
List<Token> superset = currMap.synonyms==null ? replacement :
mergeTokens(Arrays.asList(currMap.synonyms), replacement);
currMap.synonyms = superset.toArray(new Token[superset.size()]);
if (includeOrig) currMap.flags |= INCLUDE_ORIG;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder("<");
if (synonyms!=null) {
sb.append("[");
for (int i=0; i<synonyms.length; i++) {
if (i!=0) sb.append(',');
sb.append(synonyms[i]);
}
if ((flags & INCLUDE_ORIG)!=0) {
sb.append(",ORIG");
}
sb.append("],");
}
sb.append(submap);
sb.append(">");
return sb.toString();
}
/** Produces a List<Token> from a List<String> */
public static List<Token> makeTokens(List<String> strings) {
List<Token> ret = new ArrayList<>(strings.size());
for (String str : strings) {
//Token newTok = new Token(str,0,0,"SYNONYM");
Token newTok = new Token(str, 0,0,"SYNONYM");
ret.add(newTok);
}
return ret;
}
/**
* Merge two lists of tokens, producing a single list with manipulated positionIncrements so that
* the tokens end up at the same position.
*
* Example: [a b] merged with [c d] produces [a/b c/d] ('/' denotes tokens in the same position)
* Example: [a,5 b,2] merged with [c d,4 e,4] produces [c a,5/d b,2 e,2] (a,n means a has posInc=n)
*
*/
public static List<Token> mergeTokens(List<Token> lst1, List<Token> lst2) {
ArrayList<Token> result = new ArrayList<>();
if (lst1 ==null || lst2 ==null) {
if (lst2 != null) result.addAll(lst2);
if (lst1 != null) result.addAll(lst1);
return result;
}
int pos=0;
Iterator<Token> iter1=lst1.iterator();
Iterator<Token> iter2=lst2.iterator();
Token tok1 = iter1.hasNext() ? iter1.next() : null;
Token tok2 = iter2.hasNext() ? iter2.next() : null;
int pos1 = tok1!=null ? tok1.getPositionIncrement() : 0;
int pos2 = tok2!=null ? tok2.getPositionIncrement() : 0;
while(tok1!=null || tok2!=null) {
while (tok1 != null && (pos1 <= pos2 || tok2==null)) {
Token tok = new Token(tok1.startOffset(), tok1.endOffset(), tok1.type());
tok.copyBuffer(tok1.buffer(), 0, tok1.length());
tok.setPositionIncrement(pos1-pos);
result.add(tok);
pos=pos1;
tok1 = iter1.hasNext() ? iter1.next() : null;
pos1 += tok1!=null ? tok1.getPositionIncrement() : 0;
}
while (tok2 != null && (pos2 <= pos1 || tok1==null)) {
Token tok = new Token(tok2.startOffset(), tok2.endOffset(), tok2.type());
tok.copyBuffer(tok2.buffer(), 0, tok2.length());
tok.setPositionIncrement(pos2-pos);
result.add(tok);
pos=pos2;
tok2 = iter2.hasNext() ? iter2.next() : null;
pos2 += tok2!=null ? tok2.getPositionIncrement() : 0;
}
}
return result;
}
}