| package org.apache.lucene.analysis.hunspell; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.List; |
| |
| import org.apache.lucene.analysis.util.CharArraySet; |
| import org.apache.lucene.store.ByteArrayDataInput; |
| import org.apache.lucene.util.ArrayUtil; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.CharsRef; |
| import org.apache.lucene.util.IntsRef; |
| import org.apache.lucene.util.automaton.CharacterRunAutomaton; |
| import org.apache.lucene.util.fst.FST; |
| import org.apache.lucene.util.fst.Outputs; |
| |
| /** |
| * Stemmer uses the affix rules declared in the Dictionary to generate one or more stems for a word. It |
| * conforms to the algorithm in the original hunspell algorithm, including recursive suffix stripping. |
| */ |
| final class Stemmer { |
| private final Dictionary dictionary; |
| private final BytesRef scratch = new BytesRef(); |
| private final StringBuilder segment = new StringBuilder(); |
| private final ByteArrayDataInput affixReader; |
| |
| // used for normalization |
| private final StringBuilder scratchSegment = new StringBuilder(); |
| private char scratchBuffer[] = new char[32]; |
| |
| // its '1' if we have no stem exceptions, otherwise every other form |
| // is really an ID pointing to the exception table |
| private final int formStep; |
| |
| /** |
| * Constructs a new Stemmer which will use the provided Dictionary to create its stems. |
| * |
| * @param dictionary Dictionary that will be used to create the stems |
| */ |
| public Stemmer(Dictionary dictionary) { |
| this.dictionary = dictionary; |
| this.affixReader = new ByteArrayDataInput(dictionary.affixData); |
| for (int level = 0; level < 3; level++) { |
| if (dictionary.prefixes != null) { |
| prefixArcs[level] = new FST.Arc<>(); |
| prefixReaders[level] = dictionary.prefixes.getBytesReader(); |
| } |
| if (dictionary.suffixes != null) { |
| suffixArcs[level] = new FST.Arc<>(); |
| suffixReaders[level] = dictionary.suffixes.getBytesReader(); |
| } |
| } |
| formStep = dictionary.hasStemExceptions ? 2 : 1; |
| } |
| |
| /** |
| * Find the stem(s) of the provided word. |
| * |
| * @param word Word to find the stems for |
| * @return List of stems for the word |
| */ |
| public List<CharsRef> stem(String word) { |
| return stem(word.toCharArray(), word.length()); |
| } |
| |
| /** |
| * Find the stem(s) of the provided word |
| * |
| * @param word Word to find the stems for |
| * @return List of stems for the word |
| */ |
| public List<CharsRef> stem(char word[], int length) { |
| |
| if (dictionary.needsInputCleaning) { |
| scratchSegment.setLength(0); |
| scratchSegment.append(word, 0, length); |
| CharSequence cleaned = dictionary.cleanInput(scratchSegment, segment); |
| scratchBuffer = ArrayUtil.grow(scratchBuffer, cleaned.length()); |
| length = segment.length(); |
| segment.getChars(0, length, scratchBuffer, 0); |
| word = scratchBuffer; |
| } |
| |
| int caseType = caseOf(word, length); |
| if (caseType == UPPER_CASE) { |
| // upper: union exact, title, lower |
| caseFoldTitle(word, length); |
| caseFoldLower(titleBuffer, length); |
| List<CharsRef> list = doStem(word, length, false); |
| list.addAll(doStem(titleBuffer, length, true)); |
| list.addAll(doStem(lowerBuffer, length, true)); |
| return list; |
| } else if (caseType == TITLE_CASE) { |
| // title: union exact, lower |
| caseFoldLower(word, length); |
| List<CharsRef> list = doStem(word, length, false); |
| list.addAll(doStem(lowerBuffer, length, true)); |
| return list; |
| } else { |
| // exact match only |
| return doStem(word, length, false); |
| } |
| } |
| |
| // temporary buffers for case variants |
| private char[] lowerBuffer = new char[8]; |
| private char[] titleBuffer = new char[8]; |
| |
| private static final int EXACT_CASE = 0; |
| private static final int TITLE_CASE = 1; |
| private static final int UPPER_CASE = 2; |
| |
| /** returns EXACT_CASE,TITLE_CASE, or UPPER_CASE type for the word */ |
| private int caseOf(char word[], int length) { |
| if (dictionary.ignoreCase || length == 0 || !Character.isUpperCase(word[0])) { |
| return EXACT_CASE; |
| } |
| |
| // determine if we are title or lowercase (or something funky, in which its exact) |
| boolean seenUpper = false; |
| boolean seenLower = false; |
| for (int i = 1; i < length; i++) { |
| boolean v = Character.isUpperCase(word[i]); |
| seenUpper |= v; |
| seenLower |= !v; |
| } |
| |
| if (!seenLower) { |
| return UPPER_CASE; |
| } else if (!seenUpper) { |
| return TITLE_CASE; |
| } else { |
| return EXACT_CASE; |
| } |
| } |
| |
| /** folds titlecase variant of word to titleBuffer */ |
| private void caseFoldTitle(char word[], int length) { |
| titleBuffer = ArrayUtil.grow(titleBuffer, length); |
| System.arraycopy(word, 0, titleBuffer, 0, length); |
| for (int i = 1; i < length; i++) { |
| titleBuffer[i] = dictionary.caseFold(titleBuffer[i]); |
| } |
| } |
| |
| /** folds lowercase variant of word (title cased) to lowerBuffer */ |
| private void caseFoldLower(char word[], int length) { |
| lowerBuffer = ArrayUtil.grow(lowerBuffer, length); |
| System.arraycopy(word, 0, lowerBuffer, 0, length); |
| lowerBuffer[0] = dictionary.caseFold(lowerBuffer[0]); |
| } |
| |
| private List<CharsRef> doStem(char word[], int length, boolean caseVariant) { |
| List<CharsRef> stems = new ArrayList<>(); |
| IntsRef forms = dictionary.lookupWord(word, 0, length); |
| if (forms != null) { |
| for (int i = 0; i < forms.length; i += formStep) { |
| boolean checkKeepCase = caseVariant && dictionary.keepcase != -1; |
| boolean checkNeedAffix = dictionary.needaffix != -1; |
| boolean checkOnlyInCompound = dictionary.onlyincompound != -1; |
| if (checkKeepCase || checkNeedAffix || checkOnlyInCompound) { |
| dictionary.flagLookup.get(forms.ints[forms.offset+i], scratch); |
| char wordFlags[] = Dictionary.decodeFlags(scratch); |
| // we are looking for a case variant, but this word does not allow it |
| if (checkKeepCase && Dictionary.hasFlag(wordFlags, (char)dictionary.keepcase)) { |
| continue; |
| } |
| // we can't add this form, its a pseudostem requiring an affix |
| if (checkNeedAffix && Dictionary.hasFlag(wordFlags, (char)dictionary.needaffix)) { |
| continue; |
| } |
| // we can't add this form, it only belongs inside a compound word |
| if (checkOnlyInCompound && Dictionary.hasFlag(wordFlags, (char)dictionary.onlyincompound)) { |
| continue; |
| } |
| } |
| stems.add(newStem(word, length, forms, i)); |
| } |
| } |
| try { |
| boolean v = stems.addAll(stem(word, length, -1, -1, -1, 0, true, true, false, false, caseVariant)); |
| } catch (IOException bogus) { |
| throw new RuntimeException(bogus); |
| } |
| return stems; |
| } |
| |
| /** |
| * Find the unique stem(s) of the provided word |
| * |
| * @param word Word to find the stems for |
| * @return List of stems for the word |
| */ |
| public List<CharsRef> uniqueStems(char word[], int length) { |
| List<CharsRef> stems = stem(word, length); |
| if (stems.size() < 2) { |
| return stems; |
| } |
| CharArraySet terms = new CharArraySet(8, dictionary.ignoreCase); |
| List<CharsRef> deduped = new ArrayList<>(); |
| for (CharsRef s : stems) { |
| if (!terms.contains(s)) { |
| deduped.add(s); |
| terms.add(s); |
| } |
| } |
| return deduped; |
| } |
| |
| private CharsRef newStem(char buffer[], int length, IntsRef forms, int formID) { |
| final String exception; |
| if (dictionary.hasStemExceptions) { |
| int exceptionID = forms.ints[forms.offset + formID + 1]; |
| if (exceptionID > 0) { |
| exception = dictionary.getStemException(exceptionID); |
| } else { |
| exception = null; |
| } |
| } else { |
| exception = null; |
| } |
| |
| if (dictionary.needsOutputCleaning) { |
| scratchSegment.setLength(0); |
| if (exception != null) { |
| scratchSegment.append(exception); |
| } else { |
| scratchSegment.append(buffer, 0, length); |
| } |
| try { |
| Dictionary.applyMappings(dictionary.oconv, scratchSegment); |
| } catch (IOException bogus) { |
| throw new RuntimeException(bogus); |
| } |
| char cleaned[] = new char[scratchSegment.length()]; |
| scratchSegment.getChars(0, cleaned.length, cleaned, 0); |
| return new CharsRef(cleaned, 0, cleaned.length); |
| } else { |
| if (exception != null) { |
| return new CharsRef(exception); |
| } else { |
| return new CharsRef(buffer, 0, length); |
| } |
| } |
| } |
| |
| // ================================================= Helper Methods ================================================ |
| |
| // some state for traversing FSTs |
| final FST.BytesReader prefixReaders[] = new FST.BytesReader[3]; |
| @SuppressWarnings("unchecked") |
| final FST.Arc<IntsRef> prefixArcs[] = new FST.Arc[3]; |
| |
| final FST.BytesReader suffixReaders[] = new FST.BytesReader[3]; |
| @SuppressWarnings("unchecked") |
| final FST.Arc<IntsRef> suffixArcs[] = new FST.Arc[3]; |
| |
| |
| /** |
| * Generates a list of stems for the provided word |
| * |
| * @param word Word to generate the stems for |
| * @param previous previous affix that was removed (so we dont remove same one twice) |
| * @param prevFlag Flag from a previous stemming step that need to be cross-checked with any affixes in this recursive step |
| * @param prefixFlag flag of the most inner removed prefix, so that when removing a suffix, its also checked against the word |
| * @param recursionDepth current recursiondepth |
| * @param doPrefix true if we should remove prefixes |
| * @param doSuffix true if we should remove suffixes |
| * @param previousWasPrefix true if the previous removal was a prefix: |
| * if we are removing a suffix, and it has no continuation requirements, its ok. |
| * but two prefixes (COMPLEXPREFIXES) or two suffixes must have continuation requirements to recurse. |
| * @param circumfix true if the previous prefix removal was signed as a circumfix |
| * this means inner most suffix must also contain circumfix flag. |
| * @param caseVariant true if we are searching for a case variant. if the word has KEEPCASE flag it cannot succeed. |
| * @return List of stems, or empty list if no stems are found |
| */ |
| private List<CharsRef> stem(char word[], int length, int previous, int prevFlag, int prefixFlag, int recursionDepth, boolean doPrefix, boolean doSuffix, boolean previousWasPrefix, boolean circumfix, boolean caseVariant) throws IOException { |
| |
| // TODO: allow this stuff to be reused by tokenfilter |
| List<CharsRef> stems = new ArrayList<>(); |
| |
| if (doPrefix && dictionary.prefixes != null) { |
| FST<IntsRef> fst = dictionary.prefixes; |
| Outputs<IntsRef> outputs = fst.outputs; |
| FST.BytesReader bytesReader = prefixReaders[recursionDepth]; |
| FST.Arc<IntsRef> arc = prefixArcs[recursionDepth]; |
| fst.getFirstArc(arc); |
| IntsRef NO_OUTPUT = outputs.getNoOutput(); |
| IntsRef output = NO_OUTPUT; |
| int limit = dictionary.fullStrip ? length : length-1; |
| for (int i = 0; i < limit; i++) { |
| if (i > 0) { |
| int ch = word[i-1]; |
| if (fst.findTargetArc(ch, arc, arc, bytesReader) == null) { |
| break; |
| } else if (arc.output != NO_OUTPUT) { |
| output = fst.outputs.add(output, arc.output); |
| } |
| } |
| IntsRef prefixes = null; |
| if (!arc.isFinal()) { |
| continue; |
| } else { |
| prefixes = fst.outputs.add(output, arc.nextFinalOutput); |
| } |
| |
| for (int j = 0; j < prefixes.length; j++) { |
| int prefix = prefixes.ints[prefixes.offset + j]; |
| if (prefix == previous) { |
| continue; |
| } |
| affixReader.setPosition(8 * prefix); |
| char flag = (char) (affixReader.readShort() & 0xffff); |
| char stripOrd = (char) (affixReader.readShort() & 0xffff); |
| int condition = (char) (affixReader.readShort() & 0xffff); |
| boolean crossProduct = (condition & 1) == 1; |
| condition >>>= 1; |
| char append = (char) (affixReader.readShort() & 0xffff); |
| |
| final boolean compatible; |
| if (recursionDepth == 0) { |
| if (dictionary.onlyincompound == -1) { |
| compatible = true; |
| } else { |
| // check if affix is allowed in a non-compound word |
| dictionary.flagLookup.get(append, scratch); |
| char appendFlags[] = Dictionary.decodeFlags(scratch); |
| compatible = !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound); |
| } |
| } else if (crossProduct) { |
| // cross check incoming continuation class (flag of previous affix) against list. |
| dictionary.flagLookup.get(append, scratch); |
| char appendFlags[] = Dictionary.decodeFlags(scratch); |
| assert prevFlag >= 0; |
| boolean allowed = dictionary.onlyincompound == -1 || |
| !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound); |
| compatible = allowed && hasCrossCheckedFlag((char)prevFlag, appendFlags, false); |
| } else { |
| compatible = false; |
| } |
| |
| if (compatible) { |
| int deAffixedStart = i; |
| int deAffixedLength = length - deAffixedStart; |
| |
| int stripStart = dictionary.stripOffsets[stripOrd]; |
| int stripEnd = dictionary.stripOffsets[stripOrd+1]; |
| int stripLength = stripEnd - stripStart; |
| |
| if (!checkCondition(condition, dictionary.stripData, stripStart, stripLength, word, deAffixedStart, deAffixedLength)) { |
| continue; |
| } |
| |
| char strippedWord[] = new char[stripLength + deAffixedLength]; |
| System.arraycopy(dictionary.stripData, stripStart, strippedWord, 0, stripLength); |
| System.arraycopy(word, deAffixedStart, strippedWord, stripLength, deAffixedLength); |
| |
| List<CharsRef> stemList = applyAffix(strippedWord, strippedWord.length, prefix, -1, recursionDepth, true, circumfix, caseVariant); |
| |
| stems.addAll(stemList); |
| } |
| } |
| } |
| } |
| |
| if (doSuffix && dictionary.suffixes != null) { |
| FST<IntsRef> fst = dictionary.suffixes; |
| Outputs<IntsRef> outputs = fst.outputs; |
| FST.BytesReader bytesReader = suffixReaders[recursionDepth]; |
| FST.Arc<IntsRef> arc = suffixArcs[recursionDepth]; |
| fst.getFirstArc(arc); |
| IntsRef NO_OUTPUT = outputs.getNoOutput(); |
| IntsRef output = NO_OUTPUT; |
| int limit = dictionary.fullStrip ? 0 : 1; |
| for (int i = length; i >= limit; i--) { |
| if (i < length) { |
| int ch = word[i]; |
| if (fst.findTargetArc(ch, arc, arc, bytesReader) == null) { |
| break; |
| } else if (arc.output != NO_OUTPUT) { |
| output = fst.outputs.add(output, arc.output); |
| } |
| } |
| IntsRef suffixes = null; |
| if (!arc.isFinal()) { |
| continue; |
| } else { |
| suffixes = fst.outputs.add(output, arc.nextFinalOutput); |
| } |
| |
| for (int j = 0; j < suffixes.length; j++) { |
| int suffix = suffixes.ints[suffixes.offset + j]; |
| if (suffix == previous) { |
| continue; |
| } |
| affixReader.setPosition(8 * suffix); |
| char flag = (char) (affixReader.readShort() & 0xffff); |
| char stripOrd = (char) (affixReader.readShort() & 0xffff); |
| int condition = (char) (affixReader.readShort() & 0xffff); |
| boolean crossProduct = (condition & 1) == 1; |
| condition >>>= 1; |
| char append = (char) (affixReader.readShort() & 0xffff); |
| |
| final boolean compatible; |
| if (recursionDepth == 0) { |
| if (dictionary.onlyincompound == -1) { |
| compatible = true; |
| } else { |
| // check if affix is allowed in a non-compound word |
| dictionary.flagLookup.get(append, scratch); |
| char appendFlags[] = Dictionary.decodeFlags(scratch); |
| compatible = !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound); |
| } |
| } else if (crossProduct) { |
| // cross check incoming continuation class (flag of previous affix) against list. |
| dictionary.flagLookup.get(append, scratch); |
| char appendFlags[] = Dictionary.decodeFlags(scratch); |
| assert prevFlag >= 0; |
| boolean allowed = dictionary.onlyincompound == -1 || |
| !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound); |
| compatible = allowed && hasCrossCheckedFlag((char)prevFlag, appendFlags, previousWasPrefix); |
| } else { |
| compatible = false; |
| } |
| |
| if (compatible) { |
| int appendLength = length - i; |
| int deAffixedLength = length - appendLength; |
| |
| int stripStart = dictionary.stripOffsets[stripOrd]; |
| int stripEnd = dictionary.stripOffsets[stripOrd+1]; |
| int stripLength = stripEnd - stripStart; |
| |
| if (!checkCondition(condition, word, 0, deAffixedLength, dictionary.stripData, stripStart, stripLength)) { |
| continue; |
| } |
| |
| char strippedWord[] = new char[stripLength + deAffixedLength]; |
| System.arraycopy(word, 0, strippedWord, 0, deAffixedLength); |
| System.arraycopy(dictionary.stripData, stripStart, strippedWord, deAffixedLength, stripLength); |
| |
| List<CharsRef> stemList = applyAffix(strippedWord, strippedWord.length, suffix, prefixFlag, recursionDepth, false, circumfix, caseVariant); |
| |
| stems.addAll(stemList); |
| } |
| } |
| } |
| } |
| |
| return stems; |
| } |
| |
| /** checks condition of the concatenation of two strings */ |
| // note: this is pretty stupid, we really should subtract strip from the condition up front and just check the stem |
| // but this is a little bit more complicated. |
| private boolean checkCondition(int condition, char c1[], int c1off, int c1len, char c2[], int c2off, int c2len) { |
| if (condition != 0) { |
| CharacterRunAutomaton pattern = dictionary.patterns.get(condition); |
| int state = pattern.getInitialState(); |
| for (int i = c1off; i < c1off + c1len; i++) { |
| state = pattern.step(state, c1[i]); |
| if (state == -1) { |
| return false; |
| } |
| } |
| for (int i = c2off; i < c2off + c2len; i++) { |
| state = pattern.step(state, c2[i]); |
| if (state == -1) { |
| return false; |
| } |
| } |
| return pattern.isAccept(state); |
| } |
| return true; |
| } |
| |
| /** |
| * Applies the affix rule to the given word, producing a list of stems if any are found |
| * |
| * @param strippedWord Word the affix has been removed and the strip added |
| * @param length valid length of stripped word |
| * @param affix HunspellAffix representing the affix rule itself |
| * @param prefixFlag when we already stripped a prefix, we cant simply recurse and check the suffix, unless both are compatible |
| * so we must check dictionary form against both to add it as a stem! |
| * @param recursionDepth current recursion depth |
| * @param prefix true if we are removing a prefix (false if its a suffix) |
| * @return List of stems for the word, or an empty list if none are found |
| */ |
| List<CharsRef> applyAffix(char strippedWord[], int length, int affix, int prefixFlag, int recursionDepth, boolean prefix, boolean circumfix, boolean caseVariant) throws IOException { |
| // TODO: just pass this in from before, no need to decode it twice |
| affixReader.setPosition(8 * affix); |
| char flag = (char) (affixReader.readShort() & 0xffff); |
| affixReader.skipBytes(2); // strip |
| int condition = (char) (affixReader.readShort() & 0xffff); |
| boolean crossProduct = (condition & 1) == 1; |
| condition >>>= 1; |
| char append = (char) (affixReader.readShort() & 0xffff); |
| |
| List<CharsRef> stems = new ArrayList<>(); |
| |
| IntsRef forms = dictionary.lookupWord(strippedWord, 0, length); |
| if (forms != null) { |
| for (int i = 0; i < forms.length; i += formStep) { |
| dictionary.flagLookup.get(forms.ints[forms.offset+i], scratch); |
| char wordFlags[] = Dictionary.decodeFlags(scratch); |
| if (Dictionary.hasFlag(wordFlags, flag)) { |
| // confusing: in this one exception, we already chained the first prefix against the second, |
| // so it doesnt need to be checked against the word |
| boolean chainedPrefix = dictionary.complexPrefixes && recursionDepth == 1 && prefix; |
| if (chainedPrefix == false && prefixFlag >= 0 && !Dictionary.hasFlag(wordFlags, (char)prefixFlag)) { |
| // see if we can chain prefix thru the suffix continuation class (only if it has any!) |
| dictionary.flagLookup.get(append, scratch); |
| char appendFlags[] = Dictionary.decodeFlags(scratch); |
| if (!hasCrossCheckedFlag((char)prefixFlag, appendFlags, false)) { |
| continue; |
| } |
| } |
| |
| // if circumfix was previously set by a prefix, we must check this suffix, |
| // to ensure it has it, and vice versa |
| if (dictionary.circumfix != -1) { |
| dictionary.flagLookup.get(append, scratch); |
| char appendFlags[] = Dictionary.decodeFlags(scratch); |
| boolean suffixCircumfix = Dictionary.hasFlag(appendFlags, (char)dictionary.circumfix); |
| if (circumfix != suffixCircumfix) { |
| continue; |
| } |
| } |
| |
| // we are looking for a case variant, but this word does not allow it |
| if (caseVariant && dictionary.keepcase != -1 && Dictionary.hasFlag(wordFlags, (char)dictionary.keepcase)) { |
| continue; |
| } |
| // we aren't decompounding (yet) |
| if (dictionary.onlyincompound != -1 && Dictionary.hasFlag(wordFlags, (char)dictionary.onlyincompound)) { |
| continue; |
| } |
| stems.add(newStem(strippedWord, length, forms, i)); |
| } |
| } |
| } |
| |
| // if a circumfix flag is defined in the dictionary, and we are a prefix, we need to check if we have that flag |
| if (dictionary.circumfix != -1 && !circumfix && prefix) { |
| dictionary.flagLookup.get(append, scratch); |
| char appendFlags[] = Dictionary.decodeFlags(scratch); |
| circumfix = Dictionary.hasFlag(appendFlags, (char)dictionary.circumfix); |
| } |
| |
| if (crossProduct) { |
| if (recursionDepth == 0) { |
| if (prefix) { |
| // we took away the first prefix. |
| // COMPLEXPREFIXES = true: combine with a second prefix and another suffix |
| // COMPLEXPREFIXES = false: combine with a suffix |
| stems.addAll(stem(strippedWord, length, affix, flag, flag, ++recursionDepth, dictionary.complexPrefixes && dictionary.twoStageAffix, true, true, circumfix, caseVariant)); |
| } else if (dictionary.complexPrefixes == false && dictionary.twoStageAffix) { |
| // we took away a suffix. |
| // COMPLEXPREFIXES = true: we don't recurse! only one suffix allowed |
| // COMPLEXPREFIXES = false: combine with another suffix |
| stems.addAll(stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix, caseVariant)); |
| } |
| } else if (recursionDepth == 1) { |
| if (prefix && dictionary.complexPrefixes) { |
| // we took away the second prefix: go look for another suffix |
| stems.addAll(stem(strippedWord, length, affix, flag, flag, ++recursionDepth, false, true, true, circumfix, caseVariant)); |
| } else if (prefix == false && dictionary.complexPrefixes == false && dictionary.twoStageAffix) { |
| // we took away a prefix, then a suffix: go look for another suffix |
| stems.addAll(stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix, caseVariant)); |
| } |
| } |
| } |
| |
| return stems; |
| } |
| |
| /** |
| * Checks if the given flag cross checks with the given array of flags |
| * |
| * @param flag Flag to cross check with the array of flags |
| * @param flags Array of flags to cross check against. Can be {@code null} |
| * @return {@code true} if the flag is found in the array or the array is {@code null}, {@code false} otherwise |
| */ |
| private boolean hasCrossCheckedFlag(char flag, char[] flags, boolean matchEmpty) { |
| return (flags.length == 0 && matchEmpty) || Arrays.binarySearch(flags, flag) >= 0; |
| } |
| } |