blob: 39199a5d57804365f757e160aaeb8917c8e7bc1f [file] [log] [blame]
/*=============================================================================#
# Copyright (c) 2010, 2020 Stephan Wahlbrink and others.
#
# This program and the accompanying materials are made available under the
# terms of the Eclipse Public License 2.0 which is available at
# https://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0
# which is available at https://www.apache.org/licenses/LICENSE-2.0.
#
# SPDX-License-Identifier: EPL-2.0 OR Apache-2.0
#
# Contributors:
# Stephan Wahlbrink <sw@wahlbrink.eu> - initial API and implementation
#=============================================================================*/
package org.eclipse.statet.internal.rhelp.core.index;
import java.io.Reader;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilter;
import org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.CharFilterFactory;
import org.tartarus.snowball.ext.EnglishStemmer;
final class DefaultAnalyzer extends StopwordAnalyzerBase {
public static final CharArraySet STOP_WORDS_SET= EnglishAnalyzer.ENGLISH_STOP_WORDS_SET;
private final CharFilterFactory charFilterFactory;
public DefaultAnalyzer() {
this(null);
}
public DefaultAnalyzer(final CharFilterFactory charFilterFactory) {
super(STOP_WORDS_SET);
this.charFilterFactory= charFilterFactory;
}
@Override
protected Reader initReader(final String fieldName, Reader reader) {
if (this.charFilterFactory != null) {
reader= this.charFilterFactory.create(reader);
}
return super.initReader(fieldName, reader);
}
@Override
protected TokenStreamComponents createComponents(final String fieldName) {
final Tokenizer source= new StandardTokenizer();
TokenStream result= source;
result= new EnglishPossessiveFilter(result);
result= new LowerCaseFilter(result);
result= new StopFilter(result, this.stopwords);
result= new KeywordRepeatFilter(result);
result= new SnowballFilter(result, new EnglishStemmer());
result= new RemoveDuplicatesTokenFilter(result);
return new TokenStreamComponents(source, result);
}
}