| /*=============================================================================# |
| # Copyright (c) 2010, 2020 Stephan Wahlbrink and others. |
| # |
| # This program and the accompanying materials are made available under the |
| # terms of the Eclipse Public License 2.0 which is available at |
| # https://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0 |
| # which is available at https://www.apache.org/licenses/LICENSE-2.0. |
| # |
| # SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 |
| # |
| # Contributors: |
| # Stephan Wahlbrink <sw@wahlbrink.eu> - initial API and implementation |
| #=============================================================================*/ |
| |
| package org.eclipse.statet.internal.rhelp.core.index; |
| |
| import java.io.Reader; |
| |
| import org.apache.lucene.analysis.CharArraySet; |
| import org.apache.lucene.analysis.StopwordAnalyzerBase; |
| import org.apache.lucene.analysis.TokenStream; |
| import org.apache.lucene.analysis.Tokenizer; |
| import org.apache.lucene.analysis.core.LowerCaseFilter; |
| import org.apache.lucene.analysis.core.StopFilter; |
| import org.apache.lucene.analysis.en.EnglishAnalyzer; |
| import org.apache.lucene.analysis.en.EnglishPossessiveFilter; |
| import org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilter; |
| import org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilter; |
| import org.apache.lucene.analysis.snowball.SnowballFilter; |
| import org.apache.lucene.analysis.standard.StandardTokenizer; |
| import org.apache.lucene.analysis.util.CharFilterFactory; |
| import org.tartarus.snowball.ext.EnglishStemmer; |
| |
| |
| final class DefaultAnalyzer extends StopwordAnalyzerBase { |
| |
| |
| public static final CharArraySet STOP_WORDS_SET= EnglishAnalyzer.ENGLISH_STOP_WORDS_SET; |
| |
| private final CharFilterFactory charFilterFactory; |
| |
| |
| public DefaultAnalyzer() { |
| this(null); |
| } |
| |
| public DefaultAnalyzer(final CharFilterFactory charFilterFactory) { |
| super(STOP_WORDS_SET); |
| |
| this.charFilterFactory= charFilterFactory; |
| } |
| |
| |
| @Override |
| protected Reader initReader(final String fieldName, Reader reader) { |
| if (this.charFilterFactory != null) { |
| reader= this.charFilterFactory.create(reader); |
| } |
| return super.initReader(fieldName, reader); |
| } |
| |
| @Override |
| protected TokenStreamComponents createComponents(final String fieldName) { |
| final Tokenizer source= new StandardTokenizer(); |
| TokenStream result= source; |
| result= new EnglishPossessiveFilter(result); |
| result= new LowerCaseFilter(result); |
| result= new StopFilter(result, this.stopwords); |
| result= new KeywordRepeatFilter(result); |
| result= new SnowballFilter(result, new EnglishStemmer()); |
| result= new RemoveDuplicatesTokenFilter(result); |
| return new TokenStreamComponents(source, result); |
| } |
| |
| } |