blob: e19dddd14413ed7c10341d2bd811417c91ad6673 [file] [log] [blame]
//------------------------------------------------------------------------------
// Copyright (c) 2005, 2006 IBM Corporation and others.
// All rights reserved. This program and the accompanying materials
// are made available under the terms of the Eclipse Public License v1.0
// which accompanies this distribution, and is available at
// http://www.eclipse.org/legal/epl-v10.html
//
// Contributors:
// IBM Corporation - initial implementation
//------------------------------------------------------------------------------
package org.eclipse.epf.search.analysis;
import java.io.IOException;
import java.util.ResourceBundle;
import java.util.Set;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.eclipse.epf.common.utils.StrUtil;
/**
* A text filter that handles Unicode 4.1 characters.
*
* @author Kelvin Low
* @since 1.0
*/
public final class TextFilter extends TokenFilter {
private static Set stopWords = null;
/**
* Creates a new instance.
*/
public TextFilter(TokenStream in) {
super(in);
if (stopWords == null) {
loadStopWords();
}
}
/**
* @see org.apache.lucene.analysis.TokenStream#next()
*/
public final Token next() throws IOException {
for (Token token = input.next(); token != null; token = input.next()) {
String tokenText = token.termText();
if (!stopWords.contains(tokenText)) {
return token;
}
}
return null;
}
/**
* Loads the stop words defined in the StopWords.properties file.
*/
private void loadStopWords() {
String[] words = null;
try {
ResourceBundle bundle = ResourceBundle.getBundle(TextFilter.class
.getPackage().getName()
+ ".StopWords"); //$NON-NLS-1$
String property = bundle.getString("Search.stopWords"); //$NON-NLS-1$
words = StrUtil.split(property, " ,"); //$NON-NLS-1$
} catch (Exception e) {
words = StopAnalyzer.ENGLISH_STOP_WORDS;
}
stopWords = StopFilter.makeStopSet(words);
}
}