org.eclipse.epf.web.search/src/org/eclipse/epf/web/search/analysis/CJKAnalyzer.java - epf/org.eclipse.epf.projects - Git at Google

 package org.eclipse.epf.web.search.analysis;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;

 import java.io.Reader;
 import java.util.Set;

 public class CJKAnalyzer extends Analyzer {
   //~ Static fields/initializers ---------------------------------------------

   /**
    * An array containing some common English words that are not usually
    * useful for searching and some double-byte interpunctions.
    */
   public final static String[] STOP_WORDS = {
     "a", "and", "are", "as", "at", "be",
     "but", "by", "for", "if", "in",
     "into", "is", "it", "no", "not",
     "of", "on", "or", "s", "such", "t",
     "that", "the", "their", "then",
     "there", "these", "they", "this",
     "to", "was", "will", "with", "",
     "www"
   };

   //~ Instance fields --------------------------------------------------------

   /**
    * stop word list
    */
   private Set stopTable;

   //~ Constructors -----------------------------------------------------------

   /**
    * Builds an analyzer which removes words in {@link #STOP_WORDS}.
    */
   public CJKAnalyzer() {
     stopTable = StopFilter.makeStopSet(STOP_WORDS);
   }

   /**
    * Builds an analyzer which removes words in the provided array.
    *
    * @param stopWords stop word array
    */
   public CJKAnalyzer(String[] stopWords) {
     stopTable = StopFilter.makeStopSet(stopWords);
   }

   //~ Methods ----------------------------------------------------------------

   /**
    * get token stream from input
    *
    * @param fieldName lucene field name
    * @param reader    input reader
    * @return TokenStream
    */
   public final TokenStream tokenStream(String fieldName, Reader reader) {
     return new StopFilter(new CJKTokenizer(reader), stopTable);
   }
 }
	package org.eclipse.epf.web.search.analysis;

	import org.apache.lucene.analysis.Analyzer;
	import org.apache.lucene.analysis.StopFilter;
	import org.apache.lucene.analysis.TokenStream;

	import java.io.Reader;
	import java.util.Set;

	public class CJKAnalyzer extends Analyzer {
	//~ Static fields/initializers ---------------------------------------------

	/**
	* An array containing some common English words that are not usually
	* useful for searching and some double-byte interpunctions.
	*/
	public final static String[] STOP_WORDS = {
	"a", "and", "are", "as", "at", "be",
	"but", "by", "for", "if", "in",
	"into", "is", "it", "no", "not",
	"of", "on", "or", "s", "such", "t",
	"that", "the", "their", "then",
	"there", "these", "they", "this",
	"to", "was", "will", "with", "",
	"www"
	};

	//~ Instance fields --------------------------------------------------------

	/**
	* stop word list
	*/
	private Set stopTable;

	//~ Constructors -----------------------------------------------------------

	/**
	* Builds an analyzer which removes words in {@link #STOP_WORDS}.
	*/
	public CJKAnalyzer() {
	stopTable = StopFilter.makeStopSet(STOP_WORDS);
	}

	/**
	* Builds an analyzer which removes words in the provided array.
	*
	* @param stopWords stop word array
	*/
	public CJKAnalyzer(String[] stopWords) {
	stopTable = StopFilter.makeStopSet(stopWords);
	}

	//~ Methods ----------------------------------------------------------------

	/**
	* get token stream from input
	*
	* @param fieldName lucene field name
	* @param reader input reader
	* @return TokenStream
	*/
	public final TokenStream tokenStream(String fieldName, Reader reader) {
	return new StopFilter(new CJKTokenizer(reader), stopTable);
	}
	}