org.eclipse.epf.web.search/src/org/eclipse/epf/web/search/IndexSearch.java - epf/org.eclipse.epf.projects - Git at Google

 package org.eclipse.epf.web.search;

 import java.util.StringTokenizer;

 import org.apache.lucene.document.Document;
 import org.apache.lucene.queryParser.ParseException;
 import org.apache.lucene.queryParser.QueryParser;
 import org.apache.lucene.search.Hits;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Searcher;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.store.RAMDirectory;
 import org.eclipse.epf.web.search.analysis.ChineseAnalyzer;
 import org.eclipse.epf.web.search.analysis.TextAnalyzer;

 import com.ibm.icu.text.DecimalFormat;

 public class IndexSearch {

 	private static Searcher searcher;

 	public static Hits search(String indexLocation, String inputQuery,
 		String searchField) {

 		Hits hits = null;
 		Query q = null;
 		String queryString = null;
 		DecimalFormat scoreFormatter = new DecimalFormat("0.######");

 		if (indexLocation == null || inputQuery == null || searchField == null
 			|| searchField.length() == 0 || inputQuery.length() == 0)
 			return null;

 		try {
 			queryString = detectHyphenated(inputQuery);

 			if (queryString == null || queryString.length() == 0)
 				return null;

 			if (searcher == null) {
 				searcher = new IndexSearcher(new RAMDirectory(indexLocation));
 //				searcher = new IndexSearcher(FSDirectory.getDirectory(indexLocation,
 //					false));
 			}

 			if ( searcher == null ) {
 				return hits;
 			}

 //			Sort sort = new Sort(new SortField[]{
 //					new SortField("title"),
 //					new SortField("summary")
 //					});

 			// per user's request, the sorting should be based on the hit rate,
 			// so don't pass in any sorting criteria
 			Sort sort = null;

 			try {

 				q = QueryParser.parse(queryString, searchField,
 						new TextAnalyzer());
 				if ( q != null ) {
 					hits = searcher.search(q, (Sort)sort);
 				}
 			} catch (Throwable th ) {

 			}

 			// note: icu4j failed with jre 1.5 in server mode.
 			// in such a case, try the old one
 			q = QueryParser.parse(queryString, searchField,
 					new ChineseAnalyzer());
 			if ( q != null ) {
 				hits = searcher.search(q, (Sort)sort);
 			}

 			return hits;


 		} catch (ParseException pe) {
 			pe.printStackTrace();
 		} catch (java.io.IOException io) {
 			io.printStackTrace();
 		} catch (Throwable p1) {
 			p1.printStackTrace();
 		}

 		return hits;
 	}

 	/*
 	 * detect hyphenated words and if hyphenate, put quotes around so that they are
 	 * considered one word for example,"use-case" as opposed to use-case, which Lucene
 	 * interprets as "use" NOT "case" which is not what we want. algorithm: first tokenize
 	 * by space to isolate words next, if words have hyphens, put quotes at the beginning
 	 * and end append to buffer
 	 */
 	public static String detectHyphenated(String queryString) {
 		StringTokenizer spaceTokenizer = new StringTokenizer(queryString);
 		StringBuffer wordBuffer = new StringBuffer();

 		while (spaceTokenizer.hasMoreTokens()) {
 			// isolate into words
 			String word = spaceTokenizer.nextToken();
 			if ((word.indexOf('-') != -1 ) && !word.startsWith("\"")) //$NON-NLS-1$
 			{
 				// hyphen found, and word is not already an exact phrase,
 				// so add quotes to beginning and end of word
 				wordBuffer.append("\""); //$NON-NLS-1$
 				wordBuffer.append(word);
 				wordBuffer.append("\""); //$NON-NLS-1$
 				//System.out.println("if: " + wordBuffer.toString());
 			} else {
 				//System.out.println("else: " + queryString);
 				return queryString;  //RATLC00251031
 				//wordBuffer.append(word);
 			}
 			if (spaceTokenizer.hasMoreTokens()) {
 				wordBuffer.append(" "); //$NON-NLS-1$
 			}
 		}


 		return wordBuffer.toString();
 	}

 	/**
 	 * @param args
 	 */
 //	public static void main(String[] args) {
 //		if (args.length > 0) {
 //			// String rupPath = System.getProperty( "user.home" ) + java.io.File.separator
 //			// + "rup";
 //			String rupPath = args[0];
 //			SortedHits sortedHits = IndexSearch.search(rupPath + "\\index", "Analyst",
 //				"contents");
 //			System.out
 //				.println("There are " + sortedHits.length() + " hits for 'analyst'");
 //		}
 //
 //	}
 }
	package org.eclipse.epf.web.search;

	import java.util.StringTokenizer;

	import org.apache.lucene.document.Document;
	import org.apache.lucene.queryParser.ParseException;
	import org.apache.lucene.queryParser.QueryParser;
	import org.apache.lucene.search.Hits;
	import org.apache.lucene.search.IndexSearcher;
	import org.apache.lucene.search.Query;
	import org.apache.lucene.search.Searcher;
	import org.apache.lucene.search.Sort;
	import org.apache.lucene.search.SortField;
	import org.apache.lucene.store.RAMDirectory;
	import org.eclipse.epf.web.search.analysis.ChineseAnalyzer;
	import org.eclipse.epf.web.search.analysis.TextAnalyzer;

	import com.ibm.icu.text.DecimalFormat;

	public class IndexSearch {

	private static Searcher searcher;

	public static Hits search(String indexLocation, String inputQuery,
	String searchField) {

	Hits hits = null;
	Query q = null;
	String queryString = null;
	DecimalFormat scoreFormatter = new DecimalFormat("0.######");

	if (indexLocation == null \|\| inputQuery == null \|\| searchField == null
	\|\| searchField.length() == 0 \|\| inputQuery.length() == 0)
	return null;

	try {
	queryString = detectHyphenated(inputQuery);

	if (queryString == null \|\| queryString.length() == 0)
	return null;

	if (searcher == null) {
	searcher = new IndexSearcher(new RAMDirectory(indexLocation));
	// searcher = new IndexSearcher(FSDirectory.getDirectory(indexLocation,
	// false));
	}

	if ( searcher == null ) {
	return hits;
	}

	// Sort sort = new Sort(new SortField[]{
	// new SortField("title"),
	// new SortField("summary")
	// });

	// per user's request, the sorting should be based on the hit rate,
	// so don't pass in any sorting criteria
	Sort sort = null;

	try {

	q = QueryParser.parse(queryString, searchField,
	new TextAnalyzer());
	if ( q != null ) {
	hits = searcher.search(q, (Sort)sort);
	}
	} catch (Throwable th ) {

	}

	// note: icu4j failed with jre 1.5 in server mode.
	// in such a case, try the old one
	q = QueryParser.parse(queryString, searchField,
	new ChineseAnalyzer());
	if ( q != null ) {
	hits = searcher.search(q, (Sort)sort);
	}

	return hits;


	} catch (ParseException pe) {
	pe.printStackTrace();
	} catch (java.io.IOException io) {
	io.printStackTrace();
	} catch (Throwable p1) {
	p1.printStackTrace();
	}

	return hits;
	}

	/*
	* detect hyphenated words and if hyphenate, put quotes around so that they are
	* considered one word for example,"use-case" as opposed to use-case, which Lucene
	* interprets as "use" NOT "case" which is not what we want. algorithm: first tokenize
	* by space to isolate words next, if words have hyphens, put quotes at the beginning
	* and end append to buffer
	*/
	public static String detectHyphenated(String queryString) {
	StringTokenizer spaceTokenizer = new StringTokenizer(queryString);
	StringBuffer wordBuffer = new StringBuffer();

	while (spaceTokenizer.hasMoreTokens()) {
	// isolate into words
	String word = spaceTokenizer.nextToken();
	if ((word.indexOf('-') != -1 ) && !word.startsWith("\"")) //$NON-NLS-1$
	{
	// hyphen found, and word is not already an exact phrase,
	// so add quotes to beginning and end of word
	wordBuffer.append("\""); //$NON-NLS-1$
	wordBuffer.append(word);
	wordBuffer.append("\""); //$NON-NLS-1$
	//System.out.println("if: " + wordBuffer.toString());
	} else {
	//System.out.println("else: " + queryString);
	return queryString; //RATLC00251031
	//wordBuffer.append(word);
	}
	if (spaceTokenizer.hasMoreTokens()) {
	wordBuffer.append(" "); //$NON-NLS-1$
	}
	}


	return wordBuffer.toString();
	}

	/**
	* @param args
	*/
	// public static void main(String[] args) {
	// if (args.length > 0) {
	// // String rupPath = System.getProperty( "user.home" ) + java.io.File.separator
	// // + "rup";
	// String rupPath = args[0];
	// SortedHits sortedHits = IndexSearch.search(rupPath + "\\index", "Analyst",
	// "contents");
	// System.out
	// .println("There are " + sortedHits.length() + " hits for 'analyst'");
	// }
	//
	// }
	}