blob: a894a84352a43cbc849eace2d6d7f0e820dc1c17 [file] [log] [blame]
package org.eclipse.epf.web.search;
import java.util.StringTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.store.RAMDirectory;
import org.eclipse.epf.web.search.analysis.ChineseAnalyzer;
import org.eclipse.epf.web.search.analysis.TextAnalyzer;
import com.ibm.icu.text.DecimalFormat;
public class IndexSearch {
private static Searcher searcher;
public static Hits search(String indexLocation, String inputQuery,
String searchField) {
Hits hits = null;
Query q = null;
String queryString = null;
DecimalFormat scoreFormatter = new DecimalFormat("0.######");
if (indexLocation == null || inputQuery == null || searchField == null
|| searchField.length() == 0 || inputQuery.length() == 0)
return null;
try {
queryString = detectHyphenated(inputQuery);
if (queryString == null || queryString.length() == 0)
return null;
if (searcher == null) {
searcher = new IndexSearcher(new RAMDirectory(indexLocation));
// searcher = new IndexSearcher(FSDirectory.getDirectory(indexLocation,
// false));
}
if ( searcher == null ) {
return hits;
}
// Sort sort = new Sort(new SortField[]{
// new SortField("title"),
// new SortField("summary")
// });
// per user's request, the sorting should be based on the hit rate,
// so don't pass in any sorting criteria
Sort sort = null;
try {
q = QueryParser.parse(queryString, searchField,
new TextAnalyzer());
if ( q != null ) {
hits = searcher.search(q, (Sort)sort);
}
} catch (Throwable th ) {
}
// note: icu4j failed with jre 1.5 in server mode.
// in such a case, try the old one
q = QueryParser.parse(queryString, searchField,
new ChineseAnalyzer());
if ( q != null ) {
hits = searcher.search(q, (Sort)sort);
}
return hits;
} catch (ParseException pe) {
pe.printStackTrace();
} catch (java.io.IOException io) {
io.printStackTrace();
} catch (Throwable p1) {
p1.printStackTrace();
}
return hits;
}
/*
* detect hyphenated words and if hyphenate, put quotes around so that they are
* considered one word for example,"use-case" as opposed to use-case, which Lucene
* interprets as "use" NOT "case" which is not what we want. algorithm: first tokenize
* by space to isolate words next, if words have hyphens, put quotes at the beginning
* and end append to buffer
*/
public static String detectHyphenated(String queryString) {
StringTokenizer spaceTokenizer = new StringTokenizer(queryString);
StringBuffer wordBuffer = new StringBuffer();
while (spaceTokenizer.hasMoreTokens()) {
// isolate into words
String word = spaceTokenizer.nextToken();
if ((word.indexOf('-') != -1 ) && !word.startsWith("\"")) //$NON-NLS-1$
{
// hyphen found, and word is not already an exact phrase,
// so add quotes to beginning and end of word
wordBuffer.append("\""); //$NON-NLS-1$
wordBuffer.append(word);
wordBuffer.append("\""); //$NON-NLS-1$
//System.out.println("if: " + wordBuffer.toString());
} else {
//System.out.println("else: " + queryString);
return queryString; //RATLC00251031
//wordBuffer.append(word);
}
if (spaceTokenizer.hasMoreTokens()) {
wordBuffer.append(" "); //$NON-NLS-1$
}
}
return wordBuffer.toString();
}
/**
* @param args
*/
// public static void main(String[] args) {
// if (args.length > 0) {
// // String rupPath = System.getProperty( "user.home" ) + java.io.File.separator
// // + "rup";
// String rupPath = args[0];
// SortedHits sortedHits = IndexSearch.search(rupPath + "\\index", "Analyst",
// "contents");
// System.out
// .println("There are " + sortedHits.length() + " hits for 'analyst'");
// }
//
// }
}