| package org.eclipse.epf.web.search; |
| |
| import java.util.StringTokenizer; |
| |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.queryParser.ParseException; |
| import org.apache.lucene.queryParser.QueryParser; |
| import org.apache.lucene.search.Hits; |
| import org.apache.lucene.search.IndexSearcher; |
| import org.apache.lucene.search.Query; |
| import org.apache.lucene.search.Searcher; |
| import org.apache.lucene.search.Sort; |
| import org.apache.lucene.search.SortField; |
| import org.apache.lucene.store.RAMDirectory; |
| import org.eclipse.epf.web.search.analysis.ChineseAnalyzer; |
| import org.eclipse.epf.web.search.analysis.TextAnalyzer; |
| |
| import com.ibm.icu.text.DecimalFormat; |
| |
| public class IndexSearch { |
| |
| private static Searcher searcher; |
| |
| public static Hits search(String indexLocation, String inputQuery, |
| String searchField) { |
| |
| Hits hits = null; |
| Query q = null; |
| String queryString = null; |
| DecimalFormat scoreFormatter = new DecimalFormat("0.######"); |
| |
| if (indexLocation == null || inputQuery == null || searchField == null |
| || searchField.length() == 0 || inputQuery.length() == 0) |
| return null; |
| |
| try { |
| queryString = detectHyphenated(inputQuery); |
| |
| if (queryString == null || queryString.length() == 0) |
| return null; |
| |
| if (searcher == null) { |
| searcher = new IndexSearcher(new RAMDirectory(indexLocation)); |
| // searcher = new IndexSearcher(FSDirectory.getDirectory(indexLocation, |
| // false)); |
| } |
| |
| if ( searcher == null ) { |
| return hits; |
| } |
| |
| // Sort sort = new Sort(new SortField[]{ |
| // new SortField("title"), |
| // new SortField("summary") |
| // }); |
| |
| // per user's request, the sorting should be based on the hit rate, |
| // so don't pass in any sorting criteria |
| Sort sort = null; |
| |
| try { |
| |
| q = QueryParser.parse(queryString, searchField, |
| new TextAnalyzer()); |
| if ( q != null ) { |
| hits = searcher.search(q, (Sort)sort); |
| } |
| } catch (Throwable th ) { |
| hits = null; |
| } |
| |
| if ( hits == null ) { |
| // note: icu4j failed with jre 1.5 in server mode. |
| // in such a case, try the old one |
| q = QueryParser.parse(queryString, searchField, |
| new ChineseAnalyzer()); |
| if ( q != null ) { |
| hits = searcher.search(q, (Sort)sort); |
| } |
| } |
| |
| return hits; |
| |
| |
| } catch (ParseException pe) { |
| pe.printStackTrace(); |
| } catch (java.io.IOException io) { |
| io.printStackTrace(); |
| } catch (Throwable p1) { |
| p1.printStackTrace(); |
| } |
| |
| return hits; |
| } |
| |
| /* |
| * detect hyphenated words and if hyphenate, put quotes around so that they are |
| * considered one word for example,"use-case" as opposed to use-case, which Lucene |
| * interprets as "use" NOT "case" which is not what we want. algorithm: first tokenize |
| * by space to isolate words next, if words have hyphens, put quotes at the beginning |
| * and end append to buffer |
| */ |
| public static String detectHyphenated(String queryString) { |
| StringTokenizer spaceTokenizer = new StringTokenizer(queryString); |
| StringBuffer wordBuffer = new StringBuffer(); |
| |
| while (spaceTokenizer.hasMoreTokens()) { |
| // isolate into words |
| String word = spaceTokenizer.nextToken(); |
| if ((word.indexOf('-') != -1 ) && !word.startsWith("\"")) //$NON-NLS-1$ |
| { |
| // hyphen found, and word is not already an exact phrase, |
| // so add quotes to beginning and end of word |
| wordBuffer.append("\""); //$NON-NLS-1$ |
| wordBuffer.append(word); |
| wordBuffer.append("\""); //$NON-NLS-1$ |
| //System.out.println("if: " + wordBuffer.toString()); |
| } else { |
| //System.out.println("else: " + queryString); |
| return queryString; //RATLC00251031 |
| //wordBuffer.append(word); |
| } |
| if (spaceTokenizer.hasMoreTokens()) { |
| wordBuffer.append(" "); //$NON-NLS-1$ |
| } |
| } |
| |
| |
| return wordBuffer.toString(); |
| } |
| |
| /** |
| * @param args |
| */ |
| // public static void main(String[] args) { |
| // if (args.length > 0) { |
| // // String rupPath = System.getProperty( "user.home" ) + java.io.File.separator |
| // // + "rup"; |
| // String rupPath = args[0]; |
| // SortedHits sortedHits = IndexSearch.search(rupPath + "\\index", "Analyst", |
| // "contents"); |
| // System.out |
| // .println("There are " + sortedHits.length() + " hits for 'analyst'"); |
| // } |
| // |
| // } |
| } |