| //------------------------------------------------------------------------------ |
| // Copyright (c) 2005, 2006 IBM Corporation and others. |
| // All rights reserved. This program and the accompanying materials |
| // are made available under the terms of the Eclipse Public License v1.0 |
| // which accompanies this distribution, and is available at |
| // http://www.eclipse.org/legal/epl-v10.html |
| // |
| // Contributors: |
| // IBM Corporation - initial implementation |
| //------------------------------------------------------------------------------ |
| package org.eclipse.epf.search.configuration.internal; |
| |
| import java.io.File; |
| import java.io.IOException; |
| import java.util.Enumeration; |
| import java.util.Properties; |
| |
| import org.apache.lucene.document.DateField; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field; |
| import org.apache.lucene.index.IndexWriter; |
| import org.apache.lucene.queryParser.QueryParser; |
| import org.apache.lucene.search.Hits; |
| import org.apache.lucene.search.IndexSearcher; |
| import org.apache.lucene.search.Query; |
| import org.apache.lucene.search.Searcher; |
| import org.eclipse.core.runtime.IProgressMonitor; |
| import org.eclipse.epf.common.html.HTMLParser; |
| import org.eclipse.epf.search.GenerateSearchIndexException; |
| import org.eclipse.epf.search.SearchConfigurationException; |
| import org.eclipse.epf.search.SearchResources; |
| import org.eclipse.epf.search.SearchServiceException; |
| import org.eclipse.epf.search.analysis.TextAnalyzer; |
| import org.eclipse.epf.search.configuration.ConfigurationHitEntry; |
| import org.eclipse.epf.search.configuration.ConfigurationSearchQuery; |
| |
| /** |
| * The default implementation for searching a published configuration. |
| * |
| * @author Kelvin Low |
| * @since 1.0 |
| */ |
| public class ConfigurationSearchService { |
| |
| /** |
| * Document fields. |
| */ |
| public static final String BRIEF_DESCRIPTION_FIELD = "briefDescription"; //$NON-NLS-1$ |
| |
| public static final String CONTENT_FIELD = "content"; //$NON-NLS-1$ |
| |
| public static final String ID_FIELD = "id"; //$NON-NLS-1$ |
| |
| public static final String MODIFIED_FIELD = "modified"; //$NON-NLS-1$ |
| |
| public static final String NAME_FIELD = "name"; //$NON-NLS-1$ |
| |
| public static final String ROLE_FIELD = "role"; //$NON-NLS-1$ |
| |
| public static final String SUMMARY_FIELD = "summary"; //$NON-NLS-1$ |
| |
| public static final String TYPE_FIELD = "type"; //$NON-NLS-1$ |
| |
| public static final String URL_FIELD = "url"; //$NON-NLS-1$ |
| |
| // The HTML parser. |
| private HTMLParser parser; |
| |
| // The directory containing the documents to be indexed. |
| private String docDir = null; |
| |
| // The directory where the index files will be generated. |
| private String indexDir = null; |
| |
| /** |
| * Creates a new instance. |
| * |
| * @param docDir |
| * the absolute path to a published configuration |
| */ |
| public ConfigurationSearchService(String docDir) { |
| String userHome = System.getProperty("user.home"); //$NON-NLS-1$ |
| int hashCode = Math.abs(docDir.hashCode()); |
| String indexPath = userHome + File.separator |
| + "rup" + File.separator + hashCode + File.separator + "index"; //$NON-NLS-1$ //$NON-NLS-2$ |
| setDirs(docDir, indexPath); |
| } |
| |
| /** |
| * Creates a new instance. |
| * |
| * @param docDir |
| * the absolute path to a published configuration |
| * @param indexDir |
| * the absolute path to the indexes |
| */ |
| public ConfigurationSearchService(String docDir, String indexDir) { |
| setDirs(docDir, indexDir); |
| } |
| |
| /** |
| * Sets up the configuration and index directory. |
| * |
| * @param docDir |
| * the absolute path to a published configuration |
| * @param indexDir |
| * the absolute path to the indexes |
| */ |
| public void setDirs(String docDir, String indexDir) { |
| this.docDir = docDir; |
| this.indexDir = indexDir; |
| } |
| |
| /** |
| * Performs a search based on the given search query. |
| * |
| * @param query |
| * the search query string |
| * @return an array of <code>ConfigurationHitEntry</code> objects |
| * @throws SearchServiceException |
| * if an error occurs while executing the operation |
| */ |
| public ConfigurationHitEntry[] search(ConfigurationSearchQuery query) |
| throws SearchServiceException { |
| return search(query.getQueryString()); |
| } |
| |
| /** |
| * Performs a search on the given query string. |
| * |
| * @param qstr |
| * a Lucene compatible query string |
| * @return an array of <code>ConfigurationHitEntry</code> objects |
| * @throws SearchServiceException |
| * if an error occurs while executing the operation |
| */ |
| public ConfigurationHitEntry[] search(String qstr) |
| throws SearchServiceException { |
| Searcher searcher = null; |
| try { |
| searcher = new IndexSearcher(indexDir); |
| Query query = QueryParser.parse(qstr, CONTENT_FIELD, |
| new TextAnalyzer()); |
| |
| Hits lhits = searcher.search(query); |
| ConfigurationHitEntry[] hits = new ConfigurationHitEntry[lhits |
| .length()]; |
| |
| for (int i = 0; i < hits.length; i++) { |
| Document doc = lhits.doc(i); |
| hits[i] = new ConfigurationHitEntry(); |
| hits[i].setName(doc.get(NAME_FIELD)); |
| hits[i].setUrl(doc.get(URL_FIELD)); |
| hits[i].setBriefDesc(doc.get(BRIEF_DESCRIPTION_FIELD)); |
| hits[i].setId(doc.get(ID_FIELD)); |
| hits[i].setType(doc.get(TYPE_FIELD)); |
| } |
| return hits; |
| } catch (Exception e) { |
| throw new SearchConfigurationException(e); |
| } finally { |
| if (searcher != null) { |
| try { |
| searcher.close(); |
| } catch (IOException e) { |
| } |
| } |
| } |
| } |
| |
| /** |
| * Returns true of the index database already exists. |
| * |
| * @return <code>true</code> if the index database already exists |
| */ |
| public boolean indexExists() { |
| File[] files = new File(indexDir).listFiles(); |
| return (files != null && files.length != 0); |
| } |
| |
| /** |
| * Creates the search index. |
| * |
| * @throws SearchServiceException |
| * if an error occurs while executing the operation |
| */ |
| public void index() throws SearchServiceException { |
| index(null, false); |
| } |
| |
| /** |
| * Creates the search index. |
| * |
| * @param reindex |
| * if <code>true</code>, performs a reindex |
| * @throws SearchServiceException |
| * if an error occurs while executing the operation |
| */ |
| public void index(boolean reindex) throws SearchServiceException { |
| index(null, reindex); |
| } |
| |
| /** |
| * Creates the index database. |
| * |
| * @param pm |
| * a progress monitor |
| * @param reindex |
| * if <code>true</code>, performs a reindex |
| * @throws SearchServiceException |
| * if an error occurs while executing the operation |
| */ |
| public void index(IProgressMonitor pm, boolean reindex) |
| throws SearchServiceException { |
| synchronized (ConfigurationSearchService.class) { |
| if (indexDir == null || docDir == null) { |
| throw new IllegalStateException("Invalid indexDir or docDir"); //$NON-NLS-1$ |
| } |
| |
| if (!reindex && indexExists()) { |
| return; |
| } |
| |
| if (pm != null) { |
| pm.beginTask(SearchResources.indexConfigFilesTask_name, |
| getTotalDocsToIndex(docDir)); |
| } |
| |
| try { |
| deleteAllIndexDirs(); |
| new File(indexDir).mkdirs(); |
| IndexWriter writer = new IndexWriter(indexDir, |
| new TextAnalyzer(), true); |
| writer.maxFieldLength = 1000000; |
| parser = new HTMLParser(); |
| indexDocs(new File(docDir), writer, pm); |
| writer.optimize(); |
| writer.close(); |
| } catch (Exception e) { |
| throw new GenerateSearchIndexException(e); |
| } finally { |
| if (pm != null) { |
| pm.done(); |
| } |
| } |
| } |
| } |
| |
| /** |
| * Deletes all existing index databases. |
| */ |
| public void deleteAllIndexDirs() { |
| File parent = new File(indexDir).getParentFile(); |
| File[] files = parent.listFiles(); |
| if (files != null) { |
| for (int i = 0; i < files.length; i++) { |
| if (files[i].getName().startsWith("index")) { //$NON-NLS-1$ |
| File[] file = files[i].listFiles(); |
| for (int j = 0; j < file.length; j++) { |
| file[j].delete(); |
| } |
| files[i].delete(); |
| } |
| } |
| } |
| } |
| |
| /** |
| * Returns the total number of files that will be added to the search index. |
| * |
| * @param dir |
| * the directory name |
| * @return the number of files that will be added to the search index, |
| * including all sub-directories |
| */ |
| public static int getTotalDocsToIndex(String dir) { |
| int total = 0; |
| File dirFile = new File(dir); |
| File files[] = dirFile.listFiles(); |
| if (files == null) { |
| return total; |
| } |
| for (int i = 0; i < files.length; i++) { |
| if (isXMIDoc(files[i])) { |
| total++; |
| } else if (files[i].isDirectory()) { |
| total += getTotalDocsToIndex(files[i].getAbsolutePath()); |
| } |
| } |
| return total; |
| } |
| |
| /** |
| * Returns true if the file needs to be indexed. |
| * |
| * @param file |
| * the file to be verified |
| * @return <code>true</code> if the file needs to be indexed |
| */ |
| private static boolean isXMIDoc(File file) { |
| return file.getPath().endsWith(".xmi"); //$NON-NLS-1$ |
| } |
| |
| /** |
| * Returns true if the file needs to be indexed. |
| * |
| * @param file |
| * the file to be verified |
| * @return <code>true</code> if the file needs to be indexed |
| */ |
| private static boolean isHtmlOrTextDoc(File file) { |
| String path = file.getPath(); |
| return path.endsWith(".html") || path.endsWith(".htm") || path.endsWith(".txt"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
| } |
| |
| /** |
| * A recursive function to index files. |
| * |
| * @param file |
| * a directory or file to be indexed |
| * @param writer |
| * the index writer |
| * @param pm |
| * a progress monitor |
| * @throws Exception |
| */ |
| private void indexDocs(File file, IndexWriter writer, IProgressMonitor pm) |
| throws Exception { |
| if (file.isDirectory()) { |
| if (file.getName().equalsIgnoreCase("applet")) { //$NON-NLS-1$ |
| return; |
| } |
| String[] files = file.list(); |
| for (int i = 0; i < files.length; i++) { |
| indexDocs(new File(file, files[i]), writer, pm); |
| } |
| } else if (isHtmlOrTextDoc(file)) { |
| if (pm != null) { |
| pm.subTask(file.getName()); |
| pm.worked(1); |
| } |
| if (shouldBeExcluded(file)) { |
| return; |
| } |
| Document doc = getHTMLDocument(file); |
| writer.addDocument(doc); |
| } |
| } |
| |
| /** |
| * Returns <ocde>true</code> if the file should be excluded from indexing. |
| * |
| * @param file |
| * a file |
| * @return <code>true</code> if the file should be excluded from indexing |
| */ |
| public boolean shouldBeExcluded(File file) { |
| String p = file.getParentFile().getAbsolutePath(); |
| return docDir.startsWith(p); |
| } |
| |
| /** |
| * Gets the document object of a HTML file. |
| * |
| * @param file |
| * the HTML file to be indexed |
| * @return a document object |
| * @throws IOException |
| * if an I/O error occurs while parsing the HTML file |
| * @throws InterruptedException |
| * if the operation is interrupted by a user |
| */ |
| public Document getHTMLDocument(File file) throws IOException, |
| InterruptedException { |
| Document doc = new Document(); |
| doc.add(Field.UnIndexed(URL_FIELD, file.getPath().replace( |
| File.pathSeparatorChar, '/'))); |
| doc.add(Field.Keyword(MODIFIED_FIELD, DateField.timeToString(file |
| .lastModified()))); |
| |
| try { |
| parser.parse(file); |
| } catch (Exception e) { |
| } |
| |
| doc.add(Field.Text(CONTENT_FIELD, parser.getText())); |
| doc.add(Field.UnIndexed(SUMMARY_FIELD, parser.getSummary())); |
| |
| Properties metaTags = parser.getMetaTags(); |
| for (Enumeration e = metaTags.propertyNames(); e.hasMoreElements();) { |
| String tagName = (String) e.nextElement(); |
| doc.add(Field.Text(tagName, metaTags.getProperty(tagName))); |
| } |
| |
| if (doc.getField(ROLE_FIELD) == null) { |
| doc.add(Field.Text(ROLE_FIELD, "NORUPROLE")); //$NON-NLS-1$ |
| } |
| |
| return doc; |
| } |
| |
| } |