| //------------------------------------------------------------------------------ |
| // Copyright (c) 2005, 2006 IBM Corporation and others. |
| // All rights reserved. This program and the accompanying materials |
| // are made available under the terms of the Eclipse Public License v1.0 |
| // which accompanies this distribution, and is available at |
| // http://www.eclipse.org/legal/epl-v10.html |
| // |
| // Contributors: |
| // IBM Corporation - initial implementation |
| //------------------------------------------------------------------------------ |
| package org.eclipse.epf.publishing.services.search; |
| |
| import java.io.File; |
| import java.io.Reader; |
| import java.util.HashMap; |
| import java.util.Iterator; |
| import java.util.Map; |
| |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field; |
| |
| /** |
| * This class returns a Lucene document for indexing and searching. |
| * A document is a unit for indexing and searching. It consists of a |
| * list of fields that can be indexed and searched. Each field has |
| * a name and a text value. <br> |
| * |
| * The fields that are maintained by this class are: <br> |
| * <UL> |
| * <LI>document title</LI> |
| * <LI>document URL</LI> |
| * <LI>summary (usually the first few lines of text)</LI> |
| * <LI>text of document</LI> |
| * </UL> |
| * <br> |
| * All types of document, such as HTML and PDF documents, must |
| * subclass AbstractDocument. <br> |
| * |
| */ |
| public abstract class AbstractDocument |
| { |
| public static final String FIELD_URL = "url"; //$NON-NLS-1$ |
| public static final String FIELD_CONTENTS = "contents"; //$NON-NLS-1$ |
| public static final String FIELD_SUMMARY = "summary"; //$NON-NLS-1$ |
| public static final String FIELD_TITLE = "title"; //$NON-NLS-1$ |
| public static final String FIELD_VALUE_UNDEFINED = ""; //$NON-NLS-1$ |
| |
| protected String _docTitle = null; |
| protected String _docUrl = null; |
| protected String _summary = null; |
| protected String _contents = null; |
| protected Reader _contentReader = null; |
| |
| protected Map additionalFields = new HashMap(); |
| |
| /** |
| * Default constructor. |
| */ |
| public AbstractDocument() |
| { |
| } |
| |
| /** |
| * Parses and compiles the document given the file. |
| */ |
| public Document document( File file ) |
| { |
| // first compile the document fields |
| compileDocument( file ); |
| |
| if( _docTitle == null ) |
| { |
| _docTitle = FIELD_VALUE_UNDEFINED; |
| } |
| if( _docUrl == null ) |
| { |
| _docUrl = FIELD_VALUE_UNDEFINED; |
| } |
| if( _summary == null ) |
| { |
| _summary = FIELD_VALUE_UNDEFINED; |
| } |
| |
| // create a new Lucene document |
| Document luceneDocument = new Document(); |
| |
| // add the url as a field named "url". Use an UnIndexed field, so |
| // that the url is just stored with the document, but is not searchable. |
| luceneDocument.add( Field.UnIndexed( FIELD_URL, _docUrl ) ); |
| |
| // add the contents so it will get tokenized and indexed. |
| if( null != _contents ) |
| { |
| luceneDocument.add( Field.Text( FIELD_CONTENTS, _contents ) ); |
| } |
| else |
| { |
| luceneDocument.add( Field.Text( FIELD_CONTENTS, _contentReader ) ); |
| } |
| |
| // add the summary as an UnIndexed field, so that it is stored and returned |
| // with hit documents for display. |
| luceneDocument.add( Field.UnIndexed( FIELD_SUMMARY, _summary ) ); |
| |
| // Add the title as a separate Text field, so that it can be searched |
| // separately. |
| luceneDocument.add( Field.Text( FIELD_TITLE, _docTitle ) ); |
| |
| if ( additionalFields.size() > 0 ) |
| { |
| for ( Iterator it = additionalFields.entrySet().iterator(); it.hasNext(); ) |
| { |
| Map.Entry entry = (Map.Entry) it.next(); |
| luceneDocument.add( Field.Text((String)entry.getKey(), (String)entry.getValue() ) ); |
| } |
| } |
| return( luceneDocument ); |
| } |
| |
| /** |
| * Sets the document title. |
| */ |
| protected void setDocTitle( String title ) |
| { |
| //System.out.println( "TITLE === " + title ); |
| _docTitle = title; |
| } |
| |
| /** |
| * Sets the document url. |
| */ |
| protected void setDocUrl( String url ) |
| { |
| // System.out.println( "URL === " + url ); |
| _docUrl = url; |
| } |
| |
| /** |
| * Sets the document summary. |
| */ |
| protected void setSummary( String summary ) |
| { |
| //System.out.println( "SUMMARY === " + summary ); |
| _summary = summary; |
| } |
| |
| /** |
| * Sets the document content with the given string. |
| * Mutually exclusive with setting the document content |
| * with a reader. |
| * @see #setContentReader() |
| */ |
| protected void setContentString( String contents ) |
| { |
| System.out.println( contents ); |
| _contents = contents; |
| _contentReader = null; |
| } |
| |
| /** |
| * Sets the document content with the given reader. |
| * Mutually exclusive with setting the document content |
| * with a string. |
| * @see #setContentString() |
| */ |
| protected void setContentReader( Reader contentReader ) |
| { |
| _contentReader = contentReader; |
| _contents = null; |
| } |
| |
| /** |
| * Parses and compiles the document fields from the given file. |
| */ |
| protected abstract void compileDocument( File file ); |
| |
| } |
| |