plugins/org.eclipse.epf.publishing/src/org/eclipse/epf/publishing/services/search/AbstractDocument.java - epf/org.eclipse.epf.archive - Git at Google

 //------------------------------------------------------------------------------
 // Copyright (c) 2005, 2006 IBM Corporation and others.
 // All rights reserved. This program and the accompanying materials
 // are made available under the terms of the Eclipse Public License v1.0
 // which accompanies this distribution, and is available at
 // http://www.eclipse.org/legal/epl-v10.html
 //
 // Contributors:
 // IBM Corporation - initial implementation
 //------------------------------------------------------------------------------
 package org.eclipse.epf.publishing.services.search;

 import java.io.File;
 import java.io.Reader;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Map;

 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;

 /**
  * This class returns a Lucene document for indexing and searching.
  * A document is a unit for indexing and searching. It consists of a
  * list of fields that can be indexed and searched. Each field has
  * a name and a text value. <br>
  *
  * The fields that are maintained by this class are: <br>
  * <UL>
  * <LI>document title</LI>
  * <LI>document URL</LI>
  * <LI>summary (usually the first few lines of text)</LI>
  * <LI>text of document</LI>
  * </UL>
  * <br>
  * All types of document, such as HTML and PDF documents, must
  * subclass AbstractDocument. <br>
  *
  */
 public abstract class AbstractDocument
 {
 	public static final String FIELD_URL = "url"; //$NON-NLS-1$
 	public static final String FIELD_CONTENTS = "contents"; //$NON-NLS-1$
 	public static final String FIELD_SUMMARY = "summary"; //$NON-NLS-1$
 	public static final String FIELD_TITLE = "title"; //$NON-NLS-1$
 	public static final String FIELD_VALUE_UNDEFINED = ""; //$NON-NLS-1$

 	protected String _docTitle = null;
 	protected String _docUrl = null;
 	protected String _summary = null;
 	protected String _contents = null;
 	protected Reader _contentReader = null;

 	protected Map additionalFields = new HashMap();

 	/**
 	 * Default constructor.
 	 */
 	public AbstractDocument()
 	{
 	}

 	/**
 	 * Parses and compiles the document given the file.
 	 */
 	public Document document( File file )
 	{
 		// first compile the document fields
 		compileDocument( file );

 		if( _docTitle == null )
 		{
 			_docTitle = FIELD_VALUE_UNDEFINED;
 		}
 		if( _docUrl == null )
 		{
 			_docUrl = FIELD_VALUE_UNDEFINED;
 		}
 		if( _summary == null )
 		{
 			_summary = FIELD_VALUE_UNDEFINED;
 		}

 		// create a new Lucene document
 		Document luceneDocument = new Document();

     	// add the url as a field named "url".  Use an UnIndexed field, so
     	// that the url is just stored with the document, but is not searchable.
     	luceneDocument.add( Field.UnIndexed( FIELD_URL, _docUrl ) );

 	    // add the contents so it will get tokenized and indexed.
 	    if( null != _contents )
 	    {
     		luceneDocument.add( Field.Text( FIELD_CONTENTS, _contents ) );
 	    }
 	    else
 	    {
     		luceneDocument.add( Field.Text( FIELD_CONTENTS, _contentReader ) );
 	    }

     	// add the summary as an UnIndexed field, so that it is stored and returned
     	// with hit documents for display.
     	luceneDocument.add( Field.UnIndexed( FIELD_SUMMARY, _summary ) );

     	// Add the title as a separate Text field, so that it can be searched
     	// separately.
     	luceneDocument.add( Field.Text( FIELD_TITLE, _docTitle ) );

     	if ( additionalFields.size() > 0 )
     	{
     		for ( Iterator it = additionalFields.entrySet().iterator(); it.hasNext(); )
     		{
     			Map.Entry entry = (Map.Entry) it.next();
     			luceneDocument.add( Field.Text((String)entry.getKey(), (String)entry.getValue() ) );
      		}
     	}
     	return( luceneDocument );
 	}

 	/**
 	 * Sets the document title.
 	 */
 	protected void setDocTitle( String title )
 	{
 		//System.out.println( "TITLE === " + title );
 		_docTitle = title;
 	}

 	/**
 	 * Sets the document url.
 	 */
 	protected void setDocUrl( String url )
 	{
 //		System.out.println( "URL === " + url );
 		_docUrl = url;
 	}

 	/**
 	 * Sets the document summary.
 	 */
 	protected void setSummary( String summary )
 	{
 		//System.out.println( "SUMMARY === " + summary );
 		_summary = summary;
 	}

 	/**
 	 * Sets the document content with the given string.
 	 * Mutually exclusive with setting the document content
 	 * with a reader.
 	 * @see #setContentReader()
 	 */
 	protected void setContentString( String contents )
 	{
 		System.out.println( contents );
 		_contents = contents;
 		_contentReader = null;
 	}

 	/**
 	 * Sets the document content with the given reader.
 	 * Mutually exclusive with setting the document content
 	 * with a string.
 	 * @see #setContentString()
 	 */
 	protected void setContentReader( Reader contentReader )
 	{
 		_contentReader = contentReader;
 		_contents = null;
 	}

 	/**
 	 * Parses and compiles the document fields from the given file.
 	 */
 	protected abstract void compileDocument( File file );

 }
	//------------------------------------------------------------------------------
	// Copyright (c) 2005, 2006 IBM Corporation and others.
	// All rights reserved. This program and the accompanying materials
	// are made available under the terms of the Eclipse Public License v1.0
	// which accompanies this distribution, and is available at
	// http://www.eclipse.org/legal/epl-v10.html
	//
	// Contributors:
	// IBM Corporation - initial implementation
	//------------------------------------------------------------------------------
	package org.eclipse.epf.publishing.services.search;

	import java.io.File;
	import java.io.Reader;
	import java.util.HashMap;
	import java.util.Iterator;
	import java.util.Map;

	import org.apache.lucene.document.Document;
	import org.apache.lucene.document.Field;

	/**
	* This class returns a Lucene document for indexing and searching.
	* A document is a unit for indexing and searching. It consists of a
	* list of fields that can be indexed and searched. Each field has
	* a name and a text value. <br>
	*
	* The fields that are maintained by this class are: <br>
	* <UL>
	* <LI>document title</LI>
	* <LI>document URL</LI>
	* <LI>summary (usually the first few lines of text)</LI>
	* <LI>text of document</LI>
	* </UL>
	* <br>
	* All types of document, such as HTML and PDF documents, must
	* subclass AbstractDocument. <br>
	*
	*/
	public abstract class AbstractDocument
	{
	public static final String FIELD_URL = "url"; //$NON-NLS-1$
	public static final String FIELD_CONTENTS = "contents"; //$NON-NLS-1$
	public static final String FIELD_SUMMARY = "summary"; //$NON-NLS-1$
	public static final String FIELD_TITLE = "title"; //$NON-NLS-1$
	public static final String FIELD_VALUE_UNDEFINED = ""; //$NON-NLS-1$

	protected String _docTitle = null;
	protected String _docUrl = null;
	protected String _summary = null;
	protected String _contents = null;
	protected Reader _contentReader = null;

	protected Map additionalFields = new HashMap();

	/**
	* Default constructor.
	*/
	public AbstractDocument()
	{
	}

	/**
	* Parses and compiles the document given the file.
	*/
	public Document document( File file )
	{
	// first compile the document fields
	compileDocument( file );

	if( _docTitle == null )
	{
	_docTitle = FIELD_VALUE_UNDEFINED;
	}
	if( _docUrl == null )
	{
	_docUrl = FIELD_VALUE_UNDEFINED;
	}
	if( _summary == null )
	{
	_summary = FIELD_VALUE_UNDEFINED;
	}

	// create a new Lucene document
	Document luceneDocument = new Document();

	// add the url as a field named "url". Use an UnIndexed field, so
	// that the url is just stored with the document, but is not searchable.
	luceneDocument.add( Field.UnIndexed( FIELD_URL, _docUrl ) );

	// add the contents so it will get tokenized and indexed.
	if( null != _contents )
	{
	luceneDocument.add( Field.Text( FIELD_CONTENTS, _contents ) );
	}
	else
	{
	luceneDocument.add( Field.Text( FIELD_CONTENTS, _contentReader ) );
	}

	// add the summary as an UnIndexed field, so that it is stored and returned
	// with hit documents for display.
	luceneDocument.add( Field.UnIndexed( FIELD_SUMMARY, _summary ) );

	// Add the title as a separate Text field, so that it can be searched
	// separately.
	luceneDocument.add( Field.Text( FIELD_TITLE, _docTitle ) );

	if ( additionalFields.size() > 0 )
	{
	for ( Iterator it = additionalFields.entrySet().iterator(); it.hasNext(); )
	{
	Map.Entry entry = (Map.Entry) it.next();
	luceneDocument.add( Field.Text((String)entry.getKey(), (String)entry.getValue() ) );
	}
	}
	return( luceneDocument );
	}

	/**
	* Sets the document title.
	*/
	protected void setDocTitle( String title )
	{
	//System.out.println( "TITLE === " + title );
	_docTitle = title;
	}

	/**
	* Sets the document url.
	*/
	protected void setDocUrl( String url )
	{
	// System.out.println( "URL === " + url );
	_docUrl = url;
	}

	/**
	* Sets the document summary.
	*/
	protected void setSummary( String summary )
	{
	//System.out.println( "SUMMARY === " + summary );
	_summary = summary;
	}

	/**
	* Sets the document content with the given string.
	* Mutually exclusive with setting the document content
	* with a reader.
	* @see #setContentReader()
	*/
	protected void setContentString( String contents )
	{
	System.out.println( contents );
	_contents = contents;
	_contentReader = null;
	}

	/**
	* Sets the document content with the given reader.
	* Mutually exclusive with setting the document content
	* with a string.
	* @see #setContentString()
	*/
	protected void setContentReader( Reader contentReader )
	{
	_contentReader = contentReader;
	_contents = null;
	}

	/**
	* Parses and compiles the document fields from the given file.
	*/
	protected abstract void compileDocument( File file );

	}