blob: 559a0feac299169d68a5799c5466e476e1f22f80 [file] [log] [blame]
//------------------------------------------------------------------------------
// Copyright (c) 2005, 2006 IBM Corporation and others.
// All rights reserved. This program and the accompanying materials
// are made available under the terms of the Eclipse Public License v1.0
// which accompanies this distribution, and is available at
// http://www.eclipse.org/legal/epl-v10.html
//
// Contributors:
// IBM Corporation - initial implementation
//------------------------------------------------------------------------------
package org.eclipse.epf.publishing.services.search;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.Enumeration;
import java.util.Properties;
import org.apache.lucene.demo.html.HTMLParser;
/**
* This class parses and compiles the fields for a HTML document.<br>
*
*/
public class HTMLDocument extends AbstractDocument
{
/**
* Default constructor.
*/
public HTMLDocument()
{
super();
}
/**
* Parses and compiles the document fields from the given file.
*/
protected void compileDocument( File file )
{
// use the file path as the url
setDocUrl( file.getPath() );
try
{
FileInputStream inStream = new FileInputStream( file );
InputStreamReader streamReader = new InputStreamReader( inStream, "UTF8" ); //$NON-NLS-1$
HTMLParser parser = new HTMLParser( streamReader );
// HTMLParser parser = new HTMLParser( file );
// set the contents
setContentReader( parser.getReader() );
// set the summary
setSummary( parser.getSummary() );
// set title
setDocTitle( parser.getTitle() );
Properties prop = parser.getMetaTags();
for(Enumeration enu = prop.propertyNames(); enu.hasMoreElements();)
{
String tagName = (String)enu.nextElement();
super.additionalFields.put(tagName, prop.getProperty(tagName));
}
parser = null;
// close stream
streamReader.close();
inStream.close();
// System.out.println( file.getPath() );
}
catch( Exception e )
{
e.printStackTrace();
}
}
}