| //------------------------------------------------------------------------------ |
| // Copyright (c) 2005, 2006 IBM Corporation and others. |
| // All rights reserved. This program and the accompanying materials |
| // are made available under the terms of the Eclipse Public License v1.0 |
| // which accompanies this distribution, and is available at |
| // http://www.eclipse.org/legal/epl-v10.html |
| // |
| // Contributors: |
| // IBM Corporation - initial implementation |
| //------------------------------------------------------------------------------ |
| package org.eclipse.epf.publishing.services.search; |
| |
| import java.io.File; |
| import java.io.FileInputStream; |
| import java.io.InputStreamReader; |
| import java.util.Enumeration; |
| import java.util.Properties; |
| |
| import org.apache.lucene.demo.html.HTMLParser; |
| |
| /** |
| * This class parses and compiles the fields for a HTML document.<br> |
| * |
| */ |
| public class HTMLDocument extends AbstractDocument |
| { |
| /** |
| * Default constructor. |
| */ |
| public HTMLDocument() |
| { |
| super(); |
| } |
| |
| /** |
| * Parses and compiles the document fields from the given file. |
| */ |
| protected void compileDocument( File file ) |
| { |
| // use the file path as the url |
| setDocUrl( file.getPath() ); |
| |
| try |
| { |
| FileInputStream inStream = new FileInputStream( file ); |
| InputStreamReader streamReader = new InputStreamReader( inStream, "UTF8" ); //$NON-NLS-1$ |
| HTMLParser parser = new HTMLParser( streamReader ); |
| |
| // HTMLParser parser = new HTMLParser( file ); |
| |
| // set the contents |
| setContentReader( parser.getReader() ); |
| |
| // set the summary |
| setSummary( parser.getSummary() ); |
| |
| // set title |
| setDocTitle( parser.getTitle() ); |
| |
| Properties prop = parser.getMetaTags(); |
| |
| for(Enumeration enu = prop.propertyNames(); enu.hasMoreElements();) |
| { |
| String tagName = (String)enu.nextElement(); |
| super.additionalFields.put(tagName, prop.getProperty(tagName)); |
| } |
| |
| parser = null; |
| |
| // close stream |
| streamReader.close(); |
| inStream.close(); |
| |
| // System.out.println( file.getPath() ); |
| } |
| catch( Exception e ) |
| { |
| e.printStackTrace(); |
| } |
| } |
| |
| } |
| |