plugins/org.eclipse.epf.search/src/org/eclipse/epf/search/IndexBuilder.java - epf/org.eclipse.epf.archive - Git at Google

 //------------------------------------------------------------------------------
 //Copyright (c) 2005, 2007 IBM Corporation and others.
 //All rights reserved. This program and the accompanying materials
 //are made available under the terms of the Eclipse Public License v1.0
 //which accompanies this distribution, and is available at
 //http://www.eclipse.org/legal/epl-v10.html
 //
 //Contributors:
 //IBM Corporation - initial implementation
 //------------------------------------------------------------------------------
 package org.eclipse.epf.search;

 import java.io.BufferedWriter;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileWriter;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.Reader;
 import java.util.ArrayList;
 import java.util.Date;
 import java.util.Enumeration;
 import java.util.List;
 import java.util.Properties;

 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.store.RAMDirectory;
 import org.eclipse.epf.search.analysis.TextAnalyzer;
 import org.eclipse.epf.search.utils.LHTMLParser;
 import org.eclipse.epf.search.utils.JarCreator;
 import org.eclipse.epf.search.utils.UNCUtil;

 /**
  * This class is the main class that creates the Index from the file
  * associations in the process layout.
  */
 public class IndexBuilder {
 	static final String VERSION_FILE_NAME = "version.txt"; //$NON-NLS-1$
 	static final String VERSION_DELIMITER = "*"; //$NON-NLS-1$

 	/**
 	 * Document fields.
 	 */
 	public static final String BRIEF_DESCRIPTION_FIELD = "briefDescription"; //$NON-NLS-1$
 	public static final String CONTENT_FIELD = "contents"; //$NON-NLS-1$
 	public static final String ID_FIELD = "id"; //$NON-NLS-1$
 	public static final String MODIFIED_FIELD = "modified"; //$NON-NLS-1$
 	public static final String NAME_FIELD = "name"; //$NON-NLS-1$
 	public static final String ROLE_FIELD = "role"; //$NON-NLS-1$
 	public static final String SUMMARY_FIELD = "summary"; //$NON-NLS-1$
 	public static final String TYPE_FIELD = "type"; //$NON-NLS-1$
 	public static final String URL_FIELD = "url"; //$NON-NLS-1$
 	private static final String TITLE_FIELD = "title"; //$NON-NLS-1$
 	private static final String UMA_ELEMENT_TYPE_FIELD = "uma.type"; //$NON-NLS-1$
 	public static final String GENERAL_CONTENT = "general_content"; //$NON-NLS-1$

 	// List of UMA elements that should be included in the search index.
 	private static List NO_SEARCHEABLE_UMA_ELEMENTS = new ArrayList();
 	static {
 		NO_SEARCHEABLE_UMA_ELEMENTS.add("summary");
 		NO_SEARCHEABLE_UMA_ELEMENTS.add("workproductdescriptor");
 		NO_SEARCHEABLE_UMA_ELEMENTS.add("taskdescriptor");
 		NO_SEARCHEABLE_UMA_ELEMENTS.add("roledescriptor");
 	}

 	// A list of top level directories that should be excluded from the search
 	// index.
 	private java.util.List dirsToSkip = new ArrayList();
 	private String pDirectory = null;
 	private StringBuffer indexFolder = null;
 	private String productName = null;

 	public IndexBuilder(String publishDir) {
 		int appletIndex = -1;
 		if (publishDir == null)
 			return;

 		appletIndex = publishDir.indexOf(File.separator + "applet");

 		pDirectory = UNCUtil.convertFilename((appletIndex > -1) ? publishDir
 				.substring(0, appletIndex + 1) : publishDir);
 		String siteName = pDirectory.replace(File.separatorChar, '/');
 		int index = siteName.length();
 		if (siteName.endsWith("/")) //$NON-NLS-1$
 		{
 			index = index - 1;
 		}

 		int index2 = siteName.lastIndexOf("/", index - 1); //$NON-NLS-1$

 		productName = siteName.substring(index2 + 1, index);

 		// create the index
 		StringBuffer searchFolder = new StringBuffer(pDirectory);
 		if (!searchFolder.toString().endsWith(File.separator)) {
 			searchFolder.append(File.separator);
 		}
 		searchFolder.append("search"); //$NON-NLS-2$

 		indexFolder = new StringBuffer(searchFolder.toString());
 		indexFolder.append(File.separator).append("index"); //$NON-NLS-1$

 		dirsToSkip.add(pDirectory + "applet"); //$NON-NLS-1$
 		dirsToSkip.add(pDirectory + "css"); //$NON-NLS-1$
 		dirsToSkip.add(pDirectory + "ext_help"); //$NON-NLS-1$
 		dirsToSkip.add(pDirectory + "icons"); //$NON-NLS-1$
 		dirsToSkip.add(pDirectory + "images"); //$NON-NLS-1$
 		dirsToSkip.add(pDirectory + "index"); //$NON-NLS-1$
 		dirsToSkip.add(pDirectory + "logs"); //$NON-NLS-1$
 		dirsToSkip.add(pDirectory + "manuals"); //$NON-NLS-1$
 		dirsToSkip.add(pDirectory + "noapplet"); //$NON-NLS-1$
 		dirsToSkip.add(pDirectory + "pages_not_installed"); //$NON-NLS-1$
 		dirsToSkip.add(pDirectory + "process"); //$NON-NLS-1$
 		dirsToSkip.add(pDirectory + "scripts"); //$NON-NLS-1$
 		dirsToSkip.add(pDirectory + "stylesheets"); //$NON-NLS-1$
 		dirsToSkip.add(pDirectory + "xml"); //$NON-NLS-1$
 		dirsToSkip.add(pDirectory + "search"); //$NON-NLS-1$
 	}

 	public boolean createIndex() throws SearchServiceException {
 		synchronized (IndexBuilder.class) {

 			if (indexFolder == null || pDirectory == null) {
 				throw new IllegalStateException(
 						"Invalid indexFolder or pDirectory"); //$NON-NLS-1$
 			}

 			try {
 				RAMDirectory ramDir = new RAMDirectory();

 				IndexWriter fsWriter = new IndexWriter(FSDirectory
 						.getDirectory(indexFolder.toString(), true),
 						new TextAnalyzer(), true);

 				IndexWriter ramWriter = new IndexWriter(ramDir,
 						new TextAnalyzer(), true);

 				if ((fsWriter != null)) {
 					// fsWriter.mergeFactor = 1000;
 					// fsWriter.maxMergeDocs = 10000;
 					fsWriter.maxFieldLength = 1000000;

 					indexDocs(new File(pDirectory), ramWriter);

 					fsWriter.addIndexes(new Directory[] { ramDir });
 					fsWriter.optimize();
 					ramWriter.close();
 					fsWriter.close();
 				}
 			} catch (Exception e) {
 				System.out.println(e.getMessage());
 				System.out.println("createIndex");
 				e.printStackTrace();
 			}

 			System.out.println("index created successfully"); //$NON-NLS-1$

 			// jar up the created index.
 			JarCreator.jarFolder(indexFolder.toString());

 			System.out.println("index Jarred successfully"); //$NON-NLS-1$

 			try {
 				// delete the files now that they've been jarred.
 				File indexDir = new File(indexFolder.toString());
 				File[] files = indexDir.listFiles();
 				for (int i = 0; i < files.length; i++) {
 					File tempFile = files[i];
 					if (!tempFile.getName().equals(JarCreator.INDEX_JAR)) {
 						tempFile.delete();
 					}
 				}

 				// create the version file.
 				Date today = new Date();
 				long milliseconds = today.getTime();

 				// String rupName = publishDir.substring(index);
 				File newIndexJar = new File(indexFolder + File.separator
 						+ JarCreator.INDEX_JAR);
 				if (newIndexJar.exists()) {
 					String fileSize = "" + newIndexJar.length(); //$NON-NLS-1$
 					FileWriter fw = new FileWriter(indexFolder + File.separator
 							+ VERSION_FILE_NAME);
 					BufferedWriter bw = new BufferedWriter(fw);
 					bw.write(productName + VERSION_DELIMITER + milliseconds
 							+ VERSION_DELIMITER + fileSize + "\n"); //$NON-NLS-1$
 					bw.close();
 					fw.close();
 				} else {
 					throw new SearchServiceException(
 							SearchResources.createSearchIndexError);
 				}
 			} catch (IOException ioe) {
 				throw new SearchServiceException(
 						SearchResources.createSearchIndexError);
 			}

 			return true;
 		}
 	}

 	/**
 	 * Index the actual documents specified by the files and recursively get all
 	 * file in the specified folder file
 	 *
 	 */
 	private void indexDocs(File file, IndexWriter writer) throws Exception {
 		if (dirsToSkip.contains(file.getAbsolutePath())) {
 			return;
 		}
 		if (file.isDirectory()) {
 			String[] files = file.list();
 			for (int i = 0; i < files.length; i++) {
 				indexDocs(new File(file, files[i]), writer);
 			}
 		} else if (isHtmlDoc(file)) {
 			if (shouldBeExcluded(file)) {
 				return;
 			}
 			try {
 				Document doc = getHTMLDocument(file);

 				if (doc != null) {
 					writer.addDocument(doc);
 				}
 			} catch (Exception e1) {
 				System.out.println(file.getName());
 				System.out.println("indexDocs");
 				e1.printStackTrace();
 			}
 		}
 	}

 	/**
 	 * Checks whether the given file should be excluded from the search index.
 	 *
 	 * @param file
 	 *            The file to be verified.
 	 * @return <code>true</code> if the given file should be excluded from the
 	 *         search index.
 	 */
 	private boolean shouldBeExcluded(File file) {
 		String path = file.getParentFile().getAbsolutePath();
 		if (pDirectory.startsWith(path)) {
 			return true;
 		}

 		return false;
 	}

 	private static boolean isHtmlDoc(File file) {
 		String path = file.getPath();
 		return path.endsWith(".html") || path.endsWith(".htm"); //$NON-NLS-2$ //$NON-NLS-3$
 	}

 	private Document getHTMLDocument(File file) {
 		Document luceneDocument = new Document();

 		String url = file.getPath().replace(File.pathSeparatorChar, '/');
 		luceneDocument.add(Field.UnIndexed(URL_FIELD, url));

 		try {
 			LHTMLParser parser = new LHTMLParser(new InputStreamReader(
 					new FileInputStream(file), "UTF-8")); //$NON-NLS-1$

 			Reader reader = parser.getReader();
 			if (reader != null) {
 				luceneDocument.add(Field.Text(CONTENT_FIELD, reader));
 			} else {
 				return null;
 			}

 			String title = parser.getTitle();
 			if (title != null && title.length() > 0) {
 				// Workaround a Linux specific issue.
 				title = title.replaceAll("\\xa0", " "); //$NON-NLS-1$ //$NON-NLS-2$
 				luceneDocument.add(Field.Text(TITLE_FIELD, title));
 			} else {
 				return null;
 			}

 			String summary = parser.getSummary();
 			if (summary.startsWith(title)) {
 				luceneDocument.add(Field.UnIndexed(SUMMARY_FIELD, summary
 						.substring(title.length() + 1)));
 			} else
 				luceneDocument.add(Field.UnIndexed(SUMMARY_FIELD, parser
 						.getSummary()));

 			Properties metaTags = parser.getMetaTags();
 			for (Enumeration names = metaTags.propertyNames(); names
 					.hasMoreElements();) {
 				String tagName = (String) names.nextElement();
 				if (tagName != null) {
 					if (tagName.equals(ROLE_FIELD)) {
 						String roleName = metaTags.getProperty(tagName);
 						if (roleName != null) {
 							luceneDocument.add(Field.Text(tagName, roleName));
 						}
 					} else {
 						String tagValue = metaTags.getProperty(tagName);
 						if (tagValue != null) {
 							luceneDocument.add(Field.Text(tagName, tagValue));
 						}
 					}
 				}
 			}

 			if (luceneDocument.getField(ROLE_FIELD) == null) {
 				// Default to "na" to support searching for files without
 				// role meta tags.
 				luceneDocument.add(Field.Text(ROLE_FIELD, "NORUPROLE")); //$NON-NLS-1$
 			}

 			Field umaTypeField = luceneDocument
 					.getField(UMA_ELEMENT_TYPE_FIELD);
 			if (umaTypeField == null) {
 				// Default to general content.
 				luceneDocument.add(Field.Text(UMA_ELEMENT_TYPE_FIELD,
 						GENERAL_CONTENT));
 			} else if (NO_SEARCHEABLE_UMA_ELEMENTS.contains(umaTypeField
 					.stringValue())) {
 				// Exclude non-searcheable elements from the search
 				// index.
 				return null;
 			}

 			parser = null;

 		} catch (Exception e) {
 			luceneDocument = null;
 			System.out.println("getHTMLDocument");
 			e.printStackTrace();
 		}

 		return luceneDocument;
 	}

 	public static void main(String[] args) {
 		// TODO Auto-generated method stub

 	}
 }
	//------------------------------------------------------------------------------
	//Copyright (c) 2005, 2007 IBM Corporation and others.
	//All rights reserved. This program and the accompanying materials
	//are made available under the terms of the Eclipse Public License v1.0
	//which accompanies this distribution, and is available at
	//http://www.eclipse.org/legal/epl-v10.html
	//
	//Contributors:
	//IBM Corporation - initial implementation
	//------------------------------------------------------------------------------
	package org.eclipse.epf.search;

	import java.io.BufferedWriter;
	import java.io.File;
	import java.io.FileInputStream;
	import java.io.FileWriter;
	import java.io.IOException;
	import java.io.InputStreamReader;
	import java.io.Reader;
	import java.util.ArrayList;
	import java.util.Date;
	import java.util.Enumeration;
	import java.util.List;
	import java.util.Properties;

	import org.apache.lucene.document.Document;
	import org.apache.lucene.document.Field;
	import org.apache.lucene.index.IndexWriter;
	import org.apache.lucene.store.Directory;
	import org.apache.lucene.store.FSDirectory;
	import org.apache.lucene.store.RAMDirectory;
	import org.eclipse.epf.search.analysis.TextAnalyzer;
	import org.eclipse.epf.search.utils.LHTMLParser;
	import org.eclipse.epf.search.utils.JarCreator;
	import org.eclipse.epf.search.utils.UNCUtil;

	/**
	* This class is the main class that creates the Index from the file
	* associations in the process layout.
	*/
	public class IndexBuilder {
	static final String VERSION_FILE_NAME = "version.txt"; //$NON-NLS-1$
	static final String VERSION_DELIMITER = "*"; //$NON-NLS-1$

	/**
	* Document fields.
	*/
	public static final String BRIEF_DESCRIPTION_FIELD = "briefDescription"; //$NON-NLS-1$
	public static final String CONTENT_FIELD = "contents"; //$NON-NLS-1$
	public static final String ID_FIELD = "id"; //$NON-NLS-1$
	public static final String MODIFIED_FIELD = "modified"; //$NON-NLS-1$
	public static final String NAME_FIELD = "name"; //$NON-NLS-1$
	public static final String ROLE_FIELD = "role"; //$NON-NLS-1$
	public static final String SUMMARY_FIELD = "summary"; //$NON-NLS-1$
	public static final String TYPE_FIELD = "type"; //$NON-NLS-1$
	public static final String URL_FIELD = "url"; //$NON-NLS-1$
	private static final String TITLE_FIELD = "title"; //$NON-NLS-1$
	private static final String UMA_ELEMENT_TYPE_FIELD = "uma.type"; //$NON-NLS-1$
	public static final String GENERAL_CONTENT = "general_content"; //$NON-NLS-1$

	// List of UMA elements that should be included in the search index.
	private static List NO_SEARCHEABLE_UMA_ELEMENTS = new ArrayList();
	static {
	NO_SEARCHEABLE_UMA_ELEMENTS.add("summary");
	NO_SEARCHEABLE_UMA_ELEMENTS.add("workproductdescriptor");
	NO_SEARCHEABLE_UMA_ELEMENTS.add("taskdescriptor");
	NO_SEARCHEABLE_UMA_ELEMENTS.add("roledescriptor");
	}

	// A list of top level directories that should be excluded from the search
	// index.
	private java.util.List dirsToSkip = new ArrayList();
	private String pDirectory = null;
	private StringBuffer indexFolder = null;
	private String productName = null;

	public IndexBuilder(String publishDir) {
	int appletIndex = -1;
	if (publishDir == null)
	return;

	appletIndex = publishDir.indexOf(File.separator + "applet");

	pDirectory = UNCUtil.convertFilename((appletIndex > -1) ? publishDir
	.substring(0, appletIndex + 1) : publishDir);
	String siteName = pDirectory.replace(File.separatorChar, '/');
	int index = siteName.length();
	if (siteName.endsWith("/")) //$NON-NLS-1$
	{
	index = index - 1;
	}

	int index2 = siteName.lastIndexOf("/", index - 1); //$NON-NLS-1$

	productName = siteName.substring(index2 + 1, index);

	// create the index
	StringBuffer searchFolder = new StringBuffer(pDirectory);
	if (!searchFolder.toString().endsWith(File.separator)) {
	searchFolder.append(File.separator);
	}
	searchFolder.append("search"); //$NON-NLS-2$

	indexFolder = new StringBuffer(searchFolder.toString());
	indexFolder.append(File.separator).append("index"); //$NON-NLS-1$

	dirsToSkip.add(pDirectory + "applet"); //$NON-NLS-1$
	dirsToSkip.add(pDirectory + "css"); //$NON-NLS-1$
	dirsToSkip.add(pDirectory + "ext_help"); //$NON-NLS-1$
	dirsToSkip.add(pDirectory + "icons"); //$NON-NLS-1$
	dirsToSkip.add(pDirectory + "images"); //$NON-NLS-1$
	dirsToSkip.add(pDirectory + "index"); //$NON-NLS-1$
	dirsToSkip.add(pDirectory + "logs"); //$NON-NLS-1$
	dirsToSkip.add(pDirectory + "manuals"); //$NON-NLS-1$
	dirsToSkip.add(pDirectory + "noapplet"); //$NON-NLS-1$
	dirsToSkip.add(pDirectory + "pages_not_installed"); //$NON-NLS-1$
	dirsToSkip.add(pDirectory + "process"); //$NON-NLS-1$
	dirsToSkip.add(pDirectory + "scripts"); //$NON-NLS-1$
	dirsToSkip.add(pDirectory + "stylesheets"); //$NON-NLS-1$
	dirsToSkip.add(pDirectory + "xml"); //$NON-NLS-1$
	dirsToSkip.add(pDirectory + "search"); //$NON-NLS-1$
	}

	public boolean createIndex() throws SearchServiceException {
	synchronized (IndexBuilder.class) {

	if (indexFolder == null \|\| pDirectory == null) {
	throw new IllegalStateException(
	"Invalid indexFolder or pDirectory"); //$NON-NLS-1$
	}

	try {
	RAMDirectory ramDir = new RAMDirectory();

	IndexWriter fsWriter = new IndexWriter(FSDirectory
	.getDirectory(indexFolder.toString(), true),
	new TextAnalyzer(), true);

	IndexWriter ramWriter = new IndexWriter(ramDir,
	new TextAnalyzer(), true);

	if ((fsWriter != null)) {
	// fsWriter.mergeFactor = 1000;
	// fsWriter.maxMergeDocs = 10000;
	fsWriter.maxFieldLength = 1000000;

	indexDocs(new File(pDirectory), ramWriter);

	fsWriter.addIndexes(new Directory[] { ramDir });
	fsWriter.optimize();
	ramWriter.close();
	fsWriter.close();
	}
	} catch (Exception e) {
	System.out.println(e.getMessage());
	System.out.println("createIndex");
	e.printStackTrace();
	}

	System.out.println("index created successfully"); //$NON-NLS-1$

	// jar up the created index.
	JarCreator.jarFolder(indexFolder.toString());

	System.out.println("index Jarred successfully"); //$NON-NLS-1$

	try {
	// delete the files now that they've been jarred.
	File indexDir = new File(indexFolder.toString());
	File[] files = indexDir.listFiles();
	for (int i = 0; i < files.length; i++) {
	File tempFile = files[i];
	if (!tempFile.getName().equals(JarCreator.INDEX_JAR)) {
	tempFile.delete();
	}
	}

	// create the version file.
	Date today = new Date();
	long milliseconds = today.getTime();

	// String rupName = publishDir.substring(index);
	File newIndexJar = new File(indexFolder + File.separator
	+ JarCreator.INDEX_JAR);
	if (newIndexJar.exists()) {
	String fileSize = "" + newIndexJar.length(); //$NON-NLS-1$
	FileWriter fw = new FileWriter(indexFolder + File.separator
	+ VERSION_FILE_NAME);
	BufferedWriter bw = new BufferedWriter(fw);
	bw.write(productName + VERSION_DELIMITER + milliseconds
	+ VERSION_DELIMITER + fileSize + "\n"); //$NON-NLS-1$
	bw.close();
	fw.close();
	} else {
	throw new SearchServiceException(
	SearchResources.createSearchIndexError);
	}
	} catch (IOException ioe) {
	throw new SearchServiceException(
	SearchResources.createSearchIndexError);
	}

	return true;
	}
	}

	/**
	* Index the actual documents specified by the files and recursively get all
	* file in the specified folder file
	*
	*/
	private void indexDocs(File file, IndexWriter writer) throws Exception {
	if (dirsToSkip.contains(file.getAbsolutePath())) {
	return;
	}
	if (file.isDirectory()) {
	String[] files = file.list();
	for (int i = 0; i < files.length; i++) {
	indexDocs(new File(file, files[i]), writer);
	}
	} else if (isHtmlDoc(file)) {
	if (shouldBeExcluded(file)) {
	return;
	}
	try {
	Document doc = getHTMLDocument(file);

	if (doc != null) {
	writer.addDocument(doc);
	}
	} catch (Exception e1) {
	System.out.println(file.getName());
	System.out.println("indexDocs");
	e1.printStackTrace();
	}
	}
	}

	/**
	* Checks whether the given file should be excluded from the search index.
	*
	* @param file
	* The file to be verified.
	* @return <code>true</code> if the given file should be excluded from the
	* search index.
	*/
	private boolean shouldBeExcluded(File file) {
	String path = file.getParentFile().getAbsolutePath();
	if (pDirectory.startsWith(path)) {
	return true;
	}

	return false;
	}

	private static boolean isHtmlDoc(File file) {
	String path = file.getPath();
	return path.endsWith(".html") \|\| path.endsWith(".htm"); //$NON-NLS-2$ //$NON-NLS-3$
	}

	private Document getHTMLDocument(File file) {
	Document luceneDocument = new Document();

	String url = file.getPath().replace(File.pathSeparatorChar, '/');
	luceneDocument.add(Field.UnIndexed(URL_FIELD, url));

	try {
	LHTMLParser parser = new LHTMLParser(new InputStreamReader(
	new FileInputStream(file), "UTF-8")); //$NON-NLS-1$

	Reader reader = parser.getReader();
	if (reader != null) {
	luceneDocument.add(Field.Text(CONTENT_FIELD, reader));
	} else {
	return null;
	}

	String title = parser.getTitle();
	if (title != null && title.length() > 0) {
	// Workaround a Linux specific issue.
	title = title.replaceAll("\\xa0", " "); //$NON-NLS-1$ //$NON-NLS-2$
	luceneDocument.add(Field.Text(TITLE_FIELD, title));
	} else {
	return null;
	}

	String summary = parser.getSummary();
	if (summary.startsWith(title)) {
	luceneDocument.add(Field.UnIndexed(SUMMARY_FIELD, summary
	.substring(title.length() + 1)));
	} else
	luceneDocument.add(Field.UnIndexed(SUMMARY_FIELD, parser
	.getSummary()));

	Properties metaTags = parser.getMetaTags();
	for (Enumeration names = metaTags.propertyNames(); names
	.hasMoreElements();) {
	String tagName = (String) names.nextElement();
	if (tagName != null) {
	if (tagName.equals(ROLE_FIELD)) {
	String roleName = metaTags.getProperty(tagName);
	if (roleName != null) {
	luceneDocument.add(Field.Text(tagName, roleName));
	}
	} else {
	String tagValue = metaTags.getProperty(tagName);
	if (tagValue != null) {
	luceneDocument.add(Field.Text(tagName, tagValue));
	}
	}
	}
	}

	if (luceneDocument.getField(ROLE_FIELD) == null) {
	// Default to "na" to support searching for files without
	// role meta tags.
	luceneDocument.add(Field.Text(ROLE_FIELD, "NORUPROLE")); //$NON-NLS-1$
	}

	Field umaTypeField = luceneDocument
	.getField(UMA_ELEMENT_TYPE_FIELD);
	if (umaTypeField == null) {
	// Default to general content.
	luceneDocument.add(Field.Text(UMA_ELEMENT_TYPE_FIELD,
	GENERAL_CONTENT));
	} else if (NO_SEARCHEABLE_UMA_ELEMENTS.contains(umaTypeField
	.stringValue())) {
	// Exclude non-searcheable elements from the search
	// index.
	return null;
	}

	parser = null;

	} catch (Exception e) {
	luceneDocument = null;
	System.out.println("getHTMLDocument");
	e.printStackTrace();
	}

	return luceneDocument;
	}

	public static void main(String[] args) {
	// TODO Auto-generated method stub

	}
	}