//------------------------------------------------------------------------------
//Copyright (c) 2005, 2007 IBM Corporation and others.
//All rights reserved. This program and the accompanying materials
//are made available under the terms of the Eclipse Public License v1.0
//which accompanies this distribution, and is available at
//http://www.eclipse.org/legal/epl-v10.html
//
//Contributors:
//IBM Corporation - initial implementation
//------------------------------------------------------------------------------
package org.eclipse.epf.search;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Date;
import java.util.Enumeration;
import java.util.List;
import java.util.Properties;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.eclipse.epf.search.analysis.TextAnalyzer;
import org.eclipse.epf.search.utils.LHTMLParser;
import org.eclipse.epf.search.utils.JarCreator;
import org.eclipse.epf.search.utils.UNCUtil;

/**
 * This class is the main class that creates the Index from the file
 * associations in the process layout.
 */
public class IndexBuilder {
	static final String VERSION_FILE_NAME = "version.txt"; //$NON-NLS-1$
	static final String VERSION_DELIMITER = "*"; //$NON-NLS-1$

	/**
	 * Document fields.
	 */
	public static final String BRIEF_DESCRIPTION_FIELD = "briefDescription"; //$NON-NLS-1$
	public static final String CONTENT_FIELD = "contents"; //$NON-NLS-1$
	public static final String ID_FIELD = "id"; //$NON-NLS-1$
	public static final String MODIFIED_FIELD = "modified"; //$NON-NLS-1$
	public static final String NAME_FIELD = "name"; //$NON-NLS-1$
	public static final String ROLE_FIELD = "role"; //$NON-NLS-1$
	public static final String SUMMARY_FIELD = "summary"; //$NON-NLS-1$
	public static final String TYPE_FIELD = "type"; //$NON-NLS-1$
	public static final String URL_FIELD = "url"; //$NON-NLS-1$
	private static final String TITLE_FIELD = "title"; //$NON-NLS-1$
	private static final String UMA_ELEMENT_TYPE_FIELD = "uma.type"; //$NON-NLS-1$
	public static final String GENERAL_CONTENT = "general_content"; //$NON-NLS-1$

	// List of UMA elements that should be included in the search index.
	private static List NO_SEARCHEABLE_UMA_ELEMENTS = new ArrayList();
	static {
		NO_SEARCHEABLE_UMA_ELEMENTS.add("summary");
		NO_SEARCHEABLE_UMA_ELEMENTS.add("workproductdescriptor");
		NO_SEARCHEABLE_UMA_ELEMENTS.add("taskdescriptor");
		NO_SEARCHEABLE_UMA_ELEMENTS.add("roledescriptor");
	}

	// A list of top level directories that should be excluded from the search
	// index.
	private java.util.List dirsToSkip = new ArrayList();
	private String pDirectory = null;
	private StringBuffer indexFolder = null;
	private String productName = null;

	public IndexBuilder(String publishDir) {
		int appletIndex = -1;
		if (publishDir == null)
			return;

		appletIndex = publishDir.indexOf(File.separator + "applet");

		pDirectory = UNCUtil.convertFilename((appletIndex > -1) ? publishDir
				.substring(0, appletIndex + 1) : publishDir);
		String siteName = pDirectory.replace(File.separatorChar, '/');
		int index = siteName.length();
		if (siteName.endsWith("/")) //$NON-NLS-1$
		{
			index = index - 1;
		}

		int index2 = siteName.lastIndexOf("/", index - 1); //$NON-NLS-1$

		productName = siteName.substring(index2 + 1, index);

		// create the index
		StringBuffer searchFolder = new StringBuffer(pDirectory);
		if (!searchFolder.toString().endsWith(File.separator)) {
			searchFolder.append(File.separator);
		}
		searchFolder.append("applet").append(File.separator).append("search"); //$NON-NLS-2$

		indexFolder = new StringBuffer(searchFolder.toString());
		indexFolder.append(File.separator).append("index"); //$NON-NLS-1$

		dirsToSkip.add(pDirectory + "applet"); //$NON-NLS-1$
		dirsToSkip.add(pDirectory + "css"); //$NON-NLS-1$
		dirsToSkip.add(pDirectory + "ext_help"); //$NON-NLS-1$		
		dirsToSkip.add(pDirectory + "icons"); //$NON-NLS-1$
		dirsToSkip.add(pDirectory + "images"); //$NON-NLS-1$
		dirsToSkip.add(pDirectory + "index"); //$NON-NLS-1$
		dirsToSkip.add(pDirectory + "logs"); //$NON-NLS-1$
		dirsToSkip.add(pDirectory + "manuals"); //$NON-NLS-1$		
		dirsToSkip.add(pDirectory + "noapplet"); //$NON-NLS-1$
		dirsToSkip.add(pDirectory + "pages_not_installed"); //$NON-NLS-1$
		dirsToSkip.add(pDirectory + "process"); //$NON-NLS-1$		
		dirsToSkip.add(pDirectory + "scripts"); //$NON-NLS-1$
		dirsToSkip.add(pDirectory + "stylesheets"); //$NON-NLS-1$
		dirsToSkip.add(pDirectory + "xml"); //$NON-NLS-1$
	}

	public boolean createIndex() throws SearchServiceException {
		synchronized (IndexBuilder.class) {

			if (indexFolder == null || pDirectory == null) {
				throw new IllegalStateException(
						"Invalid indexFolder or pDirectory"); //$NON-NLS-1$
			}

			try {
				RAMDirectory ramDir = new RAMDirectory();

				IndexWriter fsWriter = new IndexWriter(FSDirectory
						.getDirectory(indexFolder.toString(), true),
						new TextAnalyzer(), true);

				IndexWriter ramWriter = new IndexWriter(ramDir,
						new TextAnalyzer(), true);

				if ((fsWriter != null)) {
					// fsWriter.mergeFactor = 1000;
					// fsWriter.maxMergeDocs = 10000;
					fsWriter.maxFieldLength = 1000000;

					indexDocs(new File(pDirectory), ramWriter);

					fsWriter.addIndexes(new Directory[] { ramDir });
					fsWriter.optimize();
					ramWriter.close();
					fsWriter.close();
				}
			} catch (Exception e) {
				System.out.println(e.getMessage());
				System.out.println("createIndex");
				e.printStackTrace();
			}

			System.out.println("index created successfully"); //$NON-NLS-1$

			// jar up the created index.
			JarCreator.jarFolder(indexFolder.toString());

			System.out.println("index Jarred successfully"); //$NON-NLS-1$

			try {
				// delete the files now that they've been jarred.
				File indexDir = new File(indexFolder.toString());
				File[] files = indexDir.listFiles();
				for (int i = 0; i < files.length; i++) {
					File tempFile = files[i];
					if (!tempFile.getName().equals(JarCreator.INDEX_JAR)) {
						tempFile.delete();
					}
				}

				// create the version file.
				Date today = new Date();
				long milliseconds = today.getTime();

				// String rupName = publishDir.substring(index);
				File newIndexJar = new File(indexFolder + File.separator
						+ JarCreator.INDEX_JAR);
				if (newIndexJar.exists()) {
					String fileSize = "" + newIndexJar.length(); //$NON-NLS-1$
					FileWriter fw = new FileWriter(indexFolder + File.separator
							+ VERSION_FILE_NAME);
					BufferedWriter bw = new BufferedWriter(fw);
					bw.write(productName + VERSION_DELIMITER + milliseconds
							+ VERSION_DELIMITER + fileSize + "\n"); //$NON-NLS-1$
					bw.close();
					fw.close();
				} else {
					throw new SearchServiceException(
							SearchResources.createSearchIndexError);
				}
			} catch (IOException ioe) {
				throw new SearchServiceException(
						SearchResources.createSearchIndexError);
			}

			return true;
		}
	}

	/**
	 * Index the actual documents specified by the files and recursively get all
	 * file in the specified folder file
	 * 
	 */
	private void indexDocs(File file, IndexWriter writer) throws Exception {
		if (dirsToSkip.contains(file.getAbsolutePath())) {
			return;
		}
		if (file.isDirectory()) {
			String[] files = file.list();
			for (int i = 0; i < files.length; i++) {
				indexDocs(new File(file, files[i]), writer);
			}
		} else if (isHtmlDoc(file)) {
			if (shouldBeExcluded(file)) {
				return;
			}
			try {
				Document doc = getHTMLDocument(file);

				if (doc != null) {
					writer.addDocument(doc);
				}
			} catch (Exception e1) {
				System.out.println(file.getName());
				System.out.println("indexDocs");
				e1.printStackTrace();
			}
		}
	}

	/**
	 * Checks whether the given file should be excluded from the search index.
	 * 
	 * @param file
	 *            The file to be verified.
	 * @return <code>true</code> if the given file should be excluded from the
	 *         search index.
	 */
	private boolean shouldBeExcluded(File file) {
		String path = file.getParentFile().getAbsolutePath();
		if (pDirectory.startsWith(path)) {
			return true;
		}

		return false;
	}

	private static boolean isHtmlDoc(File file) {
		String path = file.getPath();
		return path.endsWith(".html") || path.endsWith(".htm"); //$NON-NLS-2$ //$NON-NLS-3$
	}

	private Document getHTMLDocument(File file) {
		Document luceneDocument = new Document();

		String url = file.getPath().replace(File.pathSeparatorChar, '/');
		luceneDocument.add(Field.UnIndexed(URL_FIELD, url));

		try {
			LHTMLParser parser = new LHTMLParser(new InputStreamReader(
					new FileInputStream(file), "UTF-8")); //$NON-NLS-1$

			Reader reader = parser.getReader();
			if (reader != null) {
				luceneDocument.add(Field.Text(CONTENT_FIELD, reader));
			} else {
				return null;
			}

			String title = parser.getTitle();
			if (title != null && title.length() > 0) {
				// Workaround a Linux specific issue.
				title = title.replaceAll("\\xa0", " "); //$NON-NLS-1$ //$NON-NLS-2$
				luceneDocument.add(Field.Text(TITLE_FIELD, title));
			} else {
				return null;
			}

			String summary = parser.getSummary();
			if (summary.startsWith(title)) {
				luceneDocument.add(Field.UnIndexed(SUMMARY_FIELD, summary
						.substring(title.length() + 1)));
			} else
				luceneDocument.add(Field.UnIndexed(SUMMARY_FIELD, parser
						.getSummary()));

			Properties metaTags = parser.getMetaTags();
			for (Enumeration names = metaTags.propertyNames(); names
					.hasMoreElements();) {
				String tagName = (String) names.nextElement();
				if (tagName != null) {
					if (tagName.equals(ROLE_FIELD)) {
						String roleName = metaTags.getProperty(tagName);
						if (roleName != null) {
							luceneDocument.add(Field.Text(tagName, roleName));
						}
					} else {
						String tagValue = metaTags.getProperty(tagName);
						if (tagValue != null) {
							luceneDocument.add(Field.Text(tagName, tagValue));
						}
					}
				}
			}

			if (luceneDocument.getField(ROLE_FIELD) == null) {
				// Default to "na" to support searching for files without
				// role meta tags.
				luceneDocument.add(Field.Text(ROLE_FIELD, "NORUPROLE")); //$NON-NLS-1$
			}

			Field umaTypeField = luceneDocument
					.getField(UMA_ELEMENT_TYPE_FIELD);
			if (umaTypeField == null) {
				// Default to general content.
				luceneDocument.add(Field.Text(UMA_ELEMENT_TYPE_FIELD,
						GENERAL_CONTENT));
			} else if (NO_SEARCHEABLE_UMA_ELEMENTS.contains(umaTypeField
					.stringValue())) {
				// Exclude non-searcheable elements from the search
				// index.
				return null;
			}

			parser = null;

		} catch (Exception e) {
			luceneDocument = null;
			System.out.println("getHTMLDocument");
			e.printStackTrace();
		}

		return luceneDocument;
	}

	public static void main(String[] args) {
		// TODO Auto-generated method stub

	}
}