Bug 186681 [Help] Too many threads created for indexing

commit: 60a84efe77a44cdb9ceda15cf8f0eb6d9147512c [log] [tgz]
author: Chris Goldthorpe <cgoldthor> Fri Apr 24 22:36:15 2009 +0000
committer: Chris Goldthorpe <cgoldthor> Fri Apr 24 22:36:15 2009 +0000
tree: c982f59e86c5b0fe09e4a0e116d0ef5ff7826a8a
parent: ee2eac16cd0c66b257834efcb1e268fbb362d89a [diff]
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/HTMLDocParser.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/HTMLDocParser.java
index a80ba97..36cc296 100644
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/HTMLDocParser.java
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/HTMLDocParser.java

@@ -83,6 +83,7 @@
 		else {
 			htmlParser = new HTMLParser(new InputStreamReader(inputStream));
 		}
+		htmlParser.parse();
 	}
 	/**
 	 * Releases resources (closes streams)

diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/HTMLSearchParticipant.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/HTMLSearchParticipant.java
index 6eebb53..729dc5f 100644
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/HTMLSearchParticipant.java
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/HTMLSearchParticipant.java

@@ -62,9 +62,8 @@
 										+ name + " cannot be opened.", //$NON-NLS-1$
 								null);
 					}
-					ParsedDocument parsed = new ParsedDocument(parser.getContentReader());
-					doc.add(new Field("contents", parsed.newContentReader())); //$NON-NLS-1$
-					doc.add(new Field("exact_contents", parsed.newContentReader())); //$NON-NLS-1$
+					doc.add(new Field("contents", parser.getContentReader())); //$NON-NLS-1$
+					doc.add(new Field("exact_contents", parser.getContentReader())); //$NON-NLS-1$
 					String title = parser.getTitle();
 					doc.add(new Field("title", title, Field.Store.NO, Field.Index.TOKENIZED)); //$NON-NLS-1$
 					doc.add(new Field("exact_title", title, Field.Store.NO, Field.Index.TOKENIZED)); //$NON-NLS-1$

diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/LimitedSizeCharArrayWriter.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/LimitedSizeCharArrayWriter.java
new file mode 100644
index 0000000..b65c139
--- /dev/null
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/LimitedSizeCharArrayWriter.java

@@ -0,0 +1,60 @@
+/*******************************************************************************
+ * Copyright (c) 2009 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ *     IBM Corporation - initial API and implementation
+ *******************************************************************************/
+
+package org.eclipse.help.internal.search;
+
+import java.io.CharArrayWriter;
+import java.io.IOException;
+
+public class LimitedSizeCharArrayWriter extends CharArrayWriter {
+	
+	private long maxSize;
+	private long size = 0;
+
+	public LimitedSizeCharArrayWriter(long maxSize) {
+		super();
+		this.maxSize = maxSize;
+	}
+	
+	public void write(char[] c, int off, int len) {
+        size += len;
+		super.write(c, off, len);
+	}
+	
+	public void write(char[] cbuf) throws IOException {
+		size += cbuf.length;
+		if (size < maxSize) {
+		    super.write(cbuf);
+		}
+	}
+	
+	public void write(String str, int off, int len) {
+        size += len;
+        if (size < maxSize) {
+		    super.write(str, off, len);
+        }
+	}
+	
+	public void write(int c) {
+		size += 1;
+		if (size < maxSize) {
+		    super.write(c);
+		}
+	}
+	
+	public void write(String str) throws IOException {
+		size += str.length();
+		if (size < maxSize) {
+		    super.write(str);
+		}
+	}
+
+}

diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/ParsedDocument.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/ParsedDocument.java
deleted file mode 100644
index 8adfb9d..0000000
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/ParsedDocument.java
+++ /dev/null

@@ -1,75 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2000, 2007 IBM Corporation and others.
- * All rights reserved. This program and the accompanying materials
- * are made available under the terms of the Eclipse Public License v1.0
- * which accompanies this distribution, and is available at
- * http://www.eclipse.org/legal/epl-v10.html
- *
- * Contributors:
- *     IBM Corporation - initial API and implementation
- *******************************************************************************/
-package org.eclipse.help.internal.search;
-
-import java.io.*;
-
-/**
- * Parsed Document. It can be used to obtain multiple readers for the same
- * document.
- */
-public class ParsedDocument {
-	// Limit on how many characters will be indexed
-	// from a large document
-	private static final int charsLimit = 1000000;
-	Reader reader;
-	boolean read;
-	char[] docChars;
-
-	/**
-	 * Constructor for ParsedDocument.
-	 * 
-	 * @param reader
-	 *            reader obtained from the parser
-	 */
-	public ParsedDocument(Reader reader) {
-		this.reader = reader;
-		this.read = false;
-	}
-	public Reader newContentReader() {
-		if (!read) {
-			read = true;
-			readDocument();
-		}
-		return new CharArrayReader(docChars);
-	}
-	private void readDocument() {
-		CharArrayWriter writer = new CharArrayWriter();
-		char[] buf = new char[4096];
-		int n;
-		int charsWritten = 0;
-		try {
-			while (0 <= (n = reader.read(buf))) {
-				if (charsWritten < charsLimit) {
-					if (n > charsLimit - charsWritten) {
-						// do not exceed the specified limit of characters
-						writer.write(buf, 0, charsLimit - charsWritten);
-						charsWritten = charsLimit;
-					} else {
-						writer.write(buf, 0, n);
-						charsWritten += n;
-					}
-				} else {
-					// do not break out of the loop
-					// keep reading to avoid breaking pipes
-				}
-			}
-		} catch (IOException ioe) {
-			// do not do anything, will use characters read so far
-		} finally {
-			try {
-				reader.close();
-			} catch (IOException ioe2) {
-			}
-		}
-		docChars = writer.toCharArray();
-	}
-}

diff --git a/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/HTMLParser.java b/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/HTMLParser.java
index 7d65fc8..ddbeeee 100644
--- a/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/HTMLParser.java
+++ b/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/HTMLParser.java

@@ -33,6 +33,8 @@
 import java.io.*;
 import java.util.Properties;
 
+import org.eclipse.help.internal.search.LimitedSizeCharArrayWriter;
+
 public class HTMLParser implements HTMLParserConstants {
   public static int SUMMARY_LENGTH = 175;
 
@@ -50,23 +52,9 @@
   boolean afterTag = false;
   boolean afterSpace = false;
   String eol = System.getProperty("line.separator"); //$NON-NLS-1$
-  Reader pipeIn = null;
-  Writer pipeOut;
-  private MyPipedInputStream pipeInStream = null;
-  private PipedOutputStream pipeOutStream = null;
+  private LimitedSizeCharArrayWriter writer = new LimitedSizeCharArrayWriter(1000000);
   private Exception exception = null;
 
-  private class MyPipedInputStream extends PipedInputStream{
-
-    public MyPipedInputStream(){
-      super();
-    }
-
-    public boolean full() throws IOException{
-      return this.available() >= PipedInputStream.PIPE_SIZE;
-    }
-  }
-
   /**
    * @deprecated Use HTMLParser(FileInputStream) instead
    */
@@ -74,44 +62,17 @@
     this(new FileInputStream(file));
   }
 
-  public String getTitle() throws IOException, InterruptedException {
-    if (pipeIn == null)
-      getReader();                                // spawn parsing thread
-    while (true) {
-      synchronized(this) {
-        if (titleComplete || pipeInStream.full())
-          break;
-        wait(10);
-      }
-    }
+  public String getTitle() throws IOException, InterruptedException {                              // spawn parsing thread
     return title.toString().trim();
   }
 
   public Properties getMetaTags() throws IOException,
 InterruptedException {
-    if (pipeIn == null)
-      getReader();                                // spawn parsing thread
-    while (true) {
-      synchronized(this) {
-        if (titleComplete || pipeInStream.full())
-          break;
-        wait(10);
-      }
-    }
     return metaTags;
   }
 
 
-  public String getSummary() throws IOException, InterruptedException {
-    if (pipeIn == null)
-      getReader();                                // spawn parsing thread
-    while (true) {
-      synchronized(this) {
-        if (summary.length() >= SUMMARY_LENGTH || pipeInStream.full())
-          break;
-        wait(10);
-      }
-    }
+  public String getSummary() throws IOException, InterruptedException {                              // spawn parsing thread
     String metaDescription = metaTags.getProperty("description"); //$NON-NLS-1$
     if (metaDescription != null) {
     	if (metaDescription.length() > SUMMARY_LENGTH) {
@@ -122,19 +83,17 @@
     }
     return summary.toString().trim();
   }
-
+  
   public Reader getReader() throws IOException {
-    if (pipeIn == null) {
-      pipeInStream = new MyPipedInputStream();
-      pipeOutStream = new PipedOutputStream(pipeInStream);
-      pipeIn = new InputStreamReader(pipeInStream, "UTF-16BE"); //$NON-NLS-1$
-      pipeOut = new OutputStreamWriter(pipeOutStream, "UTF-16BE"); //$NON-NLS-1$
+	  return new CharArrayReader(writer.toCharArray());
+  }
 
-      Thread thread = new ParserThread(this);
-      thread.start();                             // start parsing
-    }
-
-    return pipeIn;
+  public void parse() throws IOException {
+	try { // parse document to pipeOut
+		HTMLDocument();
+	} catch (Exception e) {
+		setException(e);
+	}
   }
 
   void addToSummary(String text) {
@@ -174,7 +133,7 @@
     }
 
     length += text.length();
-    pipeOut.write(text);
+    writer.write(text);
 
     afterSpace = false;
   }
@@ -182,9 +141,9 @@
   void addMetaTag() throws IOException {
       metaTags.setProperty(currentMetaTag, currentMetaContent);
       if (currentMetaTag.equalsIgnoreCase("keywords")) { //$NON-NLS-1$
-    	  pipeOut.write(' '); 
-          pipeOut.write(currentMetaContent);
-    	  pipeOut.write(' '); 
+    	  writer.write(' '); 
+          writer.write(currentMetaContent);
+    	  writer.write(' '); 
       }
       currentMetaTag = null;
       currentMetaContent = null;
@@ -200,7 +159,7 @@
 
       String space = afterTag ? eol : " "; //$NON-NLS-1$
       length += space.length();
-      pipeOut.write(space);
+      writer.write(space);
       afterSpace = true;
     }
   }

diff --git a/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/ParserThread.java b/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/ParserThread.java
deleted file mode 100644
index bdbb2e2..0000000
--- a/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/ParserThread.java
+++ /dev/null

@@ -1,54 +0,0 @@
-package org.apache.lucene.demo.html;
-
-/**
- * Copyright 2004, The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * 
- *     Copyright (c) 2009 IBM Corp.
- *     All rights reserved.
- */
- 
-/**
-* History
-* 2004 Initial contribution The Apache Software Foundation
-* 2009 Chris Goldthorpe, IBM Corporation, fix for bug 266649
-*/
-
-import java.io.*;
-
-class ParserThread extends Thread {
-  HTMLParser parser;
-
-  ParserThread(HTMLParser p) {
-    parser = p;
-  }
-
-  public void run() {				  // convert pipeOut to pipeIn
-
-      try {					  // parse document to pipeOut
-        parser.HTMLDocument();
-      } catch (Exception e) {
-    	  parser.setException(e);
-      }
-
-      try {
-	    parser.summary.setLength(HTMLParser.SUMMARY_LENGTH);
-	    parser.titleComplete = true;
-        parser.pipeOut.close();
-      } catch (IOException e) {
-	       e.printStackTrace();
-    }
-	parser.notifyAll();
-  }
-}
commit	60a84efe77a44cdb9ceda15cf8f0eb6d9147512c	[log] [tgz]
author	Chris Goldthorpe <cgoldthor>	Fri Apr 24 22:36:15 2009 +0000
committer	Chris Goldthorpe <cgoldthor>	Fri Apr 24 22:36:15 2009 +0000
tree	c982f59e86c5b0fe09e4a0e116d0ef5ff7826a8a
parent	ee2eac16cd0c66b257834efcb1e268fbb362d89a [diff]