Bug 186681 [Help] Too many threads created for indexing
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/HTMLDocParser.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/HTMLDocParser.java
index a80ba97..36cc296 100644
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/HTMLDocParser.java
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/HTMLDocParser.java
@@ -83,6 +83,7 @@
else {
htmlParser = new HTMLParser(new InputStreamReader(inputStream));
}
+ htmlParser.parse();
}
/**
* Releases resources (closes streams)
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/HTMLSearchParticipant.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/HTMLSearchParticipant.java
index 6eebb53..729dc5f 100644
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/HTMLSearchParticipant.java
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/HTMLSearchParticipant.java
@@ -62,9 +62,8 @@
+ name + " cannot be opened.", //$NON-NLS-1$
null);
}
- ParsedDocument parsed = new ParsedDocument(parser.getContentReader());
- doc.add(new Field("contents", parsed.newContentReader())); //$NON-NLS-1$
- doc.add(new Field("exact_contents", parsed.newContentReader())); //$NON-NLS-1$
+ doc.add(new Field("contents", parser.getContentReader())); //$NON-NLS-1$
+ doc.add(new Field("exact_contents", parser.getContentReader())); //$NON-NLS-1$
String title = parser.getTitle();
doc.add(new Field("title", title, Field.Store.NO, Field.Index.TOKENIZED)); //$NON-NLS-1$
doc.add(new Field("exact_title", title, Field.Store.NO, Field.Index.TOKENIZED)); //$NON-NLS-1$
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/LimitedSizeCharArrayWriter.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/LimitedSizeCharArrayWriter.java
new file mode 100644
index 0000000..b65c139
--- /dev/null
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/LimitedSizeCharArrayWriter.java
@@ -0,0 +1,60 @@
+/*******************************************************************************
+ * Copyright (c) 2009 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ *******************************************************************************/
+
+package org.eclipse.help.internal.search;
+
+import java.io.CharArrayWriter;
+import java.io.IOException;
+
+public class LimitedSizeCharArrayWriter extends CharArrayWriter {
+
+ private long maxSize;
+ private long size = 0;
+
+ public LimitedSizeCharArrayWriter(long maxSize) {
+ super();
+ this.maxSize = maxSize;
+ }
+
+ public void write(char[] c, int off, int len) {
+ size += len;
+ super.write(c, off, len);
+ }
+
+ public void write(char[] cbuf) throws IOException {
+ size += cbuf.length;
+ if (size < maxSize) {
+ super.write(cbuf);
+ }
+ }
+
+ public void write(String str, int off, int len) {
+ size += len;
+ if (size < maxSize) {
+ super.write(str, off, len);
+ }
+ }
+
+ public void write(int c) {
+ size += 1;
+ if (size < maxSize) {
+ super.write(c);
+ }
+ }
+
+ public void write(String str) throws IOException {
+ size += str.length();
+ if (size < maxSize) {
+ super.write(str);
+ }
+ }
+
+}
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/ParsedDocument.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/ParsedDocument.java
deleted file mode 100644
index 8adfb9d..0000000
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/ParsedDocument.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2000, 2007 IBM Corporation and others.
- * All rights reserved. This program and the accompanying materials
- * are made available under the terms of the Eclipse Public License v1.0
- * which accompanies this distribution, and is available at
- * http://www.eclipse.org/legal/epl-v10.html
- *
- * Contributors:
- * IBM Corporation - initial API and implementation
- *******************************************************************************/
-package org.eclipse.help.internal.search;
-
-import java.io.*;
-
-/**
- * Parsed Document. It can be used to obtain multiple readers for the same
- * document.
- */
-public class ParsedDocument {
- // Limit on how many characters will be indexed
- // from a large document
- private static final int charsLimit = 1000000;
- Reader reader;
- boolean read;
- char[] docChars;
-
- /**
- * Constructor for ParsedDocument.
- *
- * @param reader
- * reader obtained from the parser
- */
- public ParsedDocument(Reader reader) {
- this.reader = reader;
- this.read = false;
- }
- public Reader newContentReader() {
- if (!read) {
- read = true;
- readDocument();
- }
- return new CharArrayReader(docChars);
- }
- private void readDocument() {
- CharArrayWriter writer = new CharArrayWriter();
- char[] buf = new char[4096];
- int n;
- int charsWritten = 0;
- try {
- while (0 <= (n = reader.read(buf))) {
- if (charsWritten < charsLimit) {
- if (n > charsLimit - charsWritten) {
- // do not exceed the specified limit of characters
- writer.write(buf, 0, charsLimit - charsWritten);
- charsWritten = charsLimit;
- } else {
- writer.write(buf, 0, n);
- charsWritten += n;
- }
- } else {
- // do not break out of the loop
- // keep reading to avoid breaking pipes
- }
- }
- } catch (IOException ioe) {
- // do not do anything, will use characters read so far
- } finally {
- try {
- reader.close();
- } catch (IOException ioe2) {
- }
- }
- docChars = writer.toCharArray();
- }
-}
diff --git a/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/HTMLParser.java b/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/HTMLParser.java
index 7d65fc8..ddbeeee 100644
--- a/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/HTMLParser.java
+++ b/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/HTMLParser.java
@@ -33,6 +33,8 @@
import java.io.*;
import java.util.Properties;
+import org.eclipse.help.internal.search.LimitedSizeCharArrayWriter;
+
public class HTMLParser implements HTMLParserConstants {
public static int SUMMARY_LENGTH = 175;
@@ -50,23 +52,9 @@
boolean afterTag = false;
boolean afterSpace = false;
String eol = System.getProperty("line.separator"); //$NON-NLS-1$
- Reader pipeIn = null;
- Writer pipeOut;
- private MyPipedInputStream pipeInStream = null;
- private PipedOutputStream pipeOutStream = null;
+ private LimitedSizeCharArrayWriter writer = new LimitedSizeCharArrayWriter(1000000);
private Exception exception = null;
- private class MyPipedInputStream extends PipedInputStream{
-
- public MyPipedInputStream(){
- super();
- }
-
- public boolean full() throws IOException{
- return this.available() >= PipedInputStream.PIPE_SIZE;
- }
- }
-
/**
* @deprecated Use HTMLParser(FileInputStream) instead
*/
@@ -74,44 +62,17 @@
this(new FileInputStream(file));
}
- public String getTitle() throws IOException, InterruptedException {
- if (pipeIn == null)
- getReader(); // spawn parsing thread
- while (true) {
- synchronized(this) {
- if (titleComplete || pipeInStream.full())
- break;
- wait(10);
- }
- }
+ public String getTitle() throws IOException, InterruptedException { // spawn parsing thread
return title.toString().trim();
}
public Properties getMetaTags() throws IOException,
InterruptedException {
- if (pipeIn == null)
- getReader(); // spawn parsing thread
- while (true) {
- synchronized(this) {
- if (titleComplete || pipeInStream.full())
- break;
- wait(10);
- }
- }
return metaTags;
}
- public String getSummary() throws IOException, InterruptedException {
- if (pipeIn == null)
- getReader(); // spawn parsing thread
- while (true) {
- synchronized(this) {
- if (summary.length() >= SUMMARY_LENGTH || pipeInStream.full())
- break;
- wait(10);
- }
- }
+ public String getSummary() throws IOException, InterruptedException { // spawn parsing thread
String metaDescription = metaTags.getProperty("description"); //$NON-NLS-1$
if (metaDescription != null) {
if (metaDescription.length() > SUMMARY_LENGTH) {
@@ -122,19 +83,17 @@
}
return summary.toString().trim();
}
-
+
public Reader getReader() throws IOException {
- if (pipeIn == null) {
- pipeInStream = new MyPipedInputStream();
- pipeOutStream = new PipedOutputStream(pipeInStream);
- pipeIn = new InputStreamReader(pipeInStream, "UTF-16BE"); //$NON-NLS-1$
- pipeOut = new OutputStreamWriter(pipeOutStream, "UTF-16BE"); //$NON-NLS-1$
+ return new CharArrayReader(writer.toCharArray());
+ }
- Thread thread = new ParserThread(this);
- thread.start(); // start parsing
- }
-
- return pipeIn;
+ public void parse() throws IOException {
+ try { // parse document to pipeOut
+ HTMLDocument();
+ } catch (Exception e) {
+ setException(e);
+ }
}
void addToSummary(String text) {
@@ -174,7 +133,7 @@
}
length += text.length();
- pipeOut.write(text);
+ writer.write(text);
afterSpace = false;
}
@@ -182,9 +141,9 @@
void addMetaTag() throws IOException {
metaTags.setProperty(currentMetaTag, currentMetaContent);
if (currentMetaTag.equalsIgnoreCase("keywords")) { //$NON-NLS-1$
- pipeOut.write(' ');
- pipeOut.write(currentMetaContent);
- pipeOut.write(' ');
+ writer.write(' ');
+ writer.write(currentMetaContent);
+ writer.write(' ');
}
currentMetaTag = null;
currentMetaContent = null;
@@ -200,7 +159,7 @@
String space = afterTag ? eol : " "; //$NON-NLS-1$
length += space.length();
- pipeOut.write(space);
+ writer.write(space);
afterSpace = true;
}
}
diff --git a/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/ParserThread.java b/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/ParserThread.java
deleted file mode 100644
index bdbb2e2..0000000
--- a/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/ParserThread.java
+++ /dev/null
@@ -1,54 +0,0 @@
-package org.apache.lucene.demo.html;
-
-/**
- * Copyright 2004, The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * Copyright (c) 2009 IBM Corp.
- * All rights reserved.
- */
-
-/**
-* History
-* 2004 Initial contribution The Apache Software Foundation
-* 2009 Chris Goldthorpe, IBM Corporation, fix for bug 266649
-*/
-
-import java.io.*;
-
-class ParserThread extends Thread {
- HTMLParser parser;
-
- ParserThread(HTMLParser p) {
- parser = p;
- }
-
- public void run() { // convert pipeOut to pipeIn
-
- try { // parse document to pipeOut
- parser.HTMLDocument();
- } catch (Exception e) {
- parser.setException(e);
- }
-
- try {
- parser.summary.setLength(HTMLParser.SUMMARY_LENGTH);
- parser.titleComplete = true;
- parser.pipeOut.close();
- } catch (IOException e) {
- e.printStackTrace();
- }
- parser.notifyAll();
- }
-}