blob: 0444eb9367795257de318ac44cc9789a10427e97 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2010, 2013 Tasktop Technologies and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Tasktop Technologies - initial API and implementation
*******************************************************************************/
package org.eclipse.mylyn.commons.core;
import java.io.IOException;
import java.io.StringReader;
import java.text.ParseException;
import org.eclipse.mylyn.commons.core.HtmlStreamTokenizer.Token;
import org.eclipse.mylyn.internal.commons.core.Html2TextReader;
/**
* @author Steffen Pingel
* @since 3.5
*/
public class HtmlUtil {
/**
* Strips HTML tags from a text.
*
* @param htmlText
* a string that contains HTML tags
* @return htmlText converted to plain text
* @throws IOException
* thrown if a parsing error occurs
*/
public static String toText(String htmlText) throws IOException {
Html2TextReader reader = new Html2TextReader(new StringReader(htmlText));
try {
int c;
StringBuffer sb = new StringBuffer(htmlText.length());
while ((c = reader.read()) != -1) {
sb.append((char) c);
}
return sb.toString();
} finally {
reader.close();
}
}
/**
* @since 3.7
*/
public static String getTextContent(HtmlStreamTokenizer tokenizer) throws IOException, ParseException {
StringBuilder sb = new StringBuilder();
for (Token token = tokenizer.nextToken(); token.getType() != Token.EOF; token = tokenizer.nextToken()) {
if (token.getType() == Token.TEXT) {
sb.append(token.toString().trim());
sb.append(" "); //$NON-NLS-1$
} else if (token.getType() == Token.COMMENT) {
// ignore
} else {
break;
}
}
return sb.toString().trim();
}
}