| /******************************************************************************* |
| * Copyright (c) 2010, 2013 Tasktop Technologies and others. |
| * All rights reserved. This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License v1.0 |
| * which accompanies this distribution, and is available at |
| * http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: |
| * Tasktop Technologies - initial API and implementation |
| *******************************************************************************/ |
| |
| package org.eclipse.mylyn.commons.core; |
| |
| import java.io.IOException; |
| import java.io.StringReader; |
| import java.text.ParseException; |
| |
| import org.eclipse.mylyn.commons.core.HtmlStreamTokenizer.Token; |
| import org.eclipse.mylyn.internal.commons.core.Html2TextReader; |
| |
| /** |
| * @author Steffen Pingel |
| * @since 3.5 |
| */ |
| public class HtmlUtil { |
| |
| /** |
| * Strips HTML tags from a text. |
| * |
| * @param htmlText |
| * a string that contains HTML tags |
| * @return htmlText converted to plain text |
| * @throws IOException |
| * thrown if a parsing error occurs |
| */ |
| public static String toText(String htmlText) throws IOException { |
| Html2TextReader reader = new Html2TextReader(new StringReader(htmlText)); |
| try { |
| int c; |
| StringBuffer sb = new StringBuffer(htmlText.length()); |
| while ((c = reader.read()) != -1) { |
| sb.append((char) c); |
| } |
| return sb.toString(); |
| } finally { |
| reader.close(); |
| } |
| } |
| |
| /** |
| * @since 3.7 |
| */ |
| public static String getTextContent(HtmlStreamTokenizer tokenizer) throws IOException, ParseException { |
| StringBuilder sb = new StringBuilder(); |
| for (Token token = tokenizer.nextToken(); token.getType() != Token.EOF; token = tokenizer.nextToken()) { |
| if (token.getType() == Token.TEXT) { |
| sb.append(token.toString().trim()); |
| sb.append(" "); //$NON-NLS-1$ |
| } else if (token.getType() == Token.COMMENT) { |
| // ignore |
| } else { |
| break; |
| } |
| } |
| return sb.toString().trim(); |
| } |
| |
| } |