org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/commons/core/HtmlUtil.java - mylyn/org.eclipse.mylyn.commons - Git at Google

 /*******************************************************************************
  * Copyright (c) 2010, 2013 Tasktop Technologies and others.
  * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License v1.0
  * which accompanies this distribution, and is available at
  * http://www.eclipse.org/legal/epl-v10.html
  *
  * Contributors:
  *     Tasktop Technologies - initial API and implementation
  *******************************************************************************/

 package org.eclipse.mylyn.commons.core;

 import java.io.IOException;
 import java.io.StringReader;
 import java.text.ParseException;

 import org.eclipse.mylyn.commons.core.HtmlStreamTokenizer.Token;
 import org.eclipse.mylyn.internal.commons.core.Html2TextReader;

 /**
  * @author Steffen Pingel
  * @since 3.5
  */
 public class HtmlUtil {

 	/**
 	 * Strips HTML tags from a text.
 	 *
 	 * @param htmlText
 	 *            a string that contains HTML tags
 	 * @return htmlText converted to plain text
 	 * @throws IOException
 	 *             thrown if a parsing error occurs
 	 */
 	public static String toText(String htmlText) throws IOException {
 		Html2TextReader reader = new Html2TextReader(new StringReader(htmlText));
 		try {
 			int c;
 			StringBuffer sb = new StringBuffer(htmlText.length());
 			while ((c = reader.read()) != -1) {
 				sb.append((char) c);
 			}
 			return sb.toString();
 		} finally {
 			reader.close();
 		}
 	}

 	/**
 	 * @since 3.7
 	 */
 	public static String getTextContent(HtmlStreamTokenizer tokenizer) throws IOException, ParseException {
 		StringBuilder sb = new StringBuilder();
 		for (Token token = tokenizer.nextToken(); token.getType() != Token.EOF; token = tokenizer.nextToken()) {
 			if (token.getType() == Token.TEXT) {
 				sb.append(token.toString().trim());
 				sb.append(" "); //$NON-NLS-1$
 			} else if (token.getType() == Token.COMMENT) {
 				// ignore
 			} else {
 				break;
 			}
 		}
 		return sb.toString().trim();
 	}

 }
	/*******************************************************************************
	* Copyright (c) 2010, 2013 Tasktop Technologies and others.
	* All rights reserved. This program and the accompanying materials
	* are made available under the terms of the Eclipse Public License v1.0
	* which accompanies this distribution, and is available at
	* http://www.eclipse.org/legal/epl-v10.html
	*
	* Contributors:
	* Tasktop Technologies - initial API and implementation
	*******************************************************************************/

	package org.eclipse.mylyn.commons.core;

	import java.io.IOException;
	import java.io.StringReader;
	import java.text.ParseException;

	import org.eclipse.mylyn.commons.core.HtmlStreamTokenizer.Token;
	import org.eclipse.mylyn.internal.commons.core.Html2TextReader;

	/**
	* @author Steffen Pingel
	* @since 3.5
	*/
	public class HtmlUtil {

	/**
	* Strips HTML tags from a text.
	*
	* @param htmlText
	* a string that contains HTML tags
	* @return htmlText converted to plain text
	* @throws IOException
	* thrown if a parsing error occurs
	*/
	public static String toText(String htmlText) throws IOException {
	Html2TextReader reader = new Html2TextReader(new StringReader(htmlText));
	try {
	int c;
	StringBuffer sb = new StringBuffer(htmlText.length());
	while ((c = reader.read()) != -1) {
	sb.append((char) c);
	}
	return sb.toString();
	} finally {
	reader.close();
	}
	}

	/**
	* @since 3.7
	*/
	public static String getTextContent(HtmlStreamTokenizer tokenizer) throws IOException, ParseException {
	StringBuilder sb = new StringBuilder();
	for (Token token = tokenizer.nextToken(); token.getType() != Token.EOF; token = tokenizer.nextToken()) {
	if (token.getType() == Token.TEXT) {
	sb.append(token.toString().trim());
	sb.append(" "); //$NON-NLS-1$
	} else if (token.getType() == Token.COMMENT) {
	// ignore
	} else {
	break;
	}
	}
	return sb.toString().trim();
	}

	}