blob: 8a61305509c630012203571d0f530380ee12e19e [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2004, 2008 IBM Corporation and Others
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Kentarou FUKUDA - initial API and implementation
*******************************************************************************/
package org.eclipse.actf.visualization.engines.blind.html.internal.util;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringReader;
import java.net.URL;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class DocumentCleaner {
// separated from VisualizeEngine
public static Set removeCSS(Document result, String baseUrl) {
HashSet<String> cssSet = new HashSet<String>();
URL baseURL = null;
try {
baseURL = new URL(baseUrl);
} catch (Exception e1) {
//e1.printStackTrace();
//System.out.println("DocCleaner: create base URL: " + baseUrl);
}
//Remove CSS
NodeList linkEl = result.getElementsByTagName("link");
if (linkEl != null) {
int xy = linkEl.getLength();
for (int i = xy - 1; i >= 0; i--) {
Element x = (Element) linkEl.item(i);
if ("text/css".equals(x.getAttribute("type"))) {
String hrefS = getHref(x, baseURL);
if (!hrefS.equals(""))
cssSet.add(hrefS);
x.getParentNode().removeChild(x);
}
}
}
NodeList styleEl = result.getElementsByTagName("style");
if (styleEl != null) {
int xy = styleEl.getLength();
for (int i = xy - 1; i >= 0; i--) {
Element x = (Element) styleEl.item(i);
if (x.getAttribute("type").equals("text/css")) {
NodeList tmpNL = x.getChildNodes();
for (int j = 0; j < tmpNL.getLength(); j++) {
Node tmpN = tmpNL.item(j);
getImportUrl(tmpN.toString(), baseURL, cssSet);
}
}
x.getParentNode().removeChild(x);
}
}
NodeList bodyNl = result.getElementsByTagName("body");
for (int i = 0; i < bodyNl.getLength(); i++) {
Element bodyEl = (Element) bodyNl.item(i);
bodyEl.removeAttribute("style");
}
return (cssSet);
}
private static String createFullPath(URL baseURL, String spec) {
String result = spec;
if (baseURL != null) {
try {
URL targetUrl = new URL(baseURL, spec);
//System.out.println(targetUrl.toString());
result = targetUrl.toString();
} catch (Exception e) {
//e.printStackTrace();
}
}
return (result);
}
private static String getHref(Element target, URL baseURL) {
String result = "";
if (target.hasAttribute("href")) {
result = target.getAttribute("href");
result = createFullPath(baseURL, result);
}
return (result);
}
private static void getImportUrl(String target, URL baseURL, Set<String> cssSet) {
BufferedReader tmpBR = new BufferedReader(new StringReader(target));
String tmpS;
int index;
try {
while ((tmpS = tmpBR.readLine()) != null) {
if ((index = tmpS.indexOf("@import")) > -1) {
tmpS = tmpS.substring(index + 7);
//TODO
if ((index = tmpS.indexOf("url(\"")) > -1) {
tmpS = tmpS.substring(index + 5);
if ((index = tmpS.indexOf("\");")) > -1) {
tmpS = tmpS.substring(0, index);
tmpS = createFullPath(baseURL, tmpS);
cssSet.add(tmpS);
}
}
}
}
} catch (IOException e) {
//e.printStackTrace();
}
}
public static void removeBase(Document result) {
NodeList objectNl = result.getElementsByTagName("base");
int size = objectNl.getLength();
for (int i = size - 1; i >= 0; i--) {
Element objectEl = (Element) objectNl.item(i);
objectEl.getParentNode().removeChild(objectEl);
}
}
public static void removeApplet(Document result) {
NodeList appletNl = result.getElementsByTagName("applet");
int size = appletNl.getLength();
for (int i = size - 1; i >= 0; i--) {
Element appletEl = (Element) appletNl.item(i);
String id = appletEl.getAttribute("id");
Element divEl = result.createElement("div");
divEl.setAttribute("id", id);
//remove param
Node curChild = appletEl.getLastChild();
while (curChild != null) {
Node next = curChild.getPreviousSibling();
if (!curChild.getNodeName().equals("param")) {
if (divEl.hasChildNodes()) {
divEl.insertBefore(curChild, divEl.getFirstChild());
} else {
divEl.appendChild(curChild);
}
}
curChild = next;
}
appletEl.getParentNode().insertBefore(divEl, appletEl);
appletEl.getParentNode().removeChild(appletEl);
}
}
public static void removeObject(Document result) {
NodeList objectNl = result.getElementsByTagName("object");
int size = objectNl.getLength();
for (int i = size - 1; i >= 0; i--) {
Element objectEl = (Element) objectNl.item(i);
String id = objectEl.getAttribute("id");
Element divEl = result.createElement("div");
divEl.setAttribute("id", id);
//remove param
Node curChild = objectEl.getLastChild();
while (curChild != null) {
Node next = curChild.getPreviousSibling();
if (!curChild.getNodeName().equals("param")) {
if (divEl.hasChildNodes()) {
divEl.insertBefore(curChild, divEl.getFirstChild());
} else {
divEl.appendChild(curChild);
}
}
curChild = next;
}
objectEl.getParentNode().insertBefore(divEl, objectEl);
objectEl.getParentNode().removeChild(objectEl);
}
}
public static void removeEmbed(Document doc) { //remove onload and
// onmouseover
NodeList nl = doc.getElementsByTagName("embed");
int size = nl.getLength();
for (int i = size - 1; i >= 0; i--) {
Element el = (Element) nl.item(i);
String id = el.getAttribute("id");
Element newDiv = doc.createElement("div");
newDiv.setAttribute("id", id);
Node parent = el.getParentNode();
NodeList noembedL = el.getElementsByTagName("noembed");
Node childNoembed = null;
boolean hasNoembed = false;
for (int j = 0; j < noembedL.getLength(); j++) {
//System.out.println(j + "/" + noembedL.getLength() + " " + hasNoembed);
childNoembed = noembedL.item(j);
if (el == childNoembed.getParentNode()) {
hasNoembed = true;
break;
}
}
Node startNode;
if (hasNoembed) {
NodeList tmpNL = childNoembed.getChildNodes();
for (int k = tmpNL.getLength() - 1; k >= 0; k--) {
newDiv.insertBefore(tmpNL.item(k), newDiv.getFirstChild());
}
startNode = childNoembed.getNextSibling();
} else {
newDiv.appendChild(doc.createTextNode("(Embeded element)"));
startNode = el.getFirstChild();
}
parent.insertBefore(newDiv, el);
while (startNode != null) {
Node next = startNode.getNextSibling();
parent.insertBefore(startNode, el);
startNode = next;
}
parent.removeChild(el);
}
}
public static void removeOnMouse(Element element) {
String str = element.getAttribute("onmouseover");
if ((str != null) && (str.length() > 0)) {
element.removeAttribute("onmouseover");
}
str = element.getAttribute("onmouseout");
if ((str != null) && (str.length() > 0)) {
element.removeAttribute("onmouseout");
}
str = element.getAttribute("onmousemove");
if ((str != null) && (str.length() > 0)) {
element.removeAttribute("onmousemove");
}
str = element.getAttribute("onclick");
if ((str != null) && (str.length() > 0)) {
element.removeAttribute("onclick");
}
str = element.getAttribute("onresize");
if ((str != null) && (str.length() > 0)) {
element.removeAttribute("onresize");
}
}
public static void removeOnLoad(Element element) {
String str = element.getAttribute("onload");
if ((str != null) && (str.length() > 0)) {
element.removeAttribute("onload");
}
}
public static void removeDisplayNone(Document doc) {
NodeList tmpNL = doc.getElementsByTagName("div");
for (int i = 0; i < tmpNL.getLength(); i++) {
Element tmpE = (Element) tmpNL.item(i);
//System.out.println(tmpE.getAttribute("style"));
if (tmpE.getAttribute("style").indexOf("DISPLAY: none") > -1) {
Node parent = tmpE.getParentNode();
if (parent != null) {
//System.out.println("remove display: none");
parent.removeChild(tmpE);
}
}
}
}
public static void removeBgcolor(Document doc) {
NodeList bodyNl = doc.getElementsByTagName("body");
if (bodyNl.getLength() > 0) {
Element bodyEl = (Element) bodyNl.item(0);
bodyEl.removeAttribute("bgcolor");
}
}
public static void removeMeta(Document doc) {
NodeList nl = doc.getElementsByTagName("meta");
int size = nl.getLength();
for (int i = 0; i < size; i++) {
Element el = (Element) nl.item(i); //item(i)?
String http_equiv = el.getAttribute("http-equiv");
if ((http_equiv != null) && (http_equiv.equalsIgnoreCase("refresh")||http_equiv.equalsIgnoreCase("Content-Type"))) {
el.removeAttribute("http-equiv");
}
}
}
public static void removeJavaScript(List nodeList, Document doc) {
/*
* remove onload and onmouseover
*/
NodeList nl = doc.getElementsByTagName("script");
int size = nl.getLength();
for (int i = size - 1; i >= 0; i--) {
try {
Element el = (Element) nl.item(i);
//if (el.getParentNode().getNodeName().equals("head")) {
el.getParentNode().removeChild(el);
} catch (Exception e) {
//
}
}
nl = doc.getElementsByTagName("body");
size = nl.getLength();
for (int i = 0; i < size; i++) {
try {
((Element) nl.item(i)).removeAttribute("onload");
// Added on 2004/03/03
((Element) nl.item(i)).removeAttribute("onunload");
} catch (Exception e) {
//
}
}
size = nodeList.size();
for (int i = 0; i < size; i++) {
Node node = ((VisualizationNodeInfo) nodeList.get(i)).getNode();
if (node.getNodeType() == Node.ELEMENT_NODE) {
try {
Element el = (Element) node;
String onload = el.getAttribute("onload");
if (onload.length() > 0) {
el.removeAttribute("onload");
}
String onmouseover = el.getAttribute("onmouseover");
if (onmouseover.length() > 0) {
el.removeAttribute("onmouseover");
}
} catch (Exception e) {
//
}
}
}
}
}