| /******************************************************************************* |
| * Copyright (c) 2005, 2016 IBM Corporation and Others |
| * All rights reserved. This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License v1.0 |
| * which accompanies this distribution, and is available at |
| * http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: |
| * Kentarou FUKUDA - initial API and implementation |
| *******************************************************************************/ |
| |
| package org.eclipse.actf.visualization.eval.html; |
| |
| import java.io.File; |
| import java.net.MalformedURLException; |
| import java.net.URL; |
| import java.util.ArrayList; |
| import java.util.Date; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Vector; |
| |
| import org.eclipse.actf.util.xpath.XPathService; |
| import org.eclipse.actf.util.xpath.XPathServiceFactory; |
| import org.eclipse.actf.visualization.eval.html.statistics.FlashData; |
| import org.eclipse.actf.visualization.eval.html.statistics.HeadingsData; |
| import org.eclipse.actf.visualization.eval.html.statistics.ImageStatData; |
| import org.eclipse.actf.visualization.eval.html.statistics.PageData; |
| import org.eclipse.actf.visualization.eval.problem.IProblemItem; |
| import org.w3c.dom.Document; |
| import org.w3c.dom.Element; |
| import org.w3c.dom.Node; |
| import org.w3c.dom.NodeList; |
| import org.w3c.dom.html.HTMLImageElement; |
| |
| /** |
| * Utility class for HTML evaluation |
| */ |
| public class HtmlEvalUtil extends HtmlTagUtil { |
| |
| private static final boolean PERFORMANCE_DEBUG = false; |
| |
| private static final int LONG_TEXT_NUM = 250; // TODO check |
| |
| private static final String[] HEADING_LEVEL = { "h1", "h2", "h3", "h4", "h5", "h6" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$ |
| |
| public static final String[] EVENT_MOUSE_BUTTON = { ATTR_ONCLICK, ATTR_ONDBLCLICK, ATTR_ONMOUSEUP, |
| ATTR_ONMOUSEDOWN }; |
| |
| public static final String[] EVENT_MOUSE_FOCUS = { ATTR_ONMOUSEOVER, ATTR_ONMOUSEOUT, ATTR_ONMOUSEMOVE }; |
| |
| public static final String[] EVENT_ON_KEY = { ATTR_ONKEYDOWN, ATTR_ONKEYPRESS, ATTR_ONKEYUP }; |
| |
| public static final String[] EVENT_LOAD = { ATTR_ONLOAD, ATTR_ONUNLOAD, ATTR_ONABORT, ATTR_ONERROR }; |
| |
| public static final String[] EVENT_WINDOW = { ATTR_ONRESIZE, ATTR_ONMOVE, ATTR_ONDRAGDROP }; |
| |
| public static final String[] EVENT_FOCUS = { ATTR_ONFOCUS, ATTR_ONBLUR, ATTR_ONSELECT }; |
| |
| private static final XPathService xpathService = XPathServiceFactory.newService(); |
| private static final Object EXP1 = xpathService.compile(".//a[@href]"); //$NON-NLS-1$ |
| |
| private static final Object EXP2 = xpathService.compile("//h1|//h2|//h3|//h4|//h5|//h6"); //$NON-NLS-1$ |
| |
| private Document target; |
| |
| private Document resultDoc; |
| |
| private Document srcDom; |
| |
| private Document liveDom; |
| |
| private File targetFile; |
| |
| private File srcFile; |
| |
| private File liveFile; |
| |
| private URL baseUrl; |
| |
| private Map<Node, Integer> document2IdMap; |
| |
| private boolean isDBCS; |
| |
| private boolean isLiveDom; |
| |
| private boolean hasAwithHref = false; |
| |
| private boolean hasJavascript = false; |
| |
| private Element[] aWithHref_elements; |
| |
| private String[] aWithHref_hrefs; |
| |
| private String[] aWithHref_strings; |
| |
| private HTMLImageElement[] img_elements; |
| |
| private Element[] table_elements; |
| |
| private Element[] body_elements; |
| |
| private Element[] frame_elements; |
| |
| private Element[] iframe_elements; |
| |
| private Element[] object_elements; |
| |
| private Element[] parent_table_elements; |
| |
| private Element[] bottom_data_tables; |
| |
| private Element[] bottom_1row1col_tables; |
| |
| private Element[] bottom_notdata_tables; |
| |
| private Element[] headings; |
| |
| private Element[] embed_elements; |
| |
| private Element[] script_elements; |
| |
| private Element[] javascript_elements; |
| |
| private Element[] eventMouseButtonElements; // on Click/Dblclick, onMouse |
| |
| // up/down |
| |
| private Element[] eventMouseFocusElements; // onMouse over/out/move |
| |
| private Element[] eventOnKeyElements; // onKey Down/up/press |
| |
| private Element[] eventLoadElements; // on load/unload/abort/error |
| |
| private Element[] eventWindowElements; // on Resize/Mode/DragDrop |
| |
| private Element[] eventFocusElements; // on Focus/Blur/Select |
| |
| private Element[] javascriptHref_elements; |
| |
| private String[] javascriptHref_hrefs; |
| |
| private String[] javascriptHref_strings; |
| |
| private String curUrl; |
| |
| private double invalidLinkRatio; |
| |
| private PageData pageData; |
| |
| // private int invisibleElementCount = 0; |
| // |
| // private String[] invisibleLinkStrings = new String[0]; |
| // |
| // private HashSet<String> notExistHrefSet = new HashSet<String>(); |
| |
| // for new JIS |
| private List<Element> imageButtonList; |
| private List<Element> textButtonList; |
| private List<Element> areaList; |
| private List<Element> appletList; |
| private List<Element> accessKeyList; |
| private List<Element> styleList; |
| private List<Element> styleElementList; |
| private List<Element> idElementList; |
| |
| /** |
| * Constructor of the class. |
| * |
| * @param target |
| * target {@link Document} |
| * @param resultDoc |
| * visualization result {@link Document} |
| * @param url |
| * target URL |
| * @param document2IdMap |
| * map between {@link Node} and ACTF_ID |
| * @param srcDom |
| * the original source {@link Document} |
| * @param liveDom |
| * the live {@link Document} obtained from browser |
| * @param pageData |
| * the detailed page information as {@link PageData} |
| * @param isDBCS |
| * true if target page uses DBCS |
| * @param isLive |
| * true if target is live DOM |
| */ |
| public HtmlEvalUtil(Document target, Document resultDoc, String url, Map<Node, Integer> document2IdMap, |
| Document srcDom, Document liveDom, PageData pageData, boolean isDBCS, boolean isLive) { |
| this(target, resultDoc, url, document2IdMap, srcDom, liveDom, pageData, 0, null, isDBCS, isLive); |
| } |
| |
| /** |
| * Constructor of the class. |
| * |
| * @param target |
| * target {@link Document} |
| * @param resultDoc |
| * visualization result {@link Document} |
| * @param url |
| * target URL |
| * @param document2IdMap |
| * map between {@link Node} and ACTF_ID |
| * @param srcDom |
| * the original source {@link Document} |
| * @param liveDom |
| * the live {@link Document} obtained from browser |
| * @param pageData |
| * the detailed page information as {@link PageData} |
| * @param invisibleElementCount |
| * number of invisible Element inside the page |
| * @param invisibleLinkStrings |
| * array of link target urls of invisible anchor Element |
| * @param isDBCS |
| * true if target page uses DBCS |
| * @param isLive |
| * true if target is live DOM |
| */ |
| @SuppressWarnings("nls") |
| private HtmlEvalUtil(Document target, Document resultDoc, String url, Map<Node, Integer> document2IdMap, |
| Document srcDom, Document liveDom, PageData pageData, int invisibleElementCount, |
| String[] invisibleLinkStrings, boolean isDBCS, boolean isLive) { |
| this.target = target; |
| this.resultDoc = resultDoc; |
| |
| this.srcDom = srcDom; |
| this.liveDom = liveDom; |
| this.isLiveDom = isLive; |
| |
| this.pageData = pageData; |
| |
| this.curUrl = url; |
| baseUrl = null; |
| try { |
| baseUrl = new URL(url); // ToDo handle base |
| } catch (MalformedURLException e) { |
| // e.printStackTrace(); |
| } |
| |
| this.invalidLinkRatio = 0; |
| // this.invisibleElementCount = invisibleElementCount; |
| // if (invisibleLinkStrings != null) { |
| // this.invisibleLinkStrings = invisibleLinkStrings; |
| // } |
| |
| this.document2IdMap = document2IdMap; |
| // this.html2ViewMapData = html2ViewMapData; |
| |
| if (PERFORMANCE_DEBUG) |
| System.out.println("document2IdMap\t" + (new Date()).getTime()); |
| |
| this.isDBCS = isDBCS; |
| |
| // prepare freq use elements |
| // System.out.println(df.format(new Date(System.currentTimeMillis())) |
| // + ": checker engine init"); |
| |
| NodeList tmpNL = xpathService.evalForNodeList(EXP1, target); |
| int length = tmpNL.getLength(); |
| |
| if (length > 0) { |
| hasAwithHref = true; |
| } |
| |
| aWithHref_elements = new Element[length]; |
| aWithHref_hrefs = new String[length]; |
| aWithHref_strings = new String[length]; |
| |
| for (int i = 0; i < length; i++) { |
| Element tmpE = (Element) tmpNL.item(i); |
| aWithHref_elements[i] = tmpE; |
| aWithHref_hrefs[i] = tmpE.getAttribute(ATTR_HREF); |
| aWithHref_strings[i] = getTextAltDescendant(tmpE); |
| // System.out.println(aWithHref_hrefs[i]); |
| } |
| |
| // System.out.println(df.format(new Date(System.currentTimeMillis())) |
| // + ": href fin"); |
| |
| tmpNL = target.getElementsByTagName("img"); //$NON-NLS-1$ |
| length = tmpNL.getLength(); |
| img_elements = new HTMLImageElement[length]; |
| Vector<ImageStatData> tmpV = new Vector<ImageStatData>(); |
| HashMap<HTMLImageElement, ImageStatData> tmpMap = new HashMap<HTMLImageElement, ImageStatData>(); |
| HashMap<Element, ImageStatData> linkImgMap = new HashMap<Element, ImageStatData>(); |
| for (int i = 0; i < length; i++) { |
| img_elements[i] = (HTMLImageElement) tmpNL.item(i); |
| ImageStatData isd = new ImageStatData(img_elements[i], baseUrl); |
| tmpV.add(isd); |
| tmpMap.put(img_elements[i], isd); |
| if (isd.getAncestorLink() != null) { |
| linkImgMap.put(isd.getAncestorLink(), isd); |
| } |
| } |
| pageData.setImageData(tmpV); |
| pageData.setImageDataMap(tmpMap); |
| pageData.setLinkImageDataMap(linkImgMap); |
| |
| if (PERFORMANCE_DEBUG) |
| System.out.println("process images\t" + (new Date()).getTime()); |
| |
| // TODO use XPath |
| tmpNL = target.getElementsByTagName("table"); //$NON-NLS-1$ |
| length = tmpNL.getLength(); |
| table_elements = new Element[length]; |
| Vector<Element> bottomV = new Vector<Element>(); |
| Vector<Element> parentV = new Vector<Element>(); |
| Vector<Element> b1row1colV = new Vector<Element>(); |
| Vector<Element> bNotDataV = new Vector<Element>(); |
| for (int i = 0; i < length; i++) { |
| table_elements[i] = (Element) tmpNL.item(i); |
| if (table_elements[i].getElementsByTagName("table").getLength() //$NON-NLS-1$ |
| == 0) { |
| if (is1Row1ColTable(table_elements[i])) { |
| b1row1colV.add(table_elements[i]); |
| } else if (isDataTable(table_elements[i])) { |
| bottomV.add(table_elements[i]); |
| } else { |
| bNotDataV.add(table_elements[i]); |
| } |
| } else { |
| parentV.add(table_elements[i]); |
| } |
| } |
| bottom_data_tables = new Element[bottomV.size()]; |
| bottom_1row1col_tables = new Element[b1row1colV.size()]; |
| bottom_notdata_tables = new Element[bNotDataV.size()]; |
| parent_table_elements = new Element[parentV.size()]; |
| bottomV.toArray(bottom_data_tables); |
| b1row1colV.toArray(bottom_1row1col_tables); |
| bNotDataV.toArray(bottom_notdata_tables); |
| parentV.toArray(parent_table_elements); |
| |
| if (PERFORMANCE_DEBUG) |
| System.out.println("process tables\t" + (new Date()).getTime()); |
| |
| body_elements = getElementsArray(target, "body"); |
| frame_elements = getElementsArray(target, "frame"); |
| iframe_elements = getElementsArray(target, "iframe"); |
| |
| if (PERFORMANCE_DEBUG) |
| System.out.println("process frames\t" + (new Date()).getTime()); |
| |
| HashSet<Element> embedInObjectSet = new HashSet<Element>(); |
| |
| // TODO ieDOM |
| tmpNL = target.getElementsByTagName("object"); //$NON-NLS-1$ |
| length = tmpNL.getLength(); |
| object_elements = new Element[length]; |
| for (int i = 0; i < length; i++) { |
| object_elements[i] = (Element) tmpNL.item(i); |
| if (FLASH_OBJECT.equalsIgnoreCase(object_elements[i].getAttribute("classid"))) { |
| // TODO check codebase |
| // TODO get width hight align ... loop quality... |
| NodeList paramNL = object_elements[i].getElementsByTagName("param"); |
| String src = ""; |
| for (int j = 0; j < paramNL.getLength(); j++) { |
| try { |
| Element tmpE = (Element) paramNL.item(j); |
| String name = tmpE.getAttribute("name"); |
| String value = tmpE.getAttribute("value"); |
| if (name.equalsIgnoreCase("movie")) { |
| src = value; |
| } |
| } catch (Exception e) { |
| |
| } |
| } |
| if (src != null && src.length() > 0) { |
| FlashData flashD = new FlashData(object_elements[i], src, true); |
| pageData.addFlashData(flashD); |
| |
| NodeList embedNL = object_elements[i].getElementsByTagName("embed"); |
| for (int j = 0; j < embedNL.getLength(); j++) { |
| Element tmpE = (Element) embedNL.item(j); |
| if (FLASH_TYPE.equalsIgnoreCase(tmpE.getAttribute("type"))) { |
| // TODO check PLUGINSPAGE |
| // TODO get width hight align ... loop quality... |
| |
| String src2 = tmpE.getAttribute("src"); |
| if (src2 != null && src2.length() > 0) { |
| if (src.equalsIgnoreCase(src2)) { |
| embedInObjectSet.add(tmpE); |
| flashD.setWithEmbed(true); |
| } else { |
| pageData.addFlashData(new FlashData(tmpE, src, false)); |
| } |
| } |
| } |
| } |
| |
| } |
| |
| } |
| } |
| |
| embed_elements = getElementsArray(target, "embed"); |
| for (int i = 0; i < embed_elements.length; i++) { |
| Element tmpE = embed_elements[i]; |
| if (!embedInObjectSet.contains(tmpE) && FLASH_TYPE.equals(tmpE.getAttribute("type"))) { |
| // TODO check PLUGINSPAGE |
| // TODO get width hight align ... loop quality... |
| String src = tmpE.getAttribute("src"); |
| if (src != null && src.length() > 0) { |
| pageData.addFlashData(new FlashData(tmpE, src, false)); |
| } |
| } |
| } |
| |
| if (PERFORMANCE_DEBUG) |
| System.out.println("process object\t" + (new Date()).getTime()); |
| |
| NodeList headingsNL = xpathService.evalForNodeList(EXP2, target); |
| length = headingsNL.getLength(); |
| Vector<HeadingsData> tmpV2 = new Vector<HeadingsData>(); |
| headings = new Element[headingsNL.getLength()]; |
| for (int i = 0; i < length; i++) { |
| Element tmpE = (Element) headingsNL.item(i); |
| headings[i] = tmpE; |
| tmpV2.add(new HeadingsData(tmpE, getTextAltDescendant(tmpE))); |
| } |
| pageData.setHeadingsData(tmpV2); |
| |
| if (PERFORMANCE_DEBUG) |
| System.out.println("process headins\t" + (new Date()).getTime()); |
| |
| collectScriptElements(); |
| if (PERFORMANCE_DEBUG) |
| System.out.println("collectScriptElements\t" + (new Date()).getTime()); |
| /* |
| * calcDomDifference(); if (PERFORMANCE_DEBUG) |
| * System.out.println("calcDomDifference\t" + (new Date()).getTime()); |
| */ |
| } |
| |
| private Element[] getElementsArray(Document target, String tagName) { |
| NodeList tmpNL = target.getElementsByTagName(tagName); |
| int length = tmpNL.getLength(); |
| Element[] result = new Element[length]; |
| for (int i = 0; i < length; i++) { |
| result[i] = (Element) tmpNL.item(i); |
| } |
| return (result); |
| } |
| |
| /** |
| * Utility function similar to getElementsByTagName() that returns a List |
| * instance instead of a NodeList instance. |
| * |
| * @param el |
| * an Element or Document instance |
| * @param tagName |
| * the name of element which you want look for |
| * @param tagNames |
| * optional list of element names which you want look for |
| * @return {@link List} of elements with given tag name that are descendants |
| * of the node. |
| */ |
| // for new JIS |
| public List<Element> getElementsList(Node node, String tagName, String... tagNames) { |
| List<Element> nodes = new ArrayList<Element>(); |
| NodeList nl = null; |
| if (node instanceof Document) |
| nl = ((Document) node).getElementsByTagName(tagName); |
| else if (node instanceof Element) |
| nl = ((Element) node).getElementsByTagName(tagName); |
| for (int i = 0; i < nl.getLength(); i++) { |
| nodes.add((Element) nl.item(i)); |
| } |
| |
| // variable argument |
| if (tagNames.length > 0) { |
| for (int i = 0; i < tagNames.length; i++) { |
| nodes.addAll(getElementsList(node, tagNames[i])); |
| } |
| } |
| return nodes; |
| } |
| |
| private Element[] getElementsArrayByXPath(Document target, String xpath) { |
| NodeList tmpNL = xpathService.evalPathForNodeList(xpath, target); |
| int length = tmpNL.getLength(); |
| Element[] result = new Element[length]; |
| for (int i = 0; i < length; i++) { |
| Element tmpE = (Element) tmpNL.item(i); |
| result[i] = tmpE; |
| } |
| return result; |
| } |
| |
| private List<Element> getElementsListByXPath(Document target, String xpath) { |
| NodeList tmpNL = xpathService.evalPathForNodeList(xpath, target); |
| int length = tmpNL.getLength(); |
| // Element[] result = new Element[length]; |
| List<Element> elements = new ArrayList<Element>(); |
| for (int i = 0; i < length; i++) { |
| elements.add((Element) tmpNL.item(i)); |
| } |
| return elements; |
| } |
| |
| @SuppressWarnings("nls") |
| private void collectScriptElements() { |
| script_elements = getElementsArray(target, "script"); |
| |
| javascript_elements = getElementsArrayByXPath(target, "//script[@type=\"text/javascript\"]"); |
| |
| // allEventElements = getElementsArrayByXPath( |
| // target, |
| // "//*[@onclick or @ondblclick or @onmouseup or @onmousedown or |
| // @onmouseover or @onmouseout or @onmousemove or |
| // @onkeydown or @onkeyup or @onkeypress or @onload or @onunload or |
| // @onabort or @onerror or @onresize or @onmove |
| // or @ondragdrop or @onfocus or @onblur or @onselect]"); |
| |
| eventMouseButtonElements = getElementsArrayByXPath(target, |
| "//*[@onclick or @ondblclick or @onmouseup or @onmousedown]"); |
| eventMouseFocusElements = getElementsArrayByXPath(target, "//*[@onmouseover or @onmouseout or @onmousemove]"); |
| eventOnKeyElements = getElementsArrayByXPath(target, "//*[@onkeydown or @onkeyup or @onkeypress]"); |
| eventLoadElements = getElementsArrayByXPath(target, "//*[@onload or @onunload or @onabort or @onerror]"); |
| eventWindowElements = getElementsArrayByXPath(target, "//*[@onresize or @onmove or @ondragdrop]"); |
| eventFocusElements = getElementsArrayByXPath(target, "//*[@onfocus or @onblur or @onselect]"); |
| |
| Vector<Element> tmpV1 = new Vector<Element>(); |
| Vector<String> tmpV2 = new Vector<String>(); |
| Vector<String> tmpV3 = new Vector<String>(); |
| for (int i = 0; i < aWithHref_hrefs.length; i++) { |
| if (aWithHref_hrefs[i].startsWith("javascript:")) { |
| tmpV1.add(aWithHref_elements[i]); |
| tmpV2.add(aWithHref_hrefs[i]); |
| tmpV3.add(aWithHref_strings[i]); |
| } |
| } |
| |
| int size = tmpV1.size(); |
| javascriptHref_elements = new Element[size]; |
| javascriptHref_hrefs = new String[size]; |
| javascriptHref_strings = new String[size]; |
| tmpV1.toArray(javascriptHref_elements); |
| tmpV2.toArray(javascriptHref_hrefs); |
| tmpV3.toArray(javascriptHref_strings); |
| |
| int javascriptNum = javascript_elements.length + eventFocusElements.length + eventLoadElements.length |
| + eventMouseButtonElements.length + eventMouseFocusElements.length + eventOnKeyElements.length |
| + eventWindowElements.length + javascriptHref_hrefs.length; |
| |
| hasJavascript = (javascriptNum > 0); |
| pageData.setHasJavascript(hasJavascript); |
| } |
| |
| // private void calcDomDifference() { |
| // |
| // if (EvaluationUtil.isOriginalDOM()) { |
| // // target = orig DOM |
| // if (isLiveDom || null == liveDom) { |
| // // parse error |
| // return; |
| // } |
| // |
| // TreeSet<String> existSet = new TreeSet<String>( |
| // Arrays.asList(aWithHref_hrefs)); |
| // // trim()? |
| // |
| // for (String href : aWithHref_hrefs) { |
| // if (!href.startsWith("http://") && !href.startsWith("https://")) { |
| // try { |
| // existSet.add(new URL(baseUrl, href).toString()); |
| // // System.out.println(href +" : "+new |
| // // URL(baseUrl,href)); |
| // } catch (MalformedURLException e) { |
| // } |
| // } |
| // } |
| // |
| // /* |
| // * NodeList ieNL = xpathService.evalForNodeList(EXP1, liveDom); int |
| // * size = ieNL.getLength(); |
| // */ |
| // |
| // NodeList ieNL = liveDom.getElementsByTagName("a"); |
| // int size = ieNL.getLength(); |
| // |
| // for (int i = 0; i < size; i++) { |
| // Element tmpE = (Element) ieNL.item(i); |
| // if (!tmpE.hasAttribute(ATTR_HREF)) { |
| // continue; |
| // } |
| // String tmpS = tmpE.getAttribute(ATTR_HREF); |
| // if (!existSet.contains(tmpS)) { |
| // // System.out.println("ie:"+tmpS); |
| // notExistHrefSet.add(tmpS); |
| // } |
| // } |
| // } else { |
| // // target = IE DOM |
| // NodeList orgNL = xpathService.evalForNodeList(EXP1, srcDom); |
| // int size = orgNL.getLength(); |
| // TreeSet<String> existSet = new TreeSet<String>(); |
| // for (int i = 0; i < size; i++) { |
| // existSet.add(((Element) orgNL.item(i)).getAttribute(ATTR_HREF)); |
| // // System.out.println("Src:"+((Element) |
| // // orgNL.item(i)).getAttribute(ATTR_HREF)); |
| // } |
| // |
| // size = aWithHref_hrefs.length; |
| // for (int i = 0; i < size; i++) { |
| // if (!existSet.contains(aWithHref_hrefs[i])) { |
| // notExistHrefSet.add(aWithHref_hrefs[i]); |
| // } |
| // // System.out.println("IE:"+aWithHref_hrefs[i]); |
| // } |
| // |
| // } |
| // |
| // } |
| |
| private boolean is1Row1ColTable(Element el) { |
| NodeList cellNl = el.getElementsByTagName("tr"); //$NON-NLS-1$ |
| if (cellNl.getLength() <= 1) { |
| return true; |
| } else { |
| boolean bMultiCol = false; |
| int length = cellNl.getLength(); |
| for (int i = 0; i < length; i++) { |
| NodeList thNl = ((Element) cellNl.item(i)).getElementsByTagName("th"); //$NON-NLS-1$ |
| NodeList tdNl = ((Element) cellNl.item(i)).getElementsByTagName("td"); //$NON-NLS-1$ |
| if ((thNl.getLength() + tdNl.getLength()) > 1) { |
| bMultiCol = true; |
| break; |
| } |
| } |
| if (!bMultiCol) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * If this method returns <code>true</code>, this table is a data table. |
| * Otherwise, this table is a layout table. |
| * |
| * @param table |
| * @return boolean indicating if this table is a data table. |
| */ |
| private boolean isDataTable(Element el) { |
| if (hasFormControl(el)) { |
| return false; |
| } |
| |
| NodeList cellNl = el.getElementsByTagName("td"); //$NON-NLS-1$ |
| if (cellNl.getLength() == 0) { |
| return false; |
| } else { |
| int length = cellNl.getLength(); |
| for (int j = 0; j < length; j++) { |
| if (!isDataCell((Element) cellNl.item(j))) { |
| return false; |
| } |
| } |
| |
| cellNl = el.getElementsByTagName("th"); //$NON-NLS-1$ |
| length = cellNl.getLength(); |
| for (int j = 0; j < length; j++) { |
| if (!isDataCell((Element) cellNl.item(j))) { |
| return false; |
| } |
| } // image? |
| } |
| |
| return true; |
| } |
| |
| private boolean hasFormControl(Element formEl) { |
| NodeList nl = formEl.getElementsByTagName("form"); //$NON-NLS-1$ |
| if (nl.getLength() > 0) |
| return true; |
| nl = formEl.getElementsByTagName("input"); //$NON-NLS-1$ |
| if (nl.getLength() > 0) |
| return true; |
| nl = formEl.getElementsByTagName("select"); //$NON-NLS-1$ |
| if (nl.getLength() > 0) |
| return true; |
| nl = formEl.getElementsByTagName("textarea"); //$NON-NLS-1$ |
| if (nl.getLength() > 0) |
| return true; |
| nl = formEl.getElementsByTagName("html:text"); //$NON-NLS-1$ |
| if (nl.getLength() > 0) |
| return true; |
| nl = formEl.getElementsByTagName("html:radio"); //$NON-NLS-1$ |
| if (nl.getLength() > 0) |
| return true; |
| return false; |
| } |
| |
| private boolean isDataCell(Element el) { |
| if (getTextAltDescendant(el).length() > LONG_TEXT_NUM) { |
| return false; |
| } |
| // NodeList aNl = el.getElementsByTagName("a"); //$NON-NLS-1$ |
| // NodeList liNl = el.getElementsByTagName("li"); //$NON-NLS-1$ |
| // NodeList imgNl = el.getElementsByTagName("img"); //$NON-NLS-1$ |
| // if ((aNl.getLength() + liNl.getLength() + imgNl.getLength()) > 3) { |
| // return false; |
| // } |
| NodeList imgNl = el.getElementsByTagName("img"); //$NON-NLS-1$ |
| if (imgNl.getLength() > 10) { |
| return false; |
| } |
| |
| return true; |
| } |
| |
| /** |
| * Get heading level as int |
| * |
| * @param strNodeName |
| * target Heading tag name (H1, H2,..., H6) |
| * @return heading level as int (1, 2,..., 6) |
| */ |
| public int getHeadingLevel(String strNodeName) { |
| for (int i = 0; i < HEADING_LEVEL.length; i++) { |
| if (strNodeName.equalsIgnoreCase(HEADING_LEVEL[i])) { |
| return Integer.valueOf(strNodeName.substring(1)).intValue(); |
| } |
| } |
| return 0; |
| } |
| |
| /** |
| * Get all elements that has accessKey. |
| * |
| * @return |
| */ |
| public List<Element> getAccessKeyElements() { |
| if (accessKeyList == null) { |
| accessKeyList = getElementsListByXPath(target, "//*[@accesskey]"); |
| } |
| return accessKeyList; |
| } |
| |
| /** |
| * Get all elements that has style attribute. |
| * |
| * @return |
| */ |
| public List<Element> getElementsWithStyle() { |
| if (styleList == null) { |
| styleList = getElementsListByXPath(target, "//*[@style]"); |
| } |
| return styleList; |
| } |
| |
| /** |
| * Get all elements that has id attribute. |
| * |
| * @return |
| */ |
| public List<Element> getElementsWithId() { |
| if (idElementList == null) { |
| idElementList = getElementsListByXPath(target, "//*[@id]"); |
| } |
| return idElementList; |
| } |
| |
| /** |
| * Get all style elements. |
| * |
| * @return |
| */ |
| public List<Element> getStyleElements() { |
| if (styleElementList == null) { |
| styleElementList = new ArrayList<Element>(); |
| for (Element applet : getElementsList(target, "style")) |
| styleElementList.add(applet); |
| } |
| return styleElementList; |
| } |
| |
| /** |
| * Get all applet elements. |
| * |
| * @return |
| */ |
| public List<Element> getAppletElements() { |
| if (appletList == null) { |
| appletList = getElementsList(target, "applet"); |
| } |
| return appletList; |
| } |
| |
| /** |
| * Get all area elements. |
| * |
| * @return |
| */ |
| public List<Element> getAreaElements() { |
| if (areaList == null) { |
| areaList = new ArrayList<Element>(); |
| for (Element area : getElementsList(target, "area")) |
| areaList.add(area); |
| } |
| return areaList; |
| } |
| |
| /** |
| * Get array of anchor {@link Element} who has href attribute |
| * |
| * @return array of anchor {@link Element} who has href attribute |
| */ |
| public Element[] getAWithHref_elements() { |
| return aWithHref_elements; |
| } |
| |
| /** |
| * Get target hrefs of anchor Elements |
| * |
| * @return target hrefs |
| */ |
| public String[] getAWithHref_hrefs() { |
| return aWithHref_hrefs; |
| } |
| |
| /** |
| * Get text descendant of anchor {@link Element} who has href attribute |
| * |
| * @return text descendant of anchor {@link Element} who has href attribute |
| * |
| * @see HtmlTagUtil#getTextAltDescendant(Node) |
| */ |
| public String[] getAWithHref_strings() { |
| return aWithHref_strings; |
| } |
| |
| /** |
| * Get base URL of the target page |
| * |
| * @return base URL |
| */ |
| public URL getBaseUrl() { |
| return baseUrl; |
| } |
| |
| /** |
| * Get body {@link Element} |
| * |
| * @return body {@link Element} |
| */ |
| public Element[] getBody_elements() { |
| return body_elements; |
| } |
| |
| /** |
| * Get tables who has 1 row and 1 column |
| * |
| * @return table {@link Element} who has 1 row and 1 column |
| */ |
| public Element[] getBottom_1row1col_tables() { |
| return bottom_1row1col_tables; |
| } |
| |
| /** |
| * Get bottom data tables in the nested table |
| * |
| * @return bottom data tables in the nested table |
| */ |
| public Element[] getBottom_data_tables() { |
| return bottom_data_tables; |
| } |
| |
| /** |
| * Get bottom tables (not data table) in the nested table |
| * |
| * @return bottom tables in the nested table |
| */ |
| public Element[] getBottom_notdata_tables() { |
| return bottom_notdata_tables; |
| } |
| |
| /** |
| * Get URL of the page |
| * |
| * @return URL of the page |
| */ |
| public String getUrl() { |
| return curUrl; |
| } |
| |
| /** |
| * Get map between {@link Node} and ACTF_ID |
| * |
| * @return map between {@link Node} and ACTF_ID |
| */ |
| public Map<Node, Integer> getDocument2IdMap() { |
| return document2IdMap; |
| } |
| |
| /** |
| * Get frame {@link Element} in the page |
| * |
| * @return frame elements |
| */ |
| public Element[] getFrame_elements() { |
| return frame_elements; |
| } |
| |
| /** |
| * Check if the page has |
| * |
| * <pre> |
| * <a href=""> |
| * </pre> |
| * |
| * @return true if the page has anchor with href attribute |
| */ |
| public boolean isHasAwithHref() { |
| return hasAwithHref; |
| } |
| |
| /** |
| * Check if the page uses JavaScript |
| * |
| * @return true if the page uses JavaScript |
| */ |
| public boolean isHasJavascript() { |
| return hasJavascript; |
| } |
| |
| /** |
| * Get all heading {@link Element} in the page |
| * |
| * @return all heading elements |
| */ |
| public Element[] getHeadings() { |
| return headings; |
| } |
| |
| /** |
| * Get live DOM |
| * |
| * @return live DOM |
| */ |
| public Document getLiveDom() { |
| return liveDom; |
| } |
| |
| /** |
| * Get all iframe {@link Element} in the page |
| * |
| * @return all iframe elements |
| */ |
| public Element[] getIframe_elements() { |
| return iframe_elements; |
| } |
| |
| /** |
| * Get all img {@link Element} in the page |
| * |
| * @return all img elements |
| */ |
| public HTMLImageElement[] getImg_elements() { |
| return img_elements; |
| } |
| |
| /** |
| * Get all image button (input elements whose type is "image"). |
| * |
| * @return |
| */ |
| // for new JIS |
| public List<Element> getImageButtons() { |
| if (imageButtonList != null) |
| return imageButtonList; |
| |
| imageButtonList = new ArrayList<Element>(); |
| for (Element input : getElementsList(target, "input")) { |
| if (input.getAttribute("type").equals("image")) |
| imageButtonList.add(input); |
| } |
| return imageButtonList; |
| } |
| |
| /** |
| * Get all text-based button. |
| * |
| * @return |
| */ |
| // TODO treat button elements... |
| // |
| public List<Element> getTextButtons() { |
| if (textButtonList == null) { |
| textButtonList = new ArrayList<Element>(); |
| for (Element input : getElementsList(target, "input")) { |
| if (input.getAttribute("type").matches("button|submit|reset")) |
| textButtonList.add(input); |
| } |
| } |
| return textButtonList; |
| } |
| |
| /** |
| * Get invalid link ratio of the page. (target URL number under invisible |
| * link/all target URL number) |
| * |
| * @return invalid link ratio |
| */ |
| public double getInvalidLinkRatio() { |
| return invalidLinkRatio; |
| } |
| |
| /** |
| * Get number of invisible {@link Element} |
| * |
| * @return number of invisible elements |
| * @deprecated |
| */ |
| public int getInvisibleElementCount() { |
| // return invisibleElementCount; |
| return 0; |
| } |
| |
| /** |
| * Get array of link target urls of invisible anchor {@link Element} |
| * |
| * @return array of link target urls of invisible anchor elements |
| * @deprecated |
| */ |
| public String[] getInvisibleLinkStrings() { |
| // return invisibleLinkStrings; |
| return new String[0]; |
| } |
| |
| /** |
| * Check if the target page uses DBCS |
| * |
| * @return true if the target page uses DBCS |
| */ |
| public boolean isDBCS() { |
| return isDBCS; |
| } |
| |
| /** |
| * Check if the target DOM is live DOM |
| * |
| * @return true if the target DOM is live DOM |
| */ |
| public boolean isLiveDom() { |
| return isLiveDom; |
| } |
| |
| /** |
| * Get Set of target URL that are not included in source DOM but exist in |
| * live DOM. (might be inaccessible without JavaScript) |
| * |
| * @return |
| * @deprecated |
| */ |
| public HashSet<String> getNotExistHrefSet() { |
| // return notExistHrefSet; |
| return new HashSet<String>(); |
| } |
| |
| /** |
| * Get all object {@link Element} in the page |
| * |
| * @return object elements |
| */ |
| public Element[] getObject_elements() { |
| return object_elements; |
| } |
| |
| /** |
| * Get source DOM |
| * |
| * @return source DOM |
| */ |
| public Document getSrcDom() { |
| return srcDom; |
| } |
| |
| /** |
| * Get target page information as {@link PageData} |
| * |
| * @return target page information |
| */ |
| public PageData getPageData() { |
| return pageData; |
| } |
| |
| /** |
| * Get array of parent table elements of nested tables. |
| * |
| * @return array of parent table elements of nested tables |
| */ |
| public Element[] getParent_table_elements() { |
| return parent_table_elements; |
| } |
| |
| /** |
| * Get visualization result {@link Document} |
| * |
| * @return result {@link Document} |
| */ |
| public Document getResult() { |
| return resultDoc; |
| } |
| |
| /** |
| * Get all table {@link Element} in the page |
| * |
| * @return all table elements |
| */ |
| public Element[] getTable_elements() { |
| return table_elements; |
| } |
| |
| /** |
| * Get target {@link Document} |
| * |
| * @return target {@link Document} |
| */ |
| public Document getTarget() { |
| return target; |
| } |
| |
| /** |
| * Get all embed {@link Element} in the page |
| * |
| * @return all embed elements |
| */ |
| public Element[] getEmbed_elements() { |
| return embed_elements; |
| } |
| |
| /** |
| * Get all |
| * |
| * <pre> |
| * <a href="javascript:..." |
| * </pre> |
| * |
| * elements |
| * |
| * @return all anchor elements for JavaScript |
| */ |
| public Element[] getJavascriptHref_elements() { |
| return javascriptHref_elements; |
| } |
| |
| /** |
| * Get all href Strings of |
| * |
| * <pre> |
| * <a href="javascript:..." |
| * </pre> |
| * |
| * elements |
| * |
| * @return all target arguments of anchor elements for JavaScript |
| */ |
| public String[] getJavascriptHref_hrefs() { |
| return javascriptHref_hrefs; |
| } |
| |
| /** |
| * Get text descendant of anchor {@link Element} for JavaScript |
| * |
| * @return text descendant of anchor {@link Element} for JavaScript |
| * |
| * @see HtmlTagUtil#getTextAltDescendant(Node) |
| */ |
| public String[] getJavascriptHref_strings() { |
| return javascriptHref_strings; |
| } |
| |
| /** |
| * Get array of {@link Element} that has event handler (onload, onunload, |
| * onabort or onerror) |
| * |
| * @return array of elements that have event handler for onload, onunload, |
| * onabort or onerror |
| */ |
| public Element[] getEventLoadElements() { |
| return eventLoadElements; |
| } |
| |
| /** |
| * Get array of {@link Element} that has mouse event handler (onclick, |
| * ondblclick, onmouseup or onmousedown) |
| * |
| * @return array of elements that have mouse event handler |
| */ |
| public Element[] getEventMouseButtonElements() { |
| return eventMouseButtonElements; |
| } |
| |
| /** |
| * Get array of {@link Element} that has onmouse event handler (onmouseover, |
| * onmouseout or onmousemove) |
| * |
| * @return array of elements that have onmouse event handler |
| */ |
| public Element[] getEventOnMouseElements() { |
| return eventMouseFocusElements; |
| } |
| |
| /** |
| * Get array of {@link Element} that has onkey event handler (onkeydown, |
| * onkeyup or onkeypress) |
| * |
| * @return array of elements that have onkey event handler |
| */ |
| public Element[] getEventOnKeyElements() { |
| return eventOnKeyElements; |
| } |
| |
| /** |
| * Get all script {@link Element} |
| * |
| * @return all script elements |
| */ |
| public Element[] getScript_elements() { |
| return script_elements; |
| } |
| |
| /** |
| * Get array of {@link Element} that has window event handler (onresize, |
| * onmove or ondragdrop) |
| * |
| * @return array of elements that have window event handler |
| */ |
| public Element[] getEventWindowElements() { |
| return eventWindowElements; |
| } |
| |
| /** |
| * Get array of {@link Element} that has focus event handler (onfocus, |
| * onblur or onselect) |
| * |
| * @return array of elements that have focus event handler |
| */ |
| public Element[] getEventFocusElements() { |
| return eventFocusElements; |
| } |
| |
| /** |
| * Get evaluation target HTML file |
| * |
| * @return evaluation target HTML file |
| */ |
| public File getTargetFile() { |
| return targetFile; |
| } |
| |
| /** |
| * Set evaluation target HTML file |
| * |
| */ |
| public void setTargetFile(File targetFile) { |
| this.targetFile = targetFile; |
| } |
| |
| /** |
| * Get source HTML file |
| * |
| * @return source HTML file |
| */ |
| public File getSrcFile() { |
| return srcFile; |
| } |
| |
| /** |
| * Set source HTML file |
| * |
| */ |
| public void setSrcFile(File srcFile) { |
| this.srcFile = srcFile; |
| } |
| |
| /** |
| * Get HTML file that represents live DOM |
| * |
| * @return live DOM as HTML file |
| */ |
| public File getLiveFile() { |
| return liveFile; |
| } |
| |
| /** |
| * Set HTML file that represents live DOM |
| * |
| */ |
| public void setLiveFile(File liveFile) { |
| this.liveFile = liveFile; |
| } |
| |
| /** |
| * Append an error icon to an element in the blind view. If it is already |
| * appended or it failed to append, returns false. |
| * |
| * @param pitem |
| * @param original |
| * @return |
| */ |
| public boolean appendErrorIcon(IProblemItem pitem, Element original) { |
| try { |
| String id = document2IdMap.get(original).toString(); |
| Element tmpE = resultDoc.getElementById("id" + id); |
| if (tmpE != null && "area".equalsIgnoreCase(tmpE.getTagName())) { |
| tmpE = resultDoc.getElementById("id" + id + "-span"); |
| } |
| if (tmpE != null && tmpE.getElementsByTagName("img").getLength() == 0) { |
| Element errorImg = resultDoc.createElement("img"); |
| errorImg.setAttribute(ATTR_ALT, "error icon"); |
| errorImg.setAttribute(ATTR_SRC, "img/exclawhite21.gif"); |
| errorImg.setAttribute(ATTR_TITLE, pitem.getDescription()); |
| |
| String comment = pitem.getDescription(); |
| StringBuffer comment_sb = new StringBuffer(); |
| // if (871 == curInfo.getId()) { |
| for (int x = 0; x < comment.length(); x++) { |
| if (comment.charAt(x) == '\"') { |
| comment_sb.append("\\"); |
| } |
| if (comment.charAt(x) == '\'') { |
| comment_sb.append('\\'); |
| } |
| comment_sb.append(comment.charAt(x)); |
| } |
| |
| String tmpS = comment_sb.toString().replaceAll("\n", "").replaceAll("\r", ""); |
| |
| errorImg.setAttribute("onmouseover", "updateBaloon2(\"id" + id + "\",\"" + tmpS + "\");"); |
| tmpE.appendChild(errorImg); |
| return true; |
| } else { |
| return false; |
| } |
| } catch (Exception e) { |
| return false; |
| } |
| } |
| } |