blob: 1cb61c8986b148878fd1d71df485538f17d895e3 [file] [log] [blame]
* Copyright (c) 2010 Atos Origin.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* Contributors:
* Caroline Bourdeu d'Aguerre (Atos Origin) - Initial API and implementation
* Anne Haugommard (Atos) - improvement in clean process
package org.eclipse.gendoc.documents;
import java.util.ArrayList;
import java.util.List;
import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.namespace.NamespaceContext;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.TransformerFactoryConfigurationError;
import javax.xml.transform.dom.DOMResult;
import javax.xml.transform.dom.DOMSource;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.eclipse.core.runtime.IStatus;
import org.eclipse.gendoc.document.parser.documents.Document;
import org.eclipse.gendoc.document.parser.documents.helper.XMLHelper;
import org.eclipse.gendoc.tags.parsers.TagParserConfig;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
* Document service for XML document types (ex : ODT, DOCX are based on XML documents)
public abstract class XMLDocumentService implements IDocumentService
* Character that is the start of a tag in XML language (<code>&lt;</code>)
public static String XML_TAG_START = "<";
* Character that is the end of a tag in XML language (<code>&gt;</code>)
public static String XML_TAG_END = ">";
public static String XML_AUTOCLOSING_TAG_END = "/>";
private Transformer trans;
private final TransformerFactory transFactory = TransformerFactory.newInstance();
private final Pattern pattern = Pattern.compile("<?.*?>");
private Document document;
* Regex that matched a tag &lt;myTag&gt;
private final Pattern patternTag = Pattern.compile("<[^<>]+>");
* Regex that matched a closing tag &lt;/myTag&gt;
private final Pattern patternTagClose = Pattern.compile("</[^<>]+>");
* Regex that matched a single tag <myTag/>
private final Pattern patternTagSingle = Pattern.compile("<[^<>]+/>");
* Regex that not contains &lt;
private final String NO_LT_OR_GT_INSIDE = "(?:(?!&lt;)(?!&gt;).)*";
* Line feed special character
public static final String LINE_FEED="&#xA;";
* Carriage return special character
public static final String CARRIAGE_RETURN="&#xD;";
* Tabulation special character
public static final String TABULATION="&#x9;";
* Create a new XML Document service
public XMLDocumentService()
* Create a new XML Document service
public XMLDocumentService(Document document)
this.document = document;
private void init()
ILogger logger = GendocServices.getDefault().getService(ILogger.class);
// Initialize transformer
trans = transFactory.newTransformer();
catch (TransformerConfigurationException e)
logger.log(e.getMessageAndLocation(), IStatus.ERROR);
* Get the nameSpaceContext of this document type
* @return the nameSpaceContext
public abstract NamespaceContext getNameSpaceContext();
* Get the name space URL
* @return the URL of the specific name space
public abstract String getNamingSpaceURL();
public void clear()
// do nothing
* Transform the given text in a NEW Document Node
* @param text to transform
* @return the created node
public Node asNode(String text) throws InvalidContentException
DOMResult outputTarget = null;
outputTarget = new DOMResult();
trans.transform(new StreamSource(new StringReader(text)), outputTarget);
catch (TransformerException e)
throw new InvalidContentException(text, e);
return outputTarget.getNode();
* Transform the given node as text.
* @param nodeBegin the node begin
* @return the string
* @throws InvalidContentException
* @throws TransformerFactoryConfigurationError the transformer factory configuration error
public String asText(Node nodeBegin) throws InvalidContentException
StringWriter stringOut = new StringWriter();
BufferedWriter writer = new BufferedWriter(stringOut);
String xml = "";
trans.transform(new DOMSource(nodeBegin), new StreamResult(writer));
xml = pattern.matcher(stringOut.toString()).replaceFirst("");
xml = Pattern.compile(" " + getNamingSpaceURL()).matcher(xml).replaceAll("");
catch (TransformerException e)
throw new InvalidContentException("Node " + nodeBegin.getLocalName() + " can not be transform", e);
finally {
try {
} catch (IOException e) {
return xml;
public abstract String getTextStyle();
public abstract boolean isPara(String label);
public String addNamingSpaces(String nodes)
String firstNodeName = null;
String newNodes = nodes;
Matcher m = Pattern.compile("<[^ >]*( |>)").matcher(nodes);
if (m.find())
firstNodeName ="<| |>", "");
newNodes = newNodes.replaceAll("<" + firstNodeName + " ", "<" + firstNodeName + " " + getNamingSpaceURL() + " ");
newNodes = newNodes.replaceAll("<" + firstNodeName + ">", "<" + firstNodeName + " " + getNamingSpaceURL() + ">");
for (String textNode : getTextTagLabels())
if (firstNodeName != null && !firstNodeName.equals(textNode))
newNodes = newNodes.replaceAll("<" + textNode + " ", "<" + textNode + " " + getNamingSpaceURL() + " ");
newNodes = newNodes.replaceAll("<" + textNode + ">", "<" + textNode + " " + getNamingSpaceURL() + ">");
return newNodes;
public String addNamingSpaces(String nodes, String label)
String newNodes = nodes;
newNodes = newNodes.replaceAll("<" + label + " ", "<" + label + " " + getNamingSpaceURL() + " ");
newNodes = newNodes.replaceAll("<" + label + ">", "<" + label + " " + getNamingSpaceURL() + ">");
return newNodes;
public abstract String[] getTextTagLabels();
* (non-Javadoc)
* @see, java.lang.String)
public Node injectNode(Node nodeToBeReplaced, String nodeContent) throws InvalidContentException
ILogger logger = GendocServices.getDefault().getService(ILogger.class);
nodeContent = addNamingSpaces(nodeContent, "document");
StringBuffer newNodeContentBuf = new StringBuffer();
newNodeContentBuf.append("<document " + getNamingSpaceURL() + ">");
newNodeContentBuf = cleanTextTag(newNodeContentBuf.toString());
// logger.log("INJECT Node :" + newNodeContent, ILogger.DEBUG);
// Transform "newNodeContent" text into a DOMResult
DOMResult outputTarget = null;
outputTarget = new DOMResult(nodeToBeReplaced);
trans.transform(new StreamSource(new StringReader(newNodeContentBuf.toString())), outputTarget);
catch (TransformerException e)
throw new InvalidContentException("Your text seems to contain special characters. Try using method 'clean' from external bundle 'commons' for the different model elements found", e);
// nodeToReplace has a new child (lastChild) containing newNodeContent content
NodeList nodes = nodeToBeReplaced.getLastChild().getChildNodes();
List<Node> inserted = new ArrayList<Node>();
for (int i = 0; i < nodes.getLength();/*
* No modification of the i value : nodes are dynamically removed from
* nodeList when inserted
inserted.add(nodeToBeReplaced.getParentNode().insertBefore(nodes.item(i), nodeToBeReplaced));
return inserted.get(inserted.size() - 1);
public abstract IAdditionalResourceService getAdditionalResourceService();
* The result must be "&lt;gendoc&gt;CONTENT&lt;/gendoc&gt;" or
* "&lt;context model='$incose' element='INCOSE/System' importedBundles='uml;sysml' /&gt;"
* @param currentNode
* @param tagLabels
* @throws InvalidContentException
public Node cleanTags(Node currentNode, List<String> tagLabels) throws InvalidContentException
ILogger logger = GendocServices.getDefault().getService(ILogger.class);
// 1. Find next node with starting tag character
Node firstNode = findNodeWithOneTag(currentNode);
Node next = cleanTags(currentNode, tagLabels, firstNode);
if (currentNode == null || currentNode.getParentNode() == null)
logger.log("XMLDocumentService.cleanTags : currentNode has been replaced.", ILogger.DEBUG);
currentNode = next;
Node nextStartNode;
// Do the same thing until end of tag is found.
while (next != null)
nextStartNode = findNodeWithStartTag(next, currentNode);
next = cleanTags(currentNode, tagLabels, nextStartNode);
if (currentNode == null || currentNode.getParentNode() == null)
logger.log("XMLDocumentService.cleanTags : currentNode has been replaced.", ILogger.DEBUG);
currentNode = next;
return currentNode;
public String cleanTagContent(String text, List<String> labels)
if (labels == null || labels.isEmpty())
return text;
// Get list of tag patterns
List<Pattern> patternsToClean = new ArrayList<Pattern>(labels.size() * 2);
List<Pattern> patternsToMatch = new ArrayList<Pattern>(labels.size() * 2);
for (String label : labels)
patternsToClean.add(Pattern.compile("(&lt;" + label + NO_LT_OR_GT_INSIDE + "&gt;[^&]&lt;/" + label + NO_LT_OR_GT_INSIDE + "&gt;)"));
patternsToClean.add(Pattern.compile("&lt;" + label + NO_LT_OR_GT_INSIDE + "/&gt;"));
patternsToMatch.add(Pattern.compile("(&lt;" + label + NO_LT_OR_GT_INSIDE + "&gt;|&lt;/" + label + NO_LT_OR_GT_INSIDE + "&gt;)"));
patternsToMatch.add(Pattern.compile("&lt;" + label + NO_LT_OR_GT_INSIDE + "/&gt;"));
// Get a full regex with all patterns
String patternToMatch = "";
for (Pattern p : patternsToMatch)
patternToMatch += "(" + p + ")|";
patternToMatch = patternToMatch.substring(0, patternToMatch.length() - 1);
// Clean all values contained between &lt; &gt ;
Pattern globalTagPattern = Pattern.compile("&lt;" + NO_LT_OR_GT_INSIDE + "&gt;");
StringBuffer resultBuffer = new StringBuffer();
Matcher m = globalTagPattern.matcher(text);
int index = 0;
while (m.find())
resultBuffer.append(text.substring(index, m.start()));
// Keep Original text
String originalText = text.substring(m.start(), m.end());
// Try to remove XML tags inside original text.
// Check if cleaned Tag corresponds to at least one of the patterns
// If not, keep Original value
String cleanedTag = removeXMLTag(originalText);
cleanedTag = originalText;
index = m.end();
// Clean content inside effective gendoc tags.
String result = cleanContent(resultBuffer.toString(), patternsToClean);
// Replace invalid characters
result = cleanXMLContent(result);
return result;
protected String cleanXMLContent(String content)
StringBuffer result = new StringBuffer();
for (int i = 0 ; i < content.length() ; i++){
char c = content.charAt(i);
if (TagParserConfig.INVALID_QUOTES.contains(c)){
else if (TagParserConfig.INVALID_DOUBLE_QUOTES.contains(c)){
else if (TagParserConfig.INVALID_SPACES.contains(c)){
else {
return result.toString();
public String cleanContent(String text, List<Pattern> patternsToClean)
StringBuffer result = new StringBuffer();
String patterns = "";
for (Pattern p : patternsToClean)
patterns += "(" + p + ")|";
patterns = patterns.substring(0, patterns.length() - 1);
Pattern p = Pattern.compile(patterns);
Matcher m = p.matcher(text);
int index = 0;
while (m.find())
int start = m.start();
int end = m.end();
result.append(text.substring(index, start));
result.append(removeXMLTag(text.substring(start, end)));
index = end;
return result.toString();
/* (non-Javadoc)
* @see org.eclipse.gendoc.documents.IDocumentService#removeXMLTag(java.lang.String)
private String removeXMLTag(String text)
StringBuffer result = new StringBuffer();
Matcher m = patternTag.matcher(text);
int index = 0;
while (m.find())
result.append(text.substring(index, m.start()));
index = m.end();
if (index < text.length())
return result.toString();
protected abstract Node cleanTags(Node currentNode, List<String> tagLabels, Node baseNode) throws InvalidContentException;
* Checks if the given string contains one of the valid tags completely (either opening tag, or closing tag or
* autoclosed tag)
* <p/>
* Ex :(either<code> &lt;tagLabel ...&gt; </code>or <code>&lt;/tagLabel&gt; </code>or
* <code>&lt;tagLabel ... /&gt;</code>
* @param toCheck string to check
* @param tagLabels list of valid tag labels
* @return true if string toCheck contains at least a full and valid tagLabel
public boolean containsFullTags(String toCheck, List<String> tagLabels)
if (toCheck == null)
return false;
int index = toCheck.lastIndexOf(TagParserConfig.SUP);
if (index > -1 && containsOneOf(tagLabels, toCheck.substring(index)))
return false;
List<String> labelsToFind = new ArrayList<String>();
boolean result = false;
for (String tagLabel : tagLabels)
if (toCheck.matches(".*" + TagParserConfig.INF + "(/|)" + tagLabel + ".*"))
else if (tagLabel.startsWith(toCheck.replace(TagParserConfig.INF, "")))
if (!labelsToFind.isEmpty())
result = true;
// Replace invalid quotes by valid ones
String invalidQuoteRegex= "(";
for (char quote : TagParserConfig.INVALID_QUOTES){
invalidQuoteRegex=invalidQuoteRegex.substring(0, invalidQuoteRegex.length()-1);
toCheck = toCheck.replaceAll(invalidQuoteRegex, String.valueOf(TagParserConfig.VALID_QUOTE));
String quote ="\\"+TagParserConfig.VALID_QUOTE;
String limited = toCheck.replaceAll("="+quote+"[^="+quote+"]*"+quote+"", "");
String unfinishedAttributeValue="[^=]*="+quote+"([^"+quote+"]*|([^"+quote+"]*"+quote+"[^"+quote+"]*"+quote+"[^"+quote+"]*)*)";
for (String tagLabel : labelsToFind)
boolean unfinishedAttribute = limited.matches(unfinishedAttributeValue);
boolean finishedTag = limited.matches(".*" + TagParserConfig.INF + "(/|)" + tagLabel + ".*(/|)" + TagParserConfig.SUP + "[^"+TagParserConfig.SUP+"]*");
result = result && (!unfinishedAttribute)&& (finishedTag);
return result;
* Checks if String to check contains the start of one of the labels
* @param toCheck String to check
* @param labels list of labels to compare with
* @return true if at least one label start matches the string to check
protected boolean containsOneOf(List<String> labels, String toCheck)
if (toCheck == null)
return false;
if (toCheck.endsWith(TagParserConfig.INF) || toCheck.endsWith(TagParserConfig.INF + TagParserConfig.SLASH))
return true;
String stringToCheck = toCheck.substring(toCheck.indexOf(TagParserConfig.INF) + TagParserConfig.INF.length());
if (stringToCheck.startsWith(String.valueOf(TagParserConfig.SLASH)))
stringToCheck = stringToCheck.substring(String.valueOf(TagParserConfig.SLASH).length(), stringToCheck.length() - 1);
for (String label : labels)
if (label.startsWith(stringToCheck) || stringToCheck.startsWith(label) || (("/" + label).startsWith(stringToCheck)))
return true;
return false;
* Extract the text value of a node
* @param node an XML node
* @return the value contained between node tags
* @throws InvalidContentException
public String extractNodeTextValue(Node node) throws InvalidContentException
if (node == null)
return null;
// StringBuffer result = new StringBuffer();
// if(node.getNodeValue() != null){
// result.append(node.getNodeValue());
// }
// if( node.hasChildNodes()){
// for (int i=0; i<node.getChildNodes().getLength();i++) {
// result.append(asText(node.getChildNodes().item(i)));
// }
// }
String result = null;
String nodeValue = asText(node);
if (nodeValue.endsWith(XML_AUTOCLOSING_TAG_END))
result = "";
int endOfStartingTag = nodeValue.indexOf(XML_TAG_END);
int startOfEndingTag = nodeValue.lastIndexOf(XML_TAG_START);
if (endOfStartingTag == -1 || startOfEndingTag == -1 || endOfStartingTag > startOfEndingTag)
result = nodeValue;
result = nodeValue.substring(endOfStartingTag + 1, startOfEndingTag);
return result;
// String[] elements = asText(node).split(XML_TAG_START + "/|/" + XML_TAG_END + "|" + XML_TAG_START + "|" +
// for (String element : elements)
// {
// if (element != null && !element.startsWith(node.getNodeName()))
// {
// result.append(element);
// }
// }
// return result.toString();
* Find next node with a starting tag (<code>TagConstants.INF</code>)
* @param currentNode node from which to start the searching
* @return
* @throws InvalidContentException
public Node findNodeWithOneTag(Node currentNode) throws InvalidContentException
ILogger logger = GendocServices.getDefault().getService(ILogger.class);
// FIXME Node node = currentNode.cloneNode(true);
Node node = currentNode;
String currentNodeInternalContent = "";
String nodeContent = null;
StringBuffer tagContent = new StringBuffer();
Node startNode = null;
while (startNode == null && node != null)
node =;
currentNodeInternalContent = extractNodeTextValue(node);
boolean nullOrContainsStartTag = currentNodeInternalContent == null
|| ((currentNodeInternalContent.indexOf(TagParserConfig.INF) != -1) && !(currentNodeInternalContent.substring(0, currentNodeInternalContent.indexOf(TagParserConfig.INF)).contains(XML_TAG_START)));
while (node != null && (!nullOrContainsStartTag))
node =;
currentNodeInternalContent = extractNodeTextValue(node);
nullOrContainsStartTag = currentNodeInternalContent == null
|| (currentNodeInternalContent.indexOf(TagParserConfig.INF) != -1 && !(currentNodeInternalContent.substring(0, currentNodeInternalContent.indexOf(TagParserConfig.INF)).contains(XML_TAG_START)));
if (node == null)
return null;
nodeContent = asText(node); // Full content of the node
tagContent.append(nodeContent.substring(0, nodeContent.indexOf(TagParserConfig.INF)));
startNode = node;
if (Node.TEXT_NODE == startNode.getNodeType())
logger.log("XMLDocumentService.findNodeWithOneTag returned a text node :" + startNode.getNodeValue(), ILogger.DEBUG);
startNode = node.getParentNode();
return startNode;
return null;
protected Node findNodeWithStartTag(Node currentNode, Node ancestorNode)
String label = currentNode.getNodeName();
String path = getRelativePath(ancestorNode, currentNode);
if (path == null)
return null;
if (!"".equals(path))
path += "/";
path += "following::" + label + "[contains(.,'" + XML_TAG_START + "')]";
Node result = getNodeFromXPath(ancestorNode, path);
if (getRelativePath(ancestorNode, result) == null)
return null;
if( currentNode.equals(result)){
return null;
return result;
* Get list of next nodes with the node label
* @param startNode node from which to start the searching
* @param nodeLabel label of the node to search
* @return the list of nodes following startNode and with nodeName = nodeLabel
public NodeList getNextNodes(Node startNode, String nodeLabel)
ILogger logger = GendocServices.getDefault().getService(ILogger.class);
XPathFactory factory = XPathFactory.newInstance();
XPath xpath = factory.newXPath();
XPathExpression expr = xpath.compile("following::" + nodeLabel);
Object result = expr.evaluate(startNode, XPathConstants.NODESET);
if (result != null && result instanceof NodeList)
return (NodeList) result;
catch (XPathExpressionException e)
logger.log(e.getStackTrace().toString(), IStatus.ERROR);
// Return null in case of problem => do nothing here
return null;
* Find the best ascending node from baseNode until a direct child of high Ascendant
* @param higherAscendant Node that must be a direct parent of the result
* @param baseNode node from which to start the ascension
* @return a direct child of higherAscendant that is a parent of baseNode
protected Node getBestAscendantUntil(Node higherAscendant, Node baseNode)
Node parent = baseNode.getParentNode();
if (parent == null)
return null;
if (parent.equals(higherAscendant))
return baseNode;
return getBestAscendantUntil(higherAscendant, parent);
* Find the Xpath to get currentNode from ancestorNode
* @param ancestorNode ancestor that defines the subtree for the search
* @param currentNode node to search in that subtree
* @return the path to get currentNode from ancestorNode, or null if not found
private String getRelativePath(Node ancestorNode, Node currentNode)
if (currentNode == null)
return null;
if (currentNode.equals(ancestorNode))
return "";
// Store path until parent in a buffer
StringBuffer path = new StringBuffer(currentNode.getNodeName());
// Recursively get parent and fill path
Node parent = currentNode.getParentNode();
Node node = currentNode;
if (node.getPreviousSibling() != null)
// Count position
int index = 1;
while (node != null)
String label = node.getNodeName();
node = node.getPreviousSibling();
if (node != null && label.equals(node.getNodeName()))
if (index > 0)
path.append("[" + index + "]");
while (parent != null && !parent.equals(ancestorNode))
path.insert(0, "/");
currentNode = parent;
parent = currentNode.getParentNode();
String label = currentNode.getNodeName();
if (currentNode.getPreviousSibling() != null)
// Count position
int index = 1;
while (currentNode != null)
currentNode = currentNode.getPreviousSibling();
if (currentNode != null && label.equals(currentNode.getNodeName()))
if (index > 0)
path.insert(0, "[" + index + "]");
path.insert(0, label);
if (parent == null)
return null;
return path.toString();
public NodeList getNodeListFromXPath(Node start, String expression)
XPathFactory factory = XPathFactory.newInstance();
XPath xpath = factory.newXPath();
XPathExpression expr = xpath.compile(expression);
Object result = expr.evaluate(start, XPathConstants.NODESET);
if (result != null && result instanceof NodeList)
return (NodeList) result;
catch (XPathExpressionException e)
return null;
public Node getNodeFromXPath(Node start, String expression)
ILogger logger = GendocServices.getDefault().getService(ILogger.class);
XPathFactory factory = XPathFactory.newInstance();
XPath xpath = factory.newXPath();
XPathExpression expr = xpath.compile(expression);
Object result = expr.evaluate(start, XPathConstants.NODE);
if (result != null && result instanceof Node)
return (Node) result;
catch (XPathExpressionException e)
logger.log("Invalid XPath expression : "+expression+"\n"+e.getStackTrace().toString(), IStatus.ERROR);
return null;
public Document getDocument()
return document;
public void setDocument(Document document)
this.document = document;
* Clean a text in order to obtain valid XML tags
* @param textTag the text tag to clean
* @return a node content
public StringBuffer cleanTextTag(String textTag)
StringBuffer cleaned = new StringBuffer("");
int index = 0;
Stack<String> tagStack = new Stack<String>();
/** Match all tags */
Matcher m = patternTag.matcher(textTag);
while (m.find())
String matchedTag = textTag.substring(m.start(), m.end());
String tagName = getTagName(matchedTag);
// If it is an close tag
if (isCloseTag(matchedTag))
// If the tag close the last opened tag
if (!tagStack.empty() && tagName.equals(tagStack.peek()))
// Pop opened tag
// Save text before tag + closing tag
cleaned.append(textTag.substring(index, m.end()));
else if (!tagStack.empty() && areSimilarTags(tagName, tagStack.peek()))
String toAppend = matchedTag.replace(tagName, tagStack.peek());
// Pop opened tag
// Save text before tag + closing tag
// If the tag close a tag push earlier
if (tagStack.contains(tagName))
// Save text before tag
cleaned.append(textTag.substring(index, m.start()));
// Create and add all the closing tag for tag opened after the current tag
while (!tagStack.peek().equals(tagName))
cleaned.append("</" + tagStack.pop() + ">");
// Pop opened tag
// Save the closing tag
String similarTag = containsSimilarTag(tagStack, tagName);
if (similarTag != null)
// Save text before tag
cleaned.append(textTag.substring(index, m.start()));
// Create and add all the closing tag for tag opened after the current tag
while (!tagStack.peek().equals(similarTag))
cleaned.append("</" + tagStack.pop() + ">");
// Pop opened tag
// Save the closing tag
cleaned.append(matchedTag.replace(tagName, similarTag));
// Save text before tag
cleaned.append(textTag.substring(index, m.start()));
else if (!isSingleTag(matchedTag))// If it is an open tag
// Push the open tag
// Save the text before the tag + the opened tag
cleaned.append(textTag.substring(index, m.end()));
// Save the text before the tag + the tag
cleaned.append(textTag.substring(index, m.end()));
// If the tag has never been opened
// remove the useless closing tag
// Update index
index = m.end();
// Close all unclosed tags
while (!tagStack.empty())
cleaned.append("</" + tagStack.pop() + ">");
return cleaned;
* return the tag name
* @param tag
* @return the tag name
protected String getTagName(String tag)
int begin = tag.indexOf('<') + 1;
if ('/' == tag.charAt(begin))
int end;
if (tag.contains(" "))
end = tag.indexOf(' ');
end = tag.indexOf('>');
return tag.substring(begin, end);
* Check if the given tag is a closing tag &lt;/mayTag&gt;
* @param matchedTag
* @return true if it is a closing tag else false
protected boolean isCloseTag(String matchedTag)
return patternTagClose.matcher(matchedTag).matches();
* Check if the given tag is a single tag <mayTag/>
* @param matchedTag
* @return true if it is a single tag else false
protected boolean isSingleTag(String matchedTag)
return patternTagSingle.matcher(matchedTag).matches();
protected Pattern getPatternTag()
return patternTag;
protected abstract boolean areSimilarTags(String tagName1, String tagName2);
protected abstract String containsSimilarTag(Stack<String> tagStack, String tagName);
protected Pattern getPatternTagClose()
return patternTagClose;
protected Pattern getPatternTagSingle()
return patternTagSingle;
* Remove the file f from disk. If f is a directory, remove all sub files
* @param f
public void clean(File f)
if (f != null)
File[] thefiles = f.listFiles();
if (thefiles != null)
for (File f2 : thefiles)
if (f2.isDirectory())