blob: accf1ace7684bc958f4e15d709a2c9fef04a0644 [file] [log] [blame]
/*****************************************************************************
* Copyright (c) 2010 Atos Origin.
*
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Anne Haugommard (Atos Origin) anne.haugommard@atosorigin.com - Initial API and implementation
* Antonio Campesino Robles (Ericsson) - Bug 478883
*
*****************************************************************************/
package org.eclipse.gendoc.services.odt;
import java.io.File;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.namespace.NamespaceContext;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.TransformerFactoryConfigurationError;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.eclipse.core.runtime.CoreException;
import org.eclipse.core.runtime.IConfigurationElement;
import org.eclipse.core.runtime.IExecutableExtension;
import org.eclipse.gendoc.document.parser.documents.Document;
import org.eclipse.gendoc.document.parser.documents.odt.ODTDocument;
import org.eclipse.gendoc.document.parser.documents.odt.ODTNamespaceContext;
import org.eclipse.gendoc.documents.IAdditionalResourceService;
import org.eclipse.gendoc.documents.ITableService;
import org.eclipse.gendoc.documents.XMLDocumentService;
import org.eclipse.gendoc.services.exception.DocumentServiceException;
import org.eclipse.gendoc.services.exception.InvalidContentException;
import org.eclipse.gendoc.tags.handlers.impl.RegisteredTags;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
/**
* Specific service for ODT document management
*/
public class ODTDocumentService extends XMLDocumentService implements IExecutableExtension
{
private static final String XMLNS_OFFICE = "xmlns:office=\"urn:oasis:names:tc:opendocument:xmlns:office:1.0\"";
private static final String XMLNS_TABLE = "xmlns:table=\"urn:oasis:names:tc:opendocument:xmlns:table:1.0\"";
private static final String XMLNS_TEXT = "xmlns:text=\"urn:oasis:names:tc:opendocument:xmlns:text:1.0\"";
private static final String XMLNS_FO = "xmlns:fo=\"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0\"";
private static final String XMLNS_STYLE = "xmlns:style=\"urn:oasis:names:tc:opendocument:xmlns:style:1.0\"";
/** Service for additional resources */
IAdditionalResourceService additionalResourceService;
private final String TAG_TABLE = "table:table";
private String serviceId;
private ITableService tableService;
public ODTDocumentService()
{
super();
additionalResourceService = new ODTAdditionalResourceService();
tableService = new ODTTableService();
}
public ODTDocumentService(Document document)
{
super(document);
additionalResourceService = new ODTAdditionalResourceService();
tableService = new ODTTableService();
}
public boolean isList(String label)
{
return "text:list".equals(label);
}
public boolean isListItem(String label)
{
return "text:list-item".equals(label);
}
public boolean isPara(String label)
{
return "text:p".equals(label) || "text:h".equals(label);
}
public boolean isTable(String label)
{
return "table:table".equals(label);
}
public boolean isRow(String label)
{
return "table:table-row".equals(label);
}
public boolean isCell(String label)
{
return "table:table-cell".equals(label);
}
public String getTextStyle()
{
return "(text:p|text:h|text:span)";
}
public String[] getTextTagLabels()
{
return new String[] {"text:p", "text:h", "text:span", "table:table"};
}
public String getNamingSpaceURL()
{
return XMLNS_TEXT + " " + XMLNS_TABLE + " " + XMLNS_OFFICE + " "+ XMLNS_FO+ " "+XMLNS_STYLE;
}
public NamespaceContext getNameSpaceContext()
{
return new ODTNamespaceContext();
}
public void saveDocument(Document document, String path) throws DocumentServiceException
{
if (!(document instanceof ODTDocument))
{
throw new DocumentServiceException("Document is not a valid ODT document.");
}
insertDocumentInFile((ODTDocument) document);
((ODTDocument) document).zipToLocation(path);
}
/**
* @throws TransformerFactoryConfigurationError
*/
private void insertDocumentInFile(ODTDocument document)
{
try
{
document.jumpToStart();
do
{
DOMSource domSource = new DOMSource(document.getXMLParser().getDocument());
StreamResult fluxDestination = new StreamResult(new File(document.getUnzipLocationDocumentFile().getAbsolutePath() + "/" + document.getXMLParser().getXmlFile().getName()));
TransformerFactory fabrique = TransformerFactory.newInstance();
Transformer transformationIdentite = fabrique.newTransformer();
// DO NOT INDENT : it causes problems for <span> content, that take spaces into account
// transformationIdentite.setOutputProperty(OutputKeys.INDENT, "yes");
transformationIdentite.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformationIdentite.transform(domSource, fluxDestination);
}
while (document.jumpToNextFile());
}
catch (TransformerException e2)
{
e2.printStackTrace();
}
}
/**
* @param currentNode subtree in which the clean is done.
* @param tagLabels list of known tag labels
* @param baseNode Node on which to start
* @return
* @throws InvalidContentException
*/
protected Node cleanTags(Node currentNode, List<String> tagLabels, Node baseNode) throws InvalidContentException
{
if (baseNode == null)
{
return null;
}
// 2. Check that this node contains the start of a valid tag label
StringBuffer newNodeContent = new StringBuffer(extractNodeTextValue(baseNode));
while (baseNode != null && !containsOneOf(tagLabels, newNodeContent.toString()))
{
baseNode = findNodeWithStartTag(baseNode, currentNode);
if (baseNode != null)
{
newNodeContent = new StringBuffer(extractNodeTextValue(baseNode));
}
}
if (baseNode == null)
{
return null;
}
// 3. Base node is found AND matches a valid tag => Check tag closure
boolean isCompleteTag = containsFullTags(newNodeContent.toString(), tagLabels);
String[] currentNodeText = new String[0];
List<String> partsToRemove = new LinkedList<String>();
String currentNodeContent = extractNodeTextValue(currentNode);
// 4. If tag not closed :
if (!isCompleteTag)
{
currentNodeText = currentNodeContent.split("((<[^>]*>)*[^<>]*</[^<>]*>|(<[^>]*>))");
// System.out.println(currentNodeContent+"\n -- > "+Arrays.toString(currentNodeText));
// Find all nodes matching the base node label
NodeList followingNodes = getNextNodes(baseNode, baseNode.getNodeName());
List<Node> nodesToRemove = new ArrayList<Node>();
if (currentNodeText.length > 0)
{
newNodeContent.append(currentNodeText[0]);
partsToRemove.add(currentNodeText[0]);
}
if (followingNodes != null)
{
// Append text values of all these nodes until tag closure is found
for (int i = 0; i < followingNodes.getLength(); i++)
{
if (currentNodeText.length > i + 1)
{
newNodeContent.append(currentNodeText[i + 1]);
partsToRemove.add(currentNodeText[i + 1]);
}
String textValue = extractNodeTextValue(followingNodes.item(i));
Node nodeToRemove = getBestAscendantUntil(currentNode, followingNodes.item(i));
if (nodeToRemove != null)
{
newNodeContent.append(textValue);
if (!nodesToRemove.contains(nodeToRemove))
{
nodesToRemove.add(nodeToRemove);
}
if (containsFullTags(newNodeContent.toString(), tagLabels))
{
isCompleteTag = true;
break;
}
}
else
{
break;
}
}
if (currentNodeText.length > followingNodes.getLength() + 1)
{
newNodeContent.append(currentNodeText[followingNodes.getLength() + 1]);
partsToRemove.add(currentNodeText[followingNodes.getLength() + 1]);
}
// Remove all nodes that are not useful anymore from initial current Node
for (Node nodeToRemove : nodesToRemove)
{
if (nodeToRemove != null && currentNode.equals(nodeToRemove.getParentNode()))
{
currentNode.removeChild(nodeToRemove);
}
}
}
}
// Replace content of base node with the text stored in "textContent" variable
String[] separated = asText(baseNode).split(XML_TAG_START + "|" + XML_TAG_END);
if (separated != null && separated.length > 1)
{
newNodeContent.insert(0, XML_TAG_START + separated[1] + XML_TAG_END);
newNodeContent.append(XML_TAG_START + separated[separated.length - 1] + XML_TAG_END);
}
else
{
newNodeContent.append(asText(baseNode));
}
// Replace invalid characters
String nodeContent = cleanXMLContent(newNodeContent.toString());
// String nodeContent = newNodeContent.toString().replaceAll("" + TagConstants.INVALID_QUOTE, "" + TagConstants.QUOTE);
// Replace base node by the value of the buffer
Node result = injectNode(baseNode, nodeContent);
baseNode.getParentNode().removeChild(baseNode);
// If parentNode had text content
if (!partsToRemove.isEmpty())
{
if (currentNode.getParentNode() == null)
{
// Inject into result
StringBuffer finalString = new StringBuffer();
finalString.append(nodeContent.substring(0, nodeContent.indexOf(XML_TAG_END) + 1));
finalString.append(clearContents(result, partsToRemove, nodeContent));
finalString.append(nodeContent.substring(nodeContent.lastIndexOf(XML_TAG_START)));
Node newNode = injectNode(result, finalString.toString());
result.getParentNode().removeChild(result);
result = newNode;
}
else
{
StringBuffer finalString = new StringBuffer();
String currentNodeContentString = asText(currentNode);
finalString.append(currentNodeContentString.substring(0, currentNodeContentString.indexOf(XML_TAG_END) + 1));
finalString.append(clearContents(currentNode, partsToRemove, nodeContent));
finalString.append(currentNodeContentString.substring(currentNodeContentString.lastIndexOf(XML_TAG_START)));
result = injectNode(currentNode, finalString.toString());
currentNode.getParentNode().removeChild(currentNode);
}
}
return result;
}
public String addNamingSpaces(String nodes)
{
for (String textNode : getTextTagLabels())
{
nodes = nodes.replaceAll("<" + textNode + " ", "<" + textNode + " " + getNamingSpaceURL() + " ");
nodes = nodes.replaceAll("<" + textNode + ">", "<" + textNode + " " + getNamingSpaceURL() + ">");
}
return nodes;
}
/**
* Clear contents of the "currentNodeContent" by removing all toplevel partsToRemove
*
* @param currentNodeContent content of a node
* @param partsToRemove parts to remove
* @return The currentNodeContent cleaned
* @throws InvalidContentException
*/
private String clearContents(Node currentNode, List<String> partsToRemove, String nodeContent) throws InvalidContentException
{
String currentNodeContent = extractNodeTextValue(currentNode);
Matcher m = Pattern.compile("<[^>]*>[^<]*</[^>]*>").matcher(currentNodeContent);
String[] currentNodeParts = currentNodeContent.split("(<[^>]*>[^<]*</[^>]*>)|<[^>]*/>");
StringBuffer content = new StringBuffer();
StringBuffer toRemove = new StringBuffer();
for (String partToRemove : partsToRemove)
{
toRemove.append(partToRemove);
}
for (int i = 0; i < currentNodeParts.length; i++)
{
if (!partsToRemove.contains(currentNodeParts[i]) && !toRemove.toString().equals(currentNodeParts[i]))
{
content.append(currentNodeParts[i]);
}
if (m.find())
{
content.append(currentNodeContent.substring(m.start(), m.end()));
}
}
return content.toString();
}
public IAdditionalResourceService getAdditionalResourceService()
{
return additionalResourceService;
}
public String getTableLabel()
{
return TAG_TABLE;
}
/**
* Only &lt;text:p&gt; and &lt;text:h&gt; are considered as similar tags.
*
* @see org.eclipse.gendoc.services.XMLDocumentService#areSimilarTags(java.lang.String, java.lang.String)
*/
protected boolean areSimilarTags(String tagName1, String tagName2)
{
return "text:p".equals(tagName1) && "text:h".equals(tagName2) || "text:p".equals(tagName2) && "text:h".equals(tagName1);
}
protected String containsSimilarTag (Stack<String> tagStack, String tagName)
{
if ("text:p".equals(tagName) && tagStack.contains("text:h"))
{
return "text:h";
}
else if ("text:h".equals(tagName) && tagStack.contains("text:p"))
{
return "text:p";
}
return null;
}
public void setInitializationData (IConfigurationElement config, String propertyName, Object data) throws CoreException
{
// TODO Auto-generated method stub
}
public String getServiceId ()
{
return this.serviceId;
}
public void setServiceId (String serviceId)
{
this.serviceId = serviceId;
}
public String getListLabel()
{
return "text:list";
}
public String getListId(Node n)
{
NamedNodeMap map = n.getAttributes();
if(map != null && map.getNamedItem("xml:id") != null) {
return map.getNamedItem("xml:id").getNodeValue();
}
return null;
}
protected static Pattern NOBR_REPLACE_PATTERN = Pattern.compile("(?:&lt;\\s*" + RegisteredTags.NOBR + "\\s*/\\s*&gt;)(?:.*?)(?:</text:[p|h][^>]*>)(?:.*?)(?:<text:[p|h][^>]*>)", Pattern.DOTALL | Pattern.MULTILINE);
public Pattern getNobrReplacePattern()
{
return NOBR_REPLACE_PATTERN;
}
public String getContinueList(Node currentNode, String idList) throws InvalidContentException
{
NamedNodeMap m = currentNode.getAttributes();
if(m.getNamedItem("text:continue-list") == null) {
String curNodeString = this.asText(currentNode);
curNodeString = curNodeString.replaceAll("<text:list ", "<text:list text:continue-list=\"" + idList + "\" ");
return curNodeString;
}
return this.asText(currentNode);
}
/* (non-Javadoc)
* @see org.eclipse.gendoc.documents.IDocumentService#format(java.lang.String)
*/
public String format(String input) {
// Do not use paragraph break : styles are lost
String PARAGRAPH_BREAK = "</text:p><text:p>";
String LINE_BREAK = "<text:line-break/>";
String TAB = "<text:tab/>";
return
input
// handle carriage return mixed with line feed as carriage returns
.replace(CARRIAGE_RETURN+"\n",LINE_BREAK)
.replace("\r\n",LINE_BREAK)
// handle carriage return
.replace(CARRIAGE_RETURN,LINE_BREAK)
.replace("\r",LINE_BREAK)
//handle line feed
.replace(LINE_FEED, LINE_BREAK)
.replace("\n",LINE_BREAK)
//handle tabulation
.replace(TABULATION,TAB)
.replace("\t",TAB);
}
@Override
public String getRowLabel() {
return "table:table-row";
}
@Override
public String getCellLabel() {
return "table:table-cell";
}
@Override
public ITableService getTableService() {
return tableService;
}
}