| /******************************************************************************* |
| * Copyright (c) 2007, 2008 IBM Corporation and Others |
| * All rights reserved. This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License v1.0 |
| * which accompanies this distribution, and is available at |
| * http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: |
| * Tatsuya ISHIHARA - initial API and implementation |
| *******************************************************************************/ |
| package org.eclipse.actf.model.dom.odf.util.converter.impl; |
| |
| import java.io.File; |
| import java.io.IOException; |
| import java.io.Writer; |
| import java.util.Iterator; |
| |
| import org.eclipse.actf.model.dom.odf.ODFParser; |
| import org.eclipse.actf.model.dom.odf.base.DrawingObjectBaseElement; |
| import org.eclipse.actf.model.dom.odf.base.EmbedDrawingObjectElement; |
| import org.eclipse.actf.model.dom.odf.base.ODFDocument; |
| import org.eclipse.actf.model.dom.odf.base.ODFElement; |
| import org.eclipse.actf.model.dom.odf.content.IEditable; |
| import org.eclipse.actf.model.dom.odf.dr3d.SceneElement; |
| import org.eclipse.actf.model.dom.odf.draw.CaptionElement; |
| import org.eclipse.actf.model.dom.odf.draw.CircleElement; |
| import org.eclipse.actf.model.dom.odf.draw.ConnectorElement; |
| import org.eclipse.actf.model.dom.odf.draw.ControlElement; |
| import org.eclipse.actf.model.dom.odf.draw.CustomShapeElement; |
| import org.eclipse.actf.model.dom.odf.draw.EllipseElement; |
| import org.eclipse.actf.model.dom.odf.draw.FrameElement; |
| import org.eclipse.actf.model.dom.odf.draw.GElement; |
| import org.eclipse.actf.model.dom.odf.draw.ImageElement; |
| import org.eclipse.actf.model.dom.odf.draw.ImageMapAreaElement; |
| import org.eclipse.actf.model.dom.odf.draw.ImageMapElement; |
| import org.eclipse.actf.model.dom.odf.draw.LineElement; |
| import org.eclipse.actf.model.dom.odf.draw.ObjectElement; |
| import org.eclipse.actf.model.dom.odf.draw.PageThumbnailElement; |
| import org.eclipse.actf.model.dom.odf.draw.PolygonElement; |
| import org.eclipse.actf.model.dom.odf.draw.PolylineElement; |
| import org.eclipse.actf.model.dom.odf.draw.RectElement; |
| import org.eclipse.actf.model.dom.odf.draw.RegularPolygonElement; |
| import org.eclipse.actf.model.dom.odf.draw.TextBoxElement; |
| import org.eclipse.actf.model.dom.odf.form.FixedTextElement; |
| import org.eclipse.actf.model.dom.odf.form.FormControlElement; |
| import org.eclipse.actf.model.dom.odf.table.TableCellElement; |
| import org.eclipse.actf.model.dom.odf.text.PElement; |
| import org.eclipse.actf.model.dom.odf.text.PageNumberElement; |
| import org.eclipse.actf.model.dom.odf.text.SElement; |
| import org.eclipse.actf.model.dom.odf.text.TabElement; |
| import org.eclipse.actf.model.dom.odf.text.TextConstants; |
| import org.eclipse.actf.model.dom.odf.text.TrackedChangesElement; |
| import org.eclipse.actf.model.dom.odf.util.converter.ODFConverterUtils; |
| import org.eclipse.actf.model.dom.odf.util.converter.TextExtractor; |
| import org.w3c.dom.Document; |
| import org.w3c.dom.Node; |
| import org.w3c.dom.NodeList; |
| import org.w3c.dom.Text; |
| |
| |
| public class TextExtractorImpl implements TextExtractor { |
| public static final String IMAGE_NO_ALT = "[image]"; |
| |
| public static final String IMAGEMAP_NO_ALT = "[imagemap]"; |
| |
| public static final String OBJECT_NO_ALT = "[object]"; |
| |
| public static final String OBJECT_TABLE = "[table]"; |
| |
| public static final String TEXTBOX_NO_ALT = "[text box]"; |
| |
| public static final String CAPTION_NO_ALT = "[caption]"; |
| |
| public static final String CIRCLE_NO_ALT = "[circle]"; |
| |
| public static final String CONNECTOR_NO_ALT = "[connector]"; |
| |
| public static final String CUSTOMSHAPE_NO_ALT = "[custom shape]"; |
| |
| public static final String ELLIPSE_NO_ALT = "[ellipse]"; |
| |
| public static final String FRAME_NO_ALT = "[frame]"; |
| |
| public static final String G_NO_ALT = "[group]"; |
| |
| public static final String IMAGEMAPAREA_NO_ALT = "[imagemap area]"; |
| |
| public static final String LINE_NO_ALT = "[line]"; |
| |
| public static final String POLYGON_NO_ALT = "[polygon]"; |
| |
| public static final String POLYLINE_NO_ALT = "[polygon line]"; |
| |
| public static final String RECT_NO_ALT = "[rectangle]"; |
| |
| public static final String REGULARPOLYGON_NO_ALT = "[polygon]"; |
| |
| public static final String SCENE_NO_ALT = "[3D shape]"; |
| |
| public static final String UNKNOWNSHAPE_NO_ALT = "[graphic shape]"; |
| |
| private TextExtractor converter; |
| |
| private double odfVersion = -1.0; |
| |
| private ODFElement curElem = null; |
| |
| public void setDocument(ODFDocument document, TextExtractor converter) { |
| this.curElem = (ODFElement) document.getDocumentElement(); |
| this.converter = converter; |
| } |
| |
| // for Notes8, Notes8 save document by ODF 1.1 schema, |
| // but odf:version is set as 1.0 |
| // if specify odf:version by this function, |
| // do not use odf:version defined in each ODF file |
| public void setOdfVersion(double odfVersion) { |
| this.odfVersion = odfVersion; |
| } |
| |
| private boolean writeControlElementContent(Writer writer, |
| ControlElement elem) { |
| FormControlElement form = ((ControlElement) elem) |
| .getFormControlElement(); |
| if (form != null) { |
| try { |
| if (!(form instanceof FixedTextElement)) { |
| writer.write("[form control]"); |
| } |
| |
| String formLabel = form.getAttrFormLabel(); |
| if (formLabel != null) { |
| if (!(form instanceof FixedTextElement)) { |
| writer.write(" "); |
| } |
| writer.write(formLabel); |
| } |
| return true; |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } |
| } |
| return false; |
| } |
| |
| private boolean writeFrameElementContent(Writer writer, File dir, |
| FrameElement elem, boolean enableStyle) { |
| boolean addedTextContent = false; |
| Iterator iter = ((FrameElement) elem).getChildIterator(); |
| if (iter.hasNext()) { |
| ODFElement firstContent = (ODFElement) iter.next(); |
| if (firstContent != null) { |
| addedTextContent |= converter.extractContent(writer, dir, |
| firstContent, enableStyle); |
| if (iter.hasNext()) { |
| // if this frame has image map |
| ODFElement secondContent = (ODFElement) iter.next(); |
| if ((secondContent != null) |
| && (secondContent instanceof ImageMapElement)) { |
| addedTextContent |= converter.extractContent(writer, |
| dir, secondContent, enableStyle); |
| } |
| } |
| } |
| } |
| return addedTextContent; |
| } |
| |
| private boolean writeImageMapElementContent(Writer writer, File dir, |
| ImageMapElement elem, boolean enableStyle) { |
| boolean addedTextContent = false; |
| NodeList nl = ((ImageMapElement) elem).getAreaElements(); |
| for (int i = 0; i < nl.getLength(); i++) { |
| addedTextContent |= converter.extractContent(writer, dir, |
| (ImageMapAreaElement) nl.item(i), enableStyle); |
| } |
| return addedTextContent; |
| } |
| |
| private boolean writeTextBoxElementContent(Writer writer, File dir, |
| TextBoxElement elem, boolean enableStyle) { |
| boolean addedTextContent = false; |
| NodeList children = elem.getChildNodes(); |
| for (int i = 0; i < children.getLength(); i++) { |
| Node child = children.item(i); |
| if (child instanceof Text) { |
| String str = ((Text) child).getData().trim(); |
| if (str.length() != 0) { |
| str = ODFConverterUtils.convertXMLCharacter(str); |
| try { |
| writer.write(str); |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } |
| addedTextContent = true; |
| } |
| } else if (child instanceof ODFElement) { |
| addedTextContent |= converter.extractContent(writer, dir, |
| (ODFElement) child, enableStyle); |
| } |
| } |
| return addedTextContent; |
| } |
| |
| private boolean writeTabElementContent(Writer writer, TabElement elem) { |
| try { |
| writer.write("\t"); |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } |
| return true; |
| } |
| |
| private boolean writeSElementContent(Writer writer, SElement elem) { |
| if (elem.hasAttributeNS(TextConstants.TEXT_NAMESPACE_URI, |
| TextConstants.ATTR_C)) { |
| int c = ((SElement) elem).getAttrTextC(); |
| if (c > 0) { |
| String value = ""; |
| for (int i = 0; i < c; i++) |
| value += " "; |
| try { |
| writer.write(value); |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } |
| } |
| } else { |
| try { |
| writer.write(" "); |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } |
| } |
| return true; |
| } |
| |
| private boolean writeCustomShapeElementContent(Writer writer, File dir, |
| CustomShapeElement elem, boolean enableStyle) { |
| boolean addedTextContent = false; |
| NodeList pList = elem.getElementsByTagNameNS( |
| TextConstants.TEXT_NAMESPACE_URI, TextConstants.ELEMENT_P); |
| if ((pList != null) && (pList.getLength() != 0)) { // if this custom |
| // shape is caption |
| for (int i = 0; i < pList.getLength(); i++) { |
| Node pElem = pList.item(i); |
| String content = pElem.getTextContent(); |
| if ((content != null) && (content.length() > 0)) { |
| addedTextContent |= converter.extractContent(writer, dir, |
| (ODFElement) pList.item(i), enableStyle); |
| } |
| } |
| } |
| |
| if (!addedTextContent) { |
| IEditable shortDesc = null; |
| if (odfVersion != -1.0) { |
| shortDesc = (IEditable) ((DrawingObjectBaseElement) elem) |
| .getShortDescElement(odfVersion); |
| } else { |
| shortDesc = (IEditable) ((DrawingObjectBaseElement) elem) |
| .getShortDescElement(); |
| } |
| |
| if (shortDesc == null) { |
| ODFDocument doc = (ODFDocument) elem.getOwnerDocument(); |
| if ((doc.getODFVersion() > 1.0) |
| || ((odfVersion != -1.0) && (odfVersion > 1.0))) { |
| try { |
| writer.write(CUSTOMSHAPE_NO_ALT); |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } |
| } |
| addedTextContent = true; |
| } else { |
| String str = (String) shortDesc.getValue(); |
| if (str != null) { |
| try { |
| writer.write(str); |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } |
| addedTextContent = true; |
| } |
| } |
| } |
| return addedTextContent; |
| } |
| |
| private boolean writeDrawingObjectBaseElementContent(Writer writer, |
| File dir, DrawingObjectBaseElement elem, boolean enableStyle) { |
| boolean addedTextContent = false; |
| if (elem instanceof ImageElement) |
| return false; |
| if (elem instanceof PageThumbnailElement) |
| return false; |
| |
| NodeList pList = elem.getElementsByTagNameNS( |
| TextConstants.TEXT_NAMESPACE_URI, TextConstants.ELEMENT_P); |
| if ((pList != null) && (pList.getLength() != 0)) { // if this custom |
| // shape is caption |
| for (int i = 0; i < pList.getLength(); i++) { |
| Node pElem = pList.item(i); |
| String content = pElem.getTextContent(); |
| if ((content != null) && (content.length() > 0)) { |
| addedTextContent |= converter.extractContent(writer, dir, |
| (ODFElement) pList.item(i), enableStyle); |
| } |
| } |
| } |
| |
| if (!addedTextContent) { |
| IEditable shortDesc = null; |
| if (odfVersion != -1.0) { |
| shortDesc = (IEditable) ((DrawingObjectBaseElement) elem) |
| .getShortDescElement(odfVersion); |
| } else { |
| shortDesc = (IEditable) ((DrawingObjectBaseElement) elem) |
| .getShortDescElement(); |
| } |
| |
| if (shortDesc == null) { |
| ODFDocument doc = (ODFDocument) elem.getOwnerDocument(); |
| if ((elem instanceof EmbedDrawingObjectElement) |
| || ((doc.getODFVersion() > 1.0) || ((odfVersion != -1.0) && (odfVersion > 1.0)))) { |
| try { |
| if (elem instanceof ImageElement) |
| writer.write(IMAGE_NO_ALT); |
| else if (elem instanceof ImageMapElement) |
| writer.write(IMAGEMAP_NO_ALT); |
| else if (elem instanceof ObjectElement) { |
| if (((ObjectElement) elem).isPresentationTable()) |
| writer.write(OBJECT_TABLE); |
| else |
| writer.write(OBJECT_NO_ALT); |
| } else if (elem instanceof CaptionElement) |
| writer.write(CAPTION_NO_ALT); |
| else if (elem instanceof TextBoxElement) |
| writer.write(TEXTBOX_NO_ALT); |
| else if (elem instanceof CircleElement) |
| writer.write(CIRCLE_NO_ALT); |
| else if (elem instanceof ConnectorElement) |
| writer.write(CONNECTOR_NO_ALT); |
| else if (elem instanceof CustomShapeElement) |
| writer.write(CUSTOMSHAPE_NO_ALT); |
| else if (elem instanceof EllipseElement) |
| writer.write(ELLIPSE_NO_ALT); |
| else if (elem instanceof FrameElement) |
| writer.write(FRAME_NO_ALT); |
| else if (elem instanceof GElement) |
| writer.write(G_NO_ALT); |
| else if (elem instanceof ImageMapAreaElement) |
| writer.write(IMAGEMAPAREA_NO_ALT); |
| else if (elem instanceof LineElement) |
| writer.write(LINE_NO_ALT); |
| else if (elem instanceof PolygonElement) |
| writer.write(POLYGON_NO_ALT); |
| else if (elem instanceof PolylineElement) |
| writer.write(POLYLINE_NO_ALT); |
| else if (elem instanceof RectElement) |
| writer.write(RECT_NO_ALT); |
| else if (elem instanceof RegularPolygonElement) |
| writer.write(REGULARPOLYGON_NO_ALT); |
| else if (elem instanceof SceneElement) |
| writer.write(SCENE_NO_ALT); |
| else |
| writer.write(UNKNOWNSHAPE_NO_ALT); |
| |
| writer.write(System.getProperty("line.separator")); |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } |
| addedTextContent = true; |
| } |
| } else { |
| String str = (String) shortDesc.getValue(); |
| if (str != null) { |
| try { |
| writer.write(str); |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } |
| addedTextContent = true; |
| } |
| } |
| } |
| return addedTextContent; |
| } |
| |
| private boolean writeTableCellElementContent(Writer writer, File dir, |
| TableCellElement elem, boolean enableStyle) { |
| boolean addedToCell = writeODFElementContent(writer, dir, |
| (ODFElement) elem, enableStyle); |
| if (!addedToCell) { |
| try { |
| writer.write(" "); |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } |
| } |
| return true; |
| } |
| |
| private boolean writeODFElementContent(Writer writer, File dir, |
| ODFElement elem, boolean enableStyle) { |
| boolean addedTextContent = false; |
| NodeList children = elem.getChildNodes(); |
| for (int i = 0; i < children.getLength(); i++) { |
| Node child = children.item(i); |
| if (child instanceof Text) { |
| String str = ((Text) child).getData().trim(); |
| if (str.length() != 0) { |
| str = ODFConverterUtils.convertXMLCharacter(str); |
| try { |
| writer.write(str); |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } |
| addedTextContent = true; |
| } |
| } else if (child instanceof ODFElement) { |
| addedTextContent |= converter.extractContent(writer, dir, |
| (ODFElement) child, enableStyle); |
| } |
| } |
| return addedTextContent; |
| } |
| |
| public void extractEmbedFile(ODFElement elem, File dir, String href) { |
| if (dir == null) |
| return; |
| |
| String outputFileName = dir.getAbsolutePath() |
| + System.getProperty("file.separator") + href; |
| File outputFile = new File(outputFileName); |
| |
| String outputDirName = outputFile.getParent(); |
| File outputDir = new File(outputDirName); |
| if (!outputDir.exists()) |
| outputDir.mkdirs(); |
| |
| ODFParser parser = new ODFParser(); |
| Document doc = curElem.getOwnerDocument(); |
| if (doc instanceof ODFDocument) { |
| parser.extractFile(((ODFDocument) doc).getURL(), href, |
| outputFileName); |
| } |
| } |
| |
| public boolean extractContent(Writer writer, File dir, ODFElement elem, |
| boolean enableStyle) { |
| // write odf content |
| boolean addedTextContent = false; |
| if (elem instanceof TabElement) { |
| addedTextContent |= writeTabElementContent(writer, |
| (TabElement) elem); |
| } else if (elem instanceof SElement) { |
| addedTextContent |= writeSElementContent(writer, (SElement) elem); |
| } else if (elem instanceof TableCellElement) { |
| addedTextContent |= writeTableCellElementContent(writer, dir, |
| (TableCellElement) elem, enableStyle); |
| } else if ((elem instanceof TrackedChangesElement) |
| || (elem instanceof PageThumbnailElement) |
| || (elem instanceof ImageElement) |
| || (elem instanceof PageNumberElement)) { |
| // nothing to write |
| } else if (elem instanceof DrawingObjectBaseElement) { |
| if (elem instanceof ControlElement) { |
| addedTextContent |= writeControlElementContent(writer, |
| (ControlElement) elem); |
| } else if (elem instanceof FrameElement) { |
| addedTextContent |= writeFrameElementContent(writer, dir, |
| (FrameElement) elem, enableStyle); |
| } else if (elem instanceof ImageMapElement) { |
| addedTextContent |= writeImageMapElementContent(writer, dir, |
| (ImageMapElement) elem, enableStyle); |
| } else if (elem instanceof TextBoxElement) { |
| addedTextContent |= writeTextBoxElementContent(writer, dir, |
| (TextBoxElement) elem, enableStyle); |
| } else if (elem instanceof CustomShapeElement) { |
| addedTextContent |= writeCustomShapeElementContent(writer, dir, |
| (CustomShapeElement) elem, enableStyle); |
| } else if (elem instanceof GElement) { |
| addedTextContent |= writeODFElementContent(writer, dir, |
| (ODFElement) elem, enableStyle); |
| } else { |
| addedTextContent |= writeDrawingObjectBaseElementContent( |
| writer, dir, (DrawingObjectBaseElement) elem, |
| enableStyle); |
| } |
| } else if (elem instanceof PElement) { |
| addedTextContent |= writeODFElementContent(writer, dir, |
| (ODFElement) elem, enableStyle); |
| if (addedTextContent) { |
| try { |
| writer.write(System.getProperty("line.separator")); |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } |
| } |
| } else { |
| addedTextContent |= writeODFElementContent(writer, dir, |
| (ODFElement) elem, enableStyle); |
| } |
| |
| return addedTextContent; |
| } |
| } |