org.eclipse.capra.ui.office/src/org/eclipse/capra/ui/office/objects/CapraWordRequirement.java - capra/org.eclipse.capra - Git at Google

 /*******************************************************************************
  * Copyright (c) 2016 Chalmers | University of Gothenburg, rt-labs and others.
  * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License v1.0
  * which accompanies this distribution, and is available at
  * http://www.eclipse.org/legal/epl-v10.html
  *
  * Contributors:
  * 	   Chalmers | University of Gothenburg and rt-labs - initial API and implementation and/or initial documentation
  *******************************************************************************/

 package org.eclipse.capra.ui.office.objects;

 import java.io.File;
 import java.io.StringReader;
 import java.util.Arrays;

 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;

 import org.apache.poi.xwpf.usermodel.XWPFParagraph;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
 import org.w3c.dom.Document;
 import org.w3c.dom.NodeList;
 import org.xml.sax.InputSource;

 /**
  * This class extends the CapraOfficeObject and provides an object to describe a
  * single MS Word requirement, which is defined with a specific field.
  *
  * @author Dusan Kalanj
  *
  */
 public class CapraWordRequirement extends CapraOfficeObject {

 	/**
 	 * RegEx of characters (tabs, newlines, carriage returns and invisible
 	 * control characters) to be replaced with white-spaces in the Office View.
 	 */
 	private static final String LINE_BREAKS_AND_CONTROL_REGEX = "[\r\n\t\\p{C}]+";

 	/**
 	 * Regex of characters to be used as delimiters when splitting the field
 	 * contents.
 	 */
 	private static final String WORD_FIELD_SPLIT_DELIMITERS = "(\")|(\\\\\\*)";

 	/**
 	 * Start and end XML tags of MS Word field commands
 	 */
 	private static final String FIELD_TAG = "w:instrText";

 	/**
 	 * A constructor that generates a new instance of CapraWordRequirement where
 	 * the parent properties are extracted from the provided paragraph, the file
 	 * that contains the paragraph and the id (name) of the field that denotes
 	 * the data that is to be extracted.
 	 *
 	 * @param officeFile
 	 *            the file that contains the paragraph
 	 * @param paragraph
 	 *            a Word paragraph
 	 * @param fieldName
 	 *            the name of the field that denotes the data that is to be
 	 *            extracted from the paragraph
 	 */
 	public CapraWordRequirement(File officeFile, XWPFParagraph paragraph, String fieldName) {
 		// TODO This solution assumes that there is only one requirement per
 		// paragraph. Should it be different?
 		super();

 		String rText = "";
 		String rId = "";

 		CTP pCtp = paragraph.getCTP();
 		Document doc;
 		try {
 			doc = loadXMLFromString(pCtp.toString());
 		} catch (Exception e) {
 			e.printStackTrace();
 			return;
 		}

 		NodeList nodeList = doc.getElementsByTagName(FIELD_TAG);
 		if (nodeList.getLength() > 0) {
 			// TODO Use a for loop if the solution needs to parse multiple
 			// requirements in a single paragraph. In that case,
 			// paragraph.getText() should be replaced with something from the
 			// org.w3c.dom.Document class.
 			String[] parts = nodeList.item(0).getTextContent().split(WORD_FIELD_SPLIT_DELIMITERS);
 			if (Arrays.asList(parts).contains(fieldName) && parts.length > 2) {
 				rText = paragraph.getText();
 				rId = parts[2].trim();
 			}
 		}

 		rText = rText.replaceAll(LINE_BREAKS_AND_CONTROL_REGEX, " ").trim();
 		if (!rText.isEmpty()) {
 			rText = "ID " + rId + ": " + rText;
 			String pUri = CapraOfficeObject.createUri(officeFile, rId);

 			this.setData(rText);
 			this.setUri(pUri);
 		}
 	}

 	private Document loadXMLFromString(String xml) throws Exception {
 		DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
 		DocumentBuilder builder = factory.newDocumentBuilder();
 		InputSource is = new InputSource(new StringReader(xml));
 		return builder.parse(is);
 	}
 }
	/*******************************************************************************
	* Copyright (c) 2016 Chalmers \| University of Gothenburg, rt-labs and others.
	* All rights reserved. This program and the accompanying materials
	* are made available under the terms of the Eclipse Public License v1.0
	* which accompanies this distribution, and is available at
	* http://www.eclipse.org/legal/epl-v10.html
	*
	* Contributors:
	* Chalmers \| University of Gothenburg and rt-labs - initial API and implementation and/or initial documentation
	*******************************************************************************/

	package org.eclipse.capra.ui.office.objects;

	import java.io.File;
	import java.io.StringReader;
	import java.util.Arrays;

	import javax.xml.parsers.DocumentBuilder;
	import javax.xml.parsers.DocumentBuilderFactory;

	import org.apache.poi.xwpf.usermodel.XWPFParagraph;
	import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
	import org.w3c.dom.Document;
	import org.w3c.dom.NodeList;
	import org.xml.sax.InputSource;

	/**
	* This class extends the CapraOfficeObject and provides an object to describe a
	* single MS Word requirement, which is defined with a specific field.
	*
	* @author Dusan Kalanj
	*
	*/
	public class CapraWordRequirement extends CapraOfficeObject {

	/**
	* RegEx of characters (tabs, newlines, carriage returns and invisible
	* control characters) to be replaced with white-spaces in the Office View.
	*/
	private static final String LINE_BREAKS_AND_CONTROL_REGEX = "[\r\n\t\\p{C}]+";

	/**
	* Regex of characters to be used as delimiters when splitting the field
	* contents.
	*/
	private static final String WORD_FIELD_SPLIT_DELIMITERS = "(\")\|(\\\\\\*)";

	/**
	* Start and end XML tags of MS Word field commands
	*/
	private static final String FIELD_TAG = "w:instrText";

	/**
	* A constructor that generates a new instance of CapraWordRequirement where
	* the parent properties are extracted from the provided paragraph, the file
	* that contains the paragraph and the id (name) of the field that denotes
	* the data that is to be extracted.
	*
	* @param officeFile
	* the file that contains the paragraph
	* @param paragraph
	* a Word paragraph
	* @param fieldName
	* the name of the field that denotes the data that is to be
	* extracted from the paragraph
	*/
	public CapraWordRequirement(File officeFile, XWPFParagraph paragraph, String fieldName) {
	// TODO This solution assumes that there is only one requirement per
	// paragraph. Should it be different?
	super();

	String rText = "";
	String rId = "";

	CTP pCtp = paragraph.getCTP();
	Document doc;
	try {
	doc = loadXMLFromString(pCtp.toString());
	} catch (Exception e) {
	e.printStackTrace();
	return;
	}

	NodeList nodeList = doc.getElementsByTagName(FIELD_TAG);
	if (nodeList.getLength() > 0) {
	// TODO Use a for loop if the solution needs to parse multiple
	// requirements in a single paragraph. In that case,
	// paragraph.getText() should be replaced with something from the
	// org.w3c.dom.Document class.
	String[] parts = nodeList.item(0).getTextContent().split(WORD_FIELD_SPLIT_DELIMITERS);
	if (Arrays.asList(parts).contains(fieldName) && parts.length > 2) {
	rText = paragraph.getText();
	rId = parts[2].trim();
	}
	}

	rText = rText.replaceAll(LINE_BREAKS_AND_CONTROL_REGEX, " ").trim();
	if (!rText.isEmpty()) {
	rText = "ID " + rId + ": " + rText;
	String pUri = CapraOfficeObject.createUri(officeFile, rId);

	this.setData(rText);
	this.setUri(pUri);
	}
	}

	private Document loadXMLFromString(String xml) throws Exception {
	DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
	DocumentBuilder builder = factory.newDocumentBuilder();
	InputSource is = new InputSource(new StringReader(xml));
	return builder.parse(is);
	}
	}