bundles/org.eclipse.capra.ui.office/src/org/eclipse/capra/ui/office/model/CapraWordRequirement.java - capra/org.eclipse.capra - Git at Google

 /*******************************************************************************
  * Copyright (c) 2016, 2019 Chalmers | University of Gothenburg, rt-labs and others.
  * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License v2.0
  * which accompanies this distribution, and is available at
  * http://www.eclipse.org/legal/epl-v20.html
  *
  * SPDX-License-Identifier: EPL-2.0
  *
  * Contributors:
  *      Chalmers | University of Gothenburg and rt-labs - initial API and implementation and/or initial documentation
  *      Chalmers | University of Gothenburg - additional features, updated API
  *******************************************************************************/

 package org.eclipse.capra.ui.office.model;

 import java.io.File;
 import java.io.IOException;
 import java.util.Arrays;

 import javax.xml.parsers.ParserConfigurationException;

 import org.apache.poi.xwpf.usermodel.XWPFParagraph;
 import org.eclipse.capra.ui.office.utils.CapraOfficeUtils;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.w3c.dom.Document;
 import org.w3c.dom.NodeList;
 import org.xml.sax.SAXException;

 /**
  * This class extends the CapraOfficeObject and provides an object to describe a
  * single MS Word requirement, which is defined with a specific field.
  *
  * @author Dusan Kalanj
  *
  */
 public class CapraWordRequirement extends CapraOfficeObject {

 	private static final Logger LOG = LoggerFactory.getLogger(CapraWordRequirement.class);

 	/**
 	 * RegEx of characters (tabs, newlines, carriage returns and invisible
 	 * control characters) to be replaced with white-spaces in the Office View.
 	 */
 	private static final String LINE_BREAKS_AND_CONTROL_REGEX = "[\r\n\t\\p{C}]+";

 	/**
 	 * Regex of characters to be used as delimiters when splitting the field
 	 * contents.
 	 */
 	private static final String WORD_FIELD_SPLIT_DELIMITERS = "(\")|(\\\\\\*)";

 	/**
 	 * Start and end XML tags of MS Word field commands
 	 */
 	private static final String FIELD_TAG = "w:instrText";

 	/**
 	 * A constructor that generates a new instance of CapraWordRequirement where
 	 * the parent properties are extracted from the provided paragraph, the file
 	 * that contains the paragraph and the id (name) of the field that denotes
 	 * the data that is to be extracted.
 	 *
 	 * @param officeFile
 	 *            the file that contains the paragraph
 	 * @param paragraph
 	 *            a Word paragraph
 	 * @param fieldName
 	 *            the name of the field that denotes the data that is to be
 	 *            extracted from the paragraph
 	 */
 	public CapraWordRequirement(File officeFile, XWPFParagraph paragraph, String fieldName) {
 		// TODO This solution assumes that there is only one requirement per
 		// paragraph. Should it be different?
 		super();

 		String rText = "";
 		String rId = "";

 		CTP pCtp = paragraph.getCTP();
 		Document doc;
 		try {
 			doc = CapraOfficeUtils.createDOMDocument(pCtp.toString());
 		} catch (IOException e) {
 			LOG.info("Could not create DOM document: error reading file.", e);
 			return;
 		} catch (ParserConfigurationException e) {
 			LOG.info("Could not create DOM document: parser not configured properly.", e);
 			return;
 		} catch (SAXException e) {
 			LOG.info("Could not create DOM document: malformed XML.", e);
 			return;
 		}

 		// Get all nodes from the paragraph (there should be just one node if
 		// the TODO bellow isn't implemented)
 		NodeList nodeList = doc.getElementsByTagName(FIELD_TAG);
 		if (nodeList.getLength() > 0) {
 			// TODO Use a for loop if the solution needs to parse multiple
 			// requirements in a single paragraph. In that case,
 			// paragraph.getText() should be replaced with something from the
 			// org.w3c.dom.Document class.
 			String[] parts = nodeList.item(0).getTextContent().split(WORD_FIELD_SPLIT_DELIMITERS);
 			// Extract text from the paragraph and the ID of the requirement.
 			if (Arrays.asList(parts).contains(fieldName) && parts.length > 2) {
 				rText = paragraph.getText();
 				rId = parts[2].trim();
 			}
 		}

 		rText = rText.replaceAll(LINE_BREAKS_AND_CONTROL_REGEX, " ").trim();
 		// Set the data and uri properties of the CapraOfficeObject
 		if (!rText.isEmpty()) {
 			rText = "ID " + rId + ": " + rText;
 			String pUri = createUri(officeFile.getAbsolutePath(), rId);

 			this.setData(rText);
 			this.setUri(pUri);
 		}
 	}
 }
	/*******************************************************************************
	* Copyright (c) 2016, 2019 Chalmers \| University of Gothenburg, rt-labs and others.
	* All rights reserved. This program and the accompanying materials
	* are made available under the terms of the Eclipse Public License v2.0
	* which accompanies this distribution, and is available at
	* http://www.eclipse.org/legal/epl-v20.html
	*
	* SPDX-License-Identifier: EPL-2.0
	*
	* Contributors:
	* Chalmers \| University of Gothenburg and rt-labs - initial API and implementation and/or initial documentation
	* Chalmers \| University of Gothenburg - additional features, updated API
	*******************************************************************************/

	package org.eclipse.capra.ui.office.model;

	import java.io.File;
	import java.io.IOException;
	import java.util.Arrays;

	import javax.xml.parsers.ParserConfigurationException;

	import org.apache.poi.xwpf.usermodel.XWPFParagraph;
	import org.eclipse.capra.ui.office.utils.CapraOfficeUtils;
	import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
	import org.slf4j.Logger;
	import org.slf4j.LoggerFactory;
	import org.w3c.dom.Document;
	import org.w3c.dom.NodeList;
	import org.xml.sax.SAXException;

	/**
	* This class extends the CapraOfficeObject and provides an object to describe a
	* single MS Word requirement, which is defined with a specific field.
	*
	* @author Dusan Kalanj
	*
	*/
	public class CapraWordRequirement extends CapraOfficeObject {

	private static final Logger LOG = LoggerFactory.getLogger(CapraWordRequirement.class);

	/**
	* RegEx of characters (tabs, newlines, carriage returns and invisible
	* control characters) to be replaced with white-spaces in the Office View.
	*/
	private static final String LINE_BREAKS_AND_CONTROL_REGEX = "[\r\n\t\\p{C}]+";

	/**
	* Regex of characters to be used as delimiters when splitting the field
	* contents.
	*/
	private static final String WORD_FIELD_SPLIT_DELIMITERS = "(\")\|(\\\\\\*)";

	/**
	* Start and end XML tags of MS Word field commands
	*/
	private static final String FIELD_TAG = "w:instrText";

	/**
	* A constructor that generates a new instance of CapraWordRequirement where
	* the parent properties are extracted from the provided paragraph, the file
	* that contains the paragraph and the id (name) of the field that denotes
	* the data that is to be extracted.
	*
	* @param officeFile
	* the file that contains the paragraph
	* @param paragraph
	* a Word paragraph
	* @param fieldName
	* the name of the field that denotes the data that is to be
	* extracted from the paragraph
	*/
	public CapraWordRequirement(File officeFile, XWPFParagraph paragraph, String fieldName) {
	// TODO This solution assumes that there is only one requirement per
	// paragraph. Should it be different?
	super();

	String rText = "";
	String rId = "";

	CTP pCtp = paragraph.getCTP();
	Document doc;
	try {
	doc = CapraOfficeUtils.createDOMDocument(pCtp.toString());
	} catch (IOException e) {
	LOG.info("Could not create DOM document: error reading file.", e);
	return;
	} catch (ParserConfigurationException e) {
	LOG.info("Could not create DOM document: parser not configured properly.", e);
	return;
	} catch (SAXException e) {
	LOG.info("Could not create DOM document: malformed XML.", e);
	return;
	}

	// Get all nodes from the paragraph (there should be just one node if
	// the TODO bellow isn't implemented)
	NodeList nodeList = doc.getElementsByTagName(FIELD_TAG);
	if (nodeList.getLength() > 0) {
	// TODO Use a for loop if the solution needs to parse multiple
	// requirements in a single paragraph. In that case,
	// paragraph.getText() should be replaced with something from the
	// org.w3c.dom.Document class.
	String[] parts = nodeList.item(0).getTextContent().split(WORD_FIELD_SPLIT_DELIMITERS);
	// Extract text from the paragraph and the ID of the requirement.
	if (Arrays.asList(parts).contains(fieldName) && parts.length > 2) {
	rText = paragraph.getText();
	rId = parts[2].trim();
	}
	}

	rText = rText.replaceAll(LINE_BREAKS_AND_CONTROL_REGEX, " ").trim();
	// Set the data and uri properties of the CapraOfficeObject
	if (!rText.isEmpty()) {
	rText = "ID " + rId + ": " + rText;
	String pUri = createUri(officeFile.getAbsolutePath(), rId);

	this.setData(rText);
	this.setUri(pUri);
	}
	}
	}