bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/parser/XMLSourceParser.java - sourceediting/webtools.sourceediting - Git at Google

 /*******************************************************************************
  * Copyright (c) 2001, 2004 IBM Corporation and others.
  * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License v1.0
  * which accompanies this distribution, and is available at
  * http://www.eclipse.org/legal/epl-v10.html
  *
  * Contributors:
  *     IBM Corporation - initial API and implementation
  *     Jens Lukowski/Innoopract - initial renaming/restructuring
  *
  *******************************************************************************/
 package org.eclipse.wst.xml.core.internal.parser;


 import java.io.StringReader;
 import java.util.ArrayList;
 import java.util.List;

 import org.eclipse.jface.text.BadLocationException;
 import org.eclipse.jface.text.IDocument;
 import org.eclipse.wst.sse.core.internal.document.DocumentReader;
 import org.eclipse.wst.sse.core.internal.ltk.parser.BlockMarker;
 import org.eclipse.wst.sse.core.internal.ltk.parser.BlockTagParser;
 import org.eclipse.wst.sse.core.internal.ltk.parser.BlockTokenizer;
 import org.eclipse.wst.sse.core.internal.ltk.parser.RegionParser;
 import org.eclipse.wst.sse.core.internal.ltk.parser.StructuredDocumentRegionHandler;
 import org.eclipse.wst.sse.core.internal.ltk.parser.StructuredDocumentRegionParser;
 import org.eclipse.wst.sse.core.internal.ltk.parser.StructuredDocumentRegionParserExtension;
 import org.eclipse.wst.sse.core.internal.provisional.text.IStructuredDocumentRegion;
 import org.eclipse.wst.sse.core.internal.provisional.text.ITextRegion;
 import org.eclipse.wst.sse.core.internal.provisional.text.ITextRegionContainer;
 import org.eclipse.wst.sse.core.internal.provisional.text.ITextRegionList;
 import org.eclipse.wst.sse.core.internal.text.CharSequenceReader;
 import org.eclipse.wst.sse.core.internal.text.IRegionComparible;
 import org.eclipse.wst.sse.core.internal.util.Debug;
 import org.eclipse.wst.xml.core.internal.Logger;
 import org.eclipse.wst.xml.core.internal.regions.DOMRegionContext;


 /**
  * Takes input from the HTMLTokenizer and creates a tag list
  */

 public class XMLSourceParser implements RegionParser, BlockTagParser, StructuredDocumentRegionParser, IRegionComparible, StructuredDocumentRegionParserExtension {
 	// made public to aid access from inner classes in hierarchy.
 	// TODO: in future, figure out how to solve without exposing data.
 	public CharSequence fCharSequenceSource = null;
 	private IDocument fDocumentInput;
 	protected int fOffset = 0;
 	// DMW: 2/12/03. Removed some state data, since not really needed,
 	// and since it added a lot to overhead (since so many regions are
 	// created.
 	// protected IStructuredDocumentRegion fCurrentNode = null;
 	// protected IStructuredDocumentRegion fNodes = null;
 	// protected List fRegions = null;
 	// protected Object fInput = null;
 	protected String fStringInput = null;
 	protected List fStructuredDocumentRegionHandlers;

 	protected BlockTokenizer fTokenizer = null;
 	protected long startTime;
 	protected long stopTime;

 	/**
 	 * HTMLSourceParser constructor comment.
 	 */
 	public XMLSourceParser() {
 		super();
 		fStructuredDocumentRegionHandlers = new ArrayList();
 	}

 	/**
 	 * This is a simple utility to count nodes. Used only for debug
 	 * statements.
 	 */
 	protected int _countNodes(IStructuredDocumentRegion nodes) {
 		int result = 0;
 		IStructuredDocumentRegion countNode = nodes;
 		while (countNode != null) {
 			result++;
 			countNode = countNode.getNext();
 		}
 		return result;
 	}

 	public void addBlockMarker(BlockMarker marker) {
 		getTokenizer().addBlockMarker(marker);
 	}

 	public void addStructuredDocumentRegionHandler(StructuredDocumentRegionHandler handler) {
 		if (fStructuredDocumentRegionHandlers == null)
 			fStructuredDocumentRegionHandlers = new ArrayList();
 		fStructuredDocumentRegionHandlers.add(handler);
 	}

 	public void beginBlockScan(String newTagName) {
 		getTokenizer().beginBlockTagScan(newTagName);
 	}

 	/**
 	 * @return IStructuredDocumentRegion
 	 */
 	protected IStructuredDocumentRegion createStructuredDocumentRegion(String type) {
 		IStructuredDocumentRegion newNode = null;
 		if (type == DOMRegionContext.BLOCK_TEXT)
 			newNode = XMLStructuredRegionFactory.createRegion(XMLStructuredRegionFactory.XML_BLOCK);
 		else
 			newNode = XMLStructuredRegionFactory.createRegion(XMLStructuredRegionFactory.XML);
 		return newNode;
 	}

 	protected void fireNodeParsed(IStructuredDocumentRegion fCurrentNode) {
 		// never let an Exceptions from foreign code interfere with completion
 		// of parsing. To get an exception here is definitely a program error
 		// somewhere,
 		// we can't afford to interrupt the flow of control. or backwards
 		// typing can result!
 		//
 		//
 		try {
 			if (fCurrentNode != null && fStructuredDocumentRegionHandlers != null) {
 				for (int i = 0; i < fStructuredDocumentRegionHandlers.size(); i++)
 					((StructuredDocumentRegionHandler) fStructuredDocumentRegionHandlers.get(i)).nodeParsed(fCurrentNode);
 			}
 		}
 		catch (Exception e) {
 			Logger.log(Logger.ERROR, e.getMessage());
 		}
 	}

 	public BlockMarker getBlockMarker(String tagName) {
 		List markers = getTokenizer().getBlockMarkers();
 		for (int i = 0; i < markers.size(); i++) {
 			BlockMarker marker = (BlockMarker) markers.get(i);
 			if (marker.isCaseSensitive()) {
 				if (marker.getTagName().equals(tagName))
 					return marker;
 			}
 			else {
 				if (marker.getTagName().equalsIgnoreCase(tagName))
 					return marker;
 			}
 		}
 		return null;
 	}

 	public List getBlockMarkers() {
 		return getTokenizer().getBlockMarkers();
 	}

 	/**
 	 * @return IStructuredDocumentRegion
 	 */
 	public IStructuredDocumentRegion getDocumentRegions() {
 		IStructuredDocumentRegion headnode = null;
 		if (headnode == null) {
 			if (Debug.perfTest) {
 				startTime = System.currentTimeMillis();
 			}
 			headnode = parseNodes();
 			if (Debug.perfTest) {
 				stopTime = System.currentTimeMillis();
 				System.out.println(" -- creating nodes of IStructuredDocument -- "); //$NON-NLS-1$
 				System.out.println(" Time parse and init all regions: " + (stopTime - startTime) + " (msecs)"); //$NON-NLS-2$//$NON-NLS-1$
 				// System.out.println(" for " + fRegions.size() + "
 				// Regions");//$NON-NLS-2$//$NON-NLS-1$
 				System.out.println("      and " + _countNodes(headnode) + " Nodes"); //$NON-NLS-2$//$NON-NLS-1$
 			}
 		}
 		return headnode;
 	}

 	protected ITextRegion getNextRegion() {
 		ITextRegion region = null;
 		try {
 			region = getTokenizer().getNextToken();
 			// DMW: 2/12/03 Removed state
 			// if (region != null) {
 			// fRegions.add(region);
 			// }
 			return region;
 		}
 		catch (StackOverflowError e) {
 			Logger.logException(getClass().getName() + ": input could not be parsed correctly at position " + getTokenizer().getOffset(), e); //$NON-NLS-1$
 			throw e;
 		}
 		catch (Exception e) {
 			Logger.logException(getClass().getName() + ": input could not be parsed correctly at position " + getTokenizer().getOffset() + " (" + e.getLocalizedMessage() + ")", e); //$NON-NLS-3$//$NON-NLS-2$//$NON-NLS-1$
 		}
 		return null;
 	}

 	/**
 	 * Return the full list of known regions. Typically getNodes should be
 	 * used instead of this method.
 	 */
 	public List getRegions() {
 		IStructuredDocumentRegion headNode = null;
 		if (!getTokenizer().isEOF()) {
 			headNode = getDocumentRegions();
 			// throw new IllegalStateException("parsing has not finished");
 		}
 		// for memory recovery, we assume if someone
 		// requests all regions, we can reset our big
 		// memory consuming objects
 		// but the new "getRegions" method is then more expensive.
 		// I don't think its used much, though.
 		List localRegionsList = getRegions(headNode);
 		primReset();
 		return localRegionsList;
 	}

 	/**
 	 * Method getRegions.
 	 *
 	 * @param headNode
 	 * @return List
 	 */
 	protected List getRegions(IStructuredDocumentRegion headNode) {
 		List allRegions = new ArrayList();
 		IStructuredDocumentRegion currentNode = headNode;
 		while (currentNode != null) {
 			ITextRegionList nodeRegions = currentNode.getRegions();
 			for (int i = 0; i < nodeRegions.size(); i++) {
 				allRegions.add(nodeRegions.get(i));
 			}
 			currentNode = currentNode.getNext();
 		}
 		return allRegions;
 	}

 	/**
 	 *
 	 * @return java.util.List
 	 */
 	public List getStructuredDocumentRegionHandlers() {
 		if (fStructuredDocumentRegionHandlers == null) {
 			fStructuredDocumentRegionHandlers = new ArrayList(0);
 		}
 		return fStructuredDocumentRegionHandlers;
 	}

 	/**
 	 * Returns text from the current input. Text is only valid before
 	 * getNodes() has been called and only when a raw String or DocumentReader
 	 * is given as the input.
 	 */
 	public String getText(int offset, int length) {
 		String text = null;
 		if (fCharSequenceSource != null) {
 			int start = fOffset + offset;
 			int end = start + length;
 			text = fCharSequenceSource.subSequence(start, end).toString();
 		}
 		else if (fDocumentInput != null) {
 			try {
 				text = fDocumentInput.get(offset, length);
 			}
 			catch (BadLocationException e) {
 				text = ""; //$NON-NLS-1$
 			}
 		}
 		else {
 			if (fStringInput == null || fStringInput.length() == 0 || offset + length > fStringInput.length() || offset < 0) {
 				text = ""; //$NON-NLS-1$
 			}
 			else {
 				// offset is entirely valid during parsing as the parse
 				// numbers haven't been adjusted.
 				text = fStringInput.substring(offset, offset + length);
 			}
 		}
 		return text;
 	}

 	protected BlockTokenizer getTokenizer() {
 		if (fTokenizer == null) {
 			fTokenizer = new XMLTokenizer();
 		}
 		return fTokenizer;
 	}


 	public RegionParser newInstance() {
 		XMLSourceParser newInstance = new XMLSourceParser();
 		newInstance.setTokenizer(getTokenizer().newInstance());
 		return newInstance;
 	}

 	protected IStructuredDocumentRegion parseNodes() {
 		// regions are initially reported as complete offsets within the
 		// scanned input
 		// they are adjusted here to be indexes from the currentNode's start
 		// offset
 		IStructuredDocumentRegion headNode = null;
 		IStructuredDocumentRegion lastNode = null;
 		ITextRegion region = null;
 		IStructuredDocumentRegion currentNode = null;
 		String type = null;

 		while ((region = getNextRegion()) != null) {
 			type = region.getType();
 			// these types (might) demand a IStructuredDocumentRegion for each
 			// of them
 			if (type == DOMRegionContext.BLOCK_TEXT) {
 				if (currentNode != null && currentNode.getLastRegion().getType() == DOMRegionContext.BLOCK_TEXT) {
 					// multiple block texts indicated embedded containers; no
 					// new IStructuredDocumentRegion
 					currentNode.addRegion(region);
 					currentNode.setLength(region.getStart() + region.getLength() - currentNode.getStart());
 					region.adjustStart(-currentNode.getStart());
 					// DW 4/16/2003 regions no longer have parents
 					// region.setParent(currentNode);
 				}
 				else {
 					// not continuing a IStructuredDocumentRegion
 					if (currentNode != null) {
 						// ensure that any existing node is at least
 						// terminated
 						if (!currentNode.isEnded()) {
 							currentNode.setLength(region.getStart() - currentNode.getStart());
 							// fCurrentNode.setTextLength(region.getStart() -
 							// fCurrentNode.getStart());
 						}
 						lastNode = currentNode;
 					}
 					fireNodeParsed(currentNode);
 					currentNode = createStructuredDocumentRegion(type);
 					if (lastNode != null) {
 						lastNode.setNext(currentNode);
 					}
 					currentNode.setPrevious(lastNode);
 					currentNode.setStart(region.getStart());
 					currentNode.setLength(region.getStart() + region.getLength() - currentNode.getStart());
 					currentNode.setEnded(true);
 					region.adjustStart(-currentNode.getStart());
 					currentNode.addRegion(region);
 					// DW 4/16/2003 regions no longer have parents
 					// region.setParent(currentNode);
 				}
 			}
 			// the following contexts OPEN new StructuredDocumentRegions
 			else if ((currentNode != null && currentNode.isEnded()) || (type == DOMRegionContext.XML_CONTENT) || (type == DOMRegionContext.XML_CHAR_REFERENCE) || (type == DOMRegionContext.XML_ENTITY_REFERENCE) || (type == DOMRegionContext.XML_PI_OPEN) || (type == DOMRegionContext.XML_TAG_OPEN) || (type == DOMRegionContext.XML_END_TAG_OPEN) || (type == DOMRegionContext.XML_COMMENT_OPEN) || (type == DOMRegionContext.XML_CDATA_OPEN) || (type == DOMRegionContext.XML_DECLARATION_OPEN)) {
 				if (currentNode != null) {
 					// ensure that any existing node is at least terminated
 					if (!currentNode.isEnded()) {
 						currentNode.setLength(region.getStart() - currentNode.getStart());
 						// fCurrentNode.setTextLength(region.getStart() -
 						// fCurrentNode.getStart());
 					}
 					lastNode = currentNode;
 				}
 				fireNodeParsed(currentNode);
 				currentNode = createStructuredDocumentRegion(type);
 				if (lastNode != null) {
 					lastNode.setNext(currentNode);
 				}
 				currentNode.setPrevious(lastNode);
 				currentNode.setStart(region.getStart());
 				currentNode.addRegion(region);
 				currentNode.setLength(region.getStart() + region.getLength() - currentNode.getStart());
 				region.adjustStart(-currentNode.getStart());
 				// DW 4/16/2003 regions no longer have parents
 				// region.setParent(currentNode);
 			}
 			// the following contexts neither open nor close
 			// StructuredDocumentRegions; just add to them
 			else if ((type == DOMRegionContext.XML_TAG_NAME) || (type == DOMRegionContext.XML_TAG_ATTRIBUTE_NAME) || (type == DOMRegionContext.XML_TAG_ATTRIBUTE_EQUALS) || (type == DOMRegionContext.XML_TAG_ATTRIBUTE_VALUE) || (type == DOMRegionContext.XML_COMMENT_TEXT) || (type == DOMRegionContext.XML_PI_CONTENT) || (type == DOMRegionContext.XML_DOCTYPE_INTERNAL_SUBSET)) {
 				currentNode.addRegion(region);
 				currentNode.setLength(region.getStart() + region.getLength() - currentNode.getStart());
 				region.adjustStart(-currentNode.getStart());
 				// DW 4/16/2003 regions no longer have parents
 				// region.setParent(currentNode);
 			}
 			// the following contexts close off StructuredDocumentRegions
 			// cleanly
 			else if ((type == DOMRegionContext.XML_PI_CLOSE) || (type == DOMRegionContext.XML_TAG_CLOSE) || (type == DOMRegionContext.XML_EMPTY_TAG_CLOSE) || (type == DOMRegionContext.XML_COMMENT_CLOSE) || (type == DOMRegionContext.XML_DECLARATION_CLOSE) || (type == DOMRegionContext.XML_CDATA_CLOSE)) {
 				currentNode.setEnded(true);
 				currentNode.setLength(region.getStart() + region.getLength() - currentNode.getStart());
 				currentNode.addRegion(region);
 				region.adjustStart(-currentNode.getStart());
 				// DW 4/16/2003 regions no longer have parents
 				// region.setParent(currentNode);
 			}
 			// this is extremely rare, but valid
 			else if (type == DOMRegionContext.WHITE_SPACE) {
 				ITextRegion lastRegion = currentNode.getLastRegion();
 				// pack the embedded container with this region
 				if (lastRegion instanceof ITextRegionContainer) {
 					ITextRegionContainer container = (ITextRegionContainer) lastRegion;
 					container.getRegions().add(region);
 					// containers must have parent set ...
 					// setting for EACH subregion is redundent, but not sure
 					// where else to do, so will do here for now.
 					container.setParent(currentNode);
 					// DW 4/16/2003 regions no longer have parents
 					// region.setParent(container);
 					region.adjustStart(container.getLength() - region.getStart());
 				}
 				currentNode.getLastRegion().adjustLength(region.getLength());
 				currentNode.adjustLength(region.getLength());
 			}
 			else if (type == DOMRegionContext.UNDEFINED && currentNode != null) {
 				// skip on a very-first region situation as the default
 				// behavior is good enough
 				// combine with previous if also undefined
 				if (currentNode.getLastRegion() != null && currentNode.getLastRegion().getType() == DOMRegionContext.UNDEFINED) {
 					currentNode.getLastRegion().adjustLength(region.getLength());
 					currentNode.adjustLength(region.getLength());
 				}
 				// previous wasn't undefined
 				else {
 					currentNode.addRegion(region);
 					currentNode.setLength(region.getStart() + region.getLength() - currentNode.getStart());
 					region.adjustStart(-currentNode.getStart());
 				}
 			}
 			else {
 				// if an unknown type is the first region in the document,
 				// ensure that a node exists
 				if (currentNode == null) {
 					currentNode = createStructuredDocumentRegion(type);
 					currentNode.setStart(region.getStart());
 				}
 				currentNode.addRegion(region);
 				currentNode.setLength(region.getStart() + region.getLength() - currentNode.getStart());
 				region.adjustStart(-currentNode.getStart());
 				// DW 4/16/2003 regions no longer have parents
 				// region.setParent(currentNode);
 				if (Debug.debugTokenizer)
 					System.out.println(getClass().getName() + " found region of not specifically handled type " + region.getType() + " @ " + region.getStart() + "[" + region.getLength() + "]"); //$NON-NLS-4$//$NON-NLS-3$//$NON-NLS-2$//$NON-NLS-1$
 				//$NON-NLS-3$//$NON-NLS-2$//$NON-NLS-1$
 			}

 			// these regions also get their own node, so close them cleanly
 			// NOTE: these regions have new StructuredDocumentRegions created
 			// for them above; it may
 			// be more readable if that is handled here as well, but the
 			// current layout
 			// ensures that they open StructuredDocumentRegions the same way
 			if ((type == DOMRegionContext.XML_CONTENT) || (type == DOMRegionContext.XML_CHAR_REFERENCE) || (type == DOMRegionContext.XML_ENTITY_REFERENCE)) {
 				currentNode.setEnded(true);
 			}
 			if (headNode == null && currentNode != null) {
 				headNode = currentNode;
 			}
 		}
 		if (currentNode != null) {
 			fireNodeParsed(currentNode);
 			currentNode.setPrevious(lastNode);
 		}
 		// fStringInput = null;
 		primReset();
 		return headNode;
 	}

 	protected void primReset() {
 		// fNodes = null;
 		// fRegions = null;
 		// fInput = null;
 		fStringInput = null;
 		fCharSequenceSource = null;
 		fDocumentInput = null;
 		fOffset = 0;
 		// fCurrentNode = null;
 		// DMW: also reset tokenizer so it doesn't hold on
 		// to large arrays
 		getTokenizer().reset(new char[0]);
 	}

 	/*
 	 * (non-Javadoc)
 	 *
 	 * @see org.eclipse.wst.sse.core.internal.text.IRegionComparible#regionMatches(int,
 	 *      int, java.lang.String)
 	 */
 	public boolean regionMatches(int offset, int length, String stringToCompare) {
 		// by definition
 		if (stringToCompare == null)
 			return false;

 		boolean result = false;
 		if (fCharSequenceSource != null && fCharSequenceSource instanceof IRegionComparible) {
 			result = ((IRegionComparible) fCharSequenceSource).regionMatches(offset, length, stringToCompare);
 		}
 		else {
 			// old fashioned ways
 			String test = null;
 			if (fCharSequenceSource != null) {
 				test = fCharSequenceSource.subSequence(offset, offset + length).toString();
 			}
 			else if (fStringInput != null) {
 				test = fStringInput.substring(offset, offset + length);
 			}
 			result = stringToCompare.equals(test);
 		}
 		return result;
 	}

 	public boolean regionMatchesIgnoreCase(int offset, int length, String stringToCompare) {
 		// by definition
 		if (stringToCompare == null)
 			return false;

 		boolean result = false;
 		if (fCharSequenceSource != null && fCharSequenceSource instanceof IRegionComparible) {
 			result = ((IRegionComparible) fCharSequenceSource).regionMatchesIgnoreCase(offset, length, stringToCompare);
 		}
 		else {
 			// old fashioned ways
 			String test = null;
 			if (fCharSequenceSource != null) {
 				test = fCharSequenceSource.subSequence(offset, offset + length).toString();
 			}
 			else if (fStringInput != null) {
 				test = fStringInput.substring(offset, offset + length);
 			}
 			result = stringToCompare.equalsIgnoreCase(test);
 		}
 		return result;
 	}

 	public void removeBlockMarker(BlockMarker marker) {
 		getTokenizer().removeBlockMarker(marker);
 	}

 	public void removeBlockMarker(String tagName) {
 		getTokenizer().removeBlockMarker(tagName);
 	}

 	public void removeStructuredDocumentRegionHandler(StructuredDocumentRegionHandler handler) {
 		if (fStructuredDocumentRegionHandlers == null)
 			return;
 		if (fStructuredDocumentRegionHandlers.contains(handler))
 			fStructuredDocumentRegionHandlers.remove(handler);
 	}

 	/**
 	 * Resets the input.
 	 */
 	public void reset(java.io.FileInputStream instream) {
 		primReset();
 		// fInput = instream;
 		getTokenizer().reset(instream);
 	}

 	/**
 	 * Resets the input.
 	 */
 	public void reset(java.io.Reader reader) {
 		reset(reader, 0);
 	}

 	/**
 	 * Resets the input.
 	 */
 	public void reset(java.io.Reader reader, int position) {
 		primReset();
 		fOffset = position;
 		getTokenizer().reset(reader, position);
 		if (reader instanceof DocumentReader) {
 			IDocument doc = ((DocumentReader) reader).getDocument();
 			if (doc instanceof CharSequence) {
 				fCharSequenceSource = (CharSequence) doc;
 			}
 			else {
 				// old fashioned IDocument
 				fDocumentInput = ((DocumentReader) reader).getDocument();
 			}

 		}
 		else if (reader instanceof CharSequenceReader) {
 			fCharSequenceSource = ((CharSequenceReader) reader).getOriginalSource();
 		}
 	}

 	/**
 	 * Resets the input. Use this version to allow text to be retrieved
 	 * <em>during</em> parsing, such as by the
 	 * StructuredDocumentRegionHandler.
 	 */
 	public void reset(String sourceString) {
 		reset(new StringReader(sourceString));
 		fStringInput = sourceString;
 	}

 	/**
 	 * Resets the input. Use this version to allow text to be retrieved
 	 * <em>during</em> parsing, such as by the
 	 * StructuredDocumentRegionHandler.
 	 */
 	public void reset(String sourceString, int position) {
 		StringReader reader = new StringReader(sourceString);
 		reset(reader, position);
 		fStringInput = sourceString;
 	}

 	public void resetHandlers() {
 		if (fStructuredDocumentRegionHandlers != null) {
 			int size = fStructuredDocumentRegionHandlers.size();
 			for (int i = 0; i < size; i++)
 				((StructuredDocumentRegionHandler) fStructuredDocumentRegionHandlers.get(i)).resetNodes();
 		}
 	}

 	/**
 	 *
 	 * @param List
 	 */
 	public void setStructuredDocumentRegionHandlers(List newStructuredDocumentRegionHandlers) {
 		fStructuredDocumentRegionHandlers = newStructuredDocumentRegionHandlers;
 	}

 	protected void setTokenizer(BlockTokenizer newTokenizer) {
 		// DMW: changed from private to protected, so subclass could use in
 		// creation of 'newInstance'.
 		fTokenizer = newTokenizer;
 	}
 }