ant/org.eclipse.ant.ui/Ant Editor/org/eclipse/ant/internal/ui/dtd/schema/NfmParser.java - platform/eclipse.platform - Git at Google

 /*******************************************************************************
  * Copyright (c) 2002, 2005 Object Factory Inc.
  * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License v1.0
  * which accompanies this distribution, and is available at
  * http://www.eclipse.org/legal/epl-v10.html
  *
  * Contributors:
  *		Object Factory Inc. - Initial implementation
  *******************************************************************************/
 package org.eclipse.ant.internal.ui.dtd.schema;

 import java.text.MessageFormat;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;

 import org.eclipse.ant.internal.ui.dtd.ParseError;
 import org.eclipse.ant.internal.ui.dtd.util.SortedMap;
 import org.eclipse.ant.internal.ui.dtd.util.SortedMapFactory;

 /**
  * NfmParser parses an NFA and returns an equivalent DFA if it can do so in
  * linear time and space (in terms of the original NFA).<p>
  *
  * As used here, NfmParser is called lazily when someone actually asks for a
  * Dfm. This is for performance reasons. Why go to the work of calculating all
  * those Dfms if nobody ever uses them?
  *
  * Well-formed errors in content models have already been detected. The only
  * error that can arise in NfmParser is an ambiguity error.
  *
  * Bruggemann-Klein showed that if an NFA is 1-unambiguous, an epsilon-free NFA
  * constructed from it in linear time is actually a DFA. This is obvious
  * in NfmParser. The algorithm works by removing all ambiguous transitions as
  * the graph is constructed, then proving that the reduced graph is equivalent
  * to the original in time linear in the number of ambiguous transitions.
  *
  * An effort is made to keep the DFA small, but there is no optimization
  * step, as DFAs are small anyway, with some linear inflation around *.
  * In a pathological case, like the classical (a*,a*,a*,..., a*) the number
  * of transitions in the DFA can be quadratic but this algorithm will not blow
  * up exponentially.
  *
  * @author Bob Foster
  */
 public class NfmParser {

 	public Dfm parse(Nfm nfm) throws ParseError {

 		// Parse nfm into dfm

 		Dfm dfm = parseStart(nfm.getStart(), nfm.getStop());

 		// Make list of dfms in graph

 		ArrayList dfms = new ArrayList();
 		collect(dfm, dfms);

 		// Detect accept conflicts

 		HashMap duplicates = new HashMap();
 		detect(dfms, duplicates);

 		// Replace duplicate dfms in graph

 		replace(dfms, duplicates);

 		// Allow nfm memory to be re-used

 		Nfm.free(nfm);
 		NfmNode.freeAll();

 		return dfm;
 	}

 	private void reportError(String name) throws ParseError {
 		throw new ParseError(MessageFormat.format(AntDTDSchemaMessages.NfmParser_Ambiguous, new String[]{name}));
 	}

 	public static void collect(Dfm dfm, List dfms) {
 		dfms.add(dfm);
 		collect1(dfm, dfms);
 	}

 	private static void collect1(Dfm dfm, List dfms) {
 		Object[] follows = dfm.getValues();
 		if (follows != null) {
 			for (int i = 0; i < follows.length; i++) {
 				Dfm follow = (Dfm) follows[i];
 				if (!dfms.contains(follow)) {
 					dfms.add(follow);
 					collect1(follow, dfms);
 				}
 			}
 		}
 	}


 	/**
 	 * Replace duplicate dfms found during conflict resolution.
 	 */
 	private void replace(ArrayList dfms, HashMap removed) {
 		for (int i = 0; i < dfms.size(); i++) {
 			Dfm dfm = (Dfm) dfms.get(i);
 			Object[] follows = dfm.getValues();
 			if (follows != null) {
 				for (int j = 0; j < follows.length; j++) {
 					Dfm replacement, follow = (Dfm) follows[j];
 					while ((replacement = (Dfm) removed.get(follow)) != null)
 						follow = replacement;
 					follows[j] = follow;
 				}
 			}
 		}

 		// release dfms so can be re-used
 		Iterator rit = removed.keySet().iterator();
 		while (rit.hasNext())
 			Dfm.free((Dfm)rit.next());
 	}


 	/**
 	 * Detect conflicts in each state. Two transitions are a potential conflict
 	 * if they accept the same string value. They are an actual conflict if
 	 * their follow dfms are not identical and they are an actual ambiguity if
 	 * the transition atoms of the follow dfms are not pairwise identical.
 	 * This is derived from the rule of Bruggemann-Klein, which determines
 	 * that (a|a)b is not ambiguous, but both (a,b)|(a,c) and (a,b)|(a,b) are.
 	 * The latter might be surprising, but that's committee work for you.
 	 * If two transitions are not ambiguous, one can be removed without
 	 * affecting the language accepted, and thus we have converted our
 	 * epsilon-free NFA into a DFA. If any two transitions are ambiguous,
 	 * we report an error and our responsibility ends. Note that no transitions
 	 * can be removed until all have been checked; this might disguise the
 	 * ambiguity in, e.g., ((a|a),b,(a|a))*.
 	 */
 	private void detect(ArrayList dfms, HashMap duplicates) throws ParseError {
 		for (Iterator iter = dfms.iterator(); iter.hasNext();) {
 			Dfm dfm = (Dfm) iter.next();

 			Object[] accepts = dfm.getKeys();
 			Object[] follows = dfm.getValues();
 			if (accepts != null) {
 				String last = null;
 				for (int i = 0, lasti = -1; i < accepts.length; i++) {
 					String accept = accepts[i].toString();
 					// accepts strings are interned allowing identity comparison

 					if (last != null && last == accept) {
 						if (follows[i] != follows[lasti])
 							checkConflict(new Conflict(accept, (Dfm)follows[lasti], (Dfm)follows[i]));
 					}
 					else {
 						last = accept;
 						lasti = i;
 					}
 				}
 			}
 		}

 		// once more for removal

 		for (Iterator iter = dfms.iterator(); iter.hasNext();) {
 			Dfm dfm = (Dfm) iter.next();

 			// record conflicts
 			Object[] accepts = dfm.getKeys();
 			Object[] follows = dfm.getValues();
 			boolean remove = false;
 			if (accepts != null) {
 				boolean[] removes = new boolean[accepts.length];
 				String last = null;
 				for (int i = 0, lasti = -1; i < accepts.length; i++) {
 					String accept = accepts[i].toString();

 					if (last != null && last == accept) {
 						remove = true;
 						removes[i] = true;
 						if (follows[i] != follows[lasti]) {
 							Dfm dfmhi = (Dfm) follows[i];
 							Dfm dfmlo = (Dfm) follows[lasti];
 							if (dfmhi.id < dfmlo.id) {
 								Dfm tmp = dfmhi;
 								dfmhi = dfmlo;
 								dfmlo = tmp;
 							}
 							Dfm dup = (Dfm) duplicates.get(dfmhi);
 							if (dup == null || dfmlo.id < dup.id) {
 								duplicates.put(dfmhi, dfmlo);
 							} else {
 								duplicates.put(dfmlo, dup);
 							}
 						}
 					}
 					else {
 						last = accept;
 						lasti = i;
 					}
 				}

 				if (remove) {
 					SortedMap map = dfm.getMap();
 					int i = 0;
 					for (Iterator iterator = map.keyIterator(); iterator.hasNext(); i++) {
 						iterator.next();
 						if (removes[i])
 							iterator.remove();
 					}
 					SortedMapFactory.freeMap(map);
 				}
 			}
 		}
 	}

 	/**
 	 * Check conflict and report ambiguity.
 	 * @param conflict Potential ambiguity
 	 */
 	private void checkConflict(Conflict conflict) throws ParseError {
 		if (conflict.dfm1.accepting != conflict.dfm2.accepting) {
 			reportError(conflict.name);
 		}
 		Object[] accept1 = conflict.dfm1.getKeys();
 		Object[] accept2 = conflict.dfm2.getKeys();
 		if ((accept1 == null) != (accept2 == null)) {
 			reportError(conflict.name);
 		}
 		if (accept1 != null) {
 			if (accept1.length != accept2.length) {
 				reportError(conflict.name);
 			}
 			for (int j = 0; j < accept2.length; j++) {
 				if (accept1[j] != accept2[j]) {
 					reportError(conflict.name);
 				}
 			}
 		}
 	}

 	/**
 	 * Recursive parse that visits every node reachable from
 	 * the start symbol.
 	 */
 	private Dfm parseStart(NfmNode start, NfmNode accept) {
 		// mark the start node
 		Dfm result = Dfm.dfm(false);
 		start.dfm = result;

 		// we can minimize alias dfms by marking all starting transfer links
 		while (start.next1 != null && start.next2 == null && start.symbol == null) {
 			start = start.next1;
 			start.dfm = result;
 		}

 		Dfm parsed = parse(1, start, accept);
 		result.merge(parsed);

 		Dfm.free(parsed);

 		return result;
 	}

 	private void parseNext(int mark, Dfm result, NfmNode start, NfmNode accept) {
 		Dfm parsed = parse(mark+1, start, accept);
 		result.merge(parsed);

 		Dfm.free(parsed);
 	}

 	/**
 	 * Recursive parse that visits every node reachable from
 	 * the start symbol.
 	 */
 	private Dfm parse(int mark, NfmNode start, NfmNode accept) {

 		// eliminate useless recursion (note that accept node has no branches)
 		while (start.next1 != null && start.next2 == null && start.symbol == null)
 			start = start.next1;

 		// if we reached the accept node, return an empty dfm that accepts
 		if (start == accept)
 			return Dfm.dfm(true);

 		// for a symbol, construct a dfm that accepts the symbol
 		if (start.symbol != null) {
 			Dfm nextdfm = null;
 			NfmNode next = start.next1, snext = next;
 			while (snext.dfm == null && snext.next1 != null && snext.next2 == null && snext.symbol == null)
 				snext = snext.next1;
 			if (snext.dfm != null) {
 				for (NfmNode n = next; n != snext; n = n.next1)
 					n.dfm = snext.dfm;
 				nextdfm = snext.dfm;
 			}
 			else {
 				nextdfm = Dfm.dfm(false);
 				snext.dfm = nextdfm;
 				for (NfmNode n = next; n != snext; n = n.next1)
 					n.dfm = nextdfm;
 				parseNext(mark, nextdfm, snext, accept);
 			}
 			Dfm dfm = Dfm.dfm(start.symbol, nextdfm);
 			return dfm;
 		}

 		// otherwise, follow both branches and return the combined result
 		Dfm dfm1 = null, dfm2 = null;
 		int saveMark;
 		if (start.next1 != null && start.next1.mark != mark) {
 			saveMark = start.next1.mark;
 			start.next1.mark = mark;
 			dfm1 = parse(mark, start.next1, accept);
 			start.next1.mark = saveMark;
 		}
 		if (start.next2 != null && start.next2.mark != mark) {
 			saveMark = start.next2.mark;
 			start.next2.mark = mark;
 			dfm2 = parse(mark, start.next2, accept);
 			start.next2.mark = saveMark;
 		}

 		if (dfm2 != null) {
 			if (dfm1 != null)
 				dfm1.merge(dfm2);
 			else
 				dfm1 = dfm2;
 		}
 		return dfm1;
 	}

 	private static class Conflict {
 		public String name;
 		public Dfm dfm1, dfm2;
 		public Conflict(String name, Dfm dfm1, Dfm dfm2) {
 			this.name = name;
 			this.dfm1 = dfm1;
 			this.dfm2 = dfm2;
 		}
 		public int hashCode() {
 			return dfm1.hashCode() + dfm2.hashCode();
 		}
 		public boolean equals(Object o) {
 			if (o == this)
 				return true;
 			if (!(o instanceof Conflict))
 				return false;
 			Conflict other = (Conflict) o;
 			return (dfm1 == other.dfm1 && dfm2 == other.dfm2)
 				|| (dfm1 == other.dfm2 && dfm2 == other.dfm1);
 		}
 	}

 }
	/*******************************************************************************
	* Copyright (c) 2002, 2005 Object Factory Inc.
	* All rights reserved. This program and the accompanying materials
	* are made available under the terms of the Eclipse Public License v1.0
	* which accompanies this distribution, and is available at
	* http://www.eclipse.org/legal/epl-v10.html
	*
	* Contributors:
	* Object Factory Inc. - Initial implementation
	*******************************************************************************/
	package org.eclipse.ant.internal.ui.dtd.schema;

	import java.text.MessageFormat;
	import java.util.ArrayList;
	import java.util.HashMap;
	import java.util.Iterator;
	import java.util.List;

	import org.eclipse.ant.internal.ui.dtd.ParseError;
	import org.eclipse.ant.internal.ui.dtd.util.SortedMap;
	import org.eclipse.ant.internal.ui.dtd.util.SortedMapFactory;

	/**
	* NfmParser parses an NFA and returns an equivalent DFA if it can do so in
	* linear time and space (in terms of the original NFA).<p>
	*
	* As used here, NfmParser is called lazily when someone actually asks for a
	* Dfm. This is for performance reasons. Why go to the work of calculating all
	* those Dfms if nobody ever uses them?
	*
	* Well-formed errors in content models have already been detected. The only
	* error that can arise in NfmParser is an ambiguity error.
	*
	* Bruggemann-Klein showed that if an NFA is 1-unambiguous, an epsilon-free NFA
	* constructed from it in linear time is actually a DFA. This is obvious
	* in NfmParser. The algorithm works by removing all ambiguous transitions as
	* the graph is constructed, then proving that the reduced graph is equivalent
	* to the original in time linear in the number of ambiguous transitions.
	*
	* An effort is made to keep the DFA small, but there is no optimization
	* step, as DFAs are small anyway, with some linear inflation around *.
	* In a pathological case, like the classical (a,a,a,..., a) the number
	* of transitions in the DFA can be quadratic but this algorithm will not blow
	* up exponentially.
	*
	* @author Bob Foster
	*/
	public class NfmParser {

	public Dfm parse(Nfm nfm) throws ParseError {

	// Parse nfm into dfm

	Dfm dfm = parseStart(nfm.getStart(), nfm.getStop());

	// Make list of dfms in graph

	ArrayList dfms = new ArrayList();
	collect(dfm, dfms);

	// Detect accept conflicts

	HashMap duplicates = new HashMap();
	detect(dfms, duplicates);

	// Replace duplicate dfms in graph

	replace(dfms, duplicates);

	// Allow nfm memory to be re-used

	Nfm.free(nfm);
	NfmNode.freeAll();

	return dfm;
	}

	private void reportError(String name) throws ParseError {
	throw new ParseError(MessageFormat.format(AntDTDSchemaMessages.NfmParser_Ambiguous, new String[]{name}));
	}

	public static void collect(Dfm dfm, List dfms) {
	dfms.add(dfm);
	collect1(dfm, dfms);
	}

	private static void collect1(Dfm dfm, List dfms) {
	Object[] follows = dfm.getValues();
	if (follows != null) {
	for (int i = 0; i < follows.length; i++) {
	Dfm follow = (Dfm) follows[i];
	if (!dfms.contains(follow)) {
	dfms.add(follow);
	collect1(follow, dfms);
	}
	}
	}
	}


	/**
	* Replace duplicate dfms found during conflict resolution.
	*/
	private void replace(ArrayList dfms, HashMap removed) {
	for (int i = 0; i < dfms.size(); i++) {
	Dfm dfm = (Dfm) dfms.get(i);
	Object[] follows = dfm.getValues();
	if (follows != null) {
	for (int j = 0; j < follows.length; j++) {
	Dfm replacement, follow = (Dfm) follows[j];
	while ((replacement = (Dfm) removed.get(follow)) != null)
	follow = replacement;
	follows[j] = follow;
	}
	}
	}

	// release dfms so can be re-used
	Iterator rit = removed.keySet().iterator();
	while (rit.hasNext())
	Dfm.free((Dfm)rit.next());
	}


	/**
	* Detect conflicts in each state. Two transitions are a potential conflict
	* if they accept the same string value. They are an actual conflict if
	* their follow dfms are not identical and they are an actual ambiguity if
	* the transition atoms of the follow dfms are not pairwise identical.
	* This is derived from the rule of Bruggemann-Klein, which determines
	* that (a\|a)b is not ambiguous, but both (a,b)\|(a,c) and (a,b)\|(a,b) are.
	* The latter might be surprising, but that's committee work for you.
	* If two transitions are not ambiguous, one can be removed without
	* affecting the language accepted, and thus we have converted our
	* epsilon-free NFA into a DFA. If any two transitions are ambiguous,
	* we report an error and our responsibility ends. Note that no transitions
	* can be removed until all have been checked; this might disguise the
	* ambiguity in, e.g., ((a\|a),b,(a\|a))*.
	*/
	private void detect(ArrayList dfms, HashMap duplicates) throws ParseError {
	for (Iterator iter = dfms.iterator(); iter.hasNext();) {
	Dfm dfm = (Dfm) iter.next();

	Object[] accepts = dfm.getKeys();
	Object[] follows = dfm.getValues();
	if (accepts != null) {
	String last = null;
	for (int i = 0, lasti = -1; i < accepts.length; i++) {
	String accept = accepts[i].toString();
	// accepts strings are interned allowing identity comparison

	if (last != null && last == accept) {
	if (follows[i] != follows[lasti])
	checkConflict(new Conflict(accept, (Dfm)follows[lasti], (Dfm)follows[i]));
	}
	else {
	last = accept;
	lasti = i;
	}
	}
	}
	}

	// once more for removal

	for (Iterator iter = dfms.iterator(); iter.hasNext();) {
	Dfm dfm = (Dfm) iter.next();

	// record conflicts
	Object[] accepts = dfm.getKeys();
	Object[] follows = dfm.getValues();
	boolean remove = false;
	if (accepts != null) {
	boolean[] removes = new boolean[accepts.length];
	String last = null;
	for (int i = 0, lasti = -1; i < accepts.length; i++) {
	String accept = accepts[i].toString();

	if (last != null && last == accept) {
	remove = true;
	removes[i] = true;
	if (follows[i] != follows[lasti]) {
	Dfm dfmhi = (Dfm) follows[i];
	Dfm dfmlo = (Dfm) follows[lasti];
	if (dfmhi.id < dfmlo.id) {
	Dfm tmp = dfmhi;
	dfmhi = dfmlo;
	dfmlo = tmp;
	}
	Dfm dup = (Dfm) duplicates.get(dfmhi);
	if (dup == null \|\| dfmlo.id < dup.id) {
	duplicates.put(dfmhi, dfmlo);
	} else {
	duplicates.put(dfmlo, dup);
	}
	}
	}
	else {
	last = accept;
	lasti = i;
	}
	}

	if (remove) {
	SortedMap map = dfm.getMap();
	int i = 0;
	for (Iterator iterator = map.keyIterator(); iterator.hasNext(); i++) {
	iterator.next();
	if (removes[i])
	iterator.remove();
	}
	SortedMapFactory.freeMap(map);
	}
	}
	}
	}

	/**
	* Check conflict and report ambiguity.
	* @param conflict Potential ambiguity
	*/
	private void checkConflict(Conflict conflict) throws ParseError {
	if (conflict.dfm1.accepting != conflict.dfm2.accepting) {
	reportError(conflict.name);
	}
	Object[] accept1 = conflict.dfm1.getKeys();
	Object[] accept2 = conflict.dfm2.getKeys();
	if ((accept1 == null) != (accept2 == null)) {
	reportError(conflict.name);
	}
	if (accept1 != null) {
	if (accept1.length != accept2.length) {
	reportError(conflict.name);
	}
	for (int j = 0; j < accept2.length; j++) {
	if (accept1[j] != accept2[j]) {
	reportError(conflict.name);
	}
	}
	}
	}

	/**
	* Recursive parse that visits every node reachable from
	* the start symbol.
	*/
	private Dfm parseStart(NfmNode start, NfmNode accept) {
	// mark the start node
	Dfm result = Dfm.dfm(false);
	start.dfm = result;

	// we can minimize alias dfms by marking all starting transfer links
	while (start.next1 != null && start.next2 == null && start.symbol == null) {
	start = start.next1;
	start.dfm = result;
	}

	Dfm parsed = parse(1, start, accept);
	result.merge(parsed);

	Dfm.free(parsed);

	return result;
	}

	private void parseNext(int mark, Dfm result, NfmNode start, NfmNode accept) {
	Dfm parsed = parse(mark+1, start, accept);
	result.merge(parsed);

	Dfm.free(parsed);
	}

	/**
	* Recursive parse that visits every node reachable from
	* the start symbol.
	*/
	private Dfm parse(int mark, NfmNode start, NfmNode accept) {

	// eliminate useless recursion (note that accept node has no branches)
	while (start.next1 != null && start.next2 == null && start.symbol == null)
	start = start.next1;

	// if we reached the accept node, return an empty dfm that accepts
	if (start == accept)
	return Dfm.dfm(true);

	// for a symbol, construct a dfm that accepts the symbol
	if (start.symbol != null) {
	Dfm nextdfm = null;
	NfmNode next = start.next1, snext = next;
	while (snext.dfm == null && snext.next1 != null && snext.next2 == null && snext.symbol == null)
	snext = snext.next1;
	if (snext.dfm != null) {
	for (NfmNode n = next; n != snext; n = n.next1)
	n.dfm = snext.dfm;
	nextdfm = snext.dfm;
	}
	else {
	nextdfm = Dfm.dfm(false);
	snext.dfm = nextdfm;
	for (NfmNode n = next; n != snext; n = n.next1)
	n.dfm = nextdfm;
	parseNext(mark, nextdfm, snext, accept);
	}
	Dfm dfm = Dfm.dfm(start.symbol, nextdfm);
	return dfm;
	}

	// otherwise, follow both branches and return the combined result
	Dfm dfm1 = null, dfm2 = null;
	int saveMark;
	if (start.next1 != null && start.next1.mark != mark) {
	saveMark = start.next1.mark;
	start.next1.mark = mark;
	dfm1 = parse(mark, start.next1, accept);
	start.next1.mark = saveMark;
	}
	if (start.next2 != null && start.next2.mark != mark) {
	saveMark = start.next2.mark;
	start.next2.mark = mark;
	dfm2 = parse(mark, start.next2, accept);
	start.next2.mark = saveMark;
	}

	if (dfm2 != null) {
	if (dfm1 != null)
	dfm1.merge(dfm2);
	else
	dfm1 = dfm2;
	}
	return dfm1;
	}

	private static class Conflict {
	public String name;
	public Dfm dfm1, dfm2;
	public Conflict(String name, Dfm dfm1, Dfm dfm2) {
	this.name = name;
	this.dfm1 = dfm1;
	this.dfm2 = dfm2;
	}
	public int hashCode() {
	return dfm1.hashCode() + dfm2.hashCode();
	}
	public boolean equals(Object o) {
	if (o == this)
	return true;
	if (!(o instanceof Conflict))
	return false;
	Conflict other = (Conflict) o;
	return (dfm1 == other.dfm1 && dfm2 == other.dfm2)
	\|\| (dfm1 == other.dfm2 && dfm2 == other.dfm1);
	}
	}

	}