ant/org.eclipse.ant.ui/Ant Editor/org/eclipse/ant/internal/ui/dtd/schema/NfmParser.java - platform/eclipse.platform - Git at Google

 /*******************************************************************************
  * Copyright (c) 2002, 2013 Object Factory Inc.
  *
  * This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License 2.0
  * which accompanies this distribution, and is available at
  * https://www.eclipse.org/legal/epl-2.0/
  *
  * SPDX-License-Identifier: EPL-2.0
  *
  * Contributors:
  *		Object Factory Inc. - Initial implementation
  *******************************************************************************/
 package org.eclipse.ant.internal.ui.dtd.schema;

 import com.ibm.icu.text.MessageFormat;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;

 import org.eclipse.ant.internal.ui.dtd.ParseError;
 import org.eclipse.ant.internal.ui.dtd.util.SortedMap;
 import org.eclipse.ant.internal.ui.dtd.util.SortedMapFactory;

 /**
  * NfmParser parses an NFA and returns an equivalent DFA if it can do so in linear time and space (in terms of the original NFA).
  * <p>
  *
  * As used here, NfmParser is called lazily when someone actually asks for a Dfm. This is for performance reasons. Why go to the work of calculating
  * all those Dfms if nobody ever uses them?
  *
  * Well-formed errors in content models have already been detected. The only error that can arise in NfmParser is an ambiguity error.
  *
  * Bruggemann-Klein showed that if an NFA is 1-unambiguous, an epsilon-free NFA constructed from it in linear time is actually a DFA. This is obvious
  * in NfmParser. The algorithm works by removing all ambiguous transitions as the graph is constructed, then proving that the reduced graph is
  * equivalent to the original in time linear in the number of ambiguous transitions.
  *
  * An effort is made to keep the DFA small, but there is no optimization step, as DFAs are small anyway, with some linear inflation around *. In a
  * pathological case, like the classical (a*,a*,a*,..., a*) the number of transitions in the DFA can be quadratic but this algorithm will not blow up
  * exponentially.
  *
  * @author Bob Foster
  */
 public class NfmParser {

 	public Dfm parse(Nfm nfm) throws ParseError {

 		// Parse nfm into dfm

 		Dfm dfm = parseStart(nfm.getStart(), nfm.getStop());

 		// Make list of dfms in graph

 		ArrayList<Dfm> dfms = new ArrayList<>();
 		collect(dfm, dfms);

 		// Detect accept conflicts

 		HashMap<Dfm, Dfm> duplicates = new HashMap<>();
 		detect(dfms, duplicates);

 		// Replace duplicate dfms in graph

 		replace(dfms, duplicates);

 		// Allow nfm memory to be re-used

 		Nfm.free(nfm);
 		NfmNode.freeAll();

 		return dfm;
 	}

 	private void reportError(String name) throws ParseError {
 		throw new ParseError(MessageFormat.format(AntDTDSchemaMessages.NfmParser_Ambiguous, new Object[] { name }));
 	}

 	public static void collect(Dfm dfm, List<Dfm> dfms) {
 		dfms.add(dfm);
 		collect1(dfm, dfms);
 	}

 	private static void collect1(Dfm dfm, List<Dfm> dfms) {
 		Object[] follows = dfm.getValues();
 		if (follows != null) {
 			for (int i = 0; i < follows.length; i++) {
 				Dfm follow = (Dfm) follows[i];
 				if (!dfms.contains(follow)) {
 					dfms.add(follow);
 					collect1(follow, dfms);
 				}
 			}
 		}
 	}

 	/**
 	 * Replace duplicate dfms found during conflict resolution.
 	 */
 	private void replace(ArrayList<Dfm> dfms, HashMap<Dfm, Dfm> removed) {
 		for (Dfm dfm : dfms) {
 			Object[] follows = dfm.getValues();
 			if (follows != null) {
 				for (int j = 0; j < follows.length; j++) {
 					Dfm replacement, follow = (Dfm) follows[j];
 					while ((replacement = removed.get(follow)) != null)
 						follow = replacement;
 					follows[j] = follow;
 				}
 			}
 		}
 		// release dfms so can be re-used
 		for (Dfm dfm : removed.keySet()) {
 			Dfm.free(dfm);
 		}
 	}

 	/**
 	 * Detect conflicts in each state. Two transitions are a potential conflict if they accept the same string value. They are an actual conflict if
 	 * their follow dfms are not identical and they are an actual ambiguity if the transition atoms of the follow dfms are not pairwise identical.
 	 * This is derived from the rule of Bruggemann-Klein, which determines that (a|a)b is not ambiguous, but both (a,b)|(a,c) and (a,b)|(a,b) are. The
 	 * latter might be surprising, but that's committee work for you. If two transitions are not ambiguous, one can be removed without affecting the
 	 * language accepted, and thus we have converted our epsilon-free NFA into a DFA. If any two transitions are ambiguous, we report an error and our
 	 * responsibility ends. Note that no transitions can be removed until all have been checked; this might disguise the ambiguity in, e.g.,
 	 * ((a|a),b,(a|a))*.
 	 */
 	private void detect(ArrayList<Dfm> dfms, HashMap<Dfm, Dfm> duplicates) throws ParseError {
 		for (Iterator<Dfm> iter = dfms.iterator(); iter.hasNext();) {
 			Dfm dfm = iter.next();

 			Object[] accepts = dfm.getKeys();
 			Object[] follows = dfm.getValues();
 			if (accepts != null) {
 				String last = null;
 				for (int i = 0, lasti = -1; i < accepts.length; i++) {
 					String accept = accepts[i].toString();
 					// accepts strings are interned allowing identity comparison

 					if (last != null && last == accept) {
 						if (follows[i] != follows[lasti])
 							checkConflict(new Conflict(accept, (Dfm) follows[lasti], (Dfm) follows[i]));
 					} else {
 						last = accept;
 						lasti = i;
 					}
 				}
 			}
 		}

 		// once more for removal

 		for (Iterator<Dfm> iter = dfms.iterator(); iter.hasNext();) {
 			Dfm dfm = iter.next();

 			// record conflicts
 			Object[] accepts = dfm.getKeys();
 			Object[] follows = dfm.getValues();
 			boolean remove = false;
 			if (accepts != null) {
 				boolean[] removes = new boolean[accepts.length];
 				String last = null;
 				for (int i = 0, lasti = -1; i < accepts.length; i++) {
 					String accept = accepts[i].toString();

 					if (last != null && last == accept) {
 						remove = true;
 						removes[i] = true;
 						if (follows[i] != follows[lasti]) {
 							Dfm dfmhi = (Dfm) follows[i];
 							Dfm dfmlo = (Dfm) follows[lasti];
 							if (dfmhi.id < dfmlo.id) {
 								Dfm tmp = dfmhi;
 								dfmhi = dfmlo;
 								dfmlo = tmp;
 							}
 							Dfm dup = duplicates.get(dfmhi);
 							if (dup == null || dfmlo.id < dup.id) {
 								duplicates.put(dfmhi, dfmlo);
 							} else {
 								duplicates.put(dfmlo, dup);
 							}
 						}
 					} else {
 						last = accept;
 						lasti = i;
 					}
 				}

 				if (remove) {
 					SortedMap map = dfm.getMap();
 					int i = 0;
 					for (Iterator<?> iterator = map.keyIterator(); iterator.hasNext(); i++) {
 						iterator.next();
 						if (removes[i])
 							iterator.remove();
 					}
 					SortedMapFactory.freeMap(map);
 				}
 			}
 		}
 	}

 	/**
 	 * Check conflict and report ambiguity.
 	 *
 	 * @param conflict
 	 *            Potential ambiguity
 	 */
 	private void checkConflict(Conflict conflict) throws ParseError {
 		if (conflict.dfm1.accepting != conflict.dfm2.accepting) {
 			reportError(conflict.name);
 		}
 		Object[] accept1 = conflict.dfm1.getKeys();
 		Object[] accept2 = conflict.dfm2.getKeys();
 		if ((accept1 == null) != (accept2 == null)) {
 			reportError(conflict.name);
 		}
 		if (accept1 != null) {
 			if (accept1.length != accept2.length) {
 				reportError(conflict.name);
 			}
 			for (int j = 0; j < accept2.length; j++) {
 				if (accept1[j] != accept2[j]) {
 					reportError(conflict.name);
 				}
 			}
 		}
 	}

 	/**
 	 * Recursive parse that visits every node reachable from the start symbol.
 	 */
 	private Dfm parseStart(NfmNode start, NfmNode accept) {
 		// mark the start node
 		Dfm result = Dfm.dfm(false);
 		start.dfm = result;

 		// we can minimize alias dfms by marking all starting transfer links
 		while (start.next1 != null && start.next2 == null && start.symbol == null) {
 			start = start.next1;
 			start.dfm = result;
 		}

 		Dfm parsed = parse(1, start, accept);
 		result.merge(parsed);

 		Dfm.free(parsed);

 		return result;
 	}

 	private void parseNext(int mark, Dfm result, NfmNode start, NfmNode accept) {
 		Dfm parsed = parse(mark + 1, start, accept);
 		result.merge(parsed);

 		Dfm.free(parsed);
 	}

 	/**
 	 * Recursive parse that visits every node reachable from the start symbol.
 	 */
 	private Dfm parse(int mark, NfmNode start, NfmNode accept) {

 		// eliminate useless recursion (note that accept node has no branches)
 		while (start.next1 != null && start.next2 == null && start.symbol == null)
 			start = start.next1;

 		// if we reached the accept node, return an empty dfm that accepts
 		if (start == accept)
 			return Dfm.dfm(true);

 		// for a symbol, construct a dfm that accepts the symbol
 		if (start.symbol != null) {
 			Dfm nextdfm = null;
 			NfmNode next = start.next1, snext = next;
 			while (snext.dfm == null && snext.next1 != null && snext.next2 == null && snext.symbol == null)
 				snext = snext.next1;
 			if (snext.dfm != null) {
 				for (NfmNode n = next; n != snext; n = n.next1)
 					n.dfm = snext.dfm;
 				nextdfm = snext.dfm;
 			} else {
 				nextdfm = Dfm.dfm(false);
 				snext.dfm = nextdfm;
 				for (NfmNode n = next; n != snext; n = n.next1)
 					n.dfm = nextdfm;
 				parseNext(mark, nextdfm, snext, accept);
 			}
 			Dfm dfm = Dfm.dfm(start.symbol, nextdfm);
 			return dfm;
 		}

 		// otherwise, follow both branches and return the combined result
 		Dfm dfm1 = null, dfm2 = null;
 		int saveMark;
 		if (start.next1 != null && start.next1.mark != mark) {
 			saveMark = start.next1.mark;
 			start.next1.mark = mark;
 			dfm1 = parse(mark, start.next1, accept);
 			start.next1.mark = saveMark;
 		}
 		if (start.next2 != null && start.next2.mark != mark) {
 			saveMark = start.next2.mark;
 			start.next2.mark = mark;
 			dfm2 = parse(mark, start.next2, accept);
 			start.next2.mark = saveMark;
 		}

 		if (dfm2 != null) {
 			if (dfm1 != null)
 				dfm1.merge(dfm2);
 			else
 				dfm1 = dfm2;
 		}
 		return dfm1;
 	}

 	private static class Conflict {
 		public String name;
 		public Dfm dfm1, dfm2;

 		public Conflict(String name, Dfm dfm1, Dfm dfm2) {
 			this.name = name;
 			this.dfm1 = dfm1;
 			this.dfm2 = dfm2;
 		}

 		@Override
 		public int hashCode() {
 			return dfm1.hashCode() + dfm2.hashCode();
 		}

 		@Override
 		public boolean equals(Object o) {
 			if (o == this)
 				return true;
 			if (!(o instanceof Conflict))
 				return false;
 			Conflict other = (Conflict) o;
 			return (dfm1 == other.dfm1 && dfm2 == other.dfm2) || (dfm1 == other.dfm2 && dfm2 == other.dfm1);
 		}
 	}

 }
	/*******************************************************************************
	* Copyright (c) 2002, 2013 Object Factory Inc.
	*
	* This program and the accompanying materials
	* are made available under the terms of the Eclipse Public License 2.0
	* which accompanies this distribution, and is available at
	* https://www.eclipse.org/legal/epl-2.0/
	*
	* SPDX-License-Identifier: EPL-2.0
	*
	* Contributors:
	* Object Factory Inc. - Initial implementation
	*******************************************************************************/
	package org.eclipse.ant.internal.ui.dtd.schema;

	import com.ibm.icu.text.MessageFormat;
	import java.util.ArrayList;
	import java.util.HashMap;
	import java.util.Iterator;
	import java.util.List;

	import org.eclipse.ant.internal.ui.dtd.ParseError;
	import org.eclipse.ant.internal.ui.dtd.util.SortedMap;
	import org.eclipse.ant.internal.ui.dtd.util.SortedMapFactory;

	/**
	* NfmParser parses an NFA and returns an equivalent DFA if it can do so in linear time and space (in terms of the original NFA).
	* <p>
	*
	* As used here, NfmParser is called lazily when someone actually asks for a Dfm. This is for performance reasons. Why go to the work of calculating
	* all those Dfms if nobody ever uses them?
	*
	* Well-formed errors in content models have already been detected. The only error that can arise in NfmParser is an ambiguity error.
	*
	* Bruggemann-Klein showed that if an NFA is 1-unambiguous, an epsilon-free NFA constructed from it in linear time is actually a DFA. This is obvious
	* in NfmParser. The algorithm works by removing all ambiguous transitions as the graph is constructed, then proving that the reduced graph is
	* equivalent to the original in time linear in the number of ambiguous transitions.
	*
	* An effort is made to keep the DFA small, but there is no optimization step, as DFAs are small anyway, with some linear inflation around *. In a
	* pathological case, like the classical (a,a,a,..., a) the number of transitions in the DFA can be quadratic but this algorithm will not blow up
	* exponentially.
	*
	* @author Bob Foster
	*/
	public class NfmParser {

	public Dfm parse(Nfm nfm) throws ParseError {

	// Parse nfm into dfm

	Dfm dfm = parseStart(nfm.getStart(), nfm.getStop());

	// Make list of dfms in graph

	ArrayList<Dfm> dfms = new ArrayList<>();
	collect(dfm, dfms);

	// Detect accept conflicts

	HashMap<Dfm, Dfm> duplicates = new HashMap<>();
	detect(dfms, duplicates);

	// Replace duplicate dfms in graph

	replace(dfms, duplicates);

	// Allow nfm memory to be re-used

	Nfm.free(nfm);
	NfmNode.freeAll();

	return dfm;
	}

	private void reportError(String name) throws ParseError {
	throw new ParseError(MessageFormat.format(AntDTDSchemaMessages.NfmParser_Ambiguous, new Object[] { name }));
	}

	public static void collect(Dfm dfm, List<Dfm> dfms) {
	dfms.add(dfm);
	collect1(dfm, dfms);
	}

	private static void collect1(Dfm dfm, List<Dfm> dfms) {
	Object[] follows = dfm.getValues();
	if (follows != null) {
	for (int i = 0; i < follows.length; i++) {
	Dfm follow = (Dfm) follows[i];
	if (!dfms.contains(follow)) {
	dfms.add(follow);
	collect1(follow, dfms);
	}
	}
	}
	}

	/**
	* Replace duplicate dfms found during conflict resolution.
	*/
	private void replace(ArrayList<Dfm> dfms, HashMap<Dfm, Dfm> removed) {
	for (Dfm dfm : dfms) {
	Object[] follows = dfm.getValues();
	if (follows != null) {
	for (int j = 0; j < follows.length; j++) {
	Dfm replacement, follow = (Dfm) follows[j];
	while ((replacement = removed.get(follow)) != null)
	follow = replacement;
	follows[j] = follow;
	}
	}
	}
	// release dfms so can be re-used
	for (Dfm dfm : removed.keySet()) {
	Dfm.free(dfm);
	}
	}

	/**
	* Detect conflicts in each state. Two transitions are a potential conflict if they accept the same string value. They are an actual conflict if
	* their follow dfms are not identical and they are an actual ambiguity if the transition atoms of the follow dfms are not pairwise identical.
	* This is derived from the rule of Bruggemann-Klein, which determines that (a\|a)b is not ambiguous, but both (a,b)\|(a,c) and (a,b)\|(a,b) are. The
	* latter might be surprising, but that's committee work for you. If two transitions are not ambiguous, one can be removed without affecting the
	* language accepted, and thus we have converted our epsilon-free NFA into a DFA. If any two transitions are ambiguous, we report an error and our
	* responsibility ends. Note that no transitions can be removed until all have been checked; this might disguise the ambiguity in, e.g.,
	* ((a\|a),b,(a\|a))*.
	*/
	private void detect(ArrayList<Dfm> dfms, HashMap<Dfm, Dfm> duplicates) throws ParseError {
	for (Iterator<Dfm> iter = dfms.iterator(); iter.hasNext();) {
	Dfm dfm = iter.next();

	Object[] accepts = dfm.getKeys();
	Object[] follows = dfm.getValues();
	if (accepts != null) {
	String last = null;
	for (int i = 0, lasti = -1; i < accepts.length; i++) {
	String accept = accepts[i].toString();
	// accepts strings are interned allowing identity comparison

	if (last != null && last == accept) {
	if (follows[i] != follows[lasti])
	checkConflict(new Conflict(accept, (Dfm) follows[lasti], (Dfm) follows[i]));
	} else {
	last = accept;
	lasti = i;
	}
	}
	}
	}

	// once more for removal

	for (Iterator<Dfm> iter = dfms.iterator(); iter.hasNext();) {
	Dfm dfm = iter.next();

	// record conflicts
	Object[] accepts = dfm.getKeys();
	Object[] follows = dfm.getValues();
	boolean remove = false;
	if (accepts != null) {
	boolean[] removes = new boolean[accepts.length];
	String last = null;
	for (int i = 0, lasti = -1; i < accepts.length; i++) {
	String accept = accepts[i].toString();

	if (last != null && last == accept) {
	remove = true;
	removes[i] = true;
	if (follows[i] != follows[lasti]) {
	Dfm dfmhi = (Dfm) follows[i];
	Dfm dfmlo = (Dfm) follows[lasti];
	if (dfmhi.id < dfmlo.id) {
	Dfm tmp = dfmhi;
	dfmhi = dfmlo;
	dfmlo = tmp;
	}
	Dfm dup = duplicates.get(dfmhi);
	if (dup == null \|\| dfmlo.id < dup.id) {
	duplicates.put(dfmhi, dfmlo);
	} else {
	duplicates.put(dfmlo, dup);
	}
	}
	} else {
	last = accept;
	lasti = i;
	}
	}

	if (remove) {
	SortedMap map = dfm.getMap();
	int i = 0;
	for (Iterator<?> iterator = map.keyIterator(); iterator.hasNext(); i++) {
	iterator.next();
	if (removes[i])
	iterator.remove();
	}
	SortedMapFactory.freeMap(map);
	}
	}
	}
	}

	/**
	* Check conflict and report ambiguity.
	*
	* @param conflict
	* Potential ambiguity
	*/
	private void checkConflict(Conflict conflict) throws ParseError {
	if (conflict.dfm1.accepting != conflict.dfm2.accepting) {
	reportError(conflict.name);
	}
	Object[] accept1 = conflict.dfm1.getKeys();
	Object[] accept2 = conflict.dfm2.getKeys();
	if ((accept1 == null) != (accept2 == null)) {
	reportError(conflict.name);
	}
	if (accept1 != null) {
	if (accept1.length != accept2.length) {
	reportError(conflict.name);
	}
	for (int j = 0; j < accept2.length; j++) {
	if (accept1[j] != accept2[j]) {
	reportError(conflict.name);
	}
	}
	}
	}

	/**
	* Recursive parse that visits every node reachable from the start symbol.
	*/
	private Dfm parseStart(NfmNode start, NfmNode accept) {
	// mark the start node
	Dfm result = Dfm.dfm(false);
	start.dfm = result;

	// we can minimize alias dfms by marking all starting transfer links
	while (start.next1 != null && start.next2 == null && start.symbol == null) {
	start = start.next1;
	start.dfm = result;
	}

	Dfm parsed = parse(1, start, accept);
	result.merge(parsed);

	Dfm.free(parsed);

	return result;
	}

	private void parseNext(int mark, Dfm result, NfmNode start, NfmNode accept) {
	Dfm parsed = parse(mark + 1, start, accept);
	result.merge(parsed);

	Dfm.free(parsed);
	}

	/**
	* Recursive parse that visits every node reachable from the start symbol.
	*/
	private Dfm parse(int mark, NfmNode start, NfmNode accept) {

	// eliminate useless recursion (note that accept node has no branches)
	while (start.next1 != null && start.next2 == null && start.symbol == null)
	start = start.next1;

	// if we reached the accept node, return an empty dfm that accepts
	if (start == accept)
	return Dfm.dfm(true);

	// for a symbol, construct a dfm that accepts the symbol
	if (start.symbol != null) {
	Dfm nextdfm = null;
	NfmNode next = start.next1, snext = next;
	while (snext.dfm == null && snext.next1 != null && snext.next2 == null && snext.symbol == null)
	snext = snext.next1;
	if (snext.dfm != null) {
	for (NfmNode n = next; n != snext; n = n.next1)
	n.dfm = snext.dfm;
	nextdfm = snext.dfm;
	} else {
	nextdfm = Dfm.dfm(false);
	snext.dfm = nextdfm;
	for (NfmNode n = next; n != snext; n = n.next1)
	n.dfm = nextdfm;
	parseNext(mark, nextdfm, snext, accept);
	}
	Dfm dfm = Dfm.dfm(start.symbol, nextdfm);
	return dfm;
	}

	// otherwise, follow both branches and return the combined result
	Dfm dfm1 = null, dfm2 = null;
	int saveMark;
	if (start.next1 != null && start.next1.mark != mark) {
	saveMark = start.next1.mark;
	start.next1.mark = mark;
	dfm1 = parse(mark, start.next1, accept);
	start.next1.mark = saveMark;
	}
	if (start.next2 != null && start.next2.mark != mark) {
	saveMark = start.next2.mark;
	start.next2.mark = mark;
	dfm2 = parse(mark, start.next2, accept);
	start.next2.mark = saveMark;
	}

	if (dfm2 != null) {
	if (dfm1 != null)
	dfm1.merge(dfm2);
	else
	dfm1 = dfm2;
	}
	return dfm1;
	}

	private static class Conflict {
	public String name;
	public Dfm dfm1, dfm2;

	public Conflict(String name, Dfm dfm1, Dfm dfm2) {
	this.name = name;
	this.dfm1 = dfm1;
	this.dfm2 = dfm2;
	}

	@Override
	public int hashCode() {
	return dfm1.hashCode() + dfm2.hashCode();
	}

	@Override
	public boolean equals(Object o) {
	if (o == this)
	return true;
	if (!(o instanceof Conflict))
	return false;
	Conflict other = (Conflict) o;
	return (dfm1 == other.dfm1 && dfm2 == other.dfm2) \|\| (dfm1 == other.dfm2 && dfm2 == other.dfm1);
	}
	}

	}