blob: e4f422af176844b6d05ee94e1cf4f5d7811c206b [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2002, 2005 Object Factory Inc.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Object Factory Inc. - Initial implementation
*******************************************************************************/
package org.eclipse.ant.internal.ui.dtd.schema;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import org.eclipse.ant.internal.ui.dtd.ParseError;
import org.eclipse.ant.internal.ui.dtd.util.SortedMap;
import org.eclipse.ant.internal.ui.dtd.util.SortedMapFactory;
/**
* NfmParser parses an NFA and returns an equivalent DFA if it can do so in
* linear time and space (in terms of the original NFA).<p>
*
* As used here, NfmParser is called lazily when someone actually asks for a
* Dfm. This is for performance reasons. Why go to the work of calculating all
* those Dfms if nobody ever uses them?
*
* Well-formed errors in content models have already been detected. The only
* error that can arise in NfmParser is an ambiguity error.
*
* Bruggemann-Klein showed that if an NFA is 1-unambiguous, an epsilon-free NFA
* constructed from it in linear time is actually a DFA. This is obvious
* in NfmParser. The algorithm works by removing all ambiguous transitions as
* the graph is constructed, then proving that the reduced graph is equivalent
* to the original in time linear in the number of ambiguous transitions.
*
* An effort is made to keep the DFA small, but there is no optimization
* step, as DFAs are small anyway, with some linear inflation around *.
* In a pathological case, like the classical (a*,a*,a*,..., a*) the number
* of transitions in the DFA can be quadratic but this algorithm will not blow
* up exponentially.
*
* @author Bob Foster
*/
public class NfmParser {
public Dfm parse(Nfm nfm) throws ParseError {
// Parse nfm into dfm
Dfm dfm = parseStart(nfm.getStart(), nfm.getStop());
// Make list of dfms in graph
ArrayList dfms = new ArrayList();
collect(dfm, dfms);
// Detect accept conflicts
HashMap duplicates = new HashMap();
detect(dfms, duplicates);
// Replace duplicate dfms in graph
replace(dfms, duplicates);
// Allow nfm memory to be re-used
Nfm.free(nfm);
NfmNode.freeAll();
return dfm;
}
private void reportError(String name) throws ParseError {
throw new ParseError(MessageFormat.format(AntDTDSchemaMessages.NfmParser_Ambiguous, new String[]{name}));
}
public static void collect(Dfm dfm, List dfms) {
dfms.add(dfm);
collect1(dfm, dfms);
}
private static void collect1(Dfm dfm, List dfms) {
Object[] follows = dfm.getValues();
if (follows != null) {
for (int i = 0; i < follows.length; i++) {
Dfm follow = (Dfm) follows[i];
if (!dfms.contains(follow)) {
dfms.add(follow);
collect1(follow, dfms);
}
}
}
}
/**
* Replace duplicate dfms found during conflict resolution.
*/
private void replace(ArrayList dfms, HashMap removed) {
for (int i = 0; i < dfms.size(); i++) {
Dfm dfm = (Dfm) dfms.get(i);
Object[] follows = dfm.getValues();
if (follows != null) {
for (int j = 0; j < follows.length; j++) {
Dfm replacement, follow = (Dfm) follows[j];
while ((replacement = (Dfm) removed.get(follow)) != null)
follow = replacement;
follows[j] = follow;
}
}
}
// release dfms so can be re-used
Iterator rit = removed.keySet().iterator();
while (rit.hasNext())
Dfm.free((Dfm)rit.next());
}
/**
* Detect conflicts in each state. Two transitions are a potential conflict
* if they accept the same string value. They are an actual conflict if
* their follow dfms are not identical and they are an actual ambiguity if
* the transition atoms of the follow dfms are not pairwise identical.
* This is derived from the rule of Bruggemann-Klein, which determines
* that (a|a)b is not ambiguous, but both (a,b)|(a,c) and (a,b)|(a,b) are.
* The latter might be surprising, but that's committee work for you.
* If two transitions are not ambiguous, one can be removed without
* affecting the language accepted, and thus we have converted our
* epsilon-free NFA into a DFA. If any two transitions are ambiguous,
* we report an error and our responsibility ends. Note that no transitions
* can be removed until all have been checked; this might disguise the
* ambiguity in, e.g., ((a|a),b,(a|a))*.
*/
private void detect(ArrayList dfms, HashMap duplicates) throws ParseError {
for (Iterator iter = dfms.iterator(); iter.hasNext();) {
Dfm dfm = (Dfm) iter.next();
Object[] accepts = dfm.getKeys();
Object[] follows = dfm.getValues();
if (accepts != null) {
String last = null;
for (int i = 0, lasti = -1; i < accepts.length; i++) {
String accept = accepts[i].toString();
// accepts strings are interned allowing identity comparison
if (last != null && last == accept) {
if (follows[i] != follows[lasti])
checkConflict(new Conflict(accept, (Dfm)follows[lasti], (Dfm)follows[i]));
}
else {
last = accept;
lasti = i;
}
}
}
}
// once more for removal
for (Iterator iter = dfms.iterator(); iter.hasNext();) {
Dfm dfm = (Dfm) iter.next();
// record conflicts
Object[] accepts = dfm.getKeys();
Object[] follows = dfm.getValues();
boolean remove = false;
if (accepts != null) {
boolean[] removes = new boolean[accepts.length];
String last = null;
for (int i = 0, lasti = -1; i < accepts.length; i++) {
String accept = accepts[i].toString();
if (last != null && last == accept) {
remove = true;
removes[i] = true;
if (follows[i] != follows[lasti]) {
Dfm dfmhi = (Dfm) follows[i];
Dfm dfmlo = (Dfm) follows[lasti];
if (dfmhi.id < dfmlo.id) {
Dfm tmp = dfmhi;
dfmhi = dfmlo;
dfmlo = tmp;
}
Dfm dup = (Dfm) duplicates.get(dfmhi);
if (dup == null || dfmlo.id < dup.id) {
duplicates.put(dfmhi, dfmlo);
} else {
duplicates.put(dfmlo, dup);
}
}
}
else {
last = accept;
lasti = i;
}
}
if (remove) {
SortedMap map = dfm.getMap();
int i = 0;
for (Iterator iterator = map.keyIterator(); iterator.hasNext(); i++) {
iterator.next();
if (removes[i])
iterator.remove();
}
SortedMapFactory.freeMap(map);
}
}
}
}
/**
* Check conflict and report ambiguity.
* @param conflict Potential ambiguity
*/
private void checkConflict(Conflict conflict) throws ParseError {
if (conflict.dfm1.accepting != conflict.dfm2.accepting) {
reportError(conflict.name);
}
Object[] accept1 = conflict.dfm1.getKeys();
Object[] accept2 = conflict.dfm2.getKeys();
if ((accept1 == null) != (accept2 == null)) {
reportError(conflict.name);
}
if (accept1 != null) {
if (accept1.length != accept2.length) {
reportError(conflict.name);
}
for (int j = 0; j < accept2.length; j++) {
if (accept1[j] != accept2[j]) {
reportError(conflict.name);
}
}
}
}
/**
* Recursive parse that visits every node reachable from
* the start symbol.
*/
private Dfm parseStart(NfmNode start, NfmNode accept) {
// mark the start node
Dfm result = Dfm.dfm(false);
start.dfm = result;
// we can minimize alias dfms by marking all starting transfer links
while (start.next1 != null && start.next2 == null && start.symbol == null) {
start = start.next1;
start.dfm = result;
}
Dfm parsed = parse(1, start, accept);
result.merge(parsed);
Dfm.free(parsed);
return result;
}
private void parseNext(int mark, Dfm result, NfmNode start, NfmNode accept) {
Dfm parsed = parse(mark+1, start, accept);
result.merge(parsed);
Dfm.free(parsed);
}
/**
* Recursive parse that visits every node reachable from
* the start symbol.
*/
private Dfm parse(int mark, NfmNode start, NfmNode accept) {
// eliminate useless recursion (note that accept node has no branches)
while (start.next1 != null && start.next2 == null && start.symbol == null)
start = start.next1;
// if we reached the accept node, return an empty dfm that accepts
if (start == accept)
return Dfm.dfm(true);
// for a symbol, construct a dfm that accepts the symbol
if (start.symbol != null) {
Dfm nextdfm = null;
NfmNode next = start.next1, snext = next;
while (snext.dfm == null && snext.next1 != null && snext.next2 == null && snext.symbol == null)
snext = snext.next1;
if (snext.dfm != null) {
for (NfmNode n = next; n != snext; n = n.next1)
n.dfm = snext.dfm;
nextdfm = snext.dfm;
}
else {
nextdfm = Dfm.dfm(false);
snext.dfm = nextdfm;
for (NfmNode n = next; n != snext; n = n.next1)
n.dfm = nextdfm;
parseNext(mark, nextdfm, snext, accept);
}
Dfm dfm = Dfm.dfm(start.symbol, nextdfm);
return dfm;
}
// otherwise, follow both branches and return the combined result
Dfm dfm1 = null, dfm2 = null;
int saveMark;
if (start.next1 != null && start.next1.mark != mark) {
saveMark = start.next1.mark;
start.next1.mark = mark;
dfm1 = parse(mark, start.next1, accept);
start.next1.mark = saveMark;
}
if (start.next2 != null && start.next2.mark != mark) {
saveMark = start.next2.mark;
start.next2.mark = mark;
dfm2 = parse(mark, start.next2, accept);
start.next2.mark = saveMark;
}
if (dfm2 != null) {
if (dfm1 != null)
dfm1.merge(dfm2);
else
dfm1 = dfm2;
}
return dfm1;
}
private static class Conflict {
public String name;
public Dfm dfm1, dfm2;
public Conflict(String name, Dfm dfm1, Dfm dfm2) {
this.name = name;
this.dfm1 = dfm1;
this.dfm2 = dfm2;
}
public int hashCode() {
return dfm1.hashCode() + dfm2.hashCode();
}
public boolean equals(Object o) {
if (o == this)
return true;
if (!(o instanceof Conflict))
return false;
Conflict other = (Conflict) o;
return (dfm1 == other.dfm1 && dfm2 == other.dfm2)
|| (dfm1 == other.dfm2 && dfm2 == other.dfm1);
}
}
}