blob: 43d7bb1c68b55a728880592a692d6ec6120ac9a4 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2000, 2004 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Common Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/cpl-v10.html
*
* Contributors:
* IBM Corporation - initial API and implementation
*******************************************************************************/
package org.eclipse.compare.examples.xml;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.ListIterator;
import java.util.Vector;
import org.eclipse.core.runtime.IProgressMonitor;
/** This class is used to find a mapping between the nodes of two xml parse trees
* When an identifier for a specific node is known, it will be used.
* Otherwise a min-cost bipartite matching must be solved.
* This algorithm uses the algorithm described in the paper
* "X-Diff: A Fast Change Detection Algorithm for XML Documents"
*/
public class GeneralMatching extends AbstractMatching {
HungarianMethod fH;
boolean fUseOrdered;
String[] fOrdered;
boolean fIgnoreStartsWith;
public GeneralMatching() {
fOrdered= null;
fUseOrdered= false;
fIgnoreStartsWith= false;
}
public GeneralMatching(ArrayList ordered) {
if (ordered != null && !ordered.isEmpty()) {
fUseOrdered= true;
fOrdered= new String[ordered.size()];
int i= 0;
for (Iterator iter= ordered.iterator(); iter.hasNext(); i++) {
fOrdered[i]= (String) iter.next();
}
} else {
fUseOrdered= false;
fOrdered= null;
}
//fOrderedElements= XMLPlugin.getDefault().getOrderedElements();
}
//x and y have children xc_orig and yc_orig, respectively
protected int unorderedMatch(
XMLNode x,
XMLNode y,
Object[] xc_orig,
Object[] yc_orig) {
ArrayList DTMatching= new ArrayList(); //Mathing Entry in fDT_Matchings
int distance= 0; //distance
Vector xc_vect= new Vector();
Vector yc_vect= new Vector();
for (int i= 0; i < xc_orig.length; i++) {
if (((XMLNode) xc_orig[i]).usesIDMAP()) {
int j;
for (j= 0;
j < yc_orig.length
&& !((XMLNode) yc_orig[j]).getOrigId().equals(
((XMLNode) xc_orig[i]).getOrigId());
j++);
if (j < yc_orig.length) {
/* not calculating their distance and adding it to variable "distance" to save time,
* as matching of subtrees is not performed.
* but better result might be achieved if this were done.
*/
//int d= dist( (XMLNode)xc_orig[i], (XMLNode)yc_orig[j] );
//distance += d;
//fDT[indexOfLN((XMLNode)xc_orig[i])][indexOfRN((XMLNode)yc_orig[j])]= d;
DTMatching.add(
new Match((XMLNode) xc_orig[i], (XMLNode) yc_orig[j]));
}
} else
xc_vect.add(xc_orig[i]);
}
XMLNode[] xc= (XMLNode[]) xc_vect.toArray(new XMLNode[xc_vect.size()]);
for (int j= 0; j < yc_orig.length; j++) {
if (!((XMLNode) yc_orig[j]).usesIDMAP())
yc_vect.add(yc_orig[j]);
}
XMLNode[] yc= (XMLNode[]) yc_vect.toArray(new XMLNode[yc_vect.size()]);
if (xc.length == 0 || yc.length == 0) {
if (xc.length == 0) {
for (int j= 0; j < yc.length; j++) {
distance += countNodes(yc[j]);
}
} else { //yc.length == 0
for (int i= 0; i < xc.length; i++) {
distance += countNodes(xc[i]);
}
}
} else {
for (int i= 0; i < xc.length; i++) {
for (int j= 0; j < yc.length; j++) {
if (fDT[indexOfLN(xc[i])][indexOfRN(yc[j])] == NO_ENTRY)
dist(xc[i], yc[j]);
}
}
/* look for Wmin (p.11)
* xc and yc are the two partitions that have to be mapped.
* But, they may not have same number of nodes
* HungarianMethod.java solves weighted matching only on complete bipatite graphs
* We must add nodes and edges to make graph complete
*/
final int array_size=
(xc.length > yc.length) ? xc.length : yc.length;
final int array_rowsize= array_size + 1;
final int array_colsize= array_size + 2;
int[][] A= new int[array_rowsize][array_colsize];
for (int j= 0; j < array_colsize; j++) {
A[0][j]= 0;
}
/* now: A[0] = new int[] {0,0,0, ... ,0}. This first row is not used by HungarianMethod
* (Fortran77 counts Array index from 1)
*/
for (int i= 1; i < array_rowsize; i++) {
A[i][0]= 0;
for (int j= 1; j < array_colsize - 1; j++) {
A[i][j]= -1;
}
A[i][array_colsize - 1]= 0;
}
/* now A = 0, 0, 0, ... 0,0
* 0,-1,-1, ... -1,0
* 0,-1,-1, ... -1,0
* ...
* 0,-1,-1, ... -1,0
*/
for (int i_xc= 0; i_xc < xc.length; i_xc++) {
for (int i_yc= 0; i_yc < yc.length; i_yc++) {
A[i_xc + 1][i_yc + 1]=
fDT[indexOfLN(xc[i_xc])][indexOfRN(yc[i_yc])];
}
}
int dummyCost= 0;
/* cost of dummy nodes not associated with a node in Tree, but needed
* to have a complete bipartite graph
*/
//set dummyCost to larger than any cost in A
if (xc.length > yc.length) {
for (int i= 1; i < array_rowsize; i++) {
for (int j= 1; j <= yc.length; j++)
if (A[i][j] > dummyCost)
dummyCost= A[i][j];
}
} else if (xc.length < yc.length) {
for (int i= 1; i <= xc.length; i++) {
for (int j= 1; j < array_colsize - 1; j++)
if (A[i][j] > dummyCost)
dummyCost= A[i][j];
}
} else { //xc.length == yc.length
dummyCost= Integer.MAX_VALUE - 1;
}
dummyCost += 1;
if (xc.length > yc.length) {
for (int i= 1; i < array_rowsize; i++) {
for (int j= yc.length + 1; j < array_colsize - 1; j++) {
A[i][j]= dummyCost;
}
}
} else if (xc.length < yc.length) {
for (int j= 1; j < array_colsize - 1; j++) {
for (int i= xc.length + 1; i < array_rowsize; i++) {
A[i][j]= dummyCost;
}
}
}
//A is built. Now perform matching
int[] Matching= new int[array_rowsize];
int[][] A2= new int[array_rowsize][array_colsize];
for (int i= 0; i < array_rowsize; i++) {
for (int j= 0; j < array_colsize; j++)
A2[i][j]= A[i][j];
}
fH.solve(A2, Matching);
//now Matching contains the min-cost matching of A
for (int m= 1; m < Matching.length; m++) {
if (A[Matching[m]][m] == dummyCost) {
if (xc.length > yc.length) {
distance += countNodes(xc[Matching[m] - 1]);
//added here
DTMatching.add(new Match(xc[Matching[m] - 1], null));
} else if (xc.length < yc.length) {
distance += countNodes(yc[m - 1]);
//added here
DTMatching.add(new Match(null, yc[m - 1]));
}
} else {
int index_x= indexOfLN(xc[Matching[m] - 1]);
int index_y= indexOfRN(yc[m - 1]);
distance += fDT[index_x][index_y];
if ((xc[Matching[m] - 1])
.getSignature()
.equals((yc[m - 1]).getSignature()))
DTMatching.add(
new Match(xc[Matching[m] - 1], yc[m - 1]));
else {
DTMatching.add(new Match(xc[Matching[m] - 1], null));
DTMatching.add(new Match(null, yc[m - 1]));
}
}
}
}
fDT[indexOfLN(x)][indexOfRN(y)]= distance;
fDT_Matchings[indexOfLN(x)][indexOfRN(y)]= DTMatching;
return distance;
}
protected int orderedMath(XMLNode x, XMLNode y) {
//assumes x and y have children
boolean old_isw= fIgnoreStartsWith;
fIgnoreStartsWith= true;
//both x and y have children
Object[] xc= x.getChildren();
Object[] yc= y.getChildren();
ArrayList xc_elementsAL= new ArrayList();
ArrayList xc_attrsAL= new ArrayList();
ArrayList yc_elementsAL= new ArrayList();
ArrayList yc_attrsAL= new ArrayList();
//find attributes and elements and put them in xc_elementsAL and xc_attrsAL, respectively
for (int i= 0; i < xc.length; i++) {
XMLNode x_i= (XMLNode) xc[i];
if (x_i.getXMLType().equals(XMLStructureCreator.TYPE_ELEMENT)) {
xc_elementsAL.add(x_i);
} else if (
x_i.getXMLType().equals(XMLStructureCreator.TYPE_ATTRIBUTE)) {
xc_attrsAL.add(x_i);
}
}
//do the same for yc
for (int i= 0; i < yc.length; i++) {
XMLNode y_i= (XMLNode) yc[i];
if (y_i.getXMLType().equals(XMLStructureCreator.TYPE_ELEMENT)) {
yc_elementsAL.add(y_i);
} else if (
y_i.getXMLType().equals(XMLStructureCreator.TYPE_ATTRIBUTE)) {
yc_attrsAL.add(y_i);
}
}
Object[] xc_elements= xc_elementsAL.toArray();
Object[] yc_elements= yc_elementsAL.toArray();
Object[] xc_attrs= xc_attrsAL.toArray();
Object[] yc_attrs= yc_attrsAL.toArray();
ArrayList DTMatching= null;
//Mathing to be added to Entry in fDT_Matchings
int distance= 0; //distance to be added to entry in fDT
//perform unordered matching on attributes
//this updates fDT and fDT_Matchings
if (xc_attrs.length > 0 || yc_attrs.length > 0) {
if (xc_attrs.length == 0)
distance += yc_attrs.length;
else if (yc_attrs.length == 0)
distance += xc_attrs.length;
else {
unorderedMatch(x, y, xc_attrs, yc_attrs);
distance += fDT[indexOfLN(x)][indexOfRN(y)];
DTMatching= fDT_Matchings[indexOfLN(x)][indexOfRN(y)];
}
}
if (DTMatching == null)
DTMatching= new ArrayList();
//perform ordered matching on element children, i.e. number them in order of appearance
/* start new */
distance=
handleRangeDifferencer(
xc_elements,
yc_elements,
DTMatching,
distance);
/* end new */
/* start: Naive ordered compare /*
// int minlength= (xc_elements.length > yc_elements.length)?yc_elements.length:xc_elements.length;
// for (int i= 0; i < minlength; i++) {
// distance += dist((XMLNode) xc_elements[i], (XMLNode) yc_elements[i]);
// DTMatching.add(new Match( (XMLNode)xc_elements[i], (XMLNode)yc_elements[i]));
// }
// if (xc_elements.length > yc_elements.length) {
// for (int i= minlength; i < xc_elements.length; i++) {
// distance += countNodes((XMLNode) xc_elements[i]);
// }
// } else if (xc_elements.length < yc_elements.length) {
// for (int i= minlength; i < yc_elements.length; i++) {
// distance += countNodes((XMLNode) yc_elements[i]);
// }
// }
/* end: Naive ordered compare */
fIgnoreStartsWith= old_isw;
fDT[indexOfLN(x)][indexOfRN(y)]= distance;
fDT_Matchings[indexOfLN(x)][indexOfRN(y)]= DTMatching;
return distance;
}
/* matches two trees according to paper "X-Diff", p. 16 */
public void match(
XMLNode LeftTree,
XMLNode RightTree,
boolean rightTreeIsAncestor,
IProgressMonitor monitor)
throws InterruptedException {
//if (monitor != null) monitor.beginTask("",10);
fH= new HungarianMethod();
fNLeft= new Vector();
//numbering LeftTree: Mapping nodes in LeftTree to numbers to be used as array indexes
fNRight= new Vector();
//numbering RightTree: Mapping nodes in RightTree to numbers to be used as array indexes
numberNodes(LeftTree, fNLeft);
numberNodes(RightTree, fNRight);
fDT= new int[fNLeft.size()][fNRight.size()];
fDT_Matchings= new ArrayList[fNLeft.size()][fNRight.size()];
for (int i= 0; i < fDT.length; i++) {
fDT[i]= new int[fNRight.size()];
for (int j= 0; j < fDT[0].length; j++) {
fDT[i][j]= NO_ENTRY;
}
}
ArrayList NLeft= new ArrayList();
ArrayList NRight= new ArrayList();
findLeaves(LeftTree, NLeft);
findLeaves(RightTree, NRight);
/* Matching Algorithm */
/* Step 1: Compute editing distance for (LeftTree -> RightTree)*/
while (!NLeft.isEmpty() || !NRight.isEmpty()) {
for (ListIterator itNLeft= NLeft.listIterator();
itNLeft.hasNext();
) {
XMLNode x= (XMLNode) itNLeft.next();
for (ListIterator itNRight= NRight.listIterator();
itNRight.hasNext();
) {
XMLNode y= (XMLNode) itNRight.next();
if (x.getSignature().equals(y.getSignature())) {
// System.out.println("x: "+x.getName());
// System.out.println("y: "+y.getName());
if (monitor != null && monitor.isCanceled()) {
// throw new OperationCanceledException();
throw new InterruptedException();
// return;
}
//if signature starts with root>project
//do not calculate dist
//if signature is root>project
//do ordered search on children
//do unordered search on childrenb
dist(x, y);
}
}
}
ArrayList NLeft_new= new ArrayList();
ArrayList NRight_new= new ArrayList();
for (ListIterator itNLeft= NLeft.listIterator();
itNLeft.hasNext();
) {
XMLNode node= (XMLNode) itNLeft.next();
if (node.getParent() != null
&& !NLeft_new.contains(node.getParent()))
NLeft_new.add(node.getParent());
}
for (ListIterator itNRight= NRight.listIterator();
itNRight.hasNext();
) {
XMLNode node= (XMLNode) itNRight.next();
if (node.getParent() != null
&& !NRight_new.contains(node.getParent()))
NRight_new.add(node.getParent());
}
NLeft= NLeft_new;
NRight= NRight_new;
}
//end of Step1
/* Step 2: mark matchings on LeftTree and RightTree */
fMatches= new Vector();
if (!LeftTree.getSignature().equals(RightTree.getSignature())) {
//matching is empty
} else {
fMatches.add(new Match(LeftTree, RightTree));
for (int i_M= 0; i_M < fMatches.size(); i_M++) {
Match m= (Match) fMatches.elementAt(i_M);
if (!isLeaf(m.fx) && !isLeaf(m.fy)) {
// if (fDT_Matchings[ indexOfLN(m.fx) ][ indexOfRN(m.fy) ] == null)
// System.out.println("Error: ID not unique for " + m.fx.getId());
// else
// fMatches.addAll(fDT_Matchings[ indexOfLN(m.fx) ][ indexOfRN(m.fy) ]);
if (fDT_Matchings[indexOfLN(m.fx)][indexOfRN(m.fy)]
!= null)
fMatches.addAll(
fDT_Matchings[indexOfLN(m.fx)][indexOfRN(m.fy)]);
}
}
}
//end of Step2
/* Renumber Id of Nodes to follow Matches. Or for ancestor, copy over Id to ancestor */
if (rightTreeIsAncestor) {
for (ListIterator it_M= fMatches.listIterator(); it_M.hasNext();) {
Match m= (Match) it_M.next();
if (m.fx != null && m.fy != null)
m.fy.setId(m.fx.getId());
}
} else {
int newId= 0;
for (ListIterator it_M= fMatches.listIterator();
it_M.hasNext();
newId++) {
Match m= (Match) it_M.next();
if (m.fx != null)
m.fx.setId(Integer.toString(newId));
if (m.fy != null)
m.fy.setId(Integer.toString(newId));
// System.out.println("Matching: "+ ((m.fx != null)?m.fx.getOrigId():"null")+" -> "+((m.fx != null)?m.fx.getId():"null")+" , "+((m.fy != null)?m.fy.getOrigId():"null")+" -> "+((m.fy != null)?m.fy.getId():"null")); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
}
}
//if (monitor != null) monitor.done();
}
protected int handleXandYnotLeaves(XMLNode x, XMLNode y) {
int ret= NO_ENTRY;
Object[] xc_orig= x.getChildren();
Object[] yc_orig= y.getChildren();
/* handle ordered entries */
if (fUseOrdered) {
boolean starts_with_sig= false;
boolean equals_sig= false;
String x_sig= x.getSignature();
String y_sig= y.getSignature();
int i_ordered;
if (!fIgnoreStartsWith) {
/* Normal case when algorithm runs.
* Algorithm runs bottom up from leaves to root.
* check x_sig.startsWith(fOrdered[i_ordered]) || y_sig.startsWith(fOrdered[i_ordered])
* because if this is the case and
* !(x_sig.equals(fOrdered[j_ordered]+SIGN_ELEMENT) && y_sig.equals(fOrdered[j_ordered]+SIGN_ELEMENT))
* i.e. the nodes are not marked for an ordered compare but x and/or y has an ancestor that is,
* then nodes x and/or y will be handled by that ancestor in orderedMatch(), which is a top-down algorithm.
* Thus, we exit the procedure dist() if this is the case.
*/
for (i_ordered= 0; i_ordered < fOrdered.length; i_ordered++) {
if (x_sig.startsWith(fOrdered[i_ordered])
|| y_sig.startsWith(fOrdered[i_ordered])) {
starts_with_sig= true;
if (x_sig.equals(y_sig)) {
for (int j_ordered= i_ordered;
j_ordered < fOrdered.length;
j_ordered++) {
if (x_sig
.equals(
fOrdered[j_ordered] + SIGN_ELEMENT)) {
equals_sig= true;
break;
}
break;
}
}
}
}
if (starts_with_sig) {
if (equals_sig) {
return orderedMath(x, y);
} else {
return ret;
}
}
} else {
/* when inside orderedMatch(x, y), algorithm runs recursively from a node to the leaves of the
* subtree rooted at this node.
* In this case we do not check x_sig.startsWith(fOrdered[i_ordered]) || y_sig.startsWith(fOrdered[i_ordered])
*/
if (x_sig.equals(y_sig)) {
for (i_ordered= 0;
i_ordered < fOrdered.length;
i_ordered++) {
if (x_sig.equals(fOrdered[i_ordered] + SIGN_ELEMENT)) {
equals_sig= true;
break;
}
}
}
if (equals_sig) {
return orderedMath(x, y);
}
}
}
/* end of handle ordered entries */
return unorderedMatch(x, y, xc_orig, yc_orig);
}
}