blob: 09dd6e91b7eff0443fdc7c91e2547fde47949ef7 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2012 Obeo.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Obeo - initial API and implementation
*******************************************************************************/
package org.eclipse.emf.compare.match.eobject.internal;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.SortedMap;
import org.eclipse.emf.compare.Comparison;
import org.eclipse.emf.compare.match.eobject.EObjectIndex;
import org.eclipse.emf.compare.match.eobject.ProximityEObjectMatcher;
import org.eclipse.emf.compare.match.eobject.ScopeQuery;
import org.eclipse.emf.ecore.EObject;
/**
* This class is responsible for holding several version of EObjects from sides left, right or origins and
* provides the ability to return the closest instance (from a difference side) of a given EObject. The
* implementation expects that the queried EObjects have all been indexed before a query is done. The
* implementation also expects that when you're done with an EObject and if you don't want to get it back in
* the result of subsequent queries, the EObject is removed. <br>
* The scalability of this class will highly depend on the given distance function, especially with calls
* having a max distance of 0. <br>
* It is also based on the assumption that it is <b> very likely that the EObject has another version with e
* distance of value 0</b> in the other versions sets.
*
* @author <a href="mailto:cedric.brun@obeo.fr">Cedric Brun</a>
*/
public class ProximityIndex implements EObjectIndex, MatchAheadOfTime {
/**
* The number of elements until which the index will provide elements to match ahead of time. This is done
* to avoid algorithm complexity explosion. This is clearly a tradeoff as matching ahead of time means
* some "better match" might not be found.
*/
private static final int SEARCH_WINDOW = 1000;
/**
* The distance function used to compare the Objects.
*/
private ProximityEObjectMatcher.DistanceFunction meter;
/**
* the left objects still present in the index.
*/
private Set<EObject> lefts;
/**
* the right objects still present in the index.
*/
private Set<EObject> rights;
/**
* the origin objects still present in the index.
*/
private Set<EObject> origins;
/**
* An object able to tell us whether an object is in the scope or not.
*/
private ScopeQuery scope;
/**
* An object to gather statistics while matching.
*/
private ProximityMatchStats stats = new ProximityMatchStats();
/**
* Create a new {@link ProximityIndex} using the given distance function.
*
* @param meter
* the distance function to use to compare the EObjects.
* @param matcher
* the object used to know if an instance is in the scope or not.
*/
public ProximityIndex(ProximityEObjectMatcher.DistanceFunction meter, ScopeQuery matcher) {
this.meter = meter;
this.lefts = Sets.newLinkedHashSet();
this.rights = Sets.newLinkedHashSet();
this.origins = Sets.newLinkedHashSet();
this.scope = matcher;
}
/**
* {@inheritDoc}
*/
public Map<Side, EObject> findClosests(Comparison inProgress, EObject eObj, Side passedObjectSide) {
if (!readyForThisTest(inProgress, eObj)) {
return null;
}
Map<Side, EObject> result = new HashMap<EObjectIndex.Side, EObject>(3);
result.put(passedObjectSide, eObj);
if (passedObjectSide == Side.LEFT) {
EObject closestRight = findTheClosest(inProgress, eObj, Side.LEFT, Side.RIGHT, true);
EObject closestOrigin = findTheClosest(inProgress, eObj, Side.LEFT, Side.ORIGIN, true);
result.put(Side.RIGHT, closestRight);
result.put(Side.ORIGIN, closestOrigin);
} else if (passedObjectSide == Side.RIGHT) {
EObject closestLeft = findTheClosest(inProgress, eObj, Side.RIGHT, Side.LEFT, true);
EObject closestOrigin = findTheClosest(inProgress, eObj, Side.RIGHT, Side.ORIGIN, true);
result.put(Side.LEFT, closestLeft);
result.put(Side.ORIGIN, closestOrigin);
} else if (passedObjectSide == Side.ORIGIN) {
EObject closestLeft = findTheClosest(inProgress, eObj, Side.ORIGIN, Side.LEFT, true);
EObject closestRight = findTheClosest(inProgress, eObj, Side.ORIGIN, Side.RIGHT, true);
result.put(Side.LEFT, closestLeft);
result.put(Side.RIGHT, closestRight);
}
return result;
}
/**
* return the closest EObject of the passed one found in the sideToFind storage.
*
* @param inProgress
* the comparison under match.
* @param eObj
* the base EObject.
* @param originalSide
* the side of the base EObject.
* @param sideToFind
* the side to search in.
* @param shouldDoubleCheck
* true if we should make sure that the found EObject has the inverse relationship with the
* base one.
* @return the closest EObject of the passed one found in the sideToFind storage.
*/
private EObject findTheClosest(Comparison inProgress, final EObject eObj, final Side originalSide,
final Side sideToFind, boolean shouldDoubleCheck) {
Set<EObject> storageToSearchFor = lefts;
switch (sideToFind) {
case RIGHT:
storageToSearchFor = rights;
break;
case LEFT:
storageToSearchFor = lefts;
break;
case ORIGIN:
storageToSearchFor = origins;
break;
default:
break;
}
/*
* We are starting by looking for EObject having a distance of 0. It means we'll iterate two times in
* the worst case but it is very likely that the EObject has another version with a distance of 0. It
* is also based on the assumption that calling distance() with a 0 max distance triggers shortcuts
* and is faster than calling the same distance() method with a max_distance > 0.
*/
Candidate best = findIdenticMatch(inProgress, eObj, storageToSearchFor);
if (best.some()) {
return best.eObject;
}
SortedMap<Double, EObject> candidates = Maps.newTreeMap();
/*
* We could not find an EObject which is identical, let's search again and find the closest EObject.
*/
Iterator<EObject> it = storageToSearchFor.iterator();
while (best.distance != 0 && it.hasNext()) {
EObject potentialClosest = it.next();
double dist = meter.distance(inProgress, eObj, potentialClosest);
stats.similarityCompare();
if (dist < best.distance) {
if (shouldDoubleCheck) {
// We need to double check the currentlyDigging has the same object as the closest !
candidates.put(Double.valueOf(dist), potentialClosest);
} else {
best.distance = dist;
best.eObject = potentialClosest;
}
}
}
if (shouldDoubleCheck) {
for (Entry<Double, EObject> entry : candidates.entrySet()) {
EObject doubleCheck = findTheClosest(inProgress, entry.getValue(), sideToFind, originalSide,
false);
stats.doubleCheck();
if (doubleCheck == eObj) {
stats.similaritySuccess();
best.eObject = entry.getValue();
best.distance = entry.getKey().doubleValue();
break;
} else {
stats.failedDoubleCheck();
}
}
}
if (!best.some()) {
stats.noMatch();
}
return best.eObject;
}
/**
* Look for a perfect match (identic content) in the given list of candidates.
*
* @param inProgress
* the comparison being matched.
* @param eObj
* the object
* @param candidates
* the list of possible matches.
* @return a candidate instance wrapping the found match Object (if found)
*/
private Candidate findIdenticMatch(Comparison inProgress, final EObject eObj, Set<EObject> candidates) {
Iterator<EObject> it = candidates.iterator();
Candidate best = new Candidate();
while (it.hasNext() && !best.some()) {
EObject fastCheck = it.next();
if (!readyForThisTest(inProgress, fastCheck)) {
stats.backtrack();
} else {
stats.identicCompare();
if (meter.areIdentic(inProgress, eObj, fastCheck)) {
stats.identicSuccess();
best.eObject = fastCheck;
best.distance = 0;
}
}
}
return best;
}
/**
* Check whether we have all the required information to search for this object matches.
*
* @param inProgress
* the Comparison which is being matched.
* @param fastCheck
* the Object we are trying to match.
* @return true if we have all the required information, false otherwise.
*/
private boolean readyForThisTest(Comparison inProgress, EObject fastCheck) {
EObject eContainer = fastCheck.eContainer();
if (eContainer != null && scope.isInScope(eContainer)) {
return inProgress.getMatch(eContainer) != null;
}
return true;
}
/**
* {@inheritDoc}
*/
public void remove(EObject obj, Side side) {
switch (side) {
case RIGHT:
rights.remove(obj);
break;
case LEFT:
lefts.remove(obj);
break;
case ORIGIN:
origins.remove(obj);
break;
default:
break;
}
}
/**
* {@inheritDoc}
*/
public void index(EObject eObject, Side side) {
switch (side) {
case RIGHT:
rights.add(eObject);
break;
case LEFT:
lefts.add(eObject);
break;
case ORIGIN:
origins.add(eObject);
break;
default:
break;
}
}
/**
* {@inheritDoc}
*/
public Collection<EObject> getValuesStillThere(final Side side) {
Collection<EObject> result = Collections.emptyList();
switch (side) {
case RIGHT:
result = ImmutableList.copyOf(rights);
break;
case LEFT:
result = ImmutableList.copyOf(lefts);
break;
case ORIGIN:
result = ImmutableList.copyOf(origins);
break;
default:
break;
}
return result;
}
/**
* A Wrapper class to keep an {@link EObject} aside with a distance.
*
* @author <a href="mailto:cedric.brun@obeo.fr">Cedric Brun</a>
*/
private static class Candidate {
/**
* an EObject.
*/
protected EObject eObject;
/**
* A distance.
*/
protected double distance = Double.MAX_VALUE;
/**
* return true of the candidate has an {@link EObject}.
*
* @return true of the candidate has an {@link EObject}.
*/
public boolean some() {
return eObject != null;
}
}
/**
* {@inheritDoc}
*/
public Iterable<EObject> getValuesToMatchAhead(Side left) {
Collection<EObject> indexedValues = getValuesStillThere(left);
if (indexedValues.size() > SEARCH_WINDOW) {
return indexedValues;
}
return Collections.emptyList();
}
}