/******************************************************************************* | |
* Copyright (c) 2011-2019 The University of York, Aston University, and others | |
* | |
* This program and the accompanying materials are made available under the | |
* terms of the Eclipse Public License 2.0 which is available at | |
* http://www.eclipse.org/legal/epl-2.0. | |
* | |
* This Source Code may also be made available under the following Secondary | |
* Licenses when the conditions for such availability set forth in the Eclipse | |
* Public License, v. 2.0 are satisfied: GNU General Public License, version 3. | |
* | |
* SPDX-License-Identifier: EPL-2.0 OR GPL-3.0 | |
* | |
* Contributors: | |
* Konstantinos Barmpis - initial API and implementation | |
* Antonio Garcia-Dominguez - use Java 7 Path instead of File+string processing | |
* Horacio Hoyos Rodriguez - Add proper Git support (with code review from Antonio) | |
******************************************************************************/ | |
package org.hawk.git; | |
import java.io.File; | |
import java.io.FileOutputStream; | |
import java.io.IOException; | |
import java.net.URI; | |
import java.net.URISyntaxException; | |
import java.nio.file.Path; | |
import java.nio.file.Paths; | |
import java.time.Duration; | |
import java.time.Instant; | |
import java.util.ArrayList; | |
import java.util.Collection; | |
import java.util.Date; | |
import java.util.List; | |
import java.util.ListIterator; | |
import org.apache.http.NameValuePair; | |
import org.apache.http.client.utils.URLEncodedUtils; | |
import org.eclipse.jgit.api.Git; | |
import org.eclipse.jgit.diff.DiffEntry; | |
import org.eclipse.jgit.diff.DiffFormatter; | |
import org.eclipse.jgit.errors.AmbiguousObjectException; | |
import org.eclipse.jgit.errors.IncorrectObjectTypeException; | |
import org.eclipse.jgit.errors.RevisionSyntaxException; | |
import org.eclipse.jgit.lib.ObjectId; | |
import org.eclipse.jgit.lib.ObjectLoader; | |
import org.eclipse.jgit.lib.ObjectReader; | |
import org.eclipse.jgit.lib.PersonIdent; | |
import org.eclipse.jgit.lib.Repository; | |
import org.eclipse.jgit.revwalk.RevCommit; | |
import org.eclipse.jgit.revwalk.RevSort; | |
import org.eclipse.jgit.revwalk.RevWalk; | |
import org.eclipse.jgit.storage.file.FileRepositoryBuilder; | |
import org.eclipse.jgit.treewalk.TreeWalk; | |
import org.eclipse.jgit.util.io.NullOutputStream; | |
import org.hawk.core.ICredentialsStore; | |
import org.hawk.core.IModelIndexer; | |
import org.hawk.core.IVcsManager; | |
import org.hawk.core.VcsChangeType; | |
import org.hawk.core.VcsCommit; | |
import org.hawk.core.VcsCommitItem; | |
import org.hawk.core.VcsRepositoryDelta; | |
import org.slf4j.Logger; | |
import org.slf4j.LoggerFactory; | |
/** | |
* <p> | |
* An implementation of {@link IVcsManager} that supports Git repositories. | |
* </p> | |
* <p> | |
* This implementation relies on JGit to provide its functionality. The Git tree | |
* and history is used to provide version information and to generate file | |
* contents. | |
* </p> | |
*/ | |
public class JGitRepository implements IVcsManager { | |
/** | |
* Thrown if a revision cannot be found in the repository. | |
*/ | |
public class UnableToFindRevisionException extends Exception { | |
private static final long serialVersionUID = -7277359120689923918L; | |
public UnableToFindRevisionException(String message, Throwable cause) { | |
super(message, cause); | |
} | |
} | |
/** | |
* Thrown if a delta is requested for revisions that do not belong to the same ancestry. | |
*/ | |
public class UnmergedAncestryException extends Exception { | |
private static final long serialVersionUID = -1805916162822065083L; | |
public UnmergedAncestryException(String message) { | |
super(message); | |
} | |
} | |
/** | |
* Name of the query parameter that can be used to specify an alternate branch. | |
*/ | |
static final String BRANCH_QPARAM = "branch"; | |
private static final Logger LOG = LoggerFactory.getLogger(JGitRepository.class); | |
private String branch; | |
private Repository repository; | |
private String firstRevision; | |
private Path rootLocation; | |
private boolean active; | |
private boolean isFrozen; | |
@Override | |
public String getCurrentRevision() throws Exception { | |
try { | |
final ObjectId oid = repository.resolve(branch); | |
return ObjectId.toString(oid); | |
} catch (RevisionSyntaxException | AmbiguousObjectException | IncorrectObjectTypeException e) { | |
throw new IllegalStateException("Unexpected exception", e); | |
} catch (IOException e) { | |
throw new UnableToFindRevisionException("Unable to resolve the tip of branch " + branch, e); | |
} | |
} | |
@Override | |
public synchronized String getFirstRevision() throws Exception { | |
if (firstRevision == null) { | |
repository.scanForRepoChanges(); | |
try (RevWalk walk = new RevWalk(repository)) { | |
walk.markStart(walk.parseCommit(repository.resolve(branch))); | |
walk.sort(RevSort.COMMIT_TIME_DESC, true); | |
walk.sort(RevSort.REVERSE, true); | |
RevCommit commit = walk.next(); | |
firstRevision = ObjectId.toString(commit.getId()); | |
} catch (IOException e) { | |
throw new UnableToFindRevisionException("Unable to resolve the first revision", e); | |
} | |
} | |
return firstRevision; | |
} | |
@Override | |
public List<VcsCommitItem> getDelta(String startRevision) throws Exception { | |
if (startRevision != null && startRevision.startsWith("-")) { | |
startRevision = null; | |
} | |
return getDelta(startRevision, getCurrentRevision()).getCompactedCommitItems(); | |
} | |
@Override | |
public VcsRepositoryDelta getDelta(String startRevision, String endRevision) throws Exception { | |
if (startRevision == null) { | |
// nothing to do here! | |
} else if (startRevision.startsWith("-")) { | |
startRevision = null; | |
} else { | |
checkRangeAncestry(startRevision, endRevision); | |
} | |
VcsRepositoryDelta delta = new VcsRepositoryDelta(); | |
delta.setManager(this); | |
try (Git git = Git.open(rootLocation.toFile())) { | |
Iterable<RevCommit> revs; | |
if (startRevision == null) { | |
revs = git.log().add(ObjectId.fromString(endRevision)).call(); | |
} else { | |
revs = git.log().addRange( | |
ObjectId.fromString(startRevision), | |
ObjectId.fromString(endRevision) | |
).call(); | |
} | |
// Generate deltas between revisions | |
List<VcsCommit> commits = new ArrayList<>(); | |
RevCommit previous = null; | |
for (RevCommit rev : revs) { | |
if (previous == null && rev.getParentCount() > 0) { | |
previous = rev.getParent(0); | |
} | |
diff(rev, previous).stream() | |
.findFirst().ifPresent(c -> commits.add(c.getCommit())); | |
previous = rev; | |
} | |
/* | |
* 'git log' returns revisions from newest to oldest, but the Hawk core expects | |
* them from oldest to newest. | |
*/ | |
for (ListIterator<VcsCommit> itCommit = commits.listIterator(commits.size()); itCommit.hasPrevious(); ) { | |
final VcsCommit commit = itCommit.previous(); | |
commit.setDelta(delta); | |
delta.getCommits().add(commit); | |
} | |
} | |
return delta; | |
} | |
private void checkRangeAncestry(String startRevision, String endRevision) | |
throws UnmergedAncestryException, UnableToFindRevisionException { | |
try (RevWalk walk = new RevWalk(repository)) { | |
RevCommit startCommit = walk.parseCommit(repository.resolve(startRevision)); | |
RevCommit endCommit = walk.parseCommit(repository.resolve(endRevision)); | |
if (!walk.isMergedInto(startCommit, endCommit)) { | |
throw new UnmergedAncestryException( | |
String.format("Revision %s (end) is in a branch not merged " | |
+ "into the branch that contains revision %s (start).", endRevision, startRevision)); | |
} | |
} catch (IOException e) { | |
throw new UnableToFindRevisionException("Unable to resolve the delta of revision", e); | |
} | |
} | |
@Override | |
public File importFile(String revision, String path, File optionalTemp) { | |
if (path.startsWith("/")) { | |
// Strip out starting / from the paths produced by diff(), if present | |
path = path.substring(1); | |
} | |
// Do a TreeWalk over the tree of the commit pointed by the provided revision | |
try (RevWalk revWalk = new RevWalk(repository)) { | |
try (TreeWalk treeWalk = TreeWalk.forPath( | |
repository, | |
path, | |
revWalk.parseCommit(ObjectId.fromString(revision)).getTree())) { | |
if (treeWalk == null) { | |
LOG.warn("Could not find path '{}' on revision {} of the Git repository at '{}'", path, revision, rootLocation); | |
return null; | |
} | |
ObjectId blobId = treeWalk.getObjectId(0); | |
try (ObjectReader objectReader = repository.newObjectReader()) { | |
ObjectLoader objectLoader = objectReader.open(blobId); | |
byte[] bytes = objectLoader.getBytes(); | |
try (FileOutputStream fOS = new FileOutputStream(optionalTemp)) { | |
fOS.write(bytes); | |
} | |
catch (Exception e) { | |
LOG.error("There was an error writing the contents of the file in the repository into the provided file.", e); | |
} | |
} | |
} | |
catch (Exception e) { | |
LOG.error("There was an error traversing the Git tree to retrieve the file contents.", e); | |
} | |
} catch (Exception e) { | |
LOG.error("There was an error accessing the Git repository to retrieve the file contents.", e); | |
} | |
return optionalTemp; | |
} | |
@Override | |
public boolean isActive() { | |
return active; | |
} | |
/** | |
* Prepares this manager to be run. Always invoked before {@link #run()}. | |
* | |
* @param vcsloc Path or <code>file://</code> URL to the root folder of the | |
* repository. If using a <code>file://</code> URL, the branch to | |
* be indexed can be specified via <code>?branch=BRANCH</code>. | |
* @param indexer Hawk indexer that will manage this VCS. | |
*/ | |
@Override | |
public void init(String vcsloc, IModelIndexer indexer) throws Exception { | |
// Accept both regular paths and file:// URIs | |
Path path; | |
try { | |
final URI uri = new URI(vcsloc); | |
/* | |
* This is needed to remove the query part cleanly (Paths.get will complain otherwise). | |
*/ | |
path = Paths.get(uri.resolve(".")); | |
List<NameValuePair> pairs = URLEncodedUtils.parse(uri, "UTF-8"); | |
for (NameValuePair pair : pairs) { | |
if (BRANCH_QPARAM.equals(pair.getName())) { | |
branch = pair.getValue(); | |
} | |
} | |
} catch (URISyntaxException | IllegalArgumentException ex) { | |
path = Paths.get(vcsloc); | |
} | |
File canonicalFile; | |
try { | |
canonicalFile = path.toFile().getCanonicalFile(); | |
} catch (IOException e) { | |
throw new IllegalArgumentException(String.format("Unable to access '%s' as a local folder", vcsloc)); | |
} | |
if (!canonicalFile.isDirectory()) { | |
throw new IllegalArgumentException(String.format( | |
"The location pointed to by %s is not a folder", vcsloc)); | |
} | |
rootLocation = canonicalFile.toPath(); | |
// Assess if the vcsloc is actually a git repo | |
FileRepositoryBuilder builder = new FileRepositoryBuilder().addCeilingDirectory(canonicalFile.getParentFile()) | |
.findGitDir(canonicalFile); | |
if (builder.getGitDir() == null) { | |
throw new IllegalArgumentException( | |
String.format("The location pointed to by %s doesn't appear to be a valid Git " | |
+ "repository (.git folder not found or invalid).", vcsloc)); | |
} | |
repository = builder.setMustExist(true).build(); | |
if (branch == null) { | |
/* | |
* If the user has not specified a branch by using | |
* <code>file:///path/to/root?branch=BRANCH</code>, then we will use the | |
* currently checked out branch. | |
*/ | |
branch = repository.getFullBranch(); | |
} | |
} | |
@Override | |
public void run() { | |
active = true; | |
} | |
@Override | |
public void shutdown() { | |
rootLocation = null; | |
repository.close(); | |
active = false; | |
} | |
@Override | |
public String getLocation() { | |
return rootLocation.toString(); | |
} | |
/** | |
* Git repositories do <b>NOT</b> support authentication. Calling any of the authentication related | |
* methods will throw an {@link UnsupportedOperationException} | |
* | |
* @see #getUsername() | |
* @see #getPassword() | |
* @see #setCredentials(String, String, ICredentialsStore) | |
*/ | |
@Override | |
public boolean isAuthSupported() { | |
return false; | |
} | |
/** | |
* Git repositories do <b>NOT</b> support authentication. This method always throws {@link UnsupportedOperationException} | |
* @see #isAuthSupported() | |
*/ | |
@Override | |
public String getUsername() { | |
throw new UnsupportedOperationException("Git repository does not support authentication."); | |
} | |
/** | |
* Git repositories do <b>NOT</b> support authentication. This method always throws {@link UnsupportedOperationException} | |
* @see #isAuthSupported() | |
*/ | |
@Override | |
public String getPassword() { | |
throw new UnsupportedOperationException("Git repository does not support authentication."); | |
} | |
/** | |
* Git repositories do <b>NOT</b> support authentication. This method always throws {@link UnsupportedOperationException} | |
* @see #isAuthSupported() | |
*/ | |
@Override | |
public void setCredentials(String username, String password, ICredentialsStore credStore) { | |
throw new UnsupportedOperationException("Git repository does not support authentication."); | |
} | |
@Override | |
public String getHumanReadableName() { | |
return "Git Repository (JGit-based)"; | |
} | |
@Override | |
public boolean isPathLocationAccepted() { | |
return true; | |
} | |
@Override | |
public boolean isURLLocationAccepted() { | |
return false; | |
} | |
@Override | |
public String getRepositoryPath(String rawPath) { | |
return rootLocation.relativize(Paths.get(rawPath)).toString(); | |
} | |
@Override | |
public boolean isFrozen() { | |
return isFrozen; | |
} | |
@Override | |
public void setFrozen(boolean f) { | |
isFrozen = f; | |
} | |
/** | |
* Create the set of VcsCommitItems between the two commits. | |
* <p> | |
* The previous commit can be <b>null</b> in which case the changes will be reported against an | |
* empty tree. This is useful when getting the differences of the first commit. | |
* | |
* @param current the current commit | |
* @param previous the previous commit (can be null) | |
* @return a list of VcsCommitItems | |
* @throws IOException if there is an error accessing the git information | |
*/ | |
private Collection<VcsCommitItem> diff(RevCommit current, RevCommit previous) throws IOException { | |
Collection<VcsCommitItem> result = new ArrayList<VcsCommitItem>(); | |
try ( | |
DiffFormatter diffFmt = new DiffFormatter(NullOutputStream.INSTANCE)) { | |
diffFmt.setRepository(repository); | |
VcsCommit commit = asVcsCommit(current); | |
for (DiffEntry diff : diffFmt.scan( | |
previous == null ? null : previous.getTree(), | |
current.getTree())) { | |
VcsCommitItem item = new VcsCommitItem(); | |
switch (diff.getChangeType()) { | |
case ADD: | |
item.setChangeType(VcsChangeType.ADDED); | |
item.setPath("/" + diff.getNewPath()); | |
break; | |
case DELETE: | |
item.setChangeType(VcsChangeType.DELETED); | |
item.setPath("/" + diff.getOldPath()); | |
break; | |
case MODIFY: | |
item.setChangeType(VcsChangeType.UPDATED); | |
item.setPath("/" + diff.getNewPath()); | |
break; | |
case RENAME: | |
item.setChangeType(VcsChangeType.REPLACED); | |
item.setPath("/" + diff.getNewPath()); | |
break; | |
case COPY: | |
item.setChangeType(VcsChangeType.UNKNOWN); | |
default: | |
break; | |
} | |
item.setCommit(commit); | |
commit.getItems().add(item); | |
result.add(item); | |
} | |
return result; | |
} | |
} | |
private VcsCommit asVcsCommit(RevCommit current) { | |
final PersonIdent authorIdent = current.getAuthorIdent(); | |
/* | |
* Git's commit timestamps have a 1 second resolution, meaning that if we commit | |
* very quickly in sequence (within the same second), the temporal graph may | |
* actually lose versions. | |
* | |
* As a workaround, we can count how many commits before the current one have | |
* the same timestamp, and add that number of milliseconds to the VcsCommit | |
* timestamp. This means we'd only run afoul of this limitation if we managed to | |
* commit more than 1000 times in a second (which is normally very unlikely). | |
*/ | |
Date commitDate = authorIdent.getWhen(); | |
int nCommitsSameTimestamp = 0; | |
if (current.getParentCount() > 0) { | |
RevCommit parent = current.getParent(0); | |
while (parent != null && parent.getAuthorIdent().getWhen().equals(commitDate)) { | |
++nCommitsSameTimestamp; | |
parent = parent.getParentCount() > 0 ? parent.getParent(0) : null; | |
} | |
if (nCommitsSameTimestamp > 0) { | |
Instant newInstant = commitDate.toInstant().plus(Duration.ofMillis(nCommitsSameTimestamp)); | |
commitDate = Date.from(newInstant); | |
} | |
} | |
VcsCommit commit = new VcsCommit(); | |
commit.setAuthor(authorIdent.getName()); | |
commit.setJavaDate(commitDate); | |
commit.setMessage(current.getFullMessage()); | |
commit.setRevision(current.getName()); | |
return commit; | |
} | |
} |