blob: 8ecd6bfe5b3753aa9001d8c67b7e073ec185cec5 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2011-2019 The University of York, Aston University, and others
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* This Source Code may also be made available under the following Secondary
* Licenses when the conditions for such availability set forth in the Eclipse
* Public License, v. 2.0 are satisfied: GNU General Public License, version 3.
*
* SPDX-License-Identifier: EPL-2.0 OR GPL-3.0
*
* Contributors:
* Konstantinos Barmpis - initial API and implementation
* Antonio Garcia-Dominguez - use Java 7 Path instead of File+string processing
* Horacio Hoyos Rodriguez - Add proper Git support (with code review from Antonio)
******************************************************************************/
package org.hawk.git;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.List;
import java.util.ListIterator;
import org.apache.http.NameValuePair;
import org.apache.http.client.utils.URLEncodedUtils;
import org.eclipse.jgit.api.Git;
import org.eclipse.jgit.diff.DiffEntry;
import org.eclipse.jgit.diff.DiffFormatter;
import org.eclipse.jgit.errors.AmbiguousObjectException;
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
import org.eclipse.jgit.errors.RevisionSyntaxException;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectLoader;
import org.eclipse.jgit.lib.ObjectReader;
import org.eclipse.jgit.lib.PersonIdent;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevSort;
import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.storage.file.FileRepositoryBuilder;
import org.eclipse.jgit.treewalk.TreeWalk;
import org.eclipse.jgit.util.io.NullOutputStream;
import org.hawk.core.ICredentialsStore;
import org.hawk.core.IModelIndexer;
import org.hawk.core.IVcsManager;
import org.hawk.core.VcsChangeType;
import org.hawk.core.VcsCommit;
import org.hawk.core.VcsCommitItem;
import org.hawk.core.VcsRepositoryDelta;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* <p>
* An implementation of {@link IVcsManager} that supports Git repositories.
* </p>
* <p>
* This implementation relies on JGit to provide its functionality. The Git tree
* and history is used to provide version information and to generate file
* contents.
* </p>
*/
public class JGitRepository implements IVcsManager {
/**
* Thrown if a revision cannot be found in the repository.
*/
public class UnableToFindRevisionException extends Exception {
private static final long serialVersionUID = -7277359120689923918L;
public UnableToFindRevisionException(String message, Throwable cause) {
super(message, cause);
}
}
/**
* Thrown if a delta is requested for revisions that do not belong to the same ancestry.
*/
public class UnmergedAncestryException extends Exception {
private static final long serialVersionUID = -1805916162822065083L;
public UnmergedAncestryException(String message) {
super(message);
}
}
/**
* Name of the query parameter that can be used to specify an alternate branch.
*/
static final String BRANCH_QPARAM = "branch";
private static final Logger LOG = LoggerFactory.getLogger(JGitRepository.class);
private String branch;
private Repository repository;
private String firstRevision;
private Path rootLocation;
private boolean active;
private boolean isFrozen;
@Override
public String getCurrentRevision() throws Exception {
try {
final ObjectId oid = repository.resolve(branch);
return ObjectId.toString(oid);
} catch (RevisionSyntaxException | AmbiguousObjectException | IncorrectObjectTypeException e) {
throw new IllegalStateException("Unexpected exception", e);
} catch (IOException e) {
throw new UnableToFindRevisionException("Unable to resolve the tip of branch " + branch, e);
}
}
@Override
public synchronized String getFirstRevision() throws Exception {
if (firstRevision == null) {
repository.scanForRepoChanges();
try (RevWalk walk = new RevWalk(repository)) {
walk.markStart(walk.parseCommit(repository.resolve(branch)));
walk.sort(RevSort.COMMIT_TIME_DESC, true);
walk.sort(RevSort.REVERSE, true);
RevCommit commit = walk.next();
firstRevision = ObjectId.toString(commit.getId());
} catch (IOException e) {
throw new UnableToFindRevisionException("Unable to resolve the first revision", e);
}
}
return firstRevision;
}
@Override
public List<VcsCommitItem> getDelta(String startRevision) throws Exception {
if (startRevision != null && startRevision.startsWith("-")) {
startRevision = null;
}
return getDelta(startRevision, getCurrentRevision()).getCompactedCommitItems();
}
@Override
public VcsRepositoryDelta getDelta(String startRevision, String endRevision) throws Exception {
if (startRevision == null) {
// nothing to do here!
} else if (startRevision.startsWith("-")) {
startRevision = null;
} else {
checkRangeAncestry(startRevision, endRevision);
}
VcsRepositoryDelta delta = new VcsRepositoryDelta();
delta.setManager(this);
try (Git git = Git.open(rootLocation.toFile())) {
Iterable<RevCommit> revs;
if (startRevision == null) {
revs = git.log().add(ObjectId.fromString(endRevision)).call();
} else {
revs = git.log().addRange(
ObjectId.fromString(startRevision),
ObjectId.fromString(endRevision)
).call();
}
// Generate deltas between revisions
List<VcsCommit> commits = new ArrayList<>();
RevCommit previous = null;
for (RevCommit rev : revs) {
if (previous == null && rev.getParentCount() > 0) {
previous = rev.getParent(0);
}
diff(rev, previous).stream()
.findFirst().ifPresent(c -> commits.add(c.getCommit()));
previous = rev;
}
/*
* 'git log' returns revisions from newest to oldest, but the Hawk core expects
* them from oldest to newest.
*/
for (ListIterator<VcsCommit> itCommit = commits.listIterator(commits.size()); itCommit.hasPrevious(); ) {
final VcsCommit commit = itCommit.previous();
commit.setDelta(delta);
delta.getCommits().add(commit);
}
}
return delta;
}
private void checkRangeAncestry(String startRevision, String endRevision)
throws UnmergedAncestryException, UnableToFindRevisionException {
try (RevWalk walk = new RevWalk(repository)) {
RevCommit startCommit = walk.parseCommit(repository.resolve(startRevision));
RevCommit endCommit = walk.parseCommit(repository.resolve(endRevision));
if (!walk.isMergedInto(startCommit, endCommit)) {
throw new UnmergedAncestryException(
String.format("Revision %s (end) is in a branch not merged "
+ "into the branch that contains revision %s (start).", endRevision, startRevision));
}
} catch (IOException e) {
throw new UnableToFindRevisionException("Unable to resolve the delta of revision", e);
}
}
@Override
public File importFile(String revision, String path, File optionalTemp) {
if (path.startsWith("/")) {
// Strip out starting / from the paths produced by diff(), if present
path = path.substring(1);
}
// Do a TreeWalk over the tree of the commit pointed by the provided revision
try (RevWalk revWalk = new RevWalk(repository)) {
try (TreeWalk treeWalk = TreeWalk.forPath(
repository,
path,
revWalk.parseCommit(ObjectId.fromString(revision)).getTree())) {
if (treeWalk == null) {
LOG.warn("Could not find path '{}' on revision {} of the Git repository at '{}'", path, revision, rootLocation);
return null;
}
ObjectId blobId = treeWalk.getObjectId(0);
try (ObjectReader objectReader = repository.newObjectReader()) {
ObjectLoader objectLoader = objectReader.open(blobId);
byte[] bytes = objectLoader.getBytes();
try (FileOutputStream fOS = new FileOutputStream(optionalTemp)) {
fOS.write(bytes);
}
catch (Exception e) {
LOG.error("There was an error writing the contents of the file in the repository into the provided file.", e);
}
}
}
catch (Exception e) {
LOG.error("There was an error traversing the Git tree to retrieve the file contents.", e);
}
} catch (Exception e) {
LOG.error("There was an error accessing the Git repository to retrieve the file contents.", e);
}
return optionalTemp;
}
@Override
public boolean isActive() {
return active;
}
/**
* Prepares this manager to be run. Always invoked before {@link #run()}.
*
* @param vcsloc Path or <code>file://</code> URL to the root folder of the
* repository. If using a <code>file://</code> URL, the branch to
* be indexed can be specified via <code>?branch=BRANCH</code>.
* @param indexer Hawk indexer that will manage this VCS.
*/
@Override
public void init(String vcsloc, IModelIndexer indexer) throws Exception {
// Accept both regular paths and file:// URIs
Path path;
try {
final URI uri = new URI(vcsloc);
/*
* This is needed to remove the query part cleanly (Paths.get will complain otherwise).
*/
path = Paths.get(uri.resolve("."));
List<NameValuePair> pairs = URLEncodedUtils.parse(uri, "UTF-8");
for (NameValuePair pair : pairs) {
if (BRANCH_QPARAM.equals(pair.getName())) {
branch = pair.getValue();
}
}
} catch (URISyntaxException | IllegalArgumentException ex) {
path = Paths.get(vcsloc);
}
File canonicalFile;
try {
canonicalFile = path.toFile().getCanonicalFile();
} catch (IOException e) {
throw new IllegalArgumentException(String.format("Unable to access '%s' as a local folder", vcsloc));
}
if (!canonicalFile.isDirectory()) {
throw new IllegalArgumentException(String.format(
"The location pointed to by %s is not a folder", vcsloc));
}
rootLocation = canonicalFile.toPath();
// Assess if the vcsloc is actually a git repo
FileRepositoryBuilder builder = new FileRepositoryBuilder().addCeilingDirectory(canonicalFile.getParentFile())
.findGitDir(canonicalFile);
if (builder.getGitDir() == null) {
throw new IllegalArgumentException(
String.format("The location pointed to by %s doesn't appear to be a valid Git "
+ "repository (.git folder not found or invalid).", vcsloc));
}
repository = builder.setMustExist(true).build();
if (branch == null) {
/*
* If the user has not specified a branch by using
* <code>file:///path/to/root?branch=BRANCH</code>, then we will use the
* currently checked out branch.
*/
branch = repository.getFullBranch();
}
}
@Override
public void run() {
active = true;
}
@Override
public void shutdown() {
rootLocation = null;
repository.close();
active = false;
}
@Override
public String getLocation() {
return rootLocation.toString();
}
/**
* Git repositories do <b>NOT</b> support authentication. Calling any of the authentication related
* methods will throw an {@link UnsupportedOperationException}
*
* @see #getUsername()
* @see #getPassword()
* @see #setCredentials(String, String, ICredentialsStore)
*/
@Override
public boolean isAuthSupported() {
return false;
}
/**
* Git repositories do <b>NOT</b> support authentication. This method always throws {@link UnsupportedOperationException}
* @see #isAuthSupported()
*/
@Override
public String getUsername() {
throw new UnsupportedOperationException("Git repository does not support authentication.");
}
/**
* Git repositories do <b>NOT</b> support authentication. This method always throws {@link UnsupportedOperationException}
* @see #isAuthSupported()
*/
@Override
public String getPassword() {
throw new UnsupportedOperationException("Git repository does not support authentication.");
}
/**
* Git repositories do <b>NOT</b> support authentication. This method always throws {@link UnsupportedOperationException}
* @see #isAuthSupported()
*/
@Override
public void setCredentials(String username, String password, ICredentialsStore credStore) {
throw new UnsupportedOperationException("Git repository does not support authentication.");
}
@Override
public String getHumanReadableName() {
return "Git Repository (JGit-based)";
}
@Override
public boolean isPathLocationAccepted() {
return true;
}
@Override
public boolean isURLLocationAccepted() {
return false;
}
@Override
public String getRepositoryPath(String rawPath) {
return rootLocation.relativize(Paths.get(rawPath)).toString();
}
@Override
public boolean isFrozen() {
return isFrozen;
}
@Override
public void setFrozen(boolean f) {
isFrozen = f;
}
/**
* Create the set of VcsCommitItems between the two commits.
* <p>
* The previous commit can be <b>null</b> in which case the changes will be reported against an
* empty tree. This is useful when getting the differences of the first commit.
*
* @param current the current commit
* @param previous the previous commit (can be null)
* @return a list of VcsCommitItems
* @throws IOException if there is an error accessing the git information
*/
private Collection<VcsCommitItem> diff(RevCommit current, RevCommit previous) throws IOException {
Collection<VcsCommitItem> result = new ArrayList<VcsCommitItem>();
try (
DiffFormatter diffFmt = new DiffFormatter(NullOutputStream.INSTANCE)) {
diffFmt.setRepository(repository);
VcsCommit commit = asVcsCommit(current);
for (DiffEntry diff : diffFmt.scan(
previous == null ? null : previous.getTree(),
current.getTree())) {
VcsCommitItem item = new VcsCommitItem();
switch (diff.getChangeType()) {
case ADD:
item.setChangeType(VcsChangeType.ADDED);
item.setPath("/" + diff.getNewPath());
break;
case DELETE:
item.setChangeType(VcsChangeType.DELETED);
item.setPath("/" + diff.getOldPath());
break;
case MODIFY:
item.setChangeType(VcsChangeType.UPDATED);
item.setPath("/" + diff.getNewPath());
break;
case RENAME:
item.setChangeType(VcsChangeType.REPLACED);
item.setPath("/" + diff.getNewPath());
break;
case COPY:
item.setChangeType(VcsChangeType.UNKNOWN);
default:
break;
}
item.setCommit(commit);
commit.getItems().add(item);
result.add(item);
}
return result;
}
}
private VcsCommit asVcsCommit(RevCommit current) {
final PersonIdent authorIdent = current.getAuthorIdent();
/*
* Git's commit timestamps have a 1 second resolution, meaning that if we commit
* very quickly in sequence (within the same second), the temporal graph may
* actually lose versions.
*
* As a workaround, we can count how many commits before the current one have
* the same timestamp, and add that number of milliseconds to the VcsCommit
* timestamp. This means we'd only run afoul of this limitation if we managed to
* commit more than 1000 times in a second (which is normally very unlikely).
*/
Date commitDate = authorIdent.getWhen();
int nCommitsSameTimestamp = 0;
if (current.getParentCount() > 0) {
RevCommit parent = current.getParent(0);
while (parent != null && parent.getAuthorIdent().getWhen().equals(commitDate)) {
++nCommitsSameTimestamp;
parent = parent.getParentCount() > 0 ? parent.getParent(0) : null;
}
if (nCommitsSameTimestamp > 0) {
Instant newInstant = commitDate.toInstant().plus(Duration.ofMillis(nCommitsSameTimestamp));
commitDate = Date.from(newInstant);
}
}
VcsCommit commit = new VcsCommit();
commit.setAuthor(authorIdent.getName());
commit.setJavaDate(commitDate);
commit.setMessage(current.getFullMessage());
commit.setRevision(current.getName());
return commit;
}
}