| /******************************************************************************* |
| * Copyright (c) 2000, 2014 IBM Corporation and others. |
| * |
| * This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License 2.0 |
| * which accompanies this distribution, and is available at |
| * https://www.eclipse.org/legal/epl-2.0/ |
| * |
| * SPDX-License-Identifier: EPL-2.0 |
| * |
| * Contributors: |
| * IBM Corporation - initial API and implementation |
| *******************************************************************************/ |
| package org.eclipse.compare.internal.core; |
| |
| import org.eclipse.core.runtime.OperationCanceledException; |
| import org.eclipse.core.runtime.SubMonitor; |
| |
| |
| /* Used to determine the change set responsible for each line */ |
| public abstract class LCS { |
| public static final double TOO_LONG = 100000000.0; // 10^8, the value of N*M when |
| // to start binding the |
| // run time |
| |
| private static final double POW_LIMIT = 1.5; // limit the time to |
| // D^POW_LIMIT |
| |
| private int max_differences; // the maximum number of differences from |
| // each end to consider |
| |
| private int length; |
| |
| /** |
| * Myers' algorithm for longest common subsequence. O((M + N)D) worst case |
| * time, O(M + N + D^2) expected time, O(M + N) space |
| * (http://citeseer.ist.psu.edu/myers86ond.html) |
| * |
| * Note: Beyond implementing the algorithm as described in the paper I have |
| * added diagonal range compression which helps when finding the LCS of a |
| * very long and a very short sequence, also bound the running time to (N + |
| * M)^1.5 when both sequences are very long. |
| * |
| * After this method is called, the longest common subsequence is available |
| * by calling getResult() where result[0] is composed of |
| * entries from l1 and result[1] is composed of entries from l2 |
| * @param subMonitor |
| */ |
| public void longestCommonSubsequence(SubMonitor subMonitor) { |
| int length1 = getLength1(); |
| int length2 = getLength2(); |
| if (length1 == 0 || length2 == 0) { |
| this.length = 0; |
| return; |
| } |
| |
| this.max_differences = (length1 + length2 + 1) / 2; // ceil((N+M)/2) |
| if (!isCappingDisabled() && (double) length1 * (double) length2 > TOO_LONG) { |
| // limit complexity to D^POW_LIMIT for long sequences |
| this.max_differences = (int) Math.pow(this.max_differences, POW_LIMIT - 1.0); |
| } |
| |
| initializeLcs(length1); |
| |
| subMonitor.beginTask(null, length1); |
| |
| /* |
| * The common prefixes and suffixes are always part of some LCS, include |
| * them now to reduce our search space |
| */ |
| int forwardBound; |
| int max = Math.min(length1, length2); |
| for (forwardBound = 0; forwardBound < max |
| && isRangeEqual(forwardBound, forwardBound); forwardBound++) { |
| setLcs(forwardBound, forwardBound); |
| worked(subMonitor, 1); |
| } |
| |
| int backBoundL1 = length1 - 1; |
| int backBoundL2 = length2 - 1; |
| |
| while (backBoundL1 >= forwardBound && backBoundL2 >= forwardBound |
| && isRangeEqual(backBoundL1, backBoundL2)) { |
| setLcs(backBoundL1, backBoundL2); |
| backBoundL1--; |
| backBoundL2--; |
| worked(subMonitor, 1); |
| } |
| |
| this.length = forwardBound |
| + length1 |
| - backBoundL1 |
| - 1 |
| + lcs_rec(forwardBound, backBoundL1, forwardBound, |
| backBoundL2, new int[2][length1 + length2 + 1], |
| new int[3], subMonitor); |
| |
| } |
| |
| private boolean isCappingDisabled() { |
| return CompareSettings.getDefault().isCappingDisabled(); |
| } |
| |
| /** |
| * The recursive helper function for Myers' LCS. Computes the LCS of |
| * l1[bottoml1 .. topl1] and l2[bottoml2 .. topl2] fills in the appropriate |
| * location in lcs and returns the length |
| * |
| * @param l1 The 1st sequence |
| * @param bottoml1 Index in the 1st sequence to start from (inclusive) |
| * @param topl1 Index in the 1st sequence to end on (inclusive) |
| * @param l2 The 2nd sequence |
| * @param bottoml2 Index in the 2nd sequence to start from (inclusive) |
| * @param topl2 Index in the 2nd sequence to end on (inclusive) |
| * @param V should be allocated as int[2][l1.length + l2.length + 1], used |
| * to store furthest reaching D-paths |
| * @param snake should be allocated as int[3], used to store the beginning |
| * x, y coordinates and the length of the latest snake traversed |
| * @param subMonitor |
| * @param lcs should be allocated as TextLine[2][l1.length], used to store |
| * the common points found to be part of the LCS where lcs[0] |
| * references lines of l1 and lcs[1] references lines of l2. |
| * |
| * @return the length of the LCS |
| */ |
| private int lcs_rec( |
| int bottoml1, int topl1, |
| int bottoml2, int topl2, |
| int[][] V, int[] snake, SubMonitor subMonitor) { |
| |
| // check that both sequences are non-empty |
| if (bottoml1 > topl1 || bottoml2 > topl2) { |
| return 0; |
| } |
| |
| int d = find_middle_snake(bottoml1, topl1, bottoml2, topl2, V, snake, subMonitor); |
| // System.out.println(snake[0] + " " + snake[1] + " " + snake[2]); |
| |
| // need to store these so we don't lose them when they're overwritten by |
| // the recursion |
| int len = snake[2]; |
| int startx = snake[0]; |
| int starty = snake[1]; |
| |
| // the middle snake is part of the LCS, store it |
| for (int i = 0; i < len; i++) { |
| setLcs(startx + i, starty + i); |
| worked(subMonitor, 1); |
| } |
| |
| if (d > 1) { |
| return len |
| + lcs_rec(bottoml1, startx - 1, bottoml2, starty - 1, V, snake, subMonitor) |
| + lcs_rec(startx + len, topl1, starty + len, topl2, V, snake, subMonitor); |
| } else if (d == 1) { |
| /* |
| * In this case the sequences differ by exactly 1 line. We have |
| * already saved all the lines after the difference in the for loop |
| * above, now we need to save all the lines before the difference. |
| */ |
| int max = Math.min(startx - bottoml1, starty - bottoml2); |
| for (int i = 0; i < max; i++) { |
| setLcs(bottoml1 + i, bottoml2 + i); |
| worked(subMonitor, 1); |
| } |
| return max + len; |
| } |
| |
| return len; |
| } |
| |
| private void worked(SubMonitor subMonitor, int work) { |
| if (subMonitor.isCanceled()) |
| throw new OperationCanceledException(); |
| subMonitor.worked(work); |
| } |
| |
| /** |
| * Helper function for Myers' LCS algorithm to find the middle snake for |
| * l1[bottoml1..topl1] and l2[bottoml2..topl2] The x, y coordinates of the |
| * start of the middle snake are saved in snake[0], snake[1] respectively |
| * and the length of the snake is saved in s[2]. |
| * |
| * @param l1 The 1st sequence |
| * @param bottoml1 Index in the 1st sequence to start from (inclusive) |
| * @param topl1 Index in the 1st sequence to end on (inclusive) |
| * @param l2 The 2nd sequence |
| * @param bottoml2 Index in the 2nd sequence to start from (inclusive) |
| * @param topl2 Index in the 2nd sequence to end on (inclusive) |
| * @param V should be allocated as int[2][l1.length + l2.length + 1], used |
| * to store furthest reaching D-paths |
| * @param snake should be allocated as int[3], used to store the beginning |
| * x, y coordinates and the length of the middle snake |
| * @param subMonitor |
| * |
| * @return The number of differences (SES) between l1[bottoml1..topl1] and |
| * l2[bottoml2..topl2] |
| */ |
| private int find_middle_snake( |
| int bottoml1, int topl1, |
| int bottoml2, int topl2, |
| int[][] V, int[] snake, |
| SubMonitor subMonitor) { |
| int N = topl1 - bottoml1 + 1; |
| int M = topl2 - bottoml2 + 1; |
| // System.out.println("N: " + N + " M: " + M + " bottom: " + bottoml1 + |
| // ", " + |
| // bottoml2 + " top: " + topl1 + ", " + topl2); |
| int delta = N - M; |
| boolean isEven; |
| if ((delta & 1) == 1) { |
| isEven = false; |
| } else { |
| isEven = true; |
| } |
| |
| int limit = Math.min(this.max_differences, (N + M + 1) / 2); // ceil((N+M)/2) |
| |
| int value_to_add_forward; // a 0 or 1 that we add to the start offset |
| // to make it odd/even |
| if ((M & 1) == 1) { |
| value_to_add_forward = 1; |
| } else { |
| value_to_add_forward = 0; |
| } |
| |
| int value_to_add_backward; |
| if ((N & 1) == 1) { |
| value_to_add_backward = 1; |
| } else { |
| value_to_add_backward = 0; |
| } |
| |
| int start_forward = -M; |
| int end_forward = N; |
| int start_backward = -N; |
| int end_backward = M; |
| |
| V[0][limit + 1] = 0; |
| V[1][limit - 1] = N; |
| for (int d = 0; d <= limit; d++) { |
| |
| int start_diag = Math.max(value_to_add_forward + start_forward, -d); |
| int end_diag = Math.min(end_forward, d); |
| value_to_add_forward = 1 - value_to_add_forward; |
| |
| // compute forward furthest reaching paths |
| for (int k = start_diag; k <= end_diag; k += 2) { |
| int x; |
| if (k == -d |
| || (k < d && V[0][limit + k - 1] < V[0][limit + k + 1])) { |
| x = V[0][limit + k + 1]; |
| } else { |
| x = V[0][limit + k - 1] + 1; |
| } |
| |
| int y = x - k; |
| |
| snake[0] = x + bottoml1; |
| snake[1] = y + bottoml2; |
| snake[2] = 0; |
| // System.out.println("1 x: " + x + " y: " + y + " k: " + k + " |
| // d: " + d ); |
| while (x < N && y < M |
| && isRangeEqual(x + bottoml1, y + bottoml2)) { |
| x++; |
| y++; |
| snake[2]++; |
| } |
| V[0][limit + k] = x; |
| // System.out.println(x + " " + V[1][limit+k -delta] + " " + k + |
| // " " + delta); |
| if (!isEven && k >= delta - d + 1 && k <= delta + d - 1 |
| && x >= V[1][limit + k - delta]) { |
| // System.out.println("Returning: " + (2*d-1)); |
| return 2 * d - 1; |
| } |
| |
| // check to see if we can cut down the diagonal range |
| if (x >= N && end_forward > k - 1) { |
| end_forward = k - 1; |
| } else if (y >= M) { |
| start_forward = k + 1; |
| value_to_add_forward = 0; |
| } |
| } |
| |
| start_diag = Math.max(value_to_add_backward + start_backward, -d); |
| end_diag = Math.min(end_backward, d); |
| value_to_add_backward = 1 - value_to_add_backward; |
| |
| // compute backward furthest reaching paths |
| for (int k = start_diag; k <= end_diag; k += 2) { |
| int x; |
| if (k == d |
| || (k != -d && V[1][limit + k - 1] < V[1][limit + k + 1])) { |
| x = V[1][limit + k - 1]; |
| } else { |
| x = V[1][limit + k + 1] - 1; |
| } |
| |
| int y = x - k - delta; |
| |
| snake[2] = 0; |
| // System.out.println("2 x: " + x + " y: " + y + " k: " + k + " |
| // d: " + d); |
| while (x > 0 && y > 0 |
| && isRangeEqual(x - 1 + bottoml1, y - 1 + bottoml2)) { |
| x--; |
| y--; |
| snake[2]++; |
| } |
| V[1][limit + k] = x; |
| |
| if (isEven && k >= -delta - d && k <= d - delta |
| && x <= V[0][limit + k + delta]) { |
| // System.out.println("Returning: " + 2*d); |
| snake[0] = bottoml1 + x; |
| snake[1] = bottoml2 + y; |
| |
| return 2 * d; |
| } |
| |
| // check to see if we can cut down our diagonal range |
| if (x <= 0) { |
| start_backward = k + 1; |
| value_to_add_backward = 0; |
| } else if (y <= 0 && end_backward > k - 1) { |
| end_backward = k - 1; |
| } |
| } |
| worked(subMonitor, 1); |
| } |
| |
| /* |
| * computing the true LCS is too expensive, instead find the diagonal |
| * with the most progress and pretend a midle snake of length 0 occurs |
| * there. |
| */ |
| |
| int[] most_progress = findMostProgress(M, N, limit, V); |
| |
| snake[0] = bottoml1 + most_progress[0]; |
| snake[1] = bottoml2 + most_progress[1]; |
| snake[2] = 0; |
| return 5; /* |
| * HACK: since we didn't really finish the LCS computation |
| * we don't really know the length of the SES. We don't do |
| * anything with the result anyway, unless it's <=1. We know |
| * for a fact SES > 1 so 5 is as good a number as any to |
| * return here |
| */ |
| } |
| |
| /** |
| * Takes the array with furthest reaching D-paths from an LCS computation |
| * and returns the x,y coordinates and progress made in the middle diagonal |
| * among those with maximum progress, both from the front and from the back. |
| * |
| * @param M the length of the 1st sequence for which LCS is being computed |
| * @param N the length of the 2nd sequence for which LCS is being computed |
| * @param limit the number of steps made in an attempt to find the LCS from |
| * the front and back |
| * @param V the array storing the furthest reaching D-paths for the LCS |
| * computation |
| * @return The result as an array of 3 integers where result[0] is the x |
| * coordinate of the current location in the diagonal with the most |
| * progress, result[1] is the y coordinate of the current location |
| * in the diagonal with the most progress and result[2] is the |
| * amount of progress made in that diagonal |
| */ |
| private static int[] findMostProgress(int M, int N, int limit, int[][] V) { |
| int delta = N - M; |
| |
| int forward_start_diag; |
| if ((M & 1) == (limit & 1)) { |
| forward_start_diag = Math.max(-M, -limit); |
| } else { |
| forward_start_diag = Math.max(1 - M, -limit); |
| } |
| |
| int forward_end_diag = Math.min(N, limit); |
| |
| int backward_start_diag; |
| if ((N & 1) == (limit & 1)) { |
| backward_start_diag = Math.max(-N, -limit); |
| } else { |
| backward_start_diag = Math.max(1 - N, -limit); |
| } |
| |
| int backward_end_diag = Math.min(M, limit); |
| |
| int[][] max_progress = new int[Math.max(forward_end_diag |
| - forward_start_diag, backward_end_diag - backward_start_diag) / 2 + 1][3]; |
| int num_progress = 0; // the 1st entry is current, it is initialized |
| // with 0s |
| |
| // first search the forward diagonals |
| for (int k = forward_start_diag; k <= forward_end_diag; k += 2) { |
| int x = V[0][limit + k]; |
| int y = x - k; |
| if (x > N || y > M) { |
| continue; |
| } |
| |
| int progress = x + y; |
| if (progress > max_progress[0][2]) { |
| num_progress = 0; |
| max_progress[0][0] = x; |
| max_progress[0][1] = y; |
| max_progress[0][2] = progress; |
| } else if (progress == max_progress[0][2]) { |
| num_progress++; |
| max_progress[num_progress][0] = x; |
| max_progress[num_progress][1] = y; |
| max_progress[num_progress][2] = progress; |
| } |
| } |
| |
| boolean max_progress_forward = true; // initially the maximum |
| // progress is in the forward |
| // direction |
| |
| // now search the backward diagonals |
| for (int k = backward_start_diag; k <= backward_end_diag; k += 2) { |
| int x = V[1][limit + k]; |
| int y = x - k - delta; |
| if (x < 0 || y < 0) { |
| continue; |
| } |
| |
| int progress = N - x + M - y; |
| if (progress > max_progress[0][2]) { |
| num_progress = 0; |
| max_progress_forward = false; |
| max_progress[0][0] = x; |
| max_progress[0][1] = y; |
| max_progress[0][2] = progress; |
| } else if (progress == max_progress[0][2] && !max_progress_forward) { |
| num_progress++; |
| max_progress[num_progress][0] = x; |
| max_progress[num_progress][1] = y; |
| max_progress[num_progress][2] = progress; |
| } |
| } |
| |
| // return the middle diagonal with maximal progress. |
| return max_progress[num_progress / 2]; |
| } |
| |
| protected abstract int getLength2(); |
| |
| protected abstract int getLength1(); |
| |
| protected abstract boolean isRangeEqual(int i1, int i2); |
| |
| protected abstract void setLcs(int sl1, int sl2); |
| |
| protected abstract void initializeLcs(int lcsLength); |
| |
| public int getLength() { |
| return this.length; |
| } |
| } |