001// License: GPL. For details, see LICENSE file. 002package org.openstreetmap.josm.tools; 003 004//// Taken from http://www.bmsi.com/java/#diff 005 006// http://www.bmsi.com/java/DiffPrint.java could also be useful 007 008/* 009 * $Log: Diff.java,v $ 010 * Revision 1.15 2013/04/01 16:27:31 stuart 011 * Fix DiffPrint unified format with test cases. 012 * Begin porting some diff-2.7 features. 013 * 014 * Revision 1.14 2010/03/03 21:21:25 stuart 015 * Test new direct equivalence API 016 * 017 * Revision 1.13 2009/12/07 17:43:17 stuart 018 * Compute equivMax for int[] ctor 019 * 020 * Revision 1.12 2009/12/07 17:34:46 stuart 021 * Ctor with int[]. 022 * 023 * Revision 1.11 2009/11/15 01:11:54 stuart 024 * Diff doesn't need to be generic 025 * 026 * Revision 1.10 2009/11/15 00:54:03 stuart 027 * Update to Java 5 containers 028 * 029 * Revision 1.7 2009/01/19 03:05:26 stuart 030 * Fix StackOverflow bug with heuristic on reported by Jimmy Han. 031 * 032 * Revision 1.6 2003/03/06 22:51:32 stuart 033 * Convert to CVS 034 * 035 * Revision 1.5 2002/07/19 19:14:40 stuart 036 * fix reverseScript, make change ctor public, update docs 037 * 038 * Revision 1.4 2002/04/09 17:53:39 stuart 039 * More flexible interface for diff() function. 040 * 041 * Revision 1.3 2000/03/03 21:58:03 stuart 042 * move discard_confusing_lines and shift_boundaries to class file_data 043 * 044 * Revision 1.2 2000/03/02 16:37:38 stuart 045 * Add GPL and copyright 046 * 047 */ 048 049import java.util.HashMap; 050import java.util.Map; 051 052/** A class to compare vectors of objects. The result of comparison 053 is a list of <code>change</code> objects which form an 054 edit script. The objects compared are traditionally lines 055 of text from two files. Comparison options such as "ignore 056 whitespace" are implemented by modifying the <code>equals</code> 057 and <code>hashcode</code> methods for the objects compared. 058<p> 059 The basic algorithm is described in: <br> 060 "An O(ND) Difference Algorithm and its Variations", Eugene Myers, 061 Algorithmica Vol. 1 No. 2, 1986, p 251. 062<p> 063 This class outputs different results from GNU diff 1.15 on some 064 inputs. Our results are actually better (smaller change list, smaller 065 total size of changes), but it would be nice to know why. Perhaps 066 there is a memory overwrite bug in GNU diff 1.15. 067 068 @author Stuart D. Gathman, translated from GNU diff 1.15 069 Copyright (C) 2000 Business Management Systems, Inc. 070<p> 071 This program is free software; you can redistribute it and/or modify 072 it under the terms of the GNU General Public License as published by 073 the Free Software Foundation; either version 1, or (at your option) 074 any later version. 075<p> 076 This program is distributed in the hope that it will be useful, 077 but WITHOUT ANY WARRANTY; without even the implied warranty of 078 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 079 GNU General Public License for more details. 080<p> 081 You should have received a copy of the <a href=COPYING.txt> 082 GNU General Public License</a> 083 along with this program; if not, write to the Free Software 084 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 085 */ 086public class Diff { 087 088 /** Prepare to find differences between two arrays. Each element of 089 the arrays is translated to an "equivalence number" based on 090 the result of <code>equals</code>. The original Object arrays 091 are no longer needed for computing the differences. They will 092 be needed again later to print the results of the comparison as 093 an edit script, if desired. 094 * @param a first array 095 * @param b second array 096 */ 097 public Diff(Object[] a, Object[] b) { 098 Map<Object, Integer> h = new HashMap<>(a.length + b.length); 099 filevec = new FileData[] {new FileData(a, h), new FileData(b, h)}; 100 } 101 102 /** 1 more than the maximum equivalence value used for this or its 103 sibling file. */ 104 private int equivMax = 1; 105 106 private int[] xvec, yvec; /* Vectors being compared. */ 107 private int[] fdiag; /* Vector, indexed by diagonal, containing 108 the X coordinate of the point furthest 109 along the given diagonal in the forward 110 search of the edit matrix. */ 111 private int[] bdiag; /* Vector, indexed by diagonal, containing 112 the X coordinate of the point furthest 113 along the given diagonal in the backward 114 search of the edit matrix. */ 115 private int fdiagoff, bdiagoff; 116 private final FileData[] filevec; 117 private int cost; 118 119 /** 120 * Find the midpoint of the shortest edit script for a specified 121 * portion of the two files. 122 * 123 * We scan from the beginnings of the files, and simultaneously from the ends, 124 * doing a breadth-first search through the space of edit-sequence. 125 * When the two searches meet, we have found the midpoint of the shortest 126 * edit sequence. 127 * 128 * The value returned is the number of the diagonal on which the midpoint lies. 129 * The diagonal number equals the number of inserted lines minus the number 130 * of deleted lines (counting only lines before the midpoint). 131 * The edit cost is stored into COST; this is the total number of 132 * lines inserted or deleted (counting only lines before the midpoint). 133 * 134 * This function assumes that the first lines of the specified portions 135 * of the two files do not match, and likewise that the last lines do not 136 * match. The caller must trim matching lines from the beginning and end 137 * of the portions it is going to specify. 138 * 139 * Note that if we return the "wrong" diagonal value, or if 140 * the value of bdiag at that diagonal is "wrong", 141 * the worst this can do is cause suboptimal diff output. 142 * It cannot cause incorrect diff output. 143 * @param xoff xoff 144 * @param xlim xlim 145 * @param yoff yoff 146 * @param ylim ylim 147 * @return midpoint of the shortest edit script 148 */ 149 private int diag(int xoff, int xlim, int yoff, int ylim) { 150 final int[] fd = fdiag; // Give the compiler a chance. 151 final int[] bd = bdiag; // Additional help for the compiler. 152 final int[] xv = xvec; // Still more help for the compiler. 153 final int[] yv = yvec; // And more and more . . . 154 final int dmin = xoff - ylim; // Minimum valid diagonal. 155 final int dmax = xlim - yoff; // Maximum valid diagonal. 156 final int fmid = xoff - yoff; // Center diagonal of top-down search. 157 final int bmid = xlim - ylim; // Center diagonal of bottom-up search. 158 int fmin = fmid, fmax = fmid; // Limits of top-down search. 159 int bmin = bmid, bmax = bmid; // Limits of bottom-up search. 160 // True if southeast corner is on an odd diagonal with respect to the northwest. 161 final boolean odd = (fmid - bmid & 1) != 0; 162 163 fd[fdiagoff + fmid] = xoff; 164 bd[bdiagoff + bmid] = xlim; 165 166 for (int c = 1;; ++c) { 167 int d; /* Active diagonal. */ 168 169 /* Extend the top-down search by an edit step in each diagonal. */ 170 if (fmin > dmin) { 171 fd[fdiagoff + --fmin - 1] = -1; 172 } else { 173 ++fmin; 174 } 175 if (fmax < dmax) { 176 fd[fdiagoff + ++fmax + 1] = -1; 177 } else { 178 --fmax; 179 } 180 for (d = fmax; d >= fmin; d -= 2) { 181 int x, y, tlo = fd[fdiagoff + d - 1], thi = fd[fdiagoff + d + 1]; 182 183 if (tlo >= thi) { 184 x = tlo + 1; 185 } else { 186 x = thi; 187 } 188 y = x - d; 189 while (x < xlim && y < ylim && xv[x] == yv[y]) { 190 ++x; ++y; 191 } 192 fd[fdiagoff + d] = x; 193 if (odd && bmin <= d && d <= bmax && bd[bdiagoff + d] <= fd[fdiagoff + d]) { 194 cost = 2 * c - 1; 195 return d; 196 } 197 } 198 199 /* Similar extend the bottom-up search. */ 200 if (bmin > dmin) { 201 bd[bdiagoff + --bmin - 1] = Integer.MAX_VALUE; 202 } else { 203 ++bmin; 204 } 205 if (bmax < dmax) { 206 bd[bdiagoff + ++bmax + 1] = Integer.MAX_VALUE; 207 } else { 208 --bmax; 209 } 210 for (d = bmax; d >= bmin; d -= 2) { 211 int x, y, tlo = bd[bdiagoff + d - 1], thi = bd[bdiagoff + d + 1]; 212 213 if (tlo < thi) { 214 x = tlo; 215 } else { 216 x = thi - 1; 217 } 218 y = x - d; 219 while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) { 220 --x; --y; 221 } 222 bd[bdiagoff + d] = x; 223 if (!odd && fmin <= d && d <= fmax && bd[bdiagoff + d] <= fd[fdiagoff + d]) { 224 cost = 2 * c; 225 return d; 226 } 227 } 228 } 229 } 230 231 /** 232 * Compare in detail contiguous subsequences of the two files 233 * which are known, as a whole, to match each other. 234 * 235 * The results are recorded in the vectors filevec[N].changed_flag, by 236 * storing a 1 in the element for each line that is an insertion or deletion. 237 * 238 * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1. 239 * 240 * Note that XLIM, YLIM are exclusive bounds. 241 * All line numbers are origin-0 and discarded lines are not counted. 242 * @param xoff xoff 243 * @param xlim xlim 244 * @param yoff yoff 245 * @param ylim ylim 246 */ 247 private void compareseq(int xoff, int xlim, int yoff, int ylim) { 248 /* Slide down the bottom initial diagonal. */ 249 while (xoff < xlim && yoff < ylim && xvec[xoff] == yvec[yoff]) { 250 ++xoff; ++yoff; 251 } 252 /* Slide up the top initial diagonal. */ 253 while (xlim > xoff && ylim > yoff && xvec[xlim - 1] == yvec[ylim - 1]) { 254 --xlim; --ylim; 255 } 256 257 /* Handle simple cases. */ 258 if (xoff == xlim) { 259 while (yoff < ylim) { 260 filevec[1].changedFlag[1+filevec[1].realindexes[yoff++]] = true; 261 } 262 } else if (yoff == ylim) { 263 while (xoff < xlim) { 264 filevec[0].changedFlag[1+filevec[0].realindexes[xoff++]] = true; 265 } 266 } else { 267 /* Find a point of correspondence in the middle of the files. */ 268 269 int d = diag(xoff, xlim, yoff, ylim); 270 int c = cost; 271 int b = bdiag[bdiagoff + d]; 272 273 if (c == 1) 274 /* This should be impossible, because it implies that 275 one of the two subsequences is empty, 276 and that case was handled above without calling `diag'. 277 Let's verify that this is true. */ 278 throw new IllegalArgumentException("Empty subsequence"); 279 else { 280 /* Use that point to split this problem into two subproblems. */ 281 compareseq(xoff, b, yoff, b - d); 282 /* This used to use f instead of b, 283 but that is incorrect! 284 It is not necessarily the case that diagonal d 285 has a snake from b to f. */ 286 compareseq(b, xlim, b - d, ylim); 287 } 288 } 289 } 290 291 /** Discard lines from one file that have no matches in the other file. 292 */ 293 private void discard_confusing_lines() { 294 filevec[0].discard_confusing_lines(filevec[1]); 295 filevec[1].discard_confusing_lines(filevec[0]); 296 } 297 298 /** 299 * Adjust inserts/deletes of blank lines to join changes as much as possible. 300 */ 301 private void shift_boundaries() { 302 filevec[0].shift_boundaries(filevec[1]); 303 filevec[1].shift_boundaries(filevec[0]); 304 } 305 306 /** 307 * Script builder. 308 */ 309 public interface ScriptBuilder { 310 /** 311 * Scan the tables of which lines are inserted and deleted, producing an edit script. 312 * @param changed0 true for lines in first file which do not match 2nd 313 * @param len0 number of lines in first file 314 * @param changed1 true for lines in 2nd file which do not match 1st 315 * @param len1 number of lines in 2nd file 316 * @return a linked list of changes - or null 317 */ 318 Change build_script( 319 boolean[] changed0, int len0, 320 boolean[] changed1, int len1 321 ); 322 } 323 324 /** Scan the tables of which lines are inserted and deleted, 325 producing an edit script in reverse order. */ 326 327 static class ReverseScript implements ScriptBuilder { 328 @Override 329 public Change build_script( 330 final boolean[] changed0, int len0, 331 final boolean[] changed1, int len1) { 332 Change script = null; 333 int i0 = 0, i1 = 0; 334 while (i0 < len0 || i1 < len1) { 335 if (changed0[1+i0] || changed1[1+i1]) { 336 int line0 = i0, line1 = i1; 337 338 /* Find # lines changed here in each file. */ 339 while (changed0[1+i0]) { 340 ++i0; 341 } 342 while (changed1[1+i1]) { 343 ++i1; 344 } 345 346 /* Record this change. */ 347 script = new Change(line0, line1, i0 - line0, i1 - line1, script); 348 } 349 350 /* We have reached lines in the two files that match each other. */ 351 i0++; i1++; 352 } 353 354 return script; 355 } 356 } 357 358 static class ForwardScript implements ScriptBuilder { 359 /** Scan the tables of which lines are inserted and deleted, 360 producing an edit script in forward order. */ 361 @Override 362 public Change build_script( 363 final boolean[] changed0, int len0, 364 final boolean[] changed1, int len1) { 365 Change script = null; 366 int i0 = len0, i1 = len1; 367 368 while (i0 >= 0 || i1 >= 0) { 369 if (changed0[i0] || changed1[i1]) { 370 int line0 = i0, line1 = i1; 371 372 /* Find # lines changed here in each file. */ 373 while (changed0[i0]) { 374 --i0; 375 } 376 while (changed1[i1]) { 377 --i1; 378 } 379 380 /* Record this change. */ 381 script = new Change(i0, i1, line0 - i0, line1 - i1, script); 382 } 383 384 /* We have reached lines in the two files that match each other. */ 385 i0--; i1--; 386 } 387 388 return script; 389 } 390 } 391 392 /** Standard Forward ScriptBuilder. */ 393 public static final ScriptBuilder forwardScript = new ForwardScript(); 394 /** Standard Reverse ScriptBuilder. */ 395 public static final ScriptBuilder reverseScript = new ReverseScript(); 396 397 /** 398 * Report the differences of two files. DEPTH is the current directory depth. 399 * @param reverse if {@code true} use {@link #reverseScript} else use {@link #forwardScript} 400 * @return the differences of two files 401 */ 402 public final Change diff_2(final boolean reverse) { 403 return diff(reverse ? reverseScript : forwardScript); 404 } 405 406 /** 407 * Get the results of comparison as an edit script. The script 408 * is described by a list of changes. The standard ScriptBuilder 409 * implementations provide for forward and reverse edit scripts. 410 * Alternate implementations could, for instance, list common elements 411 * instead of differences. 412 * @param bld an object to build the script from change flags 413 * @return the head of a list of changes 414 */ 415 public Change diff(final ScriptBuilder bld) { 416 417 // Some lines are obviously insertions or deletions because they don't match anything. 418 // Detect them now, and avoid even thinking about them in the main comparison algorithm. 419 discard_confusing_lines(); 420 421 // Now do the main comparison algorithm, considering just the undiscarded lines. 422 xvec = filevec[0].undiscarded; 423 yvec = filevec[1].undiscarded; 424 425 int diags = filevec[0].nondiscardedLines + filevec[1].nondiscardedLines + 3; 426 fdiag = new int[diags]; 427 fdiagoff = filevec[1].nondiscardedLines + 1; 428 bdiag = new int[diags]; 429 bdiagoff = filevec[1].nondiscardedLines + 1; 430 431 compareseq(0, filevec[0].nondiscardedLines, 432 0, filevec[1].nondiscardedLines); 433 fdiag = null; 434 bdiag = null; 435 436 // Modify the results slightly to make them prettier in cases where that can validly be done. 437 shift_boundaries(); 438 439 // Get the results of comparison in the form of a chain of `struct change's -- an edit script. 440 return bld.build_script( 441 filevec[0].changedFlag, 442 filevec[0].bufferedLines, 443 filevec[1].changedFlag, 444 filevec[1].bufferedLines 445 ); 446 } 447 448 /** The result of comparison is an "edit script": a chain of change objects. 449 Each change represents one place where some lines are deleted 450 and some are inserted. 451 452 LINE0 and LINE1 are the first affected lines in the two files (origin 0). 453 DELETED is the number of lines deleted here from file 0. 454 INSERTED is the number of lines inserted here in file 1. 455 456 If DELETED is 0 then LINE0 is the number of the line before 457 which the insertion was done; vice versa for INSERTED and LINE1. */ 458 459 public static class Change { 460 /** Previous or next edit command. */ 461 public Change link; 462 /** # lines of file 1 changed here. */ 463 public final int inserted; 464 /** # lines of file 0 changed here. */ 465 public final int deleted; 466 /** Line number of 1st deleted line. */ 467 public final int line0; 468 /** Line number of 1st inserted line. */ 469 public final int line1; 470 471 /** 472 * Cons an additional entry onto the front of an edit script OLD. 473 * LINE0 and LINE1 are the first affected lines in the two files (origin 0). 474 * DELETED is the number of lines deleted here from file 0. 475 * INSERTED is the number of lines inserted here in file 1. 476 * 477 * If DELETED is 0 then LINE0 is the number of the line before 478 * which the insertion was done; vice versa for INSERTED and LINE1. 479 * @param line0 first affected lines in the two files (origin 0) 480 * @param line1 first affected lines in the two files (origin 0) 481 * @param deleted the number of lines deleted here from file 0 482 * @param inserted the number of lines inserted here in file 1 483 * @param old edit script 484 */ 485 public Change(int line0, int line1, int deleted, int inserted, Change old) { 486 this.line0 = line0; 487 this.line1 = line1; 488 this.inserted = inserted; 489 this.deleted = deleted; 490 this.link = old; 491 } 492 493 /** 494 * Returns the number of insertions and deletions of this change as well as 495 * (recursively) the changes linked via {@link #link}. 496 * @return recursive number of insertions and deletions 497 */ 498 public int getTotalNumberOfChanges() { 499 return inserted + deleted + (link != null ? link.getTotalNumberOfChanges() : 0); 500 } 501 502 @Override 503 public String toString() { 504 String s = String.format("%d -%d +%d %d", line0, deleted, inserted, line1); 505 return (link != null) ? s + '\n' + link : s; 506 } 507 } 508 509 /** 510 * Data on one input file being compared. 511 */ 512 class FileData { 513 514 /** Allocate changed array for the results of comparison. */ 515 void clear() { 516 // Allocate a flag for each line of each file, saying whether that line is an insertion or deletion. 517 // Allocate an extra element, always zero, at each end of each vector. 518 changedFlag = new boolean[bufferedLines + 2]; 519 } 520 521 /** 522 * Return equiv_count[I] as the number of lines in this file that fall in equivalence class I. 523 * @return the array of equivalence class counts. 524 */ 525 int[] equivCount() { 526 int[] equivCount = new int[equivMax]; 527 for (int i = 0; i < bufferedLines; ++i) { 528 ++equivCount[equivs[i]]; 529 } 530 return equivCount; 531 } 532 533 /** 534 * Discard lines that have no matches in another file. 535 * 536 * A line which is discarded will not be considered by the actual comparison algorithm; 537 * it will be as if that line were not in the file. 538 * The file's `realindexes' table maps virtual line numbers 539 * (which don't count the discarded lines) into real line numbers; 540 * this is how the actual comparison algorithm produces results 541 * that are comprehensible when the discarded lines are counted. 542 * <p> 543 * When we discard a line, we also mark it as a deletion or insertion so that it will be printed in the output. 544 * @param f the other file 545 */ 546 void discard_confusing_lines(FileData f) { 547 clear(); 548 // Set up table of which lines are going to be discarded. 549 final byte[] discarded = discardable(f.equivCount()); 550 551 // Don't really discard the provisional lines except when they occur in a run of discardables, 552 // with nonprovisionals at the beginning and end. 553 filterDiscards(discarded); 554 555 // Actually discard the lines. 556 discard(discarded); 557 } 558 559 /** 560 * Mark to be discarded each line that matches no line of another file. 561 * If a line matches many lines, mark it as provisionally discardable. 562 * @param counts The count of each equivalence number for the other file. 563 * @return 0=nondiscardable, 1=discardable or 2=provisionally discardable for each line 564 * @see #equivCount() 565 */ 566 private byte[] discardable(final int[] counts) { 567 final int end = bufferedLines; 568 final byte[] discards = new byte[end]; 569 final int[] equivs = this.equivs; 570 int many = 5; 571 int tem = end / 64; 572 573 /* Multiply MANY by approximate square root of number of lines. 574 That is the threshold for provisionally discardable lines. */ 575 while ((tem = tem >> 2) > 0) { 576 many *= 2; 577 } 578 579 for (int i = 0; i < end; i++) { 580 int nmatch; 581 if (equivs[i] == 0) { 582 continue; 583 } 584 nmatch = counts[equivs[i]]; 585 if (nmatch == 0) { 586 discards[i] = 1; 587 } else if (nmatch > many) { 588 discards[i] = 2; 589 } 590 } 591 return discards; 592 } 593 594 /** 595 * Don't really discard the provisional lines except when they occur 596 * in a run of discardables, with nonprovisionals at the beginning and end. 597 * @param discards discards 598 */ 599 private void filterDiscards(final byte[] discards) { 600 final int end = bufferedLines; 601 602 for (int i = 0; i < end; i++) { 603 /* Cancel provisional discards not in middle of run of discards. */ 604 if (discards[i] == 2) { 605 discards[i] = 0; 606 } else if (discards[i] != 0) { 607 /* We have found a nonprovisional discard. */ 608 int j; 609 int length; 610 int provisional = 0; 611 612 /* Find end of this run of discardable lines. 613 Count how many are provisionally discardable. */ 614 for (j = i; j < end; j++) { 615 if (discards[j] == 0) { 616 break; 617 } 618 if (discards[j] == 2) { 619 ++provisional; 620 } 621 } 622 623 /* Cancel provisional discards at end, and shrink the run. */ 624 while (j > i && discards[j - 1] == 2) { 625 discards[--j] = 0; --provisional; 626 } 627 628 /* Now we have the length of a run of discardable lines 629 whose first and last are not provisional. */ 630 length = j - i; 631 632 /* If 1/4 of the lines in the run are provisional, 633 cancel discarding of all provisional lines in the run. */ 634 if (provisional * 4 > length) { 635 while (j > i) 636 if (discards[--j] == 2) { 637 discards[j] = 0; 638 } 639 } else { 640 int consec; 641 int minimum = 1; 642 int tem = length / 4; 643 644 /* MINIMUM is approximate square root of LENGTH/4. 645 A subrun of two or more provisionals can stand 646 when LENGTH is at least 16. 647 A subrun of 4 or more can stand when LENGTH >= 64. */ 648 while ((tem = tem >> 2) > 0) { 649 minimum *= 2; 650 } 651 minimum++; 652 653 /* Cancel any subrun of MINIMUM or more provisionals 654 within the larger run. */ 655 for (j = 0, consec = 0; j < length; j++) { 656 if (discards[i + j] != 2) { 657 consec = 0; 658 } else if (minimum == ++consec) { 659 /* Back up to start of subrun, to cancel it all. */ 660 j -= consec; 661 } else if (minimum < consec) { 662 discards[i + j] = 0; 663 } 664 } 665 666 /* Scan from beginning of run 667 until we find 3 or more nonprovisionals in a row 668 or until the first nonprovisional at least 8 lines in. 669 Until that point, cancel any provisionals. */ 670 for (j = 0, consec = 0; j < length; j++) { 671 if (j >= 8 && discards[i + j] == 1) { 672 break; 673 } 674 if (discards[i + j] == 2) { 675 consec = 0; discards[i + j] = 0; 676 } else if (discards[i + j] == 0) { 677 consec = 0; 678 } else { 679 consec++; 680 } 681 if (consec == 3) { 682 break; 683 } 684 } 685 686 /* I advances to the last line of the run. */ 687 i += length - 1; 688 689 /* Same thing, from end. */ 690 for (j = 0, consec = 0; j < length; j++) { 691 if (j >= 8 && discards[i - j] == 1) { 692 break; 693 } 694 if (discards[i - j] == 2) { 695 consec = 0; discards[i - j] = 0; 696 } else if (discards[i - j] == 0) { 697 consec = 0; 698 } else { 699 consec++; 700 } 701 if (consec == 3) { 702 break; 703 } 704 } 705 } 706 } 707 } 708 } 709 710 /** Actually discard the lines. 711 @param discards flags lines to be discarded 712 */ 713 private void discard(final byte[] discards) { 714 final int end = bufferedLines; 715 int j = 0; 716 for (int i = 0; i < end; ++i) { 717 if (discards[i] == 0) { 718 undiscarded[j] = equivs[i]; 719 realindexes[j++] = i; 720 } else { 721 changedFlag[1+i] = true; 722 } 723 } 724 nondiscardedLines = j; 725 } 726 727 FileData(int length) { 728 bufferedLines = length; 729 equivs = new int[length]; 730 undiscarded = new int[bufferedLines]; 731 realindexes = new int[bufferedLines]; 732 } 733 734 FileData(Object[] data, Map<Object, Integer> h) { 735 this(data.length); 736 // FIXME: diff 2.7 removes common prefix and common suffix 737 for (int i = 0; i < data.length; ++i) { 738 Integer ir = h.get(data[i]); 739 if (ir == null) { 740 equivs[i] = equivMax++; 741 h.put(data[i], equivs[i]); 742 } else { 743 equivs[i] = ir.intValue(); 744 } 745 } 746 } 747 748 /** Adjust inserts/deletes of blank lines to join changes 749 as much as possible. 750 751 We do something when a run of changed lines include a blank 752 line at one end and have an excluded blank line at the other. 753 We are free to choose which blank line is included. 754 `compareseq' always chooses the one at the beginning, 755 but usually it is cleaner to consider the following blank line 756 to be the "change". The only exception is if the preceding blank line 757 would join this change to other changes. 758 @param f the file being compared against 759 */ 760 void shift_boundaries(FileData f) { 761 final boolean[] changed = changedFlag; 762 final boolean[] otherChanged = f.changedFlag; 763 int i = 0; 764 int j = 0; 765 int iEnd = bufferedLines; 766 int preceding = -1; 767 int otherPreceding = -1; 768 769 for (;;) { 770 int start, end, otherStart; 771 772 /* Scan forwards to find beginning of another run of changes. 773 Also keep track of the corresponding point in the other file. */ 774 775 while (i < iEnd && !changed[1+i]) { 776 while (otherChanged[1+j++]) { 777 /* Non-corresponding lines in the other file 778 will count as the preceding batch of changes. */ 779 otherPreceding = j; 780 } 781 i++; 782 } 783 784 if (i == iEnd) { 785 break; 786 } 787 788 start = i; 789 otherStart = j; 790 791 for (;;) { 792 /* Now find the end of this run of changes. */ 793 794 while (i < iEnd && changed[1+i]) { 795 i++; 796 } 797 end = i; 798 799 /* If the first changed line matches the following unchanged one, 800 and this run does not follow right after a previous run, 801 and there are no lines deleted from the other file here, 802 then classify the first changed line as unchanged 803 and the following line as changed in its place. */ 804 805 /* You might ask, how could this run follow right after another? 806 Only because the previous run was shifted here. */ 807 808 if (end != iEnd && equivs[start] == equivs[end] && !otherChanged[1+j] 809 && !((preceding >= 0 && start == preceding) || (otherPreceding >= 0 && otherStart == otherPreceding))) { 810 changed[1+end++] = true; 811 changed[1+start++] = false; 812 ++i; 813 /* Since one line-that-matches is now before this run 814 instead of after, we must advance in the other file 815 to keep in synch. */ 816 ++j; 817 } else { 818 break; 819 } 820 } 821 822 preceding = i; 823 otherPreceding = j; 824 } 825 } 826 827 /** Number of elements (lines) in this file. */ 828 private final int bufferedLines; 829 830 /** Vector, indexed by line number, containing an equivalence code for each line. 831 * It is this vector that is actually compared with that of another file to generate differences. */ 832 private final int[] equivs; 833 834 /** Vector, like the previous one except that the elements for discarded lines have been squeezed out. */ 835 private final int[] undiscarded; 836 837 /** Vector mapping virtual line numbers (not counting discarded lines) to real ones (counting those lines). 838 * Both are origin-0. */ 839 private final int[] realindexes; 840 841 /** Total number of nondiscarded lines. */ 842 private int nondiscardedLines; 843 844 /** Array, indexed by real origin-1 line number, containing true for a line that is an insertion or a deletion. 845 The results of comparison are stored here. */ 846 private boolean[] changedFlag; 847 } 848}