From 8be6fa32a86aa6b35d8fad47b2aadea666b8e942 Mon Sep 17 00:00:00 2001 From: finnm Date: Sun, 19 Jan 2025 13:09:36 +0100 Subject: [PATCH] Fixed merge order of FinnSort and added some testing methods --- .../powersort/FinnSort/FasterFinnSort.java | 204 +++++++++++------- .../uni_marburg/powersort/FinnSort/Run.java | 8 +- .../powersort/sort/FasterFinnSortTest.java | 71 +++++- 3 files changed, 204 insertions(+), 79 deletions(-) diff --git a/app/src/main/java/de/uni_marburg/powersort/FinnSort/FasterFinnSort.java b/app/src/main/java/de/uni_marburg/powersort/FinnSort/FasterFinnSort.java index 650ed97..9e911d2 100644 --- a/app/src/main/java/de/uni_marburg/powersort/FinnSort/FasterFinnSort.java +++ b/app/src/main/java/de/uni_marburg/powersort/FinnSort/FasterFinnSort.java @@ -26,6 +26,7 @@ package de.uni_marburg.powersort.FinnSort; */ +import java.util.Arrays; import java.util.Comparator; import static java.lang.Math.pow; @@ -68,13 +69,13 @@ public class FasterFinnSort { * This is the minimum sized sequence that will be merged. Shorter * sequences will be lengthened by calling binarySort. If the entire * array is less than this length, no merges will be performed. - * + *

* This constant should be a power of two. It was 64 in Tim Peter's C * implementation, but 32 was empirically determined to work better in * this implementation. In the unlikely event that you set this constant * to be a number that's not a power of two, you'll need to change the * {@link #minRunLength} computation. - * + *

* If you decrease this constant, you must change the stackLen * computation in the TimSort constructor, or you risk an * ArrayOutOfBounds exception. See listsort.txt for a discussion @@ -87,6 +88,7 @@ public class FasterFinnSort { * The array being sorted. */ private final T[] a; + private final int rangeSize; /** * The comparator for this sort. @@ -97,7 +99,7 @@ public class FasterFinnSort { * When we get into galloping mode, we stay there until both runs win less * often than MIN_GALLOP consecutive times. */ - private static final int MIN_GALLOP = 7; + private static final int MIN_GALLOP = 7; /** * This controls when we get *into* galloping mode. It is initialized @@ -128,9 +130,9 @@ public class FasterFinnSort { * A stack of pending runs yet to be merged. Run i starts at * address base[i] and extends for len[i] elements. It's always * true (so long as the indices are in bounds) that: - * - * runBase[i] + runLen[i] == runBase[i + 1] - * + *

+ * runBase[i] + runLen[i] == runBase[i + 1] + *

* so we could cut the storage for this, but it's a minor amount, * and keeping all the info explicit simplifies the code. */ @@ -142,29 +144,28 @@ public class FasterFinnSort { /** * Creates a TimSort instance to maintain the state of an ongoing sort. * - * @param a the array to be sorted - * @param c the comparator to determine the order of the sort - * @param work a workspace array (slice) + * @param a the array to be sorted + * @param c the comparator to determine the order of the sort + * @param work a workspace array (slice) * @param workBase origin of usable space in work array - * @param workLen usable size of work array + * @param workLen usable size of work array */ - private FasterFinnSort(T[] a, Comparator c, T[] work, int workBase, int workLen) { + private FasterFinnSort(T[] a, Comparator c, T[] work, int workBase, int workLen, int rangeSize) { this.a = a; this.c = c; - + this.rangeSize = rangeSize; // Allocate temp storage (which may be increased later if necessary) int len = a.length; int tlen = (len < 2 * INITIAL_TMP_STORAGE_LENGTH) ? len >>> 1 : INITIAL_TMP_STORAGE_LENGTH; if (work == null || workLen < tlen || workBase + tlen > work.length) { @SuppressWarnings({"unchecked", "UnnecessaryLocalVariable"}) - T[] newArray = (T[])java.lang.reflect.Array.newInstance + T[] newArray = (T[]) java.lang.reflect.Array.newInstance (a.getClass().getComponentType(), tlen); tmp = newArray; tmpBase = 0; tmpLen = tlen; - } - else { + } else { tmp = work; tmpBase = workBase; tmpLen = workLen; @@ -204,24 +205,24 @@ public class FasterFinnSort { * any necessary array bounds checks and expanding parameters into * the required forms. * - * @param a the array to be sorted - * @param lo the index of the first element, inclusive, to be sorted - * @param hi the index of the last element, exclusive, to be sorted - * @param c the comparator to use - * @param work a workspace array (slice) + * @param a the array to be sorted + * @param lo the index of the first element, inclusive, to be sorted + * @param hi the index of the last element, exclusive, to be sorted + * @param c the comparator to use + * @param work a workspace array (slice) * @param workBase origin of usable space in work array - * @param workLen usable size of work array + * @param workLen usable size of work array * @since 1.8 */ public static void sort(T[] a, int lo, int hi, Comparator c, - T[] work, int workBase, int workLen) { + T[] work, int workBase, int workLen) { assert c != null && a != null && lo >= 0 && lo <= hi && hi <= a.length; - - int nRemaining = hi - lo; + int nRemaining = hi - lo; if (nRemaining < 2) return; // Arrays of size 0 and 1 are always sorted // If array is small, do a "mini-TimSort" with no merges + if (nRemaining < MIN_MERGE) { int initRunLen = countRunAndMakeAscending(a, lo, hi, c); binarySort(a, lo, hi, lo + initRunLen, c); @@ -233,7 +234,7 @@ public class FasterFinnSort { * extending short natural runs to minRun elements, and merging runs * to maintain stack invariant. */ - FasterFinnSort ts = new FasterFinnSort<>(a, c, work, workBase, workLen); + FasterFinnSort fs = new FasterFinnSort<>(a, c, work, workBase, workLen, hi - lo); int minRun = minRunLength(nRemaining); do { // Identify next run @@ -247,8 +248,8 @@ public class FasterFinnSort { } // Push run onto pending-run stack, and maybe merge - ts.pushRun(lo, runLen, hi - lo); - ts.mergeCollapse(); + fs.pushRun(lo, runLen); + fs.mergeCollapse(); // Advance to find next run lo += runLen; @@ -257,8 +258,8 @@ public class FasterFinnSort { // Merge all remaining runs to complete sort assert lo == hi; - ts.mergeForceCollapse(); - assert ts.stackSize == 1; + fs.mergeForceCollapse(); + assert fs.stackSize == 1; } /** @@ -266,18 +267,18 @@ public class FasterFinnSort { * insertion sort. This is the best method for sorting small numbers * of elements. It requires O(n log n) compares, but O(n^2) data * movement (worst case). - * + *

* If the initial part of the specified range is already sorted, * this method can take advantage of it: the method assumes that the * elements from index {@code lo}, inclusive, to {@code start}, * exclusive are already sorted. * - * @param a the array in which a range is to be sorted - * @param lo the index of the first element in the range to be sorted - * @param hi the index after the last element in the range to be sorted + * @param a the array in which a range is to be sorted + * @param lo the index of the first element in the range to be sorted + * @param hi the index after the last element in the range to be sorted * @param start the index of the first element in the range that is - * not already known to be sorted ({@code lo <= start <= hi}) - * @param c comparator to used for the sort + * not already known to be sorted ({@code lo <= start <= hi}) + * @param c comparator to used for the sort */ @SuppressWarnings("fallthrough") private static void binarySort(T[] a, int lo, int hi, int start, @@ -285,7 +286,7 @@ public class FasterFinnSort { assert lo <= start && start <= hi; if (start == lo) start++; - for ( ; start < hi; start++) { + for (; start < hi; start++) { T pivot = a[start]; // Set left (and right) to the index where a[start] (pivot) belongs @@ -329,26 +330,26 @@ public class FasterFinnSort { * Returns the length of the run beginning at the specified position in * the specified array and reverses the run if it is descending (ensuring * that the run will always be ascending when the method returns). - * + *

* A run is the longest ascending sequence with: - * - * a[lo] <= a[lo + 1] <= a[lo + 2] <= ... - * + *

+ * a[lo] <= a[lo + 1] <= a[lo + 2] <= ... + *

* or the longest descending sequence with: - * - * a[lo] > a[lo + 1] > a[lo + 2] > ... - * + *

+ * a[lo] > a[lo + 1] > a[lo + 2] > ... + *

* For its intended use in a stable mergesort, the strictness of the * definition of "descending" is needed so that the call can safely * reverse a descending sequence without violating stability. * - * @param a the array in which a run is to be counted and possibly reversed + * @param a the array in which a run is to be counted and possibly reversed * @param lo index of the first element in the run * @param hi index after the last element that may be contained in the run. - * It is required that {@code lo < hi}. - * @param c the comparator to used for the sort - * @return the length of the run beginning at the specified position in - * the specified array + * It is required that {@code lo < hi}. + * @param c the comparator to used for the sort + * @return the length of the run beginning at the specified position in + * the specified array */ private static int countRunAndMakeAscending(T[] a, int lo, int hi, Comparator c) { @@ -373,7 +374,7 @@ public class FasterFinnSort { /** * Reverse the specified range of the specified array. * - * @param a the array in which a range is to be reversed + * @param a the array in which a range is to be reversed * @param lo the index of the first element in the range to be reversed * @param hi the index after the last element in the range to be reversed */ @@ -390,14 +391,14 @@ public class FasterFinnSort { * Returns the minimum acceptable run length for an array of the specified * length. Natural runs shorter than this will be extended with * {@link #binarySort}. - * + *

* Roughly speaking, the computation is: - * - * If n < MIN_MERGE, return n (it's too small to bother with fancy stuff). - * Else if n is an exact power of 2, return MIN_MERGE/2. - * Else return an int k, MIN_MERGE/2 <= k <= MIN_MERGE, such that n/k - * is close to, but strictly less than, an exact power of 2. - * + *

+ * If n < MIN_MERGE, return n (it's too small to bother with fancy stuff). + * Else if n is an exact power of 2, return MIN_MERGE/2. + * Else return an int k, MIN_MERGE/2 <= k <= MIN_MERGE, such that n/k + * is close to, but strictly less than, an exact power of 2. + *

* For the rationale, see listsort.txt. * * @param n the length of the array to be sorted @@ -419,24 +420,24 @@ public class FasterFinnSort { * @param runBase index of the first element in the run * @param runLen the number of elements in the run */ - private void pushRun(int runBase, int runLen, int rangeSize) { + private void pushRun(int runBase, int runLen) { this.runBase[stackSize] = runBase; this.runLen[stackSize] = runLen; - this.runPower[stackSize] = power(stackSize, rangeSize); + this.runPower[stackSize] = power(stackSize); stackSize++; } /** * Examines the stack of runs waiting to be merged and merges adjacent runs * until the stack invariants are reestablished: - * - * 1. runLen[i - 3] > runLen[i - 2] + runLen[i - 1] - * 2. runLen[i - 2] > runLen[i - 1] - * + *

+ * 1. runLen[i - 3] > runLen[i - 2] + runLen[i - 1] + * 2. runLen[i - 2] > runLen[i - 1] + *

* This method is called each time a new run is pushed onto the stack, * so the invariants are guaranteed to hold for i < stackSize upon * entry to the method. - * + *

* Thanks to Stijn de Gouw, Jurriaan Rot, Frank S. de Boer, * Richard Bubel and Reiner Hahnle, this is fixed with respect to * the analysis in "On the Worst-Case Complexity of TimSort" by @@ -444,16 +445,35 @@ public class FasterFinnSort { */ private void mergeCollapse() { while (stackSize > 1) { - int n = stackSize - 2; - if (n > 0 && runPower[n + 1] < runPower[n]) { - mergeAt(n); + if (runPower[stackSize - 1] < runPower[stackSize - 2]) { + mergeAt(stackSize - 3); } else { break; // Invariant is established } } } - - private int power(int stackSize, int rangeSize) { +/* + private int power(int stackSize) { + if (stackSize == 0) { + return 0; + } + // int = (right - left + 1); = RangeSize + long l = runLen[stackSize - 1]; // + (long) runBase[stackSize]; // - ((long) left << 1); // 2*middleA + long r = runLen[stackSize]; // - ((long) left << 1); // 2*middleB + int a = (int) ((l << 30) / rangeSize); // middleA / 2n + int b = (int) ((r << 30) / rangeSize); // middleB / 2n + return Integer.numberOfLeadingZeros(a ^ b); + } +*/ + private int power(int stackSize) { + /* + System.out.println(Arrays.toString(runBase)); + System.out.println(Arrays.toString(runLen)); + System.out.println(Arrays.toString(runPower)); + System.out.println(stackSize); + System.out.println(rangeSize); + System.out.println(); + */ if (stackSize == 0) return 0; @@ -465,18 +485,50 @@ public class FasterFinnSort { int result = 0; - while (b < rangeSize) { + while (true) { ++result; if (a >= rangeSize) { a -= rangeSize; b -= rangeSize; } + if (b >= rangeSize) { + break; + } a <<= 1; b <<= 1; } return result; } +/* + public int power(int stackSize) { + + System.out.println(Arrays.toString(runBase)); + System.out.println(Arrays.toString(runLen)); + System.out.println(Arrays.toString(runPower)); + System.out.println(stackSize); + System.out.println(rangeSize); + System.out.println(); + + + if (stackSize == 0) + return 0; + + int n_1 = this.runLen[stackSize - 1]; + int n_2 = this.runLen[stackSize]; + + double a = ((double) this.runBase[stackSize - 1] + 0.5d * n_1 - 1d) / this.rangeSize; + double b = ((double) this.runBase[stackSize] + 0.5d * n_2 - 1d) / this.rangeSize; + int l = 0; + while ((int) (a * pow(2, l)) == (int) (b * pow(2 ,l))) { + + l++; + } + return l; + } + */ + + /* Backup mergeCollapse() von TimSort: @@ -519,7 +571,7 @@ public class FasterFinnSort { private void mergeAt(int i) { assert stackSize >= 2; assert i >= 0; - assert i == stackSize - 2 || i == stackSize - 3; + //assert i == stackSize - 3; int base1 = runBase[i]; int len1 = runLen[i]; @@ -533,12 +585,14 @@ public class FasterFinnSort { * run now, also slide over the last run (which isn't involved * in this merge). The current run (i+1) goes away in any case. */ - runLen[i] = len1 + len2; - if (i == stackSize - 3) { - runBase[i + 1] = runBase[i + 2]; - runLen[i + 1] = runLen[i + 2]; - } stackSize--; + runLen[i] = len1 + len2; + + // @TODO: Check power before pushing the run + runLen[i + 1] = runLen[i + 2]; + runBase[i + 1] = runBase[i + 2]; + runPower[i + 1] = runPower[i + 2]; + //runPower[i] = power(i); /* * Find where the first element of run2 goes in run1. Prior elements diff --git a/app/src/main/java/de/uni_marburg/powersort/FinnSort/Run.java b/app/src/main/java/de/uni_marburg/powersort/FinnSort/Run.java index eba6fc6..2a23c52 100644 --- a/app/src/main/java/de/uni_marburg/powersort/FinnSort/Run.java +++ b/app/src/main/java/de/uni_marburg/powersort/FinnSort/Run.java @@ -1,14 +1,16 @@ package de.uni_marburg.powersort.FinnSort; -class Run { - int start; - int end; +public class Run { + public int start; + public int end; int power; + public int len; public Run(int i, int j, int p) { start = i; end = j; power = p; + len = end - start; } } diff --git a/app/src/test/java/de/uni_marburg/powersort/sort/FasterFinnSortTest.java b/app/src/test/java/de/uni_marburg/powersort/sort/FasterFinnSortTest.java index d050e33..ec99a0f 100644 --- a/app/src/test/java/de/uni_marburg/powersort/sort/FasterFinnSortTest.java +++ b/app/src/test/java/de/uni_marburg/powersort/sort/FasterFinnSortTest.java @@ -1,8 +1,77 @@ package de.uni_marburg.powersort.sort; +import de.uni_marburg.powersort.FinnSort.FasterFinnSort; +import de.uni_marburg.powersort.FinnSort.Run; +import de.uni_marburg.powersort.benchmark.NaturalOrder; +import org.junit.jupiter.api.Test; + +import java.lang.reflect.Method; + +import static java.lang.Math.pow; +import static org.junit.jupiter.api.Assertions.assertEquals; + public class FasterFinnSortTest extends AbstractSortTest { FasterFinnSortTest() { sortAlg = SortEnum.FASTER_FINN_SORT; } -} \ No newline at end of file + @Test + public void testMergeOrder() { + Integer[] input = {24,25,26,27,28,21,22,23,18,19,20,4,5,6,7,8,9,10,11,12,13,14,15,16,17,3,1,2}; + FasterFinnSort.sort(input, 0, input.length, NaturalOrder.INSTANCE,null, 0, 0); + } + @Test + public void powerTest() { + Run run1 = new Run(0, 10, 0); + Run run2 = new Run(10, 20, 0); + + for (int i = 20; i < 10100; i++) { + System.out.println(i); + //assertEquals(integerPower(run1, run2, i), power(run1, run2, i)); + assertEquals(power(run1, run2, i), + power2(run1, run2, i)); + } + + } + private static int integerPower(Run run1, Run run2, int n) { + + int n_1 = run1.len; + int n_2 = run2.len; + + int a = 2 * run1.start + n_1 - 1; + int b = a + n_1 + n_2; + + int result = 0; + + while (a * pow(2, result) == b * pow(2, result)) { + result++; + } + + return result; + } + private static int power(Run run1, Run run2, int n) { + /* + if (run1.start == 0) { + return 0; + } + + */ + int n_1 = run1.end - run1.start; + int n_2 = run2.end - run2.start; + double a = ((double) run1.start + 0.5d * n_1) / n; + double b = ((double) run2.start + 0.5d * n_2) / n; + int l = 1; + while ((int) (a * pow(2, l)) == (int) (b * pow(2 ,l))) { + l++; + } + return l; + } + + private int power2(Run run1, Run run2, int n) { + long l = (long) run1.start + (long) run2.start; // 2*middleA + long r = (long) run2.start + (long) run2.end + 1; // 2*middleB + int a = (int) ((l << 30) / n); // middleA / 2n + int b = (int) ((r << 30) / n); // middleB / 2n + return Integer.numberOfLeadingZeros(a ^ b); + } +}