Fixed merge order of FinnSort and added some testing methods

This commit is contained in:
finnm 2025-01-19 13:09:36 +01:00
parent f851b9784d
commit 8be6fa32a8
3 changed files with 204 additions and 79 deletions

View File

@ -26,6 +26,7 @@ package de.uni_marburg.powersort.FinnSort;
*/ */
import java.util.Arrays;
import java.util.Comparator; import java.util.Comparator;
import static java.lang.Math.pow; import static java.lang.Math.pow;
@ -68,13 +69,13 @@ public class FasterFinnSort<T> {
* This is the minimum sized sequence that will be merged. Shorter * This is the minimum sized sequence that will be merged. Shorter
* sequences will be lengthened by calling binarySort. If the entire * sequences will be lengthened by calling binarySort. If the entire
* array is less than this length, no merges will be performed. * array is less than this length, no merges will be performed.
* * <p>
* This constant should be a power of two. It was 64 in Tim Peter's C * This constant should be a power of two. It was 64 in Tim Peter's C
* implementation, but 32 was empirically determined to work better in * implementation, but 32 was empirically determined to work better in
* this implementation. In the unlikely event that you set this constant * this implementation. In the unlikely event that you set this constant
* to be a number that's not a power of two, you'll need to change the * to be a number that's not a power of two, you'll need to change the
* {@link #minRunLength} computation. * {@link #minRunLength} computation.
* * <p>
* If you decrease this constant, you must change the stackLen * If you decrease this constant, you must change the stackLen
* computation in the TimSort constructor, or you risk an * computation in the TimSort constructor, or you risk an
* ArrayOutOfBounds exception. See listsort.txt for a discussion * ArrayOutOfBounds exception. See listsort.txt for a discussion
@ -87,6 +88,7 @@ public class FasterFinnSort<T> {
* The array being sorted. * The array being sorted.
*/ */
private final T[] a; private final T[] a;
private final int rangeSize;
/** /**
* The comparator for this sort. * The comparator for this sort.
@ -128,9 +130,9 @@ public class FasterFinnSort<T> {
* A stack of pending runs yet to be merged. Run i starts at * A stack of pending runs yet to be merged. Run i starts at
* address base[i] and extends for len[i] elements. It's always * address base[i] and extends for len[i] elements. It's always
* true (so long as the indices are in bounds) that: * true (so long as the indices are in bounds) that:
* * <p>
* runBase[i] + runLen[i] == runBase[i + 1] * runBase[i] + runLen[i] == runBase[i + 1]
* * <p>
* so we could cut the storage for this, but it's a minor amount, * so we could cut the storage for this, but it's a minor amount,
* and keeping all the info explicit simplifies the code. * and keeping all the info explicit simplifies the code.
*/ */
@ -148,23 +150,22 @@ public class FasterFinnSort<T> {
* @param workBase origin of usable space in work array * @param workBase origin of usable space in work array
* @param workLen usable size of work array * @param workLen usable size of work array
*/ */
private FasterFinnSort(T[] a, Comparator<? super T> c, T[] work, int workBase, int workLen) { private FasterFinnSort(T[] a, Comparator<? super T> c, T[] work, int workBase, int workLen, int rangeSize) {
this.a = a; this.a = a;
this.c = c; this.c = c;
this.rangeSize = rangeSize;
// Allocate temp storage (which may be increased later if necessary) // Allocate temp storage (which may be increased later if necessary)
int len = a.length; int len = a.length;
int tlen = (len < 2 * INITIAL_TMP_STORAGE_LENGTH) ? int tlen = (len < 2 * INITIAL_TMP_STORAGE_LENGTH) ?
len >>> 1 : INITIAL_TMP_STORAGE_LENGTH; len >>> 1 : INITIAL_TMP_STORAGE_LENGTH;
if (work == null || workLen < tlen || workBase + tlen > work.length) { if (work == null || workLen < tlen || workBase + tlen > work.length) {
@SuppressWarnings({"unchecked", "UnnecessaryLocalVariable"}) @SuppressWarnings({"unchecked", "UnnecessaryLocalVariable"})
T[] newArray = (T[])java.lang.reflect.Array.newInstance T[] newArray = (T[]) java.lang.reflect.Array.newInstance
(a.getClass().getComponentType(), tlen); (a.getClass().getComponentType(), tlen);
tmp = newArray; tmp = newArray;
tmpBase = 0; tmpBase = 0;
tmpLen = tlen; tmpLen = tlen;
} } else {
else {
tmp = work; tmp = work;
tmpBase = workBase; tmpBase = workBase;
tmpLen = workLen; tmpLen = workLen;
@ -216,12 +217,12 @@ public class FasterFinnSort<T> {
public static <T> void sort(T[] a, int lo, int hi, Comparator<? super T> c, public static <T> void sort(T[] a, int lo, int hi, Comparator<? super T> c,
T[] work, int workBase, int workLen) { T[] work, int workBase, int workLen) {
assert c != null && a != null && lo >= 0 && lo <= hi && hi <= a.length; assert c != null && a != null && lo >= 0 && lo <= hi && hi <= a.length;
int nRemaining = hi - lo; int nRemaining = hi - lo;
if (nRemaining < 2) if (nRemaining < 2)
return; // Arrays of size 0 and 1 are always sorted return; // Arrays of size 0 and 1 are always sorted
// If array is small, do a "mini-TimSort" with no merges // If array is small, do a "mini-TimSort" with no merges
if (nRemaining < MIN_MERGE) { if (nRemaining < MIN_MERGE) {
int initRunLen = countRunAndMakeAscending(a, lo, hi, c); int initRunLen = countRunAndMakeAscending(a, lo, hi, c);
binarySort(a, lo, hi, lo + initRunLen, c); binarySort(a, lo, hi, lo + initRunLen, c);
@ -233,7 +234,7 @@ public class FasterFinnSort<T> {
* extending short natural runs to minRun elements, and merging runs * extending short natural runs to minRun elements, and merging runs
* to maintain stack invariant. * to maintain stack invariant.
*/ */
FasterFinnSort<T> ts = new FasterFinnSort<>(a, c, work, workBase, workLen); FasterFinnSort<T> fs = new FasterFinnSort<>(a, c, work, workBase, workLen, hi - lo);
int minRun = minRunLength(nRemaining); int minRun = minRunLength(nRemaining);
do { do {
// Identify next run // Identify next run
@ -247,8 +248,8 @@ public class FasterFinnSort<T> {
} }
// Push run onto pending-run stack, and maybe merge // Push run onto pending-run stack, and maybe merge
ts.pushRun(lo, runLen, hi - lo); fs.pushRun(lo, runLen);
ts.mergeCollapse(); fs.mergeCollapse();
// Advance to find next run // Advance to find next run
lo += runLen; lo += runLen;
@ -257,8 +258,8 @@ public class FasterFinnSort<T> {
// Merge all remaining runs to complete sort // Merge all remaining runs to complete sort
assert lo == hi; assert lo == hi;
ts.mergeForceCollapse(); fs.mergeForceCollapse();
assert ts.stackSize == 1; assert fs.stackSize == 1;
} }
/** /**
@ -266,7 +267,7 @@ public class FasterFinnSort<T> {
* insertion sort. This is the best method for sorting small numbers * insertion sort. This is the best method for sorting small numbers
* of elements. It requires O(n log n) compares, but O(n^2) data * of elements. It requires O(n log n) compares, but O(n^2) data
* movement (worst case). * movement (worst case).
* * <p>
* If the initial part of the specified range is already sorted, * If the initial part of the specified range is already sorted,
* this method can take advantage of it: the method assumes that the * this method can take advantage of it: the method assumes that the
* elements from index {@code lo}, inclusive, to {@code start}, * elements from index {@code lo}, inclusive, to {@code start},
@ -285,7 +286,7 @@ public class FasterFinnSort<T> {
assert lo <= start && start <= hi; assert lo <= start && start <= hi;
if (start == lo) if (start == lo)
start++; start++;
for ( ; start < hi; start++) { for (; start < hi; start++) {
T pivot = a[start]; T pivot = a[start];
// Set left (and right) to the index where a[start] (pivot) belongs // Set left (and right) to the index where a[start] (pivot) belongs
@ -329,15 +330,15 @@ public class FasterFinnSort<T> {
* Returns the length of the run beginning at the specified position in * Returns the length of the run beginning at the specified position in
* the specified array and reverses the run if it is descending (ensuring * the specified array and reverses the run if it is descending (ensuring
* that the run will always be ascending when the method returns). * that the run will always be ascending when the method returns).
* * <p>
* A run is the longest ascending sequence with: * A run is the longest ascending sequence with:
* * <p>
* a[lo] <= a[lo + 1] <= a[lo + 2] <= ... * a[lo] <= a[lo + 1] <= a[lo + 2] <= ...
* * <p>
* or the longest descending sequence with: * or the longest descending sequence with:
* * <p>
* a[lo] > a[lo + 1] > a[lo + 2] > ... * a[lo] > a[lo + 1] > a[lo + 2] > ...
* * <p>
* For its intended use in a stable mergesort, the strictness of the * For its intended use in a stable mergesort, the strictness of the
* definition of "descending" is needed so that the call can safely * definition of "descending" is needed so that the call can safely
* reverse a descending sequence without violating stability. * reverse a descending sequence without violating stability.
@ -390,14 +391,14 @@ public class FasterFinnSort<T> {
* Returns the minimum acceptable run length for an array of the specified * Returns the minimum acceptable run length for an array of the specified
* length. Natural runs shorter than this will be extended with * length. Natural runs shorter than this will be extended with
* {@link #binarySort}. * {@link #binarySort}.
* * <p>
* Roughly speaking, the computation is: * Roughly speaking, the computation is:
* * <p>
* If n < MIN_MERGE, return n (it's too small to bother with fancy stuff). * If n < MIN_MERGE, return n (it's too small to bother with fancy stuff).
* Else if n is an exact power of 2, return MIN_MERGE/2. * Else if n is an exact power of 2, return MIN_MERGE/2.
* Else return an int k, MIN_MERGE/2 <= k <= MIN_MERGE, such that n/k * Else return an int k, MIN_MERGE/2 <= k <= MIN_MERGE, such that n/k
* is close to, but strictly less than, an exact power of 2. * is close to, but strictly less than, an exact power of 2.
* * <p>
* For the rationale, see listsort.txt. * For the rationale, see listsort.txt.
* *
* @param n the length of the array to be sorted * @param n the length of the array to be sorted
@ -419,24 +420,24 @@ public class FasterFinnSort<T> {
* @param runBase index of the first element in the run * @param runBase index of the first element in the run
* @param runLen the number of elements in the run * @param runLen the number of elements in the run
*/ */
private void pushRun(int runBase, int runLen, int rangeSize) { private void pushRun(int runBase, int runLen) {
this.runBase[stackSize] = runBase; this.runBase[stackSize] = runBase;
this.runLen[stackSize] = runLen; this.runLen[stackSize] = runLen;
this.runPower[stackSize] = power(stackSize, rangeSize); this.runPower[stackSize] = power(stackSize);
stackSize++; stackSize++;
} }
/** /**
* Examines the stack of runs waiting to be merged and merges adjacent runs * Examines the stack of runs waiting to be merged and merges adjacent runs
* until the stack invariants are reestablished: * until the stack invariants are reestablished:
* * <p>
* 1. runLen[i - 3] > runLen[i - 2] + runLen[i - 1] * 1. runLen[i - 3] > runLen[i - 2] + runLen[i - 1]
* 2. runLen[i - 2] > runLen[i - 1] * 2. runLen[i - 2] > runLen[i - 1]
* * <p>
* This method is called each time a new run is pushed onto the stack, * This method is called each time a new run is pushed onto the stack,
* so the invariants are guaranteed to hold for i < stackSize upon * so the invariants are guaranteed to hold for i < stackSize upon
* entry to the method. * entry to the method.
* * <p>
* Thanks to Stijn de Gouw, Jurriaan Rot, Frank S. de Boer, * Thanks to Stijn de Gouw, Jurriaan Rot, Frank S. de Boer,
* Richard Bubel and Reiner Hahnle, this is fixed with respect to * Richard Bubel and Reiner Hahnle, this is fixed with respect to
* the analysis in "On the Worst-Case Complexity of TimSort" by * the analysis in "On the Worst-Case Complexity of TimSort" by
@ -444,16 +445,35 @@ public class FasterFinnSort<T> {
*/ */
private void mergeCollapse() { private void mergeCollapse() {
while (stackSize > 1) { while (stackSize > 1) {
int n = stackSize - 2; if (runPower[stackSize - 1] < runPower[stackSize - 2]) {
if (n > 0 && runPower[n + 1] < runPower[n]) { mergeAt(stackSize - 3);
mergeAt(n);
} else { } else {
break; // Invariant is established break; // Invariant is established
} }
} }
} }
/*
private int power(int stackSize, int rangeSize) { private int power(int stackSize) {
if (stackSize == 0) {
return 0;
}
// int = (right - left + 1); = RangeSize
long l = runLen[stackSize - 1]; // + (long) runBase[stackSize]; // - ((long) left << 1); // 2*middleA
long r = runLen[stackSize]; // - ((long) left << 1); // 2*middleB
int a = (int) ((l << 30) / rangeSize); // middleA / 2n
int b = (int) ((r << 30) / rangeSize); // middleB / 2n
return Integer.numberOfLeadingZeros(a ^ b);
}
*/
private int power(int stackSize) {
/*
System.out.println(Arrays.toString(runBase));
System.out.println(Arrays.toString(runLen));
System.out.println(Arrays.toString(runPower));
System.out.println(stackSize);
System.out.println(rangeSize);
System.out.println();
*/
if (stackSize == 0) if (stackSize == 0)
return 0; return 0;
@ -465,18 +485,50 @@ public class FasterFinnSort<T> {
int result = 0; int result = 0;
while (b < rangeSize) { while (true) {
++result; ++result;
if (a >= rangeSize) { if (a >= rangeSize) {
a -= rangeSize; a -= rangeSize;
b -= rangeSize; b -= rangeSize;
} }
if (b >= rangeSize) {
break;
}
a <<= 1; a <<= 1;
b <<= 1; b <<= 1;
} }
return result; return result;
} }
/*
public int power(int stackSize) {
System.out.println(Arrays.toString(runBase));
System.out.println(Arrays.toString(runLen));
System.out.println(Arrays.toString(runPower));
System.out.println(stackSize);
System.out.println(rangeSize);
System.out.println();
if (stackSize == 0)
return 0;
int n_1 = this.runLen[stackSize - 1];
int n_2 = this.runLen[stackSize];
double a = ((double) this.runBase[stackSize - 1] + 0.5d * n_1 - 1d) / this.rangeSize;
double b = ((double) this.runBase[stackSize] + 0.5d * n_2 - 1d) / this.rangeSize;
int l = 0;
while ((int) (a * pow(2, l)) == (int) (b * pow(2 ,l))) {
l++;
}
return l;
}
*/
/* /*
Backup mergeCollapse() von TimSort: Backup mergeCollapse() von TimSort:
@ -519,7 +571,7 @@ public class FasterFinnSort<T> {
private void mergeAt(int i) { private void mergeAt(int i) {
assert stackSize >= 2; assert stackSize >= 2;
assert i >= 0; assert i >= 0;
assert i == stackSize - 2 || i == stackSize - 3; //assert i == stackSize - 3;
int base1 = runBase[i]; int base1 = runBase[i];
int len1 = runLen[i]; int len1 = runLen[i];
@ -533,12 +585,14 @@ public class FasterFinnSort<T> {
* run now, also slide over the last run (which isn't involved * run now, also slide over the last run (which isn't involved
* in this merge). The current run (i+1) goes away in any case. * in this merge). The current run (i+1) goes away in any case.
*/ */
runLen[i] = len1 + len2;
if (i == stackSize - 3) {
runBase[i + 1] = runBase[i + 2];
runLen[i + 1] = runLen[i + 2];
}
stackSize--; stackSize--;
runLen[i] = len1 + len2;
// @TODO: Check power before pushing the run
runLen[i + 1] = runLen[i + 2];
runBase[i + 1] = runBase[i + 2];
runPower[i + 1] = runPower[i + 2];
//runPower[i] = power(i);
/* /*
* Find where the first element of run2 goes in run1. Prior elements * Find where the first element of run2 goes in run1. Prior elements

View File

@ -1,14 +1,16 @@
package de.uni_marburg.powersort.FinnSort; package de.uni_marburg.powersort.FinnSort;
class Run { public class Run {
int start; public int start;
int end; public int end;
int power; int power;
public int len;
public Run(int i, int j, int p) { public Run(int i, int j, int p) {
start = i; start = i;
end = j; end = j;
power = p; power = p;
len = end - start;
} }
} }

View File

@ -1,8 +1,77 @@
package de.uni_marburg.powersort.sort; package de.uni_marburg.powersort.sort;
import de.uni_marburg.powersort.FinnSort.FasterFinnSort;
import de.uni_marburg.powersort.FinnSort.Run;
import de.uni_marburg.powersort.benchmark.NaturalOrder;
import org.junit.jupiter.api.Test;
import java.lang.reflect.Method;
import static java.lang.Math.pow;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class FasterFinnSortTest extends AbstractSortTest { public class FasterFinnSortTest extends AbstractSortTest {
FasterFinnSortTest() { FasterFinnSortTest() {
sortAlg = SortEnum.FASTER_FINN_SORT; sortAlg = SortEnum.FASTER_FINN_SORT;
} }
@Test
public void testMergeOrder() {
Integer[] input = {24,25,26,27,28,21,22,23,18,19,20,4,5,6,7,8,9,10,11,12,13,14,15,16,17,3,1,2};
FasterFinnSort.sort(input, 0, input.length, NaturalOrder.INSTANCE,null, 0, 0);
}
@Test
public void powerTest() {
Run run1 = new Run(0, 10, 0);
Run run2 = new Run(10, 20, 0);
for (int i = 20; i < 10100; i++) {
System.out.println(i);
//assertEquals(integerPower(run1, run2, i), power(run1, run2, i));
assertEquals(power(run1, run2, i),
power2(run1, run2, i));
}
}
private static int integerPower(Run run1, Run run2, int n) {
int n_1 = run1.len;
int n_2 = run2.len;
int a = 2 * run1.start + n_1 - 1;
int b = a + n_1 + n_2;
int result = 0;
while (a * pow(2, result) == b * pow(2, result)) {
result++;
}
return result;
}
private static int power(Run run1, Run run2, int n) {
/*
if (run1.start == 0) {
return 0;
}
*/
int n_1 = run1.end - run1.start;
int n_2 = run2.end - run2.start;
double a = ((double) run1.start + 0.5d * n_1) / n;
double b = ((double) run2.start + 0.5d * n_2) / n;
int l = 1;
while ((int) (a * pow(2, l)) == (int) (b * pow(2 ,l))) {
l++;
}
return l;
}
private int power2(Run run1, Run run2, int n) {
long l = (long) run1.start + (long) run2.start; // 2*middleA
long r = (long) run2.start + (long) run2.end + 1; // 2*middleB
int a = (int) ((l << 30) / n); // middleA / 2n
int b = (int) ((r << 30) / n); // middleB / 2n
return Integer.numberOfLeadingZeros(a ^ b);
}
} }