Fixed merge order of FinnSort and added some testing methods

This commit is contained in:
finnm 2025-01-19 13:09:36 +01:00
parent f851b9784d
commit 8be6fa32a8
3 changed files with 204 additions and 79 deletions

View File

@ -26,6 +26,7 @@ package de.uni_marburg.powersort.FinnSort;
*/
import java.util.Arrays;
import java.util.Comparator;
import static java.lang.Math.pow;
@ -68,13 +69,13 @@ public class FasterFinnSort<T> {
* This is the minimum sized sequence that will be merged. Shorter
* sequences will be lengthened by calling binarySort. If the entire
* array is less than this length, no merges will be performed.
*
* <p>
* This constant should be a power of two. It was 64 in Tim Peter's C
* implementation, but 32 was empirically determined to work better in
* this implementation. In the unlikely event that you set this constant
* to be a number that's not a power of two, you'll need to change the
* {@link #minRunLength} computation.
*
* <p>
* If you decrease this constant, you must change the stackLen
* computation in the TimSort constructor, or you risk an
* ArrayOutOfBounds exception. See listsort.txt for a discussion
@ -87,6 +88,7 @@ public class FasterFinnSort<T> {
* The array being sorted.
*/
private final T[] a;
private final int rangeSize;
/**
* The comparator for this sort.
@ -97,7 +99,7 @@ public class FasterFinnSort<T> {
* When we get into galloping mode, we stay there until both runs win less
* often than MIN_GALLOP consecutive times.
*/
private static final int MIN_GALLOP = 7;
private static final int MIN_GALLOP = 7;
/**
* This controls when we get *into* galloping mode. It is initialized
@ -128,9 +130,9 @@ public class FasterFinnSort<T> {
* A stack of pending runs yet to be merged. Run i starts at
* address base[i] and extends for len[i] elements. It's always
* true (so long as the indices are in bounds) that:
*
* runBase[i] + runLen[i] == runBase[i + 1]
*
* <p>
* runBase[i] + runLen[i] == runBase[i + 1]
* <p>
* so we could cut the storage for this, but it's a minor amount,
* and keeping all the info explicit simplifies the code.
*/
@ -142,29 +144,28 @@ public class FasterFinnSort<T> {
/**
* Creates a TimSort instance to maintain the state of an ongoing sort.
*
* @param a the array to be sorted
* @param c the comparator to determine the order of the sort
* @param work a workspace array (slice)
* @param a the array to be sorted
* @param c the comparator to determine the order of the sort
* @param work a workspace array (slice)
* @param workBase origin of usable space in work array
* @param workLen usable size of work array
* @param workLen usable size of work array
*/
private FasterFinnSort(T[] a, Comparator<? super T> c, T[] work, int workBase, int workLen) {
private FasterFinnSort(T[] a, Comparator<? super T> c, T[] work, int workBase, int workLen, int rangeSize) {
this.a = a;
this.c = c;
this.rangeSize = rangeSize;
// Allocate temp storage (which may be increased later if necessary)
int len = a.length;
int tlen = (len < 2 * INITIAL_TMP_STORAGE_LENGTH) ?
len >>> 1 : INITIAL_TMP_STORAGE_LENGTH;
if (work == null || workLen < tlen || workBase + tlen > work.length) {
@SuppressWarnings({"unchecked", "UnnecessaryLocalVariable"})
T[] newArray = (T[])java.lang.reflect.Array.newInstance
T[] newArray = (T[]) java.lang.reflect.Array.newInstance
(a.getClass().getComponentType(), tlen);
tmp = newArray;
tmpBase = 0;
tmpLen = tlen;
}
else {
} else {
tmp = work;
tmpBase = workBase;
tmpLen = workLen;
@ -204,24 +205,24 @@ public class FasterFinnSort<T> {
* any necessary array bounds checks and expanding parameters into
* the required forms.
*
* @param a the array to be sorted
* @param lo the index of the first element, inclusive, to be sorted
* @param hi the index of the last element, exclusive, to be sorted
* @param c the comparator to use
* @param work a workspace array (slice)
* @param a the array to be sorted
* @param lo the index of the first element, inclusive, to be sorted
* @param hi the index of the last element, exclusive, to be sorted
* @param c the comparator to use
* @param work a workspace array (slice)
* @param workBase origin of usable space in work array
* @param workLen usable size of work array
* @param workLen usable size of work array
* @since 1.8
*/
public static <T> void sort(T[] a, int lo, int hi, Comparator<? super T> c,
T[] work, int workBase, int workLen) {
T[] work, int workBase, int workLen) {
assert c != null && a != null && lo >= 0 && lo <= hi && hi <= a.length;
int nRemaining = hi - lo;
int nRemaining = hi - lo;
if (nRemaining < 2)
return; // Arrays of size 0 and 1 are always sorted
// If array is small, do a "mini-TimSort" with no merges
if (nRemaining < MIN_MERGE) {
int initRunLen = countRunAndMakeAscending(a, lo, hi, c);
binarySort(a, lo, hi, lo + initRunLen, c);
@ -233,7 +234,7 @@ public class FasterFinnSort<T> {
* extending short natural runs to minRun elements, and merging runs
* to maintain stack invariant.
*/
FasterFinnSort<T> ts = new FasterFinnSort<>(a, c, work, workBase, workLen);
FasterFinnSort<T> fs = new FasterFinnSort<>(a, c, work, workBase, workLen, hi - lo);
int minRun = minRunLength(nRemaining);
do {
// Identify next run
@ -247,8 +248,8 @@ public class FasterFinnSort<T> {
}
// Push run onto pending-run stack, and maybe merge
ts.pushRun(lo, runLen, hi - lo);
ts.mergeCollapse();
fs.pushRun(lo, runLen);
fs.mergeCollapse();
// Advance to find next run
lo += runLen;
@ -257,8 +258,8 @@ public class FasterFinnSort<T> {
// Merge all remaining runs to complete sort
assert lo == hi;
ts.mergeForceCollapse();
assert ts.stackSize == 1;
fs.mergeForceCollapse();
assert fs.stackSize == 1;
}
/**
@ -266,18 +267,18 @@ public class FasterFinnSort<T> {
* insertion sort. This is the best method for sorting small numbers
* of elements. It requires O(n log n) compares, but O(n^2) data
* movement (worst case).
*
* <p>
* If the initial part of the specified range is already sorted,
* this method can take advantage of it: the method assumes that the
* elements from index {@code lo}, inclusive, to {@code start},
* exclusive are already sorted.
*
* @param a the array in which a range is to be sorted
* @param lo the index of the first element in the range to be sorted
* @param hi the index after the last element in the range to be sorted
* @param a the array in which a range is to be sorted
* @param lo the index of the first element in the range to be sorted
* @param hi the index after the last element in the range to be sorted
* @param start the index of the first element in the range that is
* not already known to be sorted ({@code lo <= start <= hi})
* @param c comparator to used for the sort
* not already known to be sorted ({@code lo <= start <= hi})
* @param c comparator to used for the sort
*/
@SuppressWarnings("fallthrough")
private static <T> void binarySort(T[] a, int lo, int hi, int start,
@ -285,7 +286,7 @@ public class FasterFinnSort<T> {
assert lo <= start && start <= hi;
if (start == lo)
start++;
for ( ; start < hi; start++) {
for (; start < hi; start++) {
T pivot = a[start];
// Set left (and right) to the index where a[start] (pivot) belongs
@ -329,26 +330,26 @@ public class FasterFinnSort<T> {
* Returns the length of the run beginning at the specified position in
* the specified array and reverses the run if it is descending (ensuring
* that the run will always be ascending when the method returns).
*
* <p>
* A run is the longest ascending sequence with:
*
* a[lo] <= a[lo + 1] <= a[lo + 2] <= ...
*
* <p>
* a[lo] <= a[lo + 1] <= a[lo + 2] <= ...
* <p>
* or the longest descending sequence with:
*
* a[lo] > a[lo + 1] > a[lo + 2] > ...
*
* <p>
* a[lo] > a[lo + 1] > a[lo + 2] > ...
* <p>
* For its intended use in a stable mergesort, the strictness of the
* definition of "descending" is needed so that the call can safely
* reverse a descending sequence without violating stability.
*
* @param a the array in which a run is to be counted and possibly reversed
* @param a the array in which a run is to be counted and possibly reversed
* @param lo index of the first element in the run
* @param hi index after the last element that may be contained in the run.
* It is required that {@code lo < hi}.
* @param c the comparator to used for the sort
* @return the length of the run beginning at the specified position in
* the specified array
* It is required that {@code lo < hi}.
* @param c the comparator to used for the sort
* @return the length of the run beginning at the specified position in
* the specified array
*/
private static <T> int countRunAndMakeAscending(T[] a, int lo, int hi,
Comparator<? super T> c) {
@ -373,7 +374,7 @@ public class FasterFinnSort<T> {
/**
* Reverse the specified range of the specified array.
*
* @param a the array in which a range is to be reversed
* @param a the array in which a range is to be reversed
* @param lo the index of the first element in the range to be reversed
* @param hi the index after the last element in the range to be reversed
*/
@ -390,14 +391,14 @@ public class FasterFinnSort<T> {
* Returns the minimum acceptable run length for an array of the specified
* length. Natural runs shorter than this will be extended with
* {@link #binarySort}.
*
* <p>
* Roughly speaking, the computation is:
*
* If n < MIN_MERGE, return n (it's too small to bother with fancy stuff).
* Else if n is an exact power of 2, return MIN_MERGE/2.
* Else return an int k, MIN_MERGE/2 <= k <= MIN_MERGE, such that n/k
* is close to, but strictly less than, an exact power of 2.
*
* <p>
* If n < MIN_MERGE, return n (it's too small to bother with fancy stuff).
* Else if n is an exact power of 2, return MIN_MERGE/2.
* Else return an int k, MIN_MERGE/2 <= k <= MIN_MERGE, such that n/k
* is close to, but strictly less than, an exact power of 2.
* <p>
* For the rationale, see listsort.txt.
*
* @param n the length of the array to be sorted
@ -419,24 +420,24 @@ public class FasterFinnSort<T> {
* @param runBase index of the first element in the run
* @param runLen the number of elements in the run
*/
private void pushRun(int runBase, int runLen, int rangeSize) {
private void pushRun(int runBase, int runLen) {
this.runBase[stackSize] = runBase;
this.runLen[stackSize] = runLen;
this.runPower[stackSize] = power(stackSize, rangeSize);
this.runPower[stackSize] = power(stackSize);
stackSize++;
}
/**
* Examines the stack of runs waiting to be merged and merges adjacent runs
* until the stack invariants are reestablished:
*
* 1. runLen[i - 3] > runLen[i - 2] + runLen[i - 1]
* 2. runLen[i - 2] > runLen[i - 1]
*
* <p>
* 1. runLen[i - 3] > runLen[i - 2] + runLen[i - 1]
* 2. runLen[i - 2] > runLen[i - 1]
* <p>
* This method is called each time a new run is pushed onto the stack,
* so the invariants are guaranteed to hold for i < stackSize upon
* entry to the method.
*
* <p>
* Thanks to Stijn de Gouw, Jurriaan Rot, Frank S. de Boer,
* Richard Bubel and Reiner Hahnle, this is fixed with respect to
* the analysis in "On the Worst-Case Complexity of TimSort" by
@ -444,16 +445,35 @@ public class FasterFinnSort<T> {
*/
private void mergeCollapse() {
while (stackSize > 1) {
int n = stackSize - 2;
if (n > 0 && runPower[n + 1] < runPower[n]) {
mergeAt(n);
if (runPower[stackSize - 1] < runPower[stackSize - 2]) {
mergeAt(stackSize - 3);
} else {
break; // Invariant is established
}
}
}
private int power(int stackSize, int rangeSize) {
/*
private int power(int stackSize) {
if (stackSize == 0) {
return 0;
}
// int = (right - left + 1); = RangeSize
long l = runLen[stackSize - 1]; // + (long) runBase[stackSize]; // - ((long) left << 1); // 2*middleA
long r = runLen[stackSize]; // - ((long) left << 1); // 2*middleB
int a = (int) ((l << 30) / rangeSize); // middleA / 2n
int b = (int) ((r << 30) / rangeSize); // middleB / 2n
return Integer.numberOfLeadingZeros(a ^ b);
}
*/
private int power(int stackSize) {
/*
System.out.println(Arrays.toString(runBase));
System.out.println(Arrays.toString(runLen));
System.out.println(Arrays.toString(runPower));
System.out.println(stackSize);
System.out.println(rangeSize);
System.out.println();
*/
if (stackSize == 0)
return 0;
@ -465,18 +485,50 @@ public class FasterFinnSort<T> {
int result = 0;
while (b < rangeSize) {
while (true) {
++result;
if (a >= rangeSize) {
a -= rangeSize;
b -= rangeSize;
}
if (b >= rangeSize) {
break;
}
a <<= 1;
b <<= 1;
}
return result;
}
/*
public int power(int stackSize) {
System.out.println(Arrays.toString(runBase));
System.out.println(Arrays.toString(runLen));
System.out.println(Arrays.toString(runPower));
System.out.println(stackSize);
System.out.println(rangeSize);
System.out.println();
if (stackSize == 0)
return 0;
int n_1 = this.runLen[stackSize - 1];
int n_2 = this.runLen[stackSize];
double a = ((double) this.runBase[stackSize - 1] + 0.5d * n_1 - 1d) / this.rangeSize;
double b = ((double) this.runBase[stackSize] + 0.5d * n_2 - 1d) / this.rangeSize;
int l = 0;
while ((int) (a * pow(2, l)) == (int) (b * pow(2 ,l))) {
l++;
}
return l;
}
*/
/*
Backup mergeCollapse() von TimSort:
@ -519,7 +571,7 @@ public class FasterFinnSort<T> {
private void mergeAt(int i) {
assert stackSize >= 2;
assert i >= 0;
assert i == stackSize - 2 || i == stackSize - 3;
//assert i == stackSize - 3;
int base1 = runBase[i];
int len1 = runLen[i];
@ -533,12 +585,14 @@ public class FasterFinnSort<T> {
* run now, also slide over the last run (which isn't involved
* in this merge). The current run (i+1) goes away in any case.
*/
runLen[i] = len1 + len2;
if (i == stackSize - 3) {
runBase[i + 1] = runBase[i + 2];
runLen[i + 1] = runLen[i + 2];
}
stackSize--;
runLen[i] = len1 + len2;
// @TODO: Check power before pushing the run
runLen[i + 1] = runLen[i + 2];
runBase[i + 1] = runBase[i + 2];
runPower[i + 1] = runPower[i + 2];
//runPower[i] = power(i);
/*
* Find where the first element of run2 goes in run1. Prior elements

View File

@ -1,14 +1,16 @@
package de.uni_marburg.powersort.FinnSort;
class Run {
int start;
int end;
public class Run {
public int start;
public int end;
int power;
public int len;
public Run(int i, int j, int p) {
start = i;
end = j;
power = p;
len = end - start;
}
}

View File

@ -1,8 +1,77 @@
package de.uni_marburg.powersort.sort;
import de.uni_marburg.powersort.FinnSort.FasterFinnSort;
import de.uni_marburg.powersort.FinnSort.Run;
import de.uni_marburg.powersort.benchmark.NaturalOrder;
import org.junit.jupiter.api.Test;
import java.lang.reflect.Method;
import static java.lang.Math.pow;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class FasterFinnSortTest extends AbstractSortTest {
FasterFinnSortTest() {
sortAlg = SortEnum.FASTER_FINN_SORT;
}
}
@Test
public void testMergeOrder() {
Integer[] input = {24,25,26,27,28,21,22,23,18,19,20,4,5,6,7,8,9,10,11,12,13,14,15,16,17,3,1,2};
FasterFinnSort.sort(input, 0, input.length, NaturalOrder.INSTANCE,null, 0, 0);
}
@Test
public void powerTest() {
Run run1 = new Run(0, 10, 0);
Run run2 = new Run(10, 20, 0);
for (int i = 20; i < 10100; i++) {
System.out.println(i);
//assertEquals(integerPower(run1, run2, i), power(run1, run2, i));
assertEquals(power(run1, run2, i),
power2(run1, run2, i));
}
}
private static int integerPower(Run run1, Run run2, int n) {
int n_1 = run1.len;
int n_2 = run2.len;
int a = 2 * run1.start + n_1 - 1;
int b = a + n_1 + n_2;
int result = 0;
while (a * pow(2, result) == b * pow(2, result)) {
result++;
}
return result;
}
private static int power(Run run1, Run run2, int n) {
/*
if (run1.start == 0) {
return 0;
}
*/
int n_1 = run1.end - run1.start;
int n_2 = run2.end - run2.start;
double a = ((double) run1.start + 0.5d * n_1) / n;
double b = ((double) run2.start + 0.5d * n_2) / n;
int l = 1;
while ((int) (a * pow(2, l)) == (int) (b * pow(2 ,l))) {
l++;
}
return l;
}
private int power2(Run run1, Run run2, int n) {
long l = (long) run1.start + (long) run2.start; // 2*middleA
long r = (long) run2.start + (long) run2.end + 1; // 2*middleB
int a = (int) ((l << 30) / n); // middleA / 2n
int b = (int) ((r << 30) / n); // middleB / 2n
return Integer.numberOfLeadingZeros(a ^ b);
}
}