qsort.h (7430B)
1 /* 2 * Copyright (c) 2013, 2017 Alexey Tourbin 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a copy 5 * of this software and associated documentation files (the "Software"), to deal 6 * in the Software without restriction, including without limitation the rights 7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 * copies of the Software, and to permit persons to whom the Software is 9 * furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 * SOFTWARE. 21 */ 22 23 /* 24 * This is a traditional Quicksort implementation which mostly follows 25 * [Sedgewick 1978]. Sorting is performed entirely on array indices, 26 * while actual access to the array elements is abstracted out with the 27 * user-defined `LESS` and `SWAP` primitives. 28 * 29 * Synopsis: 30 * QSORT(N, LESS, SWAP); 31 * where 32 * N - the number of elements in A[]; 33 * LESS(i, j) - compares A[i] to A[j]; 34 * SWAP(i, j) - exchanges A[i] with A[j]. 35 */ 36 37 #ifndef QSORT_H 38 #define QSORT_H 39 40 /* Sort 3 elements. */ 41 #define Q_SORT3(q_a1, q_a2, q_a3, Q_LESS, Q_SWAP) \ 42 do { \ 43 if (Q_LESS(q_a2, q_a1)) { \ 44 if (Q_LESS(q_a3, q_a2)) \ 45 Q_SWAP(q_a1, q_a3); \ 46 else { \ 47 Q_SWAP(q_a1, q_a2); \ 48 if (Q_LESS(q_a3, q_a2)) \ 49 Q_SWAP(q_a2, q_a3); \ 50 } \ 51 } \ 52 else if (Q_LESS(q_a3, q_a2)) { \ 53 Q_SWAP(q_a2, q_a3); \ 54 if (Q_LESS(q_a2, q_a1)) \ 55 Q_SWAP(q_a1, q_a2); \ 56 } \ 57 } while (0) 58 59 /* Partition [q_l,q_r] around a pivot. After partitioning, 60 * [q_l,q_j] are the elements that are less than or equal to the pivot, 61 * while [q_i,q_r] are the elements greater than or equal to the pivot. */ 62 #define Q_PARTITION(q_l, q_r, q_i, q_j, Q_UINT, Q_LESS, Q_SWAP) \ 63 do { \ 64 /* The middle element, not to be confused with the median. */ \ 65 Q_UINT q_m = (q_l) + (((q_r) - (q_l)) >> 1); \ 66 /* Reorder the second, the middle, and the last items. \ 67 * As [Edelkamp Weiss 2016] explain, using the second element \ 68 * instead of the first one helps avoid bad behaviour for \ 69 * decreasingly sorted arrays. This method is used in recent \ 70 * versions of gcc's std::sort, see gcc bug 58437#c13, although \ 71 * the details are somewhat different (cf. #c14). */ \ 72 Q_SORT3((q_l) + 1, q_m, q_r, Q_LESS, Q_SWAP); \ 73 /* Place the median at the beginning. */ \ 74 Q_SWAP(q_l, q_m); \ 75 /* Partition [q_l+2, q_r-1] around the median which is in q_l. \ 76 * q_i and q_j are initially off by one, they get decremented \ 77 * in the do-while loops. */ \ 78 (q_i) = (q_l) + 1; (q_j) = q_r; \ 79 while (1) { \ 80 do (q_i)++; while (Q_LESS(q_i, q_l)); \ 81 do (q_j)--; while (Q_LESS(q_l, q_j)); \ 82 if ((q_i) >= (q_j)) break; /* Sedgewick says "until j < i" */ \ 83 Q_SWAP(q_i, q_j); \ 84 } \ 85 /* Compensate for the i==j case. */ \ 86 (q_i) = (q_j) + 1; \ 87 /* Put the median to its final place. */ \ 88 Q_SWAP(q_l, q_j); \ 89 /* The median is not part of the left subfile. */ \ 90 (q_j)--; \ 91 } while (0) 92 93 /* Insertion sort is applied to small subfiles - this is contrary to 94 * Sedgewick's suggestion to run a separate insertion sort pass after 95 * the partitioning is done. The reason I don't like a separate pass 96 * is that it triggers extra comparisons, because it can't see that the 97 * medians are already in their final positions and need not be rechecked. 98 * Since I do not assume that comparisons are cheap, I also do not try 99 * to eliminate the (q_j > q_l) boundary check. */ 100 #define Q_INSERTION_SORT(q_l, q_r, Q_UINT, Q_LESS, Q_SWAP) \ 101 do { \ 102 Q_UINT q_i, q_j; \ 103 /* For each item starting with the second... */ \ 104 for (q_i = (q_l) + 1; q_i <= (q_r); q_i++) \ 105 /* move it down the array so that the first part is sorted. */ \ 106 for (q_j = q_i; q_j > (q_l) && (Q_LESS(q_j, q_j - 1)); q_j--) \ 107 Q_SWAP(q_j, q_j - 1); \ 108 } while (0) 109 110 /* When the size of [q_l,q_r], i.e. q_r-q_l+1, is greater than or equal to 111 * Q_THRESH, the algorithm performs recursive partitioning. When the size 112 * drops below Q_THRESH, the algorithm switches to insertion sort. 113 * The minimum valid value is probably 5 (with 5 items, the second and 114 * the middle items, the middle itself being rounded down, are distinct). */ 115 #define Q_THRESH 16 116 117 /* The main loop. */ 118 #define Q_LOOP(Q_UINT, Q_N, Q_LESS, Q_SWAP) \ 119 do { \ 120 Q_UINT q_l = 0; \ 121 Q_UINT q_r = (Q_N) - 1; \ 122 Q_UINT q_sp = 0; /* the number of frames pushed to the stack */ \ 123 struct { Q_UINT q_l, q_r; } \ 124 /* On 32-bit platforms, to sort a "char[3GB+]" array, \ 125 * it may take full 32 stack frames. On 64-bit CPUs, \ 126 * though, the address space is limited to 48 bits. \ 127 * The usage is further reduced if Q_N has a 32-bit type. */ \ 128 q_st[sizeof(Q_UINT) > 4 && sizeof(Q_N) > 4 ? 48 : 32]; \ 129 while (1) { \ 130 if (q_r - q_l + 1 >= Q_THRESH) { \ 131 Q_UINT q_i, q_j; \ 132 Q_PARTITION(q_l, q_r, q_i, q_j, Q_UINT, Q_LESS, Q_SWAP); \ 133 /* Now have two subfiles: [q_l,q_j] and [q_i,q_r]. \ 134 * Dealing with them depends on which one is bigger. */ \ 135 if (q_j - q_l >= q_r - q_i) \ 136 Q_SUBFILES(q_l, q_j, q_i, q_r); \ 137 else \ 138 Q_SUBFILES(q_i, q_r, q_l, q_j); \ 139 } \ 140 else { \ 141 Q_INSERTION_SORT(q_l, q_r, Q_UINT, Q_LESS, Q_SWAP); \ 142 /* Pop subfiles from the stack, until it gets empty. */ \ 143 if (q_sp == 0) break; \ 144 q_sp--; \ 145 q_l = q_st[q_sp].q_l; \ 146 q_r = q_st[q_sp].q_r; \ 147 } \ 148 } \ 149 } while (0) 150 151 /* The missing part: dealing with subfiles. 152 * Assumes that the first subfile is not smaller than the second. */ 153 #define Q_SUBFILES(q_l1, q_r1, q_l2, q_r2) \ 154 do { \ 155 /* If the second subfile is only a single element, it needs \ 156 * no further processing. The first subfile will be processed \ 157 * on the next iteration (both subfiles cannot be only a single \ 158 * element, due to Q_THRESH). */ \ 159 if ((q_l2) == (q_r2)) { \ 160 q_l = q_l1; \ 161 q_r = q_r1; \ 162 } \ 163 else { \ 164 /* Otherwise, both subfiles need processing. \ 165 * Push the larger subfile onto the stack. */ \ 166 q_st[q_sp].q_l = q_l1; \ 167 q_st[q_sp].q_r = q_r1; \ 168 q_sp++; \ 169 /* Process the smaller subfile on the next iteration. */ \ 170 q_l = q_l2; \ 171 q_r = q_r2; \ 172 } \ 173 } while (0) 174 175 /* And now, ladies and gentlemen, may I proudly present to you... */ 176 #define QSORT(Q_N, Q_LESS, Q_SWAP) \ 177 do { \ 178 if ((Q_N) > 1) \ 179 /* We could check sizeof(Q_N) and use "unsigned", but at least \ 180 * on x86_64, this has the performance penalty of up to 5%. */ \ 181 Q_LOOP(unsigned long, Q_N, Q_LESS, Q_SWAP); \ 182 } while (0) 183 184 #endif 185 186 /* ex:set ts=8 sts=4 sw=4 noet: */