1/* Copyright (C) 1991-2024 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <https://www.gnu.org/licenses/>. */
17
18/* If you consider tuning this algorithm, you should consult first:
19 Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
20 Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */
21
22#include <errno.h>
23#include <limits.h>
24#include <memswap.h>
25#include <stdlib.h>
26#include <string.h>
27#include <stdbool.h>
28
29/* Swap SIZE bytes between addresses A and B. These helpers are provided
30 along the generic one as an optimization. */
31
32enum swap_type_t
33 {
34 SWAP_WORDS_64,
35 SWAP_WORDS_32,
36 SWAP_VOID_ARG,
37 SWAP_BYTES
38 };
39
40typedef uint32_t __attribute__ ((__may_alias__)) u32_alias_t;
41typedef uint64_t __attribute__ ((__may_alias__)) u64_alias_t;
42
43static inline void
44swap_words_64 (void * restrict a, void * restrict b, size_t n)
45{
46 do
47 {
48 n -= 8;
49 u64_alias_t t = *(u64_alias_t *)(a + n);
50 *(u64_alias_t *)(a + n) = *(u64_alias_t *)(b + n);
51 *(u64_alias_t *)(b + n) = t;
52 } while (n);
53}
54
55static inline void
56swap_words_32 (void * restrict a, void * restrict b, size_t n)
57{
58 do
59 {
60 n -= 4;
61 u32_alias_t t = *(u32_alias_t *)(a + n);
62 *(u32_alias_t *)(a + n) = *(u32_alias_t *)(b + n);
63 *(u32_alias_t *)(b + n) = t;
64 } while (n);
65}
66
67/* Replace the indirect call with a serie of if statements. It should help
68 the branch predictor. */
69static void
70do_swap (void * restrict a, void * restrict b, size_t size,
71 enum swap_type_t swap_type)
72{
73 if (swap_type == SWAP_WORDS_64)
74 swap_words_64 (a, b, n: size);
75 else if (swap_type == SWAP_WORDS_32)
76 swap_words_32 (a, b, n: size);
77 else
78 __memswap (p1: a, p2: b, n: size);
79}
80
81/* Establish the heap condition at index K, that is, the key at K will
82 not be less than either of its children, at 2 * K + 1 and 2 * K + 2
83 (if they exist). N is the last valid index. */
84static inline void
85siftdown (void *base, size_t size, size_t k, size_t n,
86 enum swap_type_t swap_type, __compar_d_fn_t cmp, void *arg)
87{
88 /* There can only be a heap condition violation if there are
89 children. */
90 while (2 * k + 1 <= n)
91 {
92 /* Left child. */
93 size_t j = 2 * k + 1;
94 /* If the right child is larger, use it. */
95 if (j < n && cmp (base + (j * size), base + ((j + 1) * size), arg) < 0)
96 j++;
97
98 /* If k is already >= to its children, we are done. */
99 if (j == k || cmp (base + (k * size), base + (j * size), arg) >= 0)
100 break;
101
102 /* Heal the violation. */
103 do_swap (a: base + (size * j), b: base + (k * size), size, swap_type);
104
105 /* Swapping with j may have introduced a violation at j. Fix
106 it in the next loop iteration. */
107 k = j;
108 }
109}
110
111/* Establish the heap condition for the indices 0 to N (inclusive). */
112static inline void
113heapify (void *base, size_t size, size_t n, enum swap_type_t swap_type,
114 __compar_d_fn_t cmp, void *arg)
115{
116 /* If n is odd, k = n / 2 has a left child at n, so this is the
117 largest index that can have a heap condition violation regarding
118 its children. */
119 size_t k = n / 2;
120 while (1)
121 {
122 siftdown (base, size, k, n, swap_type, cmp, arg);
123 if (k-- == 0)
124 break;
125 }
126}
127
128static enum swap_type_t
129get_swap_type (void *const pbase, size_t size)
130{
131 if ((size & (sizeof (uint32_t) - 1)) == 0
132 && ((uintptr_t) pbase) % __alignof__ (uint32_t) == 0)
133 {
134 if (size == sizeof (uint32_t))
135 return SWAP_WORDS_32;
136 else if (size == sizeof (uint64_t)
137 && ((uintptr_t) pbase) % __alignof__ (uint64_t) == 0)
138 return SWAP_WORDS_64;
139 }
140 return SWAP_BYTES;
141}
142
143
144/* A non-recursive heapsort with worst-case performance of O(nlog n) and
145 worst-case space complexity of O(1). It sorts the array starting at
146 BASE with n + 1 elements of SIZE bytes. The SWAP_TYPE is the callback
147 function used to swap elements, and CMP is the function used to compare
148 elements. */
149static void
150heapsort_r (void *base, size_t n, size_t size, __compar_d_fn_t cmp, void *arg)
151{
152 if (n == 0)
153 return;
154
155 enum swap_type_t swap_type = get_swap_type (pbase: base, size);
156
157 /* Build the binary heap, largest value at the base[0]. */
158 heapify (base, size, n, swap_type, cmp, arg);
159
160 while (true)
161 {
162 /* Indices 0 .. n contain the binary heap. Extract the largest
163 element put it into the final position in the array. */
164 do_swap (a: base, b: base + (n * size), size, swap_type);
165
166 /* The heap is now one element shorter. */
167 n--;
168 if (n == 0)
169 break;
170
171 /* By swapping in elements 0 and the previous value of n (now at
172 n + 1), we likely introduced a heap condition violation. Fix
173 it for the reduced heap. */
174 siftdown (base, size, k: 0, n, swap_type, cmp, arg);
175 }
176}
177
178/* The maximum size in bytes required by mergesort that will be provided
179 through a buffer allocated in the stack. */
180#define QSORT_STACK_SIZE 1024
181
182/* Elements larger than this value will be sorted through indirect sorting
183 to minimize the need to memory swap calls. */
184#define INDIRECT_SORT_SIZE_THRES 32
185
186struct msort_param
187{
188 size_t s;
189 enum swap_type_t var;
190 __compar_d_fn_t cmp;
191 void *arg;
192 char *t;
193};
194
195static void
196msort_with_tmp (const struct msort_param *p, void *b, size_t n)
197{
198 char *b1, *b2;
199 size_t n1, n2;
200
201 if (n <= 1)
202 return;
203
204 n1 = n / 2;
205 n2 = n - n1;
206 b1 = b;
207 b2 = (char *) b + (n1 * p->s);
208
209 msort_with_tmp (p, b: b1, n: n1);
210 msort_with_tmp (p, b: b2, n: n2);
211
212 char *tmp = p->t;
213 const size_t s = p->s;
214 __compar_d_fn_t cmp = p->cmp;
215 void *arg = p->arg;
216 switch (p->var)
217 {
218 case SWAP_WORDS_32:
219 while (n1 > 0 && n2 > 0)
220 {
221 if (cmp (b1, b2, arg) <= 0)
222 {
223 *(u32_alias_t *) tmp = *(u32_alias_t *) b1;
224 b1 += sizeof (u32_alias_t);
225 --n1;
226 }
227 else
228 {
229 *(u32_alias_t *) tmp = *(u32_alias_t *) b2;
230 b2 += sizeof (u32_alias_t);
231 --n2;
232 }
233 tmp += sizeof (u32_alias_t);
234 }
235 break;
236 case SWAP_WORDS_64:
237 while (n1 > 0 && n2 > 0)
238 {
239 if (cmp (b1, b2, arg) <= 0)
240 {
241 *(u64_alias_t *) tmp = *(u64_alias_t *) b1;
242 b1 += sizeof (u64_alias_t);
243 --n1;
244 }
245 else
246 {
247 *(u64_alias_t *) tmp = *(u64_alias_t *) b2;
248 b2 += sizeof (u64_alias_t);
249 --n2;
250 }
251 tmp += sizeof (u64_alias_t);
252 }
253 break;
254 case SWAP_VOID_ARG:
255 while (n1 > 0 && n2 > 0)
256 {
257 if ((*cmp) (*(const void **) b1, *(const void **) b2, arg) <= 0)
258 {
259 *(void **) tmp = *(void **) b1;
260 b1 += sizeof (void *);
261 --n1;
262 }
263 else
264 {
265 *(void **) tmp = *(void **) b2;
266 b2 += sizeof (void *);
267 --n2;
268 }
269 tmp += sizeof (void *);
270 }
271 break;
272 default:
273 while (n1 > 0 && n2 > 0)
274 {
275 if (cmp (b1, b2, arg) <= 0)
276 {
277 tmp = (char *) __mempcpy (tmp, b1, s);
278 b1 += s;
279 --n1;
280 }
281 else
282 {
283 tmp = (char *) __mempcpy (tmp, b2, s);
284 b2 += s;
285 --n2;
286 }
287 }
288 break;
289 }
290
291 if (n1 > 0)
292 memcpy (tmp, b1, n1 * s);
293 memcpy (b, p->t, (n - n2) * s);
294}
295
296static void
297__attribute_used__
298indirect_msort_with_tmp (const struct msort_param *p, void *b, size_t n,
299 size_t s)
300{
301 /* Indirect sorting. */
302 char *ip = (char *) b;
303 void **tp = (void **) (p->t + n * sizeof (void *));
304 void **t = tp;
305 void *tmp_storage = (void *) (tp + n);
306
307 while ((void *) t < tmp_storage)
308 {
309 *t++ = ip;
310 ip += s;
311 }
312 msort_with_tmp (p, b: p->t + n * sizeof (void *), n);
313
314 /* tp[0] .. tp[n - 1] is now sorted, copy around entries of
315 the original array. Knuth vol. 3 (2nd ed.) exercise 5.2-10. */
316 char *kp;
317 size_t i;
318 for (i = 0, ip = (char *) b; i < n; i++, ip += s)
319 if ((kp = tp[i]) != ip)
320 {
321 size_t j = i;
322 char *jp = ip;
323 memcpy (tmp_storage, ip, s);
324
325 do
326 {
327 size_t k = (kp - (char *) b) / s;
328 tp[j] = jp;
329 memcpy (jp, kp, s);
330 j = k;
331 jp = kp;
332 kp = tp[k];
333 }
334 while (kp != ip);
335
336 tp[j] = jp;
337 memcpy (jp, tmp_storage, s);
338 }
339}
340
341void
342__qsort_r (void *const pbase, size_t total_elems, size_t size,
343 __compar_d_fn_t cmp, void *arg)
344{
345 if (total_elems <= 1)
346 return;
347
348 /* Align to the maximum size used by the swap optimization. */
349 _Alignas (uint64_t) char tmp[QSORT_STACK_SIZE];
350 size_t total_size = total_elems * size;
351 char *buf;
352
353 if (size > INDIRECT_SORT_SIZE_THRES)
354 total_size = 2 * total_elems * sizeof (void *) + size;
355
356 if (total_size <= sizeof tmp)
357 buf = tmp;
358 else
359 {
360 int save = errno;
361 buf = malloc (size: total_size);
362 __set_errno (save);
363 if (buf == NULL)
364 {
365 /* Fallback to heapsort in case of memory failure. */
366 heapsort_r (base: pbase, n: total_elems - 1, size, cmp, arg);
367 return;
368 }
369 }
370
371 if (size > INDIRECT_SORT_SIZE_THRES)
372 {
373 const struct msort_param msort_param =
374 {
375 .s = sizeof (void *),
376 .cmp = cmp,
377 .arg = arg,
378 .var = SWAP_VOID_ARG,
379 .t = buf,
380 };
381 indirect_msort_with_tmp (p: &msort_param, b: pbase, n: total_elems, s: size);
382 }
383 else
384 {
385 const struct msort_param msort_param =
386 {
387 .s = size,
388 .cmp = cmp,
389 .arg = arg,
390 .var = get_swap_type (pbase, size),
391 .t = buf,
392 };
393 msort_with_tmp (p: &msort_param, b: pbase, n: total_elems);
394 }
395
396 if (buf != tmp)
397 free (ptr: buf);
398}
399libc_hidden_def (__qsort_r)
400weak_alias (__qsort_r, qsort_r)
401
402void
403qsort (void *b, size_t n, size_t s, __compar_fn_t cmp)
404{
405 return __qsort_r (pbase: b, total_elems: n, size: s, cmp: (__compar_d_fn_t) cmp, NULL);
406}
407libc_hidden_def (qsort)
408

source code of glibc/stdlib/qsort.c