Line data Source code
1 : //------------------------------------------------------------------------------
2 : // LG_msort1: sort a list of integers
3 : //------------------------------------------------------------------------------
4 :
5 : // LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved.
6 : // SPDX-License-Identifier: BSD-2-Clause
7 : //
8 : // For additional details (including references to third party source code and
9 : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
10 : // Contributors.txt for a full list of contributors. Created, in part, with
11 : // funding and support from the U.S. Government (see Acknowledgments.txt file).
12 : // DM22-0790
13 :
14 : // Contributed by Timothy A. Davis, Texas A&M University
15 :
16 : //------------------------------------------------------------------------------
17 :
18 : // A parallel mergesort of an array of n integers.
19 :
20 : #define LG_FREE_ALL LAGraph_Free ((void **) &W, NULL) ;
21 :
22 : #include "LG_internal.h"
23 :
24 : //------------------------------------------------------------------------------
25 : // prototype only needed for LG_msort1
26 : //------------------------------------------------------------------------------
27 :
28 : void LG_msort_1b_create_merge_tasks
29 : (
30 : // output:
31 : int64_t *LG_RESTRICT L_task, // L_task [t0...t0+ntasks-1] computed
32 : int64_t *LG_RESTRICT L_len, // L_len [t0...t0+ntasks-1] computed
33 : int64_t *LG_RESTRICT R_task, // R_task [t0...t0+ntasks-1] computed
34 : int64_t *LG_RESTRICT R_len, // R_len [t0...t0+ntasks-1] computed
35 : int64_t *LG_RESTRICT S_task, // S_task [t0...t0+ntasks-1] computed
36 : // input:
37 : const int t0, // first task tid to create
38 : const int ntasks, // # of tasks to create
39 : const int64_t pS_start, // merge into S [pS_start...]
40 : const int64_t *LG_RESTRICT L_0, // Left = L [pL_start...pL_end-1]
41 : const int64_t pL_start,
42 : const int64_t pL_end,
43 : const int64_t *LG_RESTRICT R_0, // Right = R [pR_start...pR_end-1]
44 : const int64_t pR_start,
45 : const int64_t pR_end
46 : ) ;
47 :
48 : //------------------------------------------------------------------------------
49 : // LG_msort_1b_binary_search: binary search for the pivot
50 : //------------------------------------------------------------------------------
51 :
52 : // The Pivot value is Y [pivot], and a binary search for the Pivot is made in
53 : // the array X [p_pstart...p_end-1], which is sorted in non-decreasing order on
54 : // input. The return value is pleft, where
55 : //
56 : // X [p_start ... pleft-1] <= Pivot and
57 : // X [pleft ... p_end-1] >= Pivot holds.
58 : //
59 : // pleft is returned in the range p_start to p_end. If pleft is p_start, then
60 : // the Pivot is smaller than all entries in X [p_start...p_end-1], and the left
61 : // list X [p_start...pleft-1] is empty. If pleft is p_end, then the Pivot is
62 : // larger than all entries in X [p_start...p_end-1], and the right list X
63 : // [pleft...p_end-1] is empty.
64 :
65 196 : static int64_t LG_msort_1b_binary_search // return pleft
66 : (
67 : const int64_t *LG_RESTRICT Y_0, // Pivot is Y [pivot]
68 : const int64_t pivot,
69 : const int64_t *LG_RESTRICT X_0, // search in X [p_start..p_end_-1]
70 : const int64_t p_start,
71 : const int64_t p_end
72 : )
73 : {
74 :
75 : //--------------------------------------------------------------------------
76 : // find where the Pivot appears in X
77 : //--------------------------------------------------------------------------
78 :
79 : // binary search of X [p_start...p_end-1] for the Pivot
80 196 : int64_t pleft = p_start ;
81 196 : int64_t pright = p_end - 1 ;
82 2982 : while (pleft < pright)
83 : {
84 2786 : int64_t pmiddle = (pleft + pright) >> 1 ;
85 : // less = (X [pmiddle] < Pivot)
86 2786 : bool less = LG_lt_1 (X_0, pmiddle,
87 : Y_0, pivot) ;
88 2786 : pleft = less ? (pmiddle+1) : pleft ;
89 2786 : pright = less ? pright : pmiddle ;
90 : }
91 :
92 : // binary search is narrowed down to a single item
93 : // or it has found the list is empty:
94 : ASSERT (pleft == pright || pleft == pright + 1) ;
95 :
96 : // If found is true then X [pleft == pright] == Pivot. If duplicates
97 : // appear then X [pleft] is any one of the entries equal to the Pivot
98 : // in the list. If found is false then
99 : // X [p_start ... pleft-1] < Pivot and
100 : // X [pleft+1 ... p_end-1] > Pivot holds.
101 : // The value X [pleft] may be either < or > Pivot.
102 196 : bool found = (pleft == pright) && LG_eq_1 (X_0, pleft,
103 : Y_0, pivot) ;
104 :
105 : // Modify pleft and pright:
106 196 : if (!found && (pleft == pright))
107 : {
108 26 : if (LG_lt_1 (X_0, pleft,
109 : Y_0, pivot))
110 : {
111 2 : pleft++ ;
112 : }
113 : else
114 : {
115 : // pright++ ; // (not needed)
116 : }
117 : }
118 :
119 : //--------------------------------------------------------------------------
120 : // return result
121 : //--------------------------------------------------------------------------
122 :
123 : // If found is false then
124 : // X [p_start ... pleft-1] < Pivot and
125 : // X [pleft ... p_end-1] > Pivot holds,
126 : // and pleft-1 == pright
127 :
128 : // If X has no duplicates, then whether or not Pivot is found,
129 : // X [p_start ... pleft-1] < Pivot and
130 : // X [pleft ... p_end-1] >= Pivot holds.
131 :
132 : // If X has duplicates, then whether or not Pivot is found,
133 : // X [p_start ... pleft-1] <= Pivot and
134 : // X [pleft ... p_end-1] >= Pivot holds.
135 :
136 196 : return (pleft) ;
137 : }
138 :
139 : //------------------------------------------------------------------------------
140 : // LG_msort_1b_create_merge_tasks
141 : //------------------------------------------------------------------------------
142 :
143 : // Recursively constructs ntasks tasks to merge two arrays, Left and Right,
144 : // into Sresult, where Left is L [pL_start...pL_end-1], Right is R
145 : // [pR_start...pR_end-1], and Sresult is S [pS_start...pS_start+total_work-1],
146 : // and where total_work is the total size of Left and Right.
147 : //
148 : // Task tid will merge L [L_task [tid] ... L_task [tid] + L_len [tid] - 1] and
149 : // R [R_task [tid] ... R_task [tid] + R_len [tid] -1] into the merged output
150 : // array S [S_task [tid] ... ]. The task tids created are t0 to
151 : // t0+ntasks-1.
152 :
153 452 : void LG_msort_1b_create_merge_tasks
154 : (
155 : // output:
156 : int64_t *LG_RESTRICT L_task, // L_task [t0...t0+ntasks-1] computed
157 : int64_t *LG_RESTRICT L_len, // L_len [t0...t0+ntasks-1] computed
158 : int64_t *LG_RESTRICT R_task, // R_task [t0...t0+ntasks-1] computed
159 : int64_t *LG_RESTRICT R_len, // R_len [t0...t0+ntasks-1] computed
160 : int64_t *LG_RESTRICT S_task, // S_task [t0...t0+ntasks-1] computed
161 : // input:
162 : const int t0, // first task tid to create
163 : const int ntasks, // # of tasks to create
164 : const int64_t pS_start, // merge into S [pS_start...]
165 : const int64_t *LG_RESTRICT L_0, // Left = L [pL_start...pL_end-1]
166 : const int64_t pL_start,
167 : const int64_t pL_end,
168 : const int64_t *LG_RESTRICT R_0, // Right = R [pR_start...pR_end-1]
169 : const int64_t pR_start,
170 : const int64_t pR_end
171 : )
172 : {
173 :
174 : //--------------------------------------------------------------------------
175 : // get problem size
176 : //--------------------------------------------------------------------------
177 :
178 452 : int64_t nleft = pL_end - pL_start ; // size of Left array
179 452 : int64_t nright = pR_end - pR_start ; // size of Right array
180 452 : int64_t total_work = nleft + nright ; // total work to do
181 : ASSERT (ntasks >= 1) ;
182 : ASSERT (total_work > 0) ;
183 :
184 : //--------------------------------------------------------------------------
185 : // create the tasks
186 : //--------------------------------------------------------------------------
187 :
188 452 : if (ntasks == 1)
189 : {
190 :
191 : //----------------------------------------------------------------------
192 : // a single task will merge all of Left and Right into Sresult
193 : //----------------------------------------------------------------------
194 :
195 256 : L_task [t0] = pL_start ; L_len [t0] = nleft ;
196 256 : R_task [t0] = pR_start ; R_len [t0] = nright ;
197 256 : S_task [t0] = pS_start ;
198 :
199 : }
200 : else
201 : {
202 :
203 : //----------------------------------------------------------------------
204 : // partition the Left and Right arrays for multiple merge tasks
205 : //----------------------------------------------------------------------
206 :
207 : int64_t pleft, pright ;
208 196 : if (nleft >= nright)
209 : {
210 : // split Left in half, and search for its pivot in Right
211 130 : pleft = (pL_end + pL_start) >> 1 ;
212 130 : pright = LG_msort_1b_binary_search (
213 : L_0, pleft,
214 : R_0, pR_start, pR_end) ;
215 : }
216 : else
217 : {
218 : // split Right in half, and search for its pivot in Left
219 66 : pright = (pR_end + pR_start) >> 1 ;
220 66 : pleft = LG_msort_1b_binary_search (
221 : R_0, pright,
222 : L_0, pL_start, pL_end) ;
223 : }
224 :
225 : //----------------------------------------------------------------------
226 : // partition the tasks according to the work of each partition
227 : //----------------------------------------------------------------------
228 :
229 : // work0 is the total work in the first partition
230 196 : int64_t work0 = (pleft - pL_start) + (pright - pR_start) ;
231 196 : int ntasks0 = (int) round ((double) ntasks *
232 196 : (((double) work0) / ((double) total_work))) ;
233 :
234 : // ensure at least one task is assigned to each partition
235 196 : ntasks0 = LAGRAPH_MAX (ntasks0, 1) ;
236 196 : ntasks0 = LAGRAPH_MIN (ntasks0, ntasks-1) ;
237 196 : int ntasks1 = ntasks - ntasks0 ;
238 :
239 : //----------------------------------------------------------------------
240 : // assign ntasks0 to the first half
241 : //----------------------------------------------------------------------
242 :
243 : // ntasks0 tasks merge L [pL_start...pleft-1] and R [pR_start..pright-1]
244 : // into the result S [pS_start...work0-1].
245 :
246 196 : LG_msort_1b_create_merge_tasks (
247 : L_task, L_len, R_task, R_len, S_task, t0, ntasks0, pS_start,
248 : L_0, pL_start, pleft,
249 : R_0, pR_start, pright) ;
250 :
251 : //----------------------------------------------------------------------
252 : // assign ntasks1 to the second half
253 : //----------------------------------------------------------------------
254 :
255 : // ntasks1 tasks merge L [pleft...pL_end-1] and R [pright...pR_end-1]
256 : // into the result S [pS_start+work0...pS_start+total_work].
257 :
258 196 : int t1 = t0 + ntasks0 ; // first task id of the second set of tasks
259 196 : int64_t pS_start1 = pS_start + work0 ; // 2nd set starts here in S
260 196 : LG_msort_1b_create_merge_tasks (
261 : L_task, L_len, R_task, R_len, S_task, t1, ntasks1, pS_start1,
262 : L_0, pleft, pL_end,
263 : R_0, pright, pR_end) ;
264 : }
265 452 : }
266 :
267 : //------------------------------------------------------------------------------
268 : // LG_msort_1b_merge: merge two sorted lists via a single thread
269 : //------------------------------------------------------------------------------
270 :
271 : // merge Left [0..nleft-1] and Right [0..nright-1] into S [0..nleft+nright-1] */
272 :
273 256 : static void LG_msort_1b_merge
274 : (
275 : int64_t *LG_RESTRICT S_0, // output of length nleft + nright
276 : const int64_t *LG_RESTRICT Left_0, // left input of length nleft
277 : const int64_t nleft,
278 : const int64_t *LG_RESTRICT Right_0, // right input of length nright
279 : const int64_t nright
280 : )
281 : {
282 : int64_t p, pleft, pright ;
283 :
284 : // merge the two inputs, Left and Right, while both inputs exist
285 3005484 : for (p = 0, pleft = 0, pright = 0 ; pleft < nleft && pright < nright ; p++)
286 : {
287 3005228 : if (LG_lt_1 (Left_0, pleft,
288 : Right_0, pright))
289 : {
290 : // S [p] = Left [pleft++]
291 1196146 : S_0 [p] = Left_0 [pleft] ;
292 1196146 : pleft++ ;
293 : }
294 : else
295 : {
296 : // S [p] = Right [pright++]
297 1809082 : S_0 [p] = Right_0 [pright] ;
298 1809082 : pright++ ;
299 : }
300 : }
301 :
302 : // either input is exhausted; copy the remaining list into S
303 256 : if (pleft < nleft)
304 : {
305 188 : int64_t nremaining = (nleft - pleft) ;
306 188 : memcpy (S_0 + p, Left_0 + pleft, nremaining * sizeof (int64_t)) ;
307 : }
308 68 : else if (pright < nright)
309 : {
310 68 : int64_t nremaining = (nright - pright) ;
311 68 : memcpy (S_0 + p, Right_0 + pright, nremaining * sizeof (int64_t)) ;
312 : }
313 256 : }
314 :
315 : //------------------------------------------------------------------------------
316 : // LG_msort1: parallel mergesort
317 : //------------------------------------------------------------------------------
318 :
319 10 : int LG_msort1
320 : (
321 : // input/output:
322 : int64_t *A_0, // size n array
323 : // input:
324 : const int64_t n,
325 : char *msg
326 : )
327 : {
328 :
329 : //--------------------------------------------------------------------------
330 : // check inputs
331 : //--------------------------------------------------------------------------
332 :
333 10 : LG_CLEAR_MSG ;
334 10 : int64_t *LG_RESTRICT W = NULL ;
335 10 : LG_ASSERT (A_0 != NULL, GrB_NULL_POINTER) ;
336 :
337 : //--------------------------------------------------------------------------
338 : // handle small problems with a single thread
339 : //--------------------------------------------------------------------------
340 :
341 10 : int nthreads = LG_nthreads_outer * LG_nthreads_inner ; // # threads to use
342 10 : if (nthreads <= 1 || n <= LG_BASECASE)
343 : {
344 : // sequential quicksort
345 4 : LG_qsort_1a (A_0, n) ;
346 4 : return (GrB_SUCCESS) ;
347 : }
348 :
349 : //--------------------------------------------------------------------------
350 : // determine # of tasks
351 : //--------------------------------------------------------------------------
352 :
353 : // determine the number of levels to create, which must always be an
354 : // even number. The # of levels is chosen to ensure that the # of leaves
355 : // of the task tree is between 4*nthreads and 16*nthreads.
356 :
357 : // 2 to 4 threads: 4 levels, 16 qsort leaves
358 : // 5 to 16 threads: 6 levels, 64 qsort leaves
359 : // 17 to 64 threads: 8 levels, 256 qsort leaves
360 : // 65 to 256 threads: 10 levels, 1024 qsort leaves
361 : // 256 to 1024 threads: 12 levels, 4096 qsort leaves
362 : // ...
363 :
364 6 : int k = (int) (2 + 2 * ceil (log2 ((double) nthreads) / 2)) ;
365 6 : int ntasks = 1 << k ;
366 :
367 : //--------------------------------------------------------------------------
368 : // allocate workspace
369 : //--------------------------------------------------------------------------
370 :
371 6 : LG_TRY (LAGraph_Malloc ((void **) &W, n + 6*ntasks + 1, sizeof (int64_t), msg)) ;
372 :
373 4 : int64_t *T = W ;
374 4 : int64_t *LG_RESTRICT W_0 = T ; T += n ;
375 4 : int64_t *LG_RESTRICT L_task = T ; T += ntasks ;
376 4 : int64_t *LG_RESTRICT L_len = T ; T += ntasks ;
377 4 : int64_t *LG_RESTRICT R_task = T ; T += ntasks ;
378 4 : int64_t *LG_RESTRICT R_len = T ; T += ntasks ;
379 4 : int64_t *LG_RESTRICT S_task = T ; T += ntasks ;
380 4 : int64_t *LG_RESTRICT Slice = T ; T += (ntasks+1) ;
381 :
382 : //--------------------------------------------------------------------------
383 : // partition and sort the leaves
384 : //--------------------------------------------------------------------------
385 :
386 4 : LG_eslice (Slice, n, ntasks) ;
387 : int tid ;
388 : #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
389 68 : for (tid = 0 ; tid < ntasks ; tid++)
390 : {
391 64 : int64_t leaf = Slice [tid] ;
392 64 : int64_t leafsize = Slice [tid+1] - leaf ;
393 64 : LG_qsort_1a (A_0 + leaf, leafsize) ;
394 : }
395 :
396 : //--------------------------------------------------------------------------
397 : // merge each level
398 : //--------------------------------------------------------------------------
399 :
400 4 : int nt = 1 ;
401 12 : for ( ; k >= 2 ; k -= 2)
402 : {
403 :
404 : //----------------------------------------------------------------------
405 : // merge level k into level k-1, from A into W
406 : //----------------------------------------------------------------------
407 :
408 : // FUTURE: skip k and k-1 for each group of 4 sublists of A if they are
409 : // already sorted with respect to each other.
410 :
411 : // this could be done in parallel if ntasks was large
412 48 : for (tid = 0 ; tid < ntasks ; tid += 2*nt)
413 : {
414 : // create 2*nt tasks to merge two A sublists into one W sublist
415 40 : LG_msort_1b_create_merge_tasks (
416 40 : L_task, L_len, R_task, R_len, S_task, tid, 2*nt, Slice [tid],
417 40 : A_0, Slice [tid], Slice [tid+nt],
418 40 : A_0, Slice [tid+nt], Slice [tid+2*nt]) ;
419 : }
420 :
421 : #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
422 136 : for (tid = 0 ; tid < ntasks ; tid++)
423 : {
424 : // merge A [pL...pL+nL-1] and A [pR...pR+nR-1] into W [pS..]
425 128 : int64_t pL = L_task [tid], nL = L_len [tid] ;
426 128 : int64_t pR = R_task [tid], nR = R_len [tid] ;
427 128 : int64_t pS = S_task [tid] ;
428 :
429 128 : LG_msort_1b_merge (
430 128 : W_0 + pS,
431 128 : A_0 + pL, nL,
432 128 : A_0 + pR, nR) ;
433 : }
434 8 : nt = 2*nt ;
435 :
436 : //----------------------------------------------------------------------
437 : // merge level k-1 into level k-2, from W into A
438 : //----------------------------------------------------------------------
439 :
440 : // this could be done in parallel if ntasks was large
441 28 : for (tid = 0 ; tid < ntasks ; tid += 2*nt)
442 : {
443 : // create 2*nt tasks to merge two W sublists into one A sublist
444 20 : LG_msort_1b_create_merge_tasks (
445 20 : L_task, L_len, R_task, R_len, S_task, tid, 2*nt, Slice [tid],
446 20 : W_0, Slice [tid], Slice [tid+nt],
447 20 : W_0, Slice [tid+nt], Slice [tid+2*nt]) ;
448 : }
449 :
450 : #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
451 136 : for (tid = 0 ; tid < ntasks ; tid++)
452 : {
453 : // merge A [pL...pL+nL-1] and A [pR...pR+nR-1] into W [pS..]
454 128 : int64_t pL = L_task [tid], nL = L_len [tid] ;
455 128 : int64_t pR = R_task [tid], nR = R_len [tid] ;
456 128 : int64_t pS = S_task [tid] ;
457 128 : LG_msort_1b_merge (
458 128 : A_0 + pS,
459 128 : W_0 + pL, nL,
460 128 : W_0 + pR, nR) ;
461 : }
462 8 : nt = 2*nt ;
463 : }
464 :
465 : //--------------------------------------------------------------------------
466 : // free workspace and return result
467 : //--------------------------------------------------------------------------
468 :
469 4 : LG_FREE_ALL ;
470 4 : return (GrB_SUCCESS) ;
471 : }
|