Line data Source code
1 : //------------------------------------------------------------------------------
2 : // LG_msort2: sort a 2-by-n list of integers, using A[0:1][ ] as the key
3 : //------------------------------------------------------------------------------
4 :
5 : // LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved.
6 : // SPDX-License-Identifier: BSD-2-Clause
7 : //
8 : // For additional details (including references to third party source code and
9 : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
10 : // Contributors.txt for a full list of contributors. Created, in part, with
11 : // funding and support from the U.S. Government (see Acknowledgments.txt file).
12 : // DM22-0790
13 :
14 : // Contributed by Timothy A. Davis, Texas A&M University
15 :
16 : //------------------------------------------------------------------------------
17 :
18 : // A parallel mergesort of an array of 2-by-n integers. Each key
19 : // consists of two integers.
20 :
21 : #define LG_FREE_ALL LAGraph_Free ((void **) &W, NULL) ;
22 :
23 : #include "LG_internal.h"
24 :
25 : //------------------------------------------------------------------------------
26 : // prototype only needed for LG_msort2
27 : //------------------------------------------------------------------------------
28 :
29 : void LG_msort_2b_create_merge_tasks
30 : (
31 : // output:
32 : int64_t *LG_RESTRICT L_task, // L_task [t0...t0+ntasks-1] computed
33 : int64_t *LG_RESTRICT L_len, // L_len [t0...t0+ntasks-1] computed
34 : int64_t *LG_RESTRICT R_task, // R_task [t0...t0+ntasks-1] computed
35 : int64_t *LG_RESTRICT R_len, // R_len [t0...t0+ntasks-1] computed
36 : int64_t *LG_RESTRICT S_task, // S_task [t0...t0+ntasks-1] computed
37 : // input:
38 : const int t0, // first task tid to create
39 : const int ntasks, // # of tasks to create
40 : const int64_t pS_start, // merge into S [pS_start...]
41 : const int64_t *LG_RESTRICT L_0, // Left = L [pL_start...pL_end-1]
42 : const int64_t *LG_RESTRICT L_1,
43 : const int64_t pL_start,
44 : const int64_t pL_end,
45 : const int64_t *LG_RESTRICT R_0, // Right = R [pR_start...pR_end-1]
46 : const int64_t *LG_RESTRICT R_1,
47 : const int64_t pR_start,
48 : const int64_t pR_end
49 : ) ;
50 :
51 : //------------------------------------------------------------------------------
52 : // LG_msort_2b_binary_search: binary search for the pivot
53 : //------------------------------------------------------------------------------
54 :
55 : // The Pivot value is Y [pivot], and a binary search for the Pivot is made in
56 : // the array X [p_pstart...p_end-1], which is sorted in non-decreasing order on
57 : // input. The return value is pleft, where
58 : //
59 : // X [p_start ... pleft-1] <= Pivot and
60 : // X [pleft ... p_end-1] >= Pivot holds.
61 : //
62 : // pleft is returned in the range p_start to p_end. If pleft is p_start, then
63 : // the Pivot is smaller than all entries in X [p_start...p_end-1], and the left
64 : // list X [p_start...pleft-1] is empty. If pleft is p_end, then the Pivot is
65 : // larger than all entries in X [p_start...p_end-1], and the right list X
66 : // [pleft...p_end-1] is empty.
67 :
68 196 : static int64_t LG_msort_2b_binary_search // return pleft
69 : (
70 : const int64_t *LG_RESTRICT Y_0, // Pivot is Y [pivot]
71 : const int64_t *LG_RESTRICT Y_1,
72 : const int64_t pivot,
73 : const int64_t *LG_RESTRICT X_0, // search in X [p_start..p_end_-1]
74 : const int64_t *LG_RESTRICT X_1,
75 : const int64_t p_start,
76 : const int64_t p_end
77 : )
78 : {
79 :
80 : //--------------------------------------------------------------------------
81 : // find where the Pivot appears in X
82 : //--------------------------------------------------------------------------
83 :
84 : // binary search of X [p_start...p_end-1] for the Pivot
85 196 : int64_t pleft = p_start ;
86 196 : int64_t pright = p_end - 1 ;
87 2980 : while (pleft < pright)
88 : {
89 2784 : int64_t pmiddle = (pleft + pright) >> 1 ;
90 : // less = (X [pmiddle] < Pivot)
91 2784 : bool less = LG_lt_2 (X_0, X_1, pmiddle,
92 : Y_0, Y_1, pivot) ;
93 2784 : pleft = less ? (pmiddle+1) : pleft ;
94 2784 : pright = less ? pright : pmiddle ;
95 : }
96 :
97 : // binary search is narrowed down to a single item
98 : // or it has found the list is empty:
99 : ASSERT (pleft == pright || pleft == pright + 1) ;
100 :
101 : // If found is true then X [pleft == pright] == Pivot. If duplicates
102 : // appear then X [pleft] is any one of the entries equal to the Pivot
103 : // in the list. If found is false then
104 : // X [p_start ... pleft-1] < Pivot and
105 : // X [pleft+1 ... p_end-1] > Pivot holds.
106 : // The value X [pleft] may be either < or > Pivot.
107 196 : bool found = (pleft == pright) && LG_eq_2 (X_0, X_1, pleft,
108 : Y_0, Y_1, pivot) ;
109 :
110 : // Modify pleft and pright:
111 196 : if (!found && (pleft == pright))
112 : {
113 100 : if (LG_lt_2 (X_0, X_1, pleft,
114 : Y_0, Y_1, pivot))
115 : {
116 2 : pleft++ ;
117 : }
118 : else
119 : {
120 : // pright++ ; // (not needed)
121 : }
122 : }
123 :
124 : //--------------------------------------------------------------------------
125 : // return result
126 : //--------------------------------------------------------------------------
127 :
128 : // If found is false then
129 : // X [p_start ... pleft-1] < Pivot and
130 : // X [pleft ... p_end-1] > Pivot holds,
131 : // and pleft-1 == pright
132 :
133 : // If X has no duplicates, then whether or not Pivot is found,
134 : // X [p_start ... pleft-1] < Pivot and
135 : // X [pleft ... p_end-1] >= Pivot holds.
136 :
137 : // If X has duplicates, then whether or not Pivot is found,
138 : // X [p_start ... pleft-1] <= Pivot and
139 : // X [pleft ... p_end-1] >= Pivot holds.
140 :
141 196 : return (pleft) ;
142 : }
143 :
144 : //------------------------------------------------------------------------------
145 : // LG_msort_2b_create_merge_tasks
146 : //------------------------------------------------------------------------------
147 :
148 : // Recursively constructs ntasks tasks to merge two arrays, Left and Right,
149 : // into Sresult, where Left is L [pL_start...pL_end-1], Right is R
150 : // [pR_start...pR_end-1], and Sresult is S [pS_start...pS_start+total_work-1],
151 : // and where total_work is the total size of Left and Right.
152 : //
153 : // Task tid will merge L [L_task [tid] ... L_task [tid] + L_len [tid] - 1] and
154 : // R [R_task [tid] ... R_task [tid] + R_len [tid] -1] into the merged output
155 : // array S [S_task [tid] ... ]. The task tids created are t0 to
156 : // t0+ntasks-1.
157 :
158 452 : void LG_msort_2b_create_merge_tasks
159 : (
160 : // output:
161 : int64_t *LG_RESTRICT L_task, // L_task [t0...t0+ntasks-1] computed
162 : int64_t *LG_RESTRICT L_len, // L_len [t0...t0+ntasks-1] computed
163 : int64_t *LG_RESTRICT R_task, // R_task [t0...t0+ntasks-1] computed
164 : int64_t *LG_RESTRICT R_len, // R_len [t0...t0+ntasks-1] computed
165 : int64_t *LG_RESTRICT S_task, // S_task [t0...t0+ntasks-1] computed
166 : // input:
167 : const int t0, // first task tid to create
168 : const int ntasks, // # of tasks to create
169 : const int64_t pS_start, // merge into S [pS_start...]
170 : const int64_t *LG_RESTRICT L_0, // Left = L [pL_start...pL_end-1]
171 : const int64_t *LG_RESTRICT L_1,
172 : const int64_t pL_start,
173 : const int64_t pL_end,
174 : const int64_t *LG_RESTRICT R_0, // Right = R [pR_start...pR_end-1]
175 : const int64_t *LG_RESTRICT R_1,
176 : const int64_t pR_start,
177 : const int64_t pR_end
178 : )
179 : {
180 :
181 : //--------------------------------------------------------------------------
182 : // get problem size
183 : //--------------------------------------------------------------------------
184 :
185 452 : int64_t nleft = pL_end - pL_start ; // size of Left array
186 452 : int64_t nright = pR_end - pR_start ; // size of Right array
187 452 : int64_t total_work = nleft + nright ; // total work to do
188 : ASSERT (ntasks >= 1) ;
189 : ASSERT (total_work > 0) ;
190 :
191 : //--------------------------------------------------------------------------
192 : // create the tasks
193 : //--------------------------------------------------------------------------
194 :
195 452 : if (ntasks == 1)
196 : {
197 :
198 : //----------------------------------------------------------------------
199 : // a single task will merge all of Left and Right into Sresult
200 : //----------------------------------------------------------------------
201 :
202 256 : L_task [t0] = pL_start ; L_len [t0] = nleft ;
203 256 : R_task [t0] = pR_start ; R_len [t0] = nright ;
204 256 : S_task [t0] = pS_start ;
205 :
206 : }
207 : else
208 : {
209 :
210 : //----------------------------------------------------------------------
211 : // partition the Left and Right arrays for multiple merge tasks
212 : //----------------------------------------------------------------------
213 :
214 : int64_t pleft, pright ;
215 196 : if (nleft >= nright)
216 : {
217 : // split Left in half, and search for its pivot in Right
218 134 : pleft = (pL_end + pL_start) >> 1 ;
219 134 : pright = LG_msort_2b_binary_search (
220 : L_0, L_1, pleft,
221 : R_0, R_1, pR_start, pR_end) ;
222 : }
223 : else
224 : {
225 : // split Right in half, and search for its pivot in Left
226 62 : pright = (pR_end + pR_start) >> 1 ;
227 62 : pleft = LG_msort_2b_binary_search (
228 : R_0, R_1, pright,
229 : L_0, L_1, pL_start, pL_end) ;
230 : }
231 :
232 : //----------------------------------------------------------------------
233 : // partition the tasks according to the work of each partition
234 : //----------------------------------------------------------------------
235 :
236 : // work0 is the total work in the first partition
237 196 : int64_t work0 = (pleft - pL_start) + (pright - pR_start) ;
238 196 : int ntasks0 = (int) round ((double) ntasks *
239 196 : (((double) work0) / ((double) total_work))) ;
240 :
241 : // ensure at least one task is assigned to each partition
242 196 : ntasks0 = LAGRAPH_MAX (ntasks0, 1) ;
243 196 : ntasks0 = LAGRAPH_MIN (ntasks0, ntasks-1) ;
244 196 : int ntasks1 = ntasks - ntasks0 ;
245 :
246 : //----------------------------------------------------------------------
247 : // assign ntasks0 to the first half
248 : //----------------------------------------------------------------------
249 :
250 : // ntasks0 tasks merge L [pL_start...pleft-1] and R [pR_start..pright-1]
251 : // into the result S [pS_start...work0-1].
252 :
253 196 : LG_msort_2b_create_merge_tasks (
254 : L_task, L_len, R_task, R_len, S_task, t0, ntasks0, pS_start,
255 : L_0, L_1, pL_start, pleft,
256 : R_0, R_1, pR_start, pright) ;
257 :
258 : //----------------------------------------------------------------------
259 : // assign ntasks1 to the second half
260 : //----------------------------------------------------------------------
261 :
262 : // ntasks1 tasks merge L [pleft...pL_end-1] and R [pright...pR_end-1]
263 : // into the result S [pS_start+work0...pS_start+total_work].
264 :
265 196 : int t1 = t0 + ntasks0 ; // first task id of the second set of tasks
266 196 : int64_t pS_start1 = pS_start + work0 ; // 2nd set starts here in S
267 196 : LG_msort_2b_create_merge_tasks (
268 : L_task, L_len, R_task, R_len, S_task, t1, ntasks1, pS_start1,
269 : L_0, L_1, pleft, pL_end,
270 : R_0, R_1, pright, pR_end) ;
271 : }
272 452 : }
273 :
274 : //------------------------------------------------------------------------------
275 : // LG_msort_2b_merge: merge two sorted lists via a single thread
276 : //------------------------------------------------------------------------------
277 :
278 : // merge Left [0..nleft-1] and Right [0..nright-1] into S [0..nleft+nright-1] */
279 :
280 256 : static void LG_msort_2b_merge
281 : (
282 : int64_t *LG_RESTRICT S_0, // output of length nleft + nright
283 : int64_t *LG_RESTRICT S_1,
284 : const int64_t *LG_RESTRICT Left_0, // left input of length nleft
285 : const int64_t *LG_RESTRICT Left_1,
286 : const int64_t nleft,
287 : const int64_t *LG_RESTRICT Right_0, // right input of length nright
288 : const int64_t *LG_RESTRICT Right_1,
289 : const int64_t nright
290 : )
291 : {
292 : int64_t p, pleft, pright ;
293 :
294 : // merge the two inputs, Left and Right, while both inputs exist
295 3005622 : for (p = 0, pleft = 0, pright = 0 ; pleft < nleft && pright < nright ; p++)
296 : {
297 3005366 : if (LG_lt_2 (Left_0, Left_1, pleft,
298 : Right_0, Right_1, pright))
299 : {
300 : // S [p] = Left [pleft++]
301 1196304 : S_0 [p] = Left_0 [pleft] ;
302 1196304 : S_1 [p] = Left_1 [pleft] ;
303 1196304 : pleft++ ;
304 : }
305 : else
306 : {
307 : // S [p] = Right [pright++]
308 1809062 : S_0 [p] = Right_0 [pright] ;
309 1809062 : S_1 [p] = Right_1 [pright] ;
310 1809062 : pright++ ;
311 : }
312 : }
313 :
314 : // either input is exhausted; copy the remaining list into S
315 256 : if (pleft < nleft)
316 : {
317 170 : int64_t nremaining = (nleft - pleft) ;
318 170 : memcpy (S_0 + p, Left_0 + pleft, nremaining * sizeof (int64_t)) ;
319 170 : memcpy (S_1 + p, Left_1 + pleft, nremaining * sizeof (int64_t)) ;
320 : }
321 86 : else if (pright < nright)
322 : {
323 86 : int64_t nremaining = (nright - pright) ;
324 86 : memcpy (S_0 + p, Right_0 + pright, nremaining * sizeof (int64_t)) ;
325 86 : memcpy (S_1 + p, Right_1 + pright, nremaining * sizeof (int64_t)) ;
326 : }
327 256 : }
328 :
329 : //------------------------------------------------------------------------------
330 : // LG_msort2: parallel mergesort
331 : //------------------------------------------------------------------------------
332 :
333 1663 : int LG_msort2
334 : (
335 : // input/output:
336 : int64_t *A_0, // size n array
337 : int64_t *A_1, // size n array
338 : // input:
339 : const int64_t n,
340 : char *msg
341 : )
342 : {
343 :
344 : //--------------------------------------------------------------------------
345 : // check inputs
346 : //--------------------------------------------------------------------------
347 :
348 1663 : LG_CLEAR_MSG ;
349 1663 : int64_t *LG_RESTRICT W = NULL ;
350 1663 : LG_ASSERT (A_0 != NULL, GrB_NULL_POINTER) ;
351 1663 : LG_ASSERT (A_1 != NULL, GrB_NULL_POINTER) ;
352 :
353 : //--------------------------------------------------------------------------
354 : // handle small problems with a single thread
355 : //--------------------------------------------------------------------------
356 :
357 1663 : int nthreads = LG_nthreads_outer * LG_nthreads_inner ; // # threads to use
358 1663 : if (nthreads <= 1 || n <= LG_BASECASE)
359 : {
360 : // sequential quicksort
361 1657 : LG_qsort_2 (A_0, A_1, n) ;
362 1657 : return (GrB_SUCCESS) ;
363 : }
364 :
365 : //--------------------------------------------------------------------------
366 : // determine # of tasks
367 : //--------------------------------------------------------------------------
368 :
369 : // determine the number of levels to create, which must always be an
370 : // even number. The # of levels is chosen to ensure that the # of leaves
371 : // of the task tree is between 4*nthreads and 16*nthreads.
372 :
373 : // 2 to 4 threads: 4 levels, 16 qsort leaves
374 : // 5 to 16 threads: 6 levels, 64 qsort leaves
375 : // 17 to 64 threads: 8 levels, 256 qsort leaves
376 : // 65 to 256 threads: 10 levels, 1024 qsort leaves
377 : // 256 to 1024 threads: 12 levels, 4096 qsort leaves
378 : // ...
379 :
380 6 : int k = (int) (2 + 2 * ceil (log2 ((double) nthreads) / 2)) ;
381 6 : int ntasks = 1 << k ;
382 :
383 : //--------------------------------------------------------------------------
384 : // allocate workspace
385 : //--------------------------------------------------------------------------
386 :
387 6 : LG_TRY (LAGraph_Malloc ((void **) &W, 2*n + 6*ntasks + 1, sizeof (int64_t),
388 : msg)) ;
389 :
390 4 : int64_t *T = W ;
391 4 : int64_t *LG_RESTRICT W_0 = T ; T += n ;
392 4 : int64_t *LG_RESTRICT W_1 = T ; T += n ;
393 4 : int64_t *LG_RESTRICT L_task = T ; T += ntasks ;
394 4 : int64_t *LG_RESTRICT L_len = T ; T += ntasks ;
395 4 : int64_t *LG_RESTRICT R_task = T ; T += ntasks ;
396 4 : int64_t *LG_RESTRICT R_len = T ; T += ntasks ;
397 4 : int64_t *LG_RESTRICT S_task = T ; T += ntasks ;
398 4 : int64_t *LG_RESTRICT Slice = T ; T += (ntasks+1) ;
399 :
400 : //--------------------------------------------------------------------------
401 : // partition and sort the leaves
402 : //--------------------------------------------------------------------------
403 :
404 4 : LG_eslice (Slice, n, ntasks) ;
405 : int tid ;
406 : #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
407 68 : for (tid = 0 ; tid < ntasks ; tid++)
408 : {
409 64 : int64_t leaf = Slice [tid] ;
410 64 : int64_t leafsize = Slice [tid+1] - leaf ;
411 64 : LG_qsort_2 (A_0 + leaf, A_1 + leaf, leafsize) ;
412 : }
413 :
414 : //--------------------------------------------------------------------------
415 : // merge each level
416 : //--------------------------------------------------------------------------
417 :
418 4 : int nt = 1 ;
419 12 : for ( ; k >= 2 ; k -= 2)
420 : {
421 :
422 : //----------------------------------------------------------------------
423 : // merge level k into level k-1, from A into W
424 : //----------------------------------------------------------------------
425 :
426 : // FUTURE: skip k and k-1 for each group of 4 sublists of A if they are
427 : // already sorted with respect to each other.
428 :
429 : // this could be done in parallel if ntasks was large
430 48 : for (tid = 0 ; tid < ntasks ; tid += 2*nt)
431 : {
432 : // create 2*nt tasks to merge two A sublists into one W sublist
433 40 : LG_msort_2b_create_merge_tasks (
434 40 : L_task, L_len, R_task, R_len, S_task, tid, 2*nt, Slice [tid],
435 40 : A_0, A_1, Slice [tid], Slice [tid+nt],
436 40 : A_0, A_1, Slice [tid+nt], Slice [tid+2*nt]) ;
437 : }
438 :
439 : #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
440 136 : for (tid = 0 ; tid < ntasks ; tid++)
441 : {
442 : // merge A [pL...pL+nL-1] and A [pR...pR+nR-1] into W [pS..]
443 128 : int64_t pL = L_task [tid], nL = L_len [tid] ;
444 128 : int64_t pR = R_task [tid], nR = R_len [tid] ;
445 128 : int64_t pS = S_task [tid] ;
446 :
447 128 : LG_msort_2b_merge (
448 128 : W_0 + pS, W_1 + pS,
449 128 : A_0 + pL, A_1 + pL, nL,
450 128 : A_0 + pR, A_1 + pR, nR) ;
451 : }
452 8 : nt = 2*nt ;
453 :
454 : //----------------------------------------------------------------------
455 : // merge level k-1 into level k-2, from W into A
456 : //----------------------------------------------------------------------
457 :
458 : // this could be done in parallel if ntasks was large
459 28 : for (tid = 0 ; tid < ntasks ; tid += 2*nt)
460 : {
461 : // create 2*nt tasks to merge two W sublists into one A sublist
462 20 : LG_msort_2b_create_merge_tasks (
463 20 : L_task, L_len, R_task, R_len, S_task, tid, 2*nt, Slice [tid],
464 20 : W_0, W_1, Slice [tid], Slice [tid+nt],
465 20 : W_0, W_1, Slice [tid+nt], Slice [tid+2*nt]) ;
466 : }
467 :
468 : #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
469 136 : for (tid = 0 ; tid < ntasks ; tid++)
470 : {
471 : // merge A [pL...pL+nL-1] and A [pR...pR+nR-1] into W [pS..]
472 128 : int64_t pL = L_task [tid], nL = L_len [tid] ;
473 128 : int64_t pR = R_task [tid], nR = R_len [tid] ;
474 128 : int64_t pS = S_task [tid] ;
475 128 : LG_msort_2b_merge (
476 128 : A_0 + pS, A_1 + pS,
477 128 : W_0 + pL, W_1 + pL, nL,
478 128 : W_0 + pR, W_1 + pR, nR) ;
479 : }
480 8 : nt = 2*nt ;
481 : }
482 :
483 : //--------------------------------------------------------------------------
484 : // free workspace and return result
485 : //--------------------------------------------------------------------------
486 :
487 4 : LG_FREE_ALL ;
488 4 : return (GrB_SUCCESS) ;
489 : }
|