LCOV - code coverage report
Current view: top level - src/utility - LG_msort1.c (source / functions) Hit Total Coverage
Test: LAGraph code coverage report. Commit id: cc56ed4. Current time (UTC): 2024-08-30T17:14:30Z Lines: 107 107 100.0 %
Date: 2024-08-30 17:16:41 Functions: 4 4 100.0 %

          Line data    Source code
       1             : //------------------------------------------------------------------------------
       2             : // LG_msort1: sort a list of integers
       3             : //------------------------------------------------------------------------------
       4             : 
       5             : // LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved.
       6             : // SPDX-License-Identifier: BSD-2-Clause
       7             : //
       8             : // For additional details (including references to third party source code and
       9             : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
      10             : // Contributors.txt for a full list of contributors. Created, in part, with
      11             : // funding and support from the U.S. Government (see Acknowledgments.txt file).
      12             : // DM22-0790
      13             : 
      14             : // Contributed by Timothy A. Davis, Texas A&M University
      15             : 
      16             : //------------------------------------------------------------------------------
      17             : 
      18             : // A parallel mergesort of an array of n integers.
      19             : 
      20             : #define LG_FREE_ALL LAGraph_Free ((void **) &W, NULL) ;
      21             : 
      22             : #include "LG_internal.h"
      23             : 
      24             : //------------------------------------------------------------------------------
      25             : // prototype only needed for LG_msort1
      26             : //------------------------------------------------------------------------------
      27             : 
      28             : void LG_msort_1b_create_merge_tasks
      29             : (
      30             :     // output:
      31             :     int64_t *LG_RESTRICT L_task,        // L_task [t0...t0+ntasks-1] computed
      32             :     int64_t *LG_RESTRICT L_len,         // L_len  [t0...t0+ntasks-1] computed
      33             :     int64_t *LG_RESTRICT R_task,        // R_task [t0...t0+ntasks-1] computed
      34             :     int64_t *LG_RESTRICT R_len,         // R_len  [t0...t0+ntasks-1] computed
      35             :     int64_t *LG_RESTRICT S_task,        // S_task [t0...t0+ntasks-1] computed
      36             :     // input:
      37             :     const int t0,                       // first task tid to create
      38             :     const int ntasks,                   // # of tasks to create
      39             :     const int64_t pS_start,             // merge into S [pS_start...]
      40             :     const int64_t *LG_RESTRICT L_0,     // Left = L [pL_start...pL_end-1]
      41             :     const int64_t pL_start,
      42             :     const int64_t pL_end,
      43             :     const int64_t *LG_RESTRICT R_0,     // Right = R [pR_start...pR_end-1]
      44             :     const int64_t pR_start,
      45             :     const int64_t pR_end
      46             : ) ;
      47             : 
      48             : //------------------------------------------------------------------------------
      49             : // LG_msort_1b_binary_search: binary search for the pivot
      50             : //------------------------------------------------------------------------------
      51             : 
      52             : // The Pivot value is Y [pivot], and a binary search for the Pivot is made in
      53             : // the array X [p_pstart...p_end-1], which is sorted in non-decreasing order on
      54             : // input.  The return value is pleft, where
      55             : //
      56             : //    X [p_start ... pleft-1] <= Pivot and
      57             : //    X [pleft ... p_end-1] >= Pivot holds.
      58             : //
      59             : // pleft is returned in the range p_start to p_end.  If pleft is p_start, then
      60             : // the Pivot is smaller than all entries in X [p_start...p_end-1], and the left
      61             : // list X [p_start...pleft-1] is empty.  If pleft is p_end, then the Pivot is
      62             : // larger than all entries in X [p_start...p_end-1], and the right list X
      63             : // [pleft...p_end-1] is empty.
      64             : 
      65         196 : static int64_t LG_msort_1b_binary_search    // return pleft
      66             : (
      67             :     const int64_t *LG_RESTRICT Y_0,         // Pivot is Y [pivot]
      68             :     const int64_t pivot,
      69             :     const int64_t *LG_RESTRICT X_0,         // search in X [p_start..p_end_-1]
      70             :     const int64_t p_start,
      71             :     const int64_t p_end
      72             : )
      73             : {
      74             : 
      75             :     //--------------------------------------------------------------------------
      76             :     // find where the Pivot appears in X
      77             :     //--------------------------------------------------------------------------
      78             : 
      79             :     // binary search of X [p_start...p_end-1] for the Pivot
      80         196 :     int64_t pleft = p_start ;
      81         196 :     int64_t pright = p_end - 1 ;
      82        2982 :     while (pleft < pright)
      83             :     {
      84        2786 :         int64_t pmiddle = (pleft + pright) >> 1 ;
      85             :         // less = (X [pmiddle] < Pivot)
      86        2786 :         bool less = LG_lt_1 (X_0, pmiddle,
      87             :                              Y_0, pivot) ;
      88        2786 :         pleft  = less ? (pmiddle+1) : pleft ;
      89        2786 :         pright = less ? pright : pmiddle ;
      90             :     }
      91             : 
      92             :     // binary search is narrowed down to a single item
      93             :     // or it has found the list is empty:
      94             :     ASSERT (pleft == pright || pleft == pright + 1) ;
      95             : 
      96             :     // If found is true then X [pleft == pright] == Pivot.  If duplicates
      97             :     // appear then X [pleft] is any one of the entries equal to the Pivot
      98             :     // in the list.  If found is false then
      99             :     //    X [p_start ... pleft-1] < Pivot and
     100             :     //    X [pleft+1 ... p_end-1] > Pivot holds.
     101             :     //    The value X [pleft] may be either < or > Pivot.
     102         196 :     bool found = (pleft == pright) && LG_eq_1 (X_0, pleft,
     103             :                                                Y_0, pivot) ;
     104             : 
     105             :     // Modify pleft and pright:
     106         196 :     if (!found && (pleft == pright))
     107             :     {
     108          26 :         if (LG_lt_1 (X_0, pleft,
     109             :                      Y_0, pivot))
     110             :         {
     111           2 :             pleft++ ;
     112             :         }
     113             :         else
     114             :         {
     115             : //          pright++ ;  // (not needed)
     116             :         }
     117             :     }
     118             : 
     119             :     //--------------------------------------------------------------------------
     120             :     // return result
     121             :     //--------------------------------------------------------------------------
     122             : 
     123             :     // If found is false then
     124             :     //    X [p_start ... pleft-1] < Pivot and
     125             :     //    X [pleft ... p_end-1] > Pivot holds,
     126             :     //    and pleft-1 == pright
     127             : 
     128             :     // If X has no duplicates, then whether or not Pivot is found,
     129             :     //    X [p_start ... pleft-1] < Pivot and
     130             :     //    X [pleft ... p_end-1] >= Pivot holds.
     131             : 
     132             :     // If X has duplicates, then whether or not Pivot is found,
     133             :     //    X [p_start ... pleft-1] <= Pivot and
     134             :     //    X [pleft ... p_end-1] >= Pivot holds.
     135             : 
     136         196 :     return (pleft) ;
     137             : }
     138             : 
     139             : //------------------------------------------------------------------------------
     140             : // LG_msort_1b_create_merge_tasks
     141             : //------------------------------------------------------------------------------
     142             : 
     143             : // Recursively constructs ntasks tasks to merge two arrays, Left and Right,
     144             : // into Sresult, where Left is L [pL_start...pL_end-1], Right is R
     145             : // [pR_start...pR_end-1], and Sresult is S [pS_start...pS_start+total_work-1],
     146             : // and where total_work is the total size of Left and Right.
     147             : //
     148             : // Task tid will merge L [L_task [tid] ... L_task [tid] + L_len [tid] - 1] and
     149             : // R [R_task [tid] ... R_task [tid] + R_len [tid] -1] into the merged output
     150             : // array S [S_task [tid] ... ].  The task tids created are t0 to
     151             : // t0+ntasks-1.
     152             : 
     153         452 : void LG_msort_1b_create_merge_tasks
     154             : (
     155             :     // output:
     156             :     int64_t *LG_RESTRICT L_task,        // L_task [t0...t0+ntasks-1] computed
     157             :     int64_t *LG_RESTRICT L_len,         // L_len  [t0...t0+ntasks-1] computed
     158             :     int64_t *LG_RESTRICT R_task,        // R_task [t0...t0+ntasks-1] computed
     159             :     int64_t *LG_RESTRICT R_len,         // R_len  [t0...t0+ntasks-1] computed
     160             :     int64_t *LG_RESTRICT S_task,        // S_task [t0...t0+ntasks-1] computed
     161             :     // input:
     162             :     const int t0,                       // first task tid to create
     163             :     const int ntasks,                   // # of tasks to create
     164             :     const int64_t pS_start,             // merge into S [pS_start...]
     165             :     const int64_t *LG_RESTRICT L_0,     // Left = L [pL_start...pL_end-1]
     166             :     const int64_t pL_start,
     167             :     const int64_t pL_end,
     168             :     const int64_t *LG_RESTRICT R_0,     // Right = R [pR_start...pR_end-1]
     169             :     const int64_t pR_start,
     170             :     const int64_t pR_end
     171             : )
     172             : {
     173             : 
     174             :     //--------------------------------------------------------------------------
     175             :     // get problem size
     176             :     //--------------------------------------------------------------------------
     177             : 
     178         452 :     int64_t nleft  = pL_end - pL_start ;        // size of Left array
     179         452 :     int64_t nright = pR_end - pR_start ;        // size of Right array
     180         452 :     int64_t total_work = nleft + nright ;       // total work to do
     181             :     ASSERT (ntasks >= 1) ;
     182             :     ASSERT (total_work > 0) ;
     183             : 
     184             :     //--------------------------------------------------------------------------
     185             :     // create the tasks
     186             :     //--------------------------------------------------------------------------
     187             : 
     188         452 :     if (ntasks == 1)
     189             :     {
     190             : 
     191             :         //----------------------------------------------------------------------
     192             :         // a single task will merge all of Left and Right into Sresult
     193             :         //----------------------------------------------------------------------
     194             : 
     195         256 :         L_task [t0] = pL_start ; L_len [t0] = nleft ;
     196         256 :         R_task [t0] = pR_start ; R_len [t0] = nright ;
     197         256 :         S_task [t0] = pS_start ;
     198             : 
     199             :     }
     200             :     else
     201             :     {
     202             : 
     203             :         //----------------------------------------------------------------------
     204             :         // partition the Left and Right arrays for multiple merge tasks
     205             :         //----------------------------------------------------------------------
     206             : 
     207             :         int64_t pleft, pright ;
     208         196 :         if (nleft >= nright)
     209             :         {
     210             :             // split Left in half, and search for its pivot in Right
     211         130 :             pleft = (pL_end + pL_start) >> 1 ;
     212         130 :             pright = LG_msort_1b_binary_search (
     213             :                         L_0, pleft,
     214             :                         R_0, pR_start, pR_end) ;
     215             :         }
     216             :         else
     217             :         {
     218             :             // split Right in half, and search for its pivot in Left
     219          66 :             pright = (pR_end + pR_start) >> 1 ;
     220          66 :             pleft = LG_msort_1b_binary_search (
     221             :                         R_0, pright,
     222             :                         L_0, pL_start, pL_end) ;
     223             :         }
     224             : 
     225             :         //----------------------------------------------------------------------
     226             :         // partition the tasks according to the work of each partition
     227             :         //----------------------------------------------------------------------
     228             : 
     229             :         // work0 is the total work in the first partition
     230         196 :         int64_t work0 = (pleft - pL_start) + (pright - pR_start) ;
     231         196 :         int ntasks0 = (int) round ((double) ntasks *
     232         196 :             (((double) work0) / ((double) total_work))) ;
     233             : 
     234             :         // ensure at least one task is assigned to each partition
     235         196 :         ntasks0 = LAGRAPH_MAX (ntasks0, 1) ;
     236         196 :         ntasks0 = LAGRAPH_MIN (ntasks0, ntasks-1) ;
     237         196 :         int ntasks1 = ntasks - ntasks0 ;
     238             : 
     239             :         //----------------------------------------------------------------------
     240             :         // assign ntasks0 to the first half
     241             :         //----------------------------------------------------------------------
     242             : 
     243             :         // ntasks0 tasks merge L [pL_start...pleft-1] and R [pR_start..pright-1]
     244             :         // into the result S [pS_start...work0-1].
     245             : 
     246         196 :         LG_msort_1b_create_merge_tasks (
     247             :             L_task, L_len, R_task, R_len, S_task, t0, ntasks0, pS_start,
     248             :             L_0, pL_start, pleft,
     249             :             R_0, pR_start, pright) ;
     250             : 
     251             :         //----------------------------------------------------------------------
     252             :         // assign ntasks1 to the second half
     253             :         //----------------------------------------------------------------------
     254             : 
     255             :         // ntasks1 tasks merge L [pleft...pL_end-1] and R [pright...pR_end-1]
     256             :         // into the result S [pS_start+work0...pS_start+total_work].
     257             : 
     258         196 :         int t1 = t0 + ntasks0 ;     // first task id of the second set of tasks
     259         196 :         int64_t pS_start1 = pS_start + work0 ;  // 2nd set starts here in S
     260         196 :         LG_msort_1b_create_merge_tasks (
     261             :             L_task, L_len, R_task, R_len, S_task, t1, ntasks1, pS_start1,
     262             :             L_0, pleft,  pL_end,
     263             :             R_0, pright, pR_end) ;
     264             :     }
     265         452 : }
     266             : 
     267             : //------------------------------------------------------------------------------
     268             : // LG_msort_1b_merge: merge two sorted lists via a single thread
     269             : //------------------------------------------------------------------------------
     270             : 
     271             : // merge Left [0..nleft-1] and Right [0..nright-1] into S [0..nleft+nright-1] */
     272             : 
     273         256 : static void LG_msort_1b_merge
     274             : (
     275             :     int64_t *LG_RESTRICT S_0,              // output of length nleft + nright
     276             :     const int64_t *LG_RESTRICT Left_0,     // left input of length nleft
     277             :     const int64_t nleft,
     278             :     const int64_t *LG_RESTRICT Right_0,    // right input of length nright
     279             :     const int64_t nright
     280             : )
     281             : {
     282             :     int64_t p, pleft, pright ;
     283             : 
     284             :     // merge the two inputs, Left and Right, while both inputs exist
     285     3005484 :     for (p = 0, pleft = 0, pright = 0 ; pleft < nleft && pright < nright ; p++)
     286             :     {
     287     3005228 :         if (LG_lt_1 (Left_0,  pleft,
     288             :                      Right_0, pright))
     289             :         {
     290             :             // S [p] = Left [pleft++]
     291     1196146 :             S_0 [p] = Left_0 [pleft] ;
     292     1196146 :             pleft++ ;
     293             :         }
     294             :         else
     295             :         {
     296             :             // S [p] = Right [pright++]
     297     1809082 :             S_0 [p] = Right_0 [pright] ;
     298     1809082 :             pright++ ;
     299             :         }
     300             :     }
     301             : 
     302             :     // either input is exhausted; copy the remaining list into S
     303         256 :     if (pleft < nleft)
     304             :     {
     305         188 :         int64_t nremaining = (nleft - pleft) ;
     306         188 :         memcpy (S_0 + p, Left_0 + pleft, nremaining * sizeof (int64_t)) ;
     307             :     }
     308          68 :     else if (pright < nright)
     309             :     {
     310          68 :         int64_t nremaining = (nright - pright) ;
     311          68 :         memcpy (S_0 + p, Right_0 + pright, nremaining * sizeof (int64_t)) ;
     312             :     }
     313         256 : }
     314             : 
     315             : //------------------------------------------------------------------------------
     316             : // LG_msort1: parallel mergesort
     317             : //------------------------------------------------------------------------------
     318             : 
     319          10 : int LG_msort1
     320             : (
     321             :     // input/output:
     322             :     int64_t *A_0,       // size n array
     323             :     // input:
     324             :     const int64_t n,
     325             :     char *msg
     326             : )
     327             : {
     328             : 
     329             :     //--------------------------------------------------------------------------
     330             :     // check inputs
     331             :     //--------------------------------------------------------------------------
     332             : 
     333          10 :     LG_CLEAR_MSG ;
     334          10 :     int64_t *LG_RESTRICT W = NULL ;
     335          10 :     LG_ASSERT (A_0 != NULL, GrB_NULL_POINTER) ;
     336             : 
     337             :     //--------------------------------------------------------------------------
     338             :     // handle small problems with a single thread
     339             :     //--------------------------------------------------------------------------
     340             : 
     341          10 :     int nthreads = LG_nthreads_outer * LG_nthreads_inner ; // # threads to use
     342          10 :     if (nthreads <= 1 || n <= LG_BASECASE)
     343             :     {
     344             :         // sequential quicksort
     345           4 :         LG_qsort_1a (A_0, n) ;
     346           4 :         return (GrB_SUCCESS) ;
     347             :     }
     348             : 
     349             :     //--------------------------------------------------------------------------
     350             :     // determine # of tasks
     351             :     //--------------------------------------------------------------------------
     352             : 
     353             :     // determine the number of levels to create, which must always be an
     354             :     // even number.  The # of levels is chosen to ensure that the # of leaves
     355             :     // of the task tree is between 4*nthreads and 16*nthreads.
     356             : 
     357             :     //  2 to 4 threads:     4 levels, 16 qsort leaves
     358             :     //  5 to 16 threads:    6 levels, 64 qsort leaves
     359             :     // 17 to 64 threads:    8 levels, 256 qsort leaves
     360             :     // 65 to 256 threads:   10 levels, 1024 qsort leaves
     361             :     // 256 to 1024 threads: 12 levels, 4096 qsort leaves
     362             :     // ...
     363             : 
     364           6 :     int k = (int) (2 + 2 * ceil (log2 ((double) nthreads) / 2)) ;
     365           6 :     int ntasks = 1 << k ;
     366             : 
     367             :     //--------------------------------------------------------------------------
     368             :     // allocate workspace
     369             :     //--------------------------------------------------------------------------
     370             : 
     371           6 :     LG_TRY (LAGraph_Malloc ((void **) &W, n + 6*ntasks + 1, sizeof (int64_t), msg)) ;
     372             : 
     373           4 :     int64_t *T = W ;
     374           4 :     int64_t *LG_RESTRICT W_0    = T ; T += n ;
     375           4 :     int64_t *LG_RESTRICT L_task = T ; T += ntasks ;
     376           4 :     int64_t *LG_RESTRICT L_len  = T ; T += ntasks ;
     377           4 :     int64_t *LG_RESTRICT R_task = T ; T += ntasks ;
     378           4 :     int64_t *LG_RESTRICT R_len  = T ; T += ntasks ;
     379           4 :     int64_t *LG_RESTRICT S_task = T ; T += ntasks ;
     380           4 :     int64_t *LG_RESTRICT Slice  = T ; T += (ntasks+1) ;
     381             : 
     382             :     //--------------------------------------------------------------------------
     383             :     // partition and sort the leaves
     384             :     //--------------------------------------------------------------------------
     385             : 
     386           4 :     LG_eslice (Slice, n, ntasks) ;
     387             :     int tid ;
     388             :     #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
     389          68 :     for (tid = 0 ; tid < ntasks ; tid++)
     390             :     {
     391          64 :         int64_t leaf = Slice [tid] ;
     392          64 :         int64_t leafsize = Slice [tid+1] - leaf ;
     393          64 :         LG_qsort_1a (A_0 + leaf, leafsize) ;
     394             :     }
     395             : 
     396             :     //--------------------------------------------------------------------------
     397             :     // merge each level
     398             :     //--------------------------------------------------------------------------
     399             : 
     400           4 :     int nt = 1 ;
     401          12 :     for ( ; k >= 2 ; k -= 2)
     402             :     {
     403             : 
     404             :         //----------------------------------------------------------------------
     405             :         // merge level k into level k-1, from A into W
     406             :         //----------------------------------------------------------------------
     407             : 
     408             :         // FUTURE: skip k and k-1 for each group of 4 sublists of A if they are
     409             :         // already sorted with respect to each other.
     410             : 
     411             :         // this could be done in parallel if ntasks was large
     412          48 :         for (tid = 0 ; tid < ntasks ; tid += 2*nt)
     413             :         {
     414             :             // create 2*nt tasks to merge two A sublists into one W sublist
     415          40 :             LG_msort_1b_create_merge_tasks (
     416          40 :                 L_task, L_len, R_task, R_len, S_task, tid, 2*nt, Slice [tid],
     417          40 :                 A_0, Slice [tid],    Slice [tid+nt],
     418          40 :                 A_0, Slice [tid+nt], Slice [tid+2*nt]) ;
     419             :         }
     420             : 
     421             :         #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
     422         136 :         for (tid = 0 ; tid < ntasks ; tid++)
     423             :         {
     424             :             // merge A [pL...pL+nL-1] and A [pR...pR+nR-1] into W [pS..]
     425         128 :             int64_t pL = L_task [tid], nL = L_len [tid] ;
     426         128 :             int64_t pR = R_task [tid], nR = R_len [tid] ;
     427         128 :             int64_t pS = S_task [tid] ;
     428             : 
     429         128 :             LG_msort_1b_merge (
     430         128 :                 W_0 + pS,
     431         128 :                 A_0 + pL, nL,
     432         128 :                 A_0 + pR, nR) ;
     433             :         }
     434           8 :         nt = 2*nt ;
     435             : 
     436             :         //----------------------------------------------------------------------
     437             :         // merge level k-1 into level k-2, from W into A
     438             :         //----------------------------------------------------------------------
     439             : 
     440             :         // this could be done in parallel if ntasks was large
     441          28 :         for (tid = 0 ; tid < ntasks ; tid += 2*nt)
     442             :         {
     443             :             // create 2*nt tasks to merge two W sublists into one A sublist
     444          20 :             LG_msort_1b_create_merge_tasks (
     445          20 :                 L_task, L_len, R_task, R_len, S_task, tid, 2*nt, Slice [tid],
     446          20 :                 W_0, Slice [tid],    Slice [tid+nt],
     447          20 :                 W_0, Slice [tid+nt], Slice [tid+2*nt]) ;
     448             :         }
     449             : 
     450             :         #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
     451         136 :         for (tid = 0 ; tid < ntasks ; tid++)
     452             :         {
     453             :             // merge A [pL...pL+nL-1] and A [pR...pR+nR-1] into W [pS..]
     454         128 :             int64_t pL = L_task [tid], nL = L_len [tid] ;
     455         128 :             int64_t pR = R_task [tid], nR = R_len [tid] ;
     456         128 :             int64_t pS = S_task [tid] ;
     457         128 :             LG_msort_1b_merge (
     458         128 :                 A_0 + pS,
     459         128 :                 W_0 + pL, nL,
     460         128 :                 W_0 + pR, nR) ;
     461             :         }
     462           8 :         nt = 2*nt ;
     463             :     }
     464             : 
     465             :     //--------------------------------------------------------------------------
     466             :     // free workspace and return result
     467             :     //--------------------------------------------------------------------------
     468             : 
     469           4 :     LG_FREE_ALL ;
     470           4 :     return (GrB_SUCCESS) ;
     471             : }

Generated by: LCOV version 1.14