LCOV - code coverage report
Current view: top level - src/algorithm - LG_CC_FastSV6.c (source / functions) Hit Total Coverage
Test: LAGraph code coverage report. Commit id: 7b9d2ee. Current time (UTC): 2025-06-03T21:57:17Z Lines: 162 162 100.0 %
Date: 2025-06-03 22:02:40 Functions: 2 2 100.0 %

          Line data    Source code
       1             : //------------------------------------------------------------------------------
       2             : // LG_CC_FastSV6: connected components
       3             : //------------------------------------------------------------------------------
       4             : 
       5             : // LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved.
       6             : // SPDX-License-Identifier: BSD-2-Clause
       7             : //
       8             : // For additional details (including references to third party source code and
       9             : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
      10             : // Contributors.txt for a full list of contributors. Created, in part, with
      11             : // funding and support from the U.S. Government (see Acknowledgments.txt file).
      12             : // DM22-0790
      13             : 
      14             : // Contributed by Yongzhe Zhang, modified by Timothy A. Davis, Texas A&M
      15             : // University
      16             : 
      17             : //------------------------------------------------------------------------------
      18             : 
      19             : // This is an Advanced algorithm (G->is_symmetric_structure must be known),
      20             : // but it is not user-callable (see LAGr_ConnectedComponents instead).
      21             : 
      22             : // Code is based on the algorithm described in the following paper:
      23             : // Zhang, Azad, Hu. FastSV: A Distributed-Memory Connected Component
      24             : // Algorithm with Fast Convergence (SIAM PP20)
      25             : 
      26             : // A subsequent update to the algorithm is here (which might not be reflected
      27             : // in this code):
      28             : // Yongzhe Zhang, Ariful Azad, Aydin Buluc: Parallel algorithms for finding
      29             : // connected components using linear algebra. J. Parallel Distributed Comput.
      30             : // 144: 14-27 (2020).
      31             : 
      32             : // Modified by Tim Davis, Texas A&M University: revised Reduce_assign to use
      33             : // purely GrB* and GxB* methods and the matrix C.  Added warmup phase.  Changed
      34             : // to use GxB pack/unpack instead of GxB import/export.  Converted to use the
      35             : // LAGraph_Graph object.  Exploiting iso status for the temporary matrices
      36             : // C and T.
      37             : 
      38             : // The input graph G must be undirected, or directed and with an adjacency
      39             : // matrix that has a symmetric structure.  Self-edges (diagonal entries) are
      40             : // OK, and are ignored.  The values and type of A are ignored; just its
      41             : // structure is accessed.
      42             : 
      43             : // NOTE: This function must not be called by multiple user threads at the same
      44             : // time on the same graph G, since it unpacks G->A and then packs it back when
      45             : // done.  G->A is unchanged when the function returns, but during execution
      46             : // G->A is empty.  This will be fixed once the todos are finished below, and
      47             : // G->A will then become a truly read-only object (assuming GrB_wait (G->A)
      48             : // has been done first).
      49             : 
      50             : // #define TIMINGS
      51             : 
      52             : #define __STDC_WANT_LIB_EXT1__ 1
      53             : #include <string.h>
      54             : 
      55             : #define LG_FREE_ALL ;
      56             : #include "LG_internal.h"
      57             : 
      58             : #if LAGRAPH_SUITESPARSE
      59             : 
      60             : //==============================================================================
      61             : // fastsv: find the components of a graph
      62             : //==============================================================================
      63             : 
      64          26 : static inline GrB_Info fastsv
      65             : (
      66             :     GrB_Matrix A,           // adjacency matrix, G->A or a subset of G->A
      67             :     GrB_Vector parent,      // parent vector
      68             :     GrB_Vector mngp,        // min neighbor grandparent
      69             :     GrB_Vector *gp,         // grandparent
      70             :     GrB_Vector *gp_new,     // new grandparent (swapped with gp)
      71             :     GrB_Vector t,           // workspace
      72             :     GrB_BinaryOp eq,        // GrB_EQ_(integer type)
      73             :     GrB_BinaryOp min,       // GrB_MIN_(integer type)
      74             :     GrB_Semiring min_2nd,   // GrB_MIN_SECOND_(integer type)
      75             :     GrB_Matrix C,           // C(i,j) present if i = Px (j)
      76             :     GrB_Index **Cp,         // 0:n, size n+1
      77             :     GrB_Index **Px,         // Px: non-opaque copy of parent vector, size n
      78             :     void **Cx,              // size 1, contents not accessed
      79             :     char *msg
      80             : )
      81             : {
      82             :     GrB_Index n ;
      83          26 :     GRB_TRY (GrB_Vector_size (&n, parent)) ;
      84          26 :     GrB_Index Cp_size = (n+1) * sizeof (GrB_Index) ;
      85          26 :     GrB_Index Ci_size = n * sizeof (GrB_Index) ;
      86          26 :     GrB_Index Cx_size = sizeof (bool) ;
      87          26 :     bool iso = true, jumbled = false, done = false ;
      88             : 
      89             : //  #ifdef TIMINGS
      90             : //  int pass = 0 ;
      91             : //  #endif
      92             : 
      93             :     while (true)
      94          48 :     {
      95             : //      #ifdef TIMINGS
      96             : //      printf ("\n-------------------------------------------fastsv: %d\n",
      97             : //          ++pass) ;
      98             : //      #endif
      99             : 
     100             :         //----------------------------------------------------------------------
     101             :         // hooking & shortcutting
     102             :         //----------------------------------------------------------------------
     103             : 
     104             :         // mngp = min (mngp, A*gp) using the MIN_SECOND semiring
     105          74 :         GRB_TRY (GrB_mxv (mngp, NULL, min, min_2nd, A, *gp, NULL)) ;
     106             : 
     107             :         //----------------------------------------------------------------------
     108             :         // parent = min (parent, C*mngp) where C(i,j) is present if i=Px(j)
     109             :         //----------------------------------------------------------------------
     110             : 
     111             :         // Reduce_assign: The Px array of size n is the non-opaque copy of the
     112             :         // parent vector, where i = Px [j] if the parent of node j is node i.
     113             :         // It can thus have duplicates.  The vectors parent and mngp are full
     114             :         // (all entries present).  This function computes the following, which
     115             :         // is done explicitly in the Reduce_assign function in LG_CC_Boruvka:
     116             :         //
     117             :         //      for (j = 0 ; j < n ; j++)
     118             :         //      {
     119             :         //          uint64_t i = Px [j] ;
     120             :         //          parent [i] = min (parent [i], mngp [j]) ;
     121             :         //      }
     122             :         //
     123             :         // If C(i,j) is present where i == Px [j], then this can be written as:
     124             :         //
     125             :         //      parent = min (parent, C*mngp)
     126             :         //
     127             :         // when using the min_2nd semiring.  This can be done efficiently
     128             :         // because C can be constructed in O(1) time and O(1) additional space
     129             :         // (not counting the prior Cp, Px, and Cx arrays), when using the
     130             :         // SuiteSparse pack/unpack move constructors.  The min_2nd semiring
     131             :         // ignores the values of C and operates only on the structure, so its
     132             :         // values are not relevant.  Cx is thus chosen as a GrB_BOOL array of
     133             :         // size 1 where Cx [0] = false, so the all entries present in C are
     134             :         // equal to false.
     135             : 
     136             :         // pack Cp, Px, and Cx into a matrix C with C(i,j) present if Px(j) == i
     137          74 :         GRB_TRY (GxB_Matrix_pack_CSC (C, Cp, /* Px is Ci: */ Px, Cx,
     138             :             Cp_size, Ci_size, Cx_size, iso, jumbled, NULL)) ;
     139             : 
     140             :         // parent = min (parent, C*mngp) using the MIN_SECOND semiring
     141          74 :         GRB_TRY (GrB_mxv (parent, NULL, min, min_2nd, C, mngp, NULL)) ;
     142             : 
     143             :         // unpack the contents of C, to make Px available to this method again.
     144          74 :         GRB_TRY (GxB_Matrix_unpack_CSC (C, Cp, Px, Cx,
     145             :             &Cp_size, &Ci_size, &Cx_size, &iso, &jumbled, NULL)) ;
     146             : 
     147             :         //----------------------------------------------------------------------
     148             :         // parent = min (parent, mngp, gp)
     149             :         //----------------------------------------------------------------------
     150             : 
     151          74 :         GRB_TRY (GrB_eWiseAdd (parent, NULL, min, min, mngp, *gp, NULL)) ;
     152             : 
     153             :         //----------------------------------------------------------------------
     154             :         // calculate grandparent: gp_new = parent (parent), and extract Px
     155             :         //----------------------------------------------------------------------
     156             : 
     157             :         // if parent is uint32, GraphBLAS typecasts to uint64 for Px.
     158          74 :         GRB_TRY (GrB_Vector_extractTuples (NULL, *Px, &n, parent)) ;
     159          74 :         GRB_TRY (GrB_extract (*gp_new, NULL, NULL, parent, *Px, n, NULL)) ;
     160             : 
     161             :         //----------------------------------------------------------------------
     162             :         // terminate if gp and gp_new are the same
     163             :         //----------------------------------------------------------------------
     164             : 
     165          74 :         GRB_TRY (GrB_eWiseMult (t, NULL, NULL, eq, *gp_new, *gp, NULL)) ;
     166          74 :         GRB_TRY (GrB_reduce (&done, NULL, GrB_LAND_MONOID_BOOL, t, NULL)) ;
     167          74 :         if (done) break ;
     168             : 
     169             :         // swap gp and gp_new
     170          48 :         GrB_Vector s = (*gp) ; (*gp) = (*gp_new) ; (*gp_new) = s ;
     171             :     }
     172          26 :     return (GrB_SUCCESS) ;
     173             : }
     174             : 
     175             : //==============================================================================
     176             : // LG_CC_FastSV6
     177             : //==============================================================================
     178             : 
     179             : // The output of LG_CC_FastSV* is a vector component, where component(i)=r if
     180             : // node i is in the connected compononent whose representative is node r.  If r
     181             : // is a representative, then component(r)=r.  The number of connected
     182             : // components in the graph G is the number of representatives.
     183             : 
     184             : #undef  LG_FREE_WORK
     185             : #define LG_FREE_WORK                            \
     186             : {                                               \
     187             :     LAGraph_Free ((void **) &Tp, NULL) ;        \
     188             :     LAGraph_Free ((void **) &Tj, NULL) ;        \
     189             :     LAGraph_Free ((void **) &Tx, NULL) ;        \
     190             :     LAGraph_Free ((void **) &Cp, NULL) ;        \
     191             :     LAGraph_Free ((void **) &Px, NULL) ;        \
     192             :     LAGraph_Free ((void **) &Cx, NULL) ;        \
     193             :     LAGraph_Free ((void **) &ht_key, NULL) ;    \
     194             :     LAGraph_Free ((void **) &ht_count, NULL) ;  \
     195             :     LAGraph_Free ((void **) &count, NULL) ;     \
     196             :     LAGraph_Free ((void **) &range, NULL) ;     \
     197             :     GrB_free (&C) ;                             \
     198             :     GrB_free (&T) ;                             \
     199             :     GrB_free (&t) ;                             \
     200             :     GrB_free (&y) ;                             \
     201             :     GrB_free (&gp) ;                            \
     202             :     GrB_free (&mngp) ;                          \
     203             :     GrB_free (&gp_new) ;                        \
     204             : }
     205             : 
     206             : #undef  LG_FREE_ALL
     207             : #define LG_FREE_ALL                             \
     208             : {                                               \
     209             :     LG_FREE_WORK ;                              \
     210             :     GrB_free (&parent) ;                        \
     211             : }
     212             : 
     213             : #endif
     214             : 
     215             : #ifdef TIMINGS
     216             : static void print_timings (double timings [16])
     217             : {
     218             :     double total = timings [0] + timings [1] + timings [2] ;
     219             :     printf ("SV6 %12.6f (%4.1f%%) init\n", timings [0], 100. * timings [0] / total) ;
     220             :     printf ("SV6 %12.6f (%4.1f%%) total sampling:\n", timings [1], 100. * timings [1] / total) ;
     221             :     printf ("SV6        %12.6f (%4.1f%%) setup T\n", timings [3], 100. * timings [3] / total) ;
     222             :     printf ("SV6        %12.6f (%4.1f%%) create T\n", timings [4], 100. * timings [4] / total) ;
     223             :     printf ("SV6        %12.6f (%4.1f%%) fastsv sample\n", timings [5], 100 * timings [5] / total) ;
     224             :     printf ("SV6        %12.6f (%4.1f%%) hash\n", timings [6], 100. * timings [6] / total) ;
     225             :     printf ("SV6        %12.6f (%4.1f%%) prune\n", timings [7], 100. * timings [7] / total) ;
     226             :     printf ("SV6 %12.6f (%4.1f%%) total final\n", timings [2], 100. * timings [2] / total) ;
     227             : }
     228             : #endif
     229             : 
     230          26 : int LG_CC_FastSV6           // SuiteSparse:GraphBLAS method, with GxB extensions
     231             : (
     232             :     // output:
     233             :     GrB_Vector *component,  // component(i)=r if node is in the component r
     234             :     // input:
     235             :     LAGraph_Graph G,        // input graph (modified then restored)
     236             :     char *msg
     237             : )
     238             : {
     239             : #if LAGRAPH_SUITESPARSE
     240             : 
     241             :     //--------------------------------------------------------------------------
     242             :     // check inputs
     243             :     //--------------------------------------------------------------------------
     244             : 
     245          26 :     LG_CLEAR_MSG ;
     246             : 
     247             :     #ifdef TIMINGS
     248             :     double timings [16] ;
     249             :     for (int kk = 0 ; kk < 16 ; kk++) timings [kk] = 0 ;
     250             :     double tic = LAGraph_WallClockTime ( ) ;
     251             :     LG_SET_BURBLE (false) ;
     252             :     #endif
     253             : 
     254          26 :     int64_t *range = NULL ;
     255          26 :     GrB_Index n, nvals, Cp_size = 0, *ht_key = NULL, *Px = NULL, *Cp = NULL,
     256          26 :         *count = NULL, *Tp = NULL, *Tj = NULL ;
     257          26 :     GrB_Vector parent = NULL, gp_new = NULL, mngp = NULL, gp = NULL, t = NULL,
     258          26 :         y = NULL ;
     259          26 :     GrB_Matrix T = NULL, C = NULL ;
     260          26 :     void *Tx = NULL, *Cx = NULL ;
     261          26 :     int *ht_count = NULL ;
     262             : 
     263          26 :     LG_TRY (LAGraph_CheckGraph (G, msg)) ;
     264          25 :     LG_ASSERT (component != NULL, GrB_NULL_POINTER) ;
     265          25 :     (*component) = NULL ;
     266             : 
     267          25 :     LG_ASSERT_MSG ((G->kind == LAGraph_ADJACENCY_UNDIRECTED ||
     268             :        (G->kind == LAGraph_ADJACENCY_DIRECTED &&
     269             :         G->is_symmetric_structure == LAGraph_TRUE)),
     270             :         LAGRAPH_SYMMETRIC_STRUCTURE_REQUIRED,
     271             :         "G->A must be known to be symmetric") ;
     272             : 
     273             :     //--------------------------------------------------------------------------
     274             :     // initializations
     275             :     //--------------------------------------------------------------------------
     276             : 
     277          24 :     GrB_Matrix A = G->A ;
     278          24 :     GRB_TRY (GrB_Matrix_nrows (&n, A)) ;
     279          24 :     GRB_TRY (GrB_Matrix_nvals (&nvals, A)) ;
     280             : 
     281             :     // determine the integer type, operators, and semirings to use
     282             :     GrB_Type Uint, Int ;
     283             :     GrB_IndexUnaryOp ramp ;
     284             :     GrB_Semiring min_2nd, min_2ndi ;
     285             :     GrB_BinaryOp min, eq, imin ;
     286             :     #ifdef COVERAGE
     287             :     // Just for test coverage, use 64-bit ints for n > 100.  Do not use this
     288             :     // rule in production!
     289             :     #define NBIG 100
     290             :     #else
     291             :     // For production use: 64-bit integers if n > 2^31
     292             :     #define NBIG INT32_MAX
     293             :     #endif
     294          24 :     if (n > NBIG)
     295             :     {
     296             :         // use 64-bit integers throughout
     297           8 :         Uint = GrB_UINT64 ;
     298           8 :         Int  = GrB_INT64  ;
     299           8 :         ramp = GrB_ROWINDEX_INT64 ;
     300           8 :         min  = GrB_MIN_UINT64 ;
     301           8 :         imin = GrB_MIN_INT64 ;
     302           8 :         eq   = GrB_EQ_UINT64 ;
     303           8 :         min_2nd  = GrB_MIN_SECOND_SEMIRING_UINT64 ;
     304           8 :         min_2ndi = GxB_MIN_SECONDI_INT64 ;
     305             :     }
     306             :     else
     307             :     {
     308             :         // use 32-bit integers, except for Px and for constructing the matrix C
     309          16 :         Uint = GrB_UINT32 ;
     310          16 :         Int  = GrB_INT32  ;
     311          16 :         ramp = GrB_ROWINDEX_INT32 ;
     312          16 :         min  = GrB_MIN_UINT32 ;
     313          16 :         imin = GrB_MIN_INT32 ;
     314          16 :         eq   = GrB_EQ_UINT32 ;
     315          16 :         min_2nd  = GrB_MIN_SECOND_SEMIRING_UINT32 ;
     316          16 :         min_2ndi = GxB_MIN_SECONDI_INT32 ;
     317             :     }
     318             : 
     319             :     // FASTSV_SAMPLES: number of samples to take from each row A(i,:).
     320             :     // Sampling is used if the average degree is > 8 and if n > 1024.
     321             :     #define FASTSV_SAMPLES 4
     322          24 :     bool sampling = (nvals > n * FASTSV_SAMPLES * 2 && n > 1024) ;
     323             : 
     324             : // [ todo: nthreads will not be needed once GxB_select with a GxB_RankUnaryOp
     325             : // and a new GxB_extract are added to SuiteSparse:GraphBLAS.
     326             :     // determine # of threads to use
     327             :     int nthreads, nthreads_outer, nthreads_inner ;
     328          24 :     LG_TRY (LAGraph_GetNumThreads (&nthreads_outer, &nthreads_inner, msg)) ;
     329          24 :     nthreads = nthreads_outer * nthreads_inner ;
     330          24 :     nthreads = LAGRAPH_MIN (nthreads, n / 16) ;
     331          24 :     nthreads = LAGRAPH_MAX (nthreads, 1) ;
     332             : // ]
     333             : 
     334          24 :     LG_TRY (LAGraph_Calloc ((void **) &Cx, 1, sizeof (bool), msg)) ;
     335          24 :     LG_TRY (LAGraph_Malloc ((void **) &Px, n, sizeof (GrB_Index), msg)) ;
     336             : 
     337             :     // create Cp = 0:n (always 64-bit) and the empty C matrix
     338          24 :     GRB_TRY (GrB_Matrix_new (&C, GrB_BOOL, n, n)) ;
     339          24 :     GRB_TRY (GrB_Vector_new (&t, GrB_INT64, n+1)) ;
     340          24 :     GRB_TRY (GrB_assign (t, NULL, NULL, 0, GrB_ALL, n+1, NULL)) ;
     341          24 :     GRB_TRY (GrB_apply (t, NULL, NULL, GrB_ROWINDEX_INT64, t, 0, NULL)) ;
     342          24 :     GRB_TRY (GxB_Vector_unpack_Full (t, (void **) &Cp, &Cp_size, NULL, NULL)) ;
     343          24 :     GRB_TRY (GrB_free (&t)) ;
     344             : 
     345             :     //--------------------------------------------------------------------------
     346             :     // warmup: parent = min (0:n-1, A*1) using the MIN_SECONDI semiring
     347             :     //--------------------------------------------------------------------------
     348             : 
     349             :     // y (i) = min (i, j) for all entries A(i,j).  This warmup phase takes only
     350             :     // O(n) time, because of how the MIN_SECONDI semiring is implemented in
     351             :     // SuiteSparse:GraphBLAS.  A is held by row, and the first entry in A(i,:)
     352             :     // is the minimum index j, so only the first entry in A(i,:) needs to be
     353             :     // considered for each row i.
     354             : 
     355          24 :     GRB_TRY (GrB_Vector_new (&t, Int, n)) ;
     356          24 :     GRB_TRY (GrB_Vector_new (&y, Int, n)) ;
     357          24 :     GRB_TRY (GrB_assign (t, NULL, NULL, 0, GrB_ALL, n, NULL)) ;
     358          24 :     GRB_TRY (GrB_assign (y, NULL, NULL, 0, GrB_ALL, n, NULL)) ;
     359          24 :     GRB_TRY (GrB_apply (y, NULL, NULL, ramp, y, 0, NULL)) ;
     360          24 :     GRB_TRY (GrB_mxv (y, NULL, imin, min_2ndi, A, t, NULL)) ;
     361          24 :     GRB_TRY (GrB_free (&t)) ;
     362             : 
     363             :     // The typecast from Int to Uint is required because the ROWINDEX operator
     364             :     // and MIN_SECONDI do not work in the UINT* domains, as built-in operators.
     365             :     // parent = (Uint) y
     366          24 :     GRB_TRY (GrB_Vector_new (&parent, Uint, n)) ;
     367          24 :     GRB_TRY (GrB_assign (parent, NULL, NULL, y, GrB_ALL, n, NULL)) ;
     368          24 :     GRB_TRY (GrB_free (&y)) ;
     369             : 
     370             :     // copy parent into gp, mngp, and Px.  Px is a non-opaque 64-bit copy of the
     371             :     // parent GrB_Vector.  The Px array is always of type GrB_Index since it
     372             :     // must be used as the input array for extractTuples and as Ci for pack_CSR.
     373             :     // If parent is uint32, GraphBLAS typecasts it to the uint64 Px array.
     374          24 :     GRB_TRY (GrB_Vector_extractTuples (NULL, Px, &n, parent)) ;
     375          24 :     GRB_TRY (GrB_Vector_dup (&gp, parent)) ;
     376          24 :     GRB_TRY (GrB_Vector_dup (&mngp, parent)) ;
     377          24 :     GRB_TRY (GrB_Vector_new (&gp_new, Uint, n)) ;
     378          24 :     GRB_TRY (GrB_Vector_new (&t, GrB_BOOL, n)) ;
     379             : 
     380             :     #ifdef TIMINGS
     381             :     double toc = LAGraph_WallClockTime ( ) ;
     382             :     timings [0] = toc - tic ;  // init time
     383             :     tic = toc ;
     384             :     #endif
     385             : 
     386             :     //--------------------------------------------------------------------------
     387             :     // sample phase
     388             :     //--------------------------------------------------------------------------
     389             : 
     390          24 :     if (sampling)
     391             :     {
     392             : 
     393             : // [ todo: GxB_select, using a new operator: GxB_RankUnaryOp, will do all this,
     394             : // with GxB_Matrix_select_RankOp_Scalar with operator GxB_LEASTRANK and a
     395             : // GrB_Scalar input equal to FASTSV_SAMPLES.  Built-in operators will be,
     396             : // (where y is INT64):
     397             : //
     398             : //      GxB_LEASTRANK (aij, i, j, k, d, y): select if aij has rank k <= y
     399             : //      GxB_NLEASTRANK: select if aij has rank k > y
     400             : //      GxB_GREATESTRANK (...) select if aij has rank k >= (d-y) where
     401             : //          d = # of entries in A(i,:).
     402             : //      GxB_NGREATESTRANK (...): select if aij has rank k < (d-y)
     403             : // and perhaps other operators such as:
     404             : //      GxB_LEASTRELRANK (...): select aij if rank k <= y*d where y is double
     405             : //      GxB_GREATESTRELRANK (...): select aij rank k > y*d where y is double
     406             : //
     407             : // By default, the rank of aij is its relative position as the kth entry in its
     408             : // row (from "left" to "right").  If a new GxB setting in the descriptor is
     409             : // set, then k is the relative position of aij as the kth entry in its column.
     410             : // The default would be that the rank is the position of aij in its row A(i,:).
     411             : 
     412             : // Other:
     413             : //      give me 3 random items from the row (y = 3)
     414             : //      give me the 4 biggest *values* in each row (y = 4)
     415             : 
     416             : // mxv:
     417             : //      C = A*diag(D)
     418             : 
     419             :         //----------------------------------------------------------------------
     420             :         // unpack A in CSR format
     421             :         //----------------------------------------------------------------------
     422             : 
     423             :         #ifdef TIMINGS
     424             :         double tic2 = LAGraph_WallClockTime ( ) ;
     425             :         #endif
     426             : 
     427             :         void *Ax ;
     428             :         GrB_Index *Ap, *Aj, Ap_size, Aj_size, Ax_size ;
     429             :         bool A_jumbled, A_iso ;
     430           4 :         GRB_TRY (GxB_Matrix_unpack_CSR (A, &Ap, &Aj, &Ax,
     431             :             &Ap_size, &Aj_size, &Ax_size, &A_iso, &A_jumbled, NULL)) ;
     432             : 
     433             :         //----------------------------------------------------------------------
     434             :         // allocate workspace, including space to construct T
     435             :         //----------------------------------------------------------------------
     436             : 
     437           4 :         GrB_Index Tp_size = (n+1) * sizeof (GrB_Index) ;
     438           4 :         GrB_Index Tj_size = nvals * sizeof (GrB_Index) ;
     439           4 :         GrB_Index Tx_size = sizeof (bool) ;
     440           4 :         LG_TRY (LAGraph_Malloc ((void **) &Tp, n+1, sizeof (GrB_Index), msg)) ;
     441           4 :         LG_TRY (LAGraph_Malloc ((void **) &Tj, nvals, sizeof (GrB_Index),
     442             :             msg)) ;
     443           4 :         LG_TRY (LAGraph_Calloc ((void **) &Tx, 1, sizeof (bool), msg)) ;
     444           4 :         LG_TRY (LAGraph_Malloc ((void **) &range, nthreads + 1,
     445             :             sizeof (int64_t), msg)) ;
     446           4 :         LG_TRY (LAGraph_Calloc ((void **) &count, nthreads + 1,
     447             :             sizeof (GrB_Index), msg)) ;
     448             : 
     449             :         //----------------------------------------------------------------------
     450             :         // define parallel tasks to construct T
     451             :         //----------------------------------------------------------------------
     452             : 
     453             :         // thread tid works on rows range[tid]:range[tid+1]-1 of A and T
     454             :         int tid;
     455          12 :         for (tid = 0 ; tid <= nthreads ; tid++)
     456             :         {
     457           8 :             range [tid] = (n * tid + nthreads - 1) / nthreads ;
     458             :         }
     459             : 
     460             :         //----------------------------------------------------------------------
     461             :         // determine the number entries to be constructed in T for each thread
     462             :         //----------------------------------------------------------------------
     463             : 
     464             :         #pragma omp parallel for num_threads(nthreads) schedule(static)
     465           8 :         for (tid = 0 ; tid < nthreads ; tid++)
     466             :         {
     467        9756 :             for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
     468             :             {
     469        9752 :                 int64_t deg = Ap [i + 1] - Ap [i] ;
     470        9752 :                 count [tid + 1] += LAGRAPH_MIN (FASTSV_SAMPLES, deg) ;
     471             :             }
     472             :         }
     473             : 
     474             :         //----------------------------------------------------------------------
     475             :         // count = cumsum (count)
     476             :         //----------------------------------------------------------------------
     477             : 
     478           8 :         for (tid = 0 ; tid < nthreads ; tid++)
     479             :         {
     480           4 :             count [tid + 1] += count [tid] ;
     481             :         }
     482             : 
     483             :         #ifdef TIMINGS
     484             :         double toc2 = LAGraph_WallClockTime ( ) ;
     485             :         timings [3] = toc2 - tic2 ;  // setup T
     486             :         tic2 = toc2 ;
     487             :         #endif
     488             : 
     489             :         //----------------------------------------------------------------------
     490             :         // construct T
     491             :         //----------------------------------------------------------------------
     492             : 
     493             :         // T (i,:) consists of the first FASTSV_SAMPLES of A (i,:).
     494             : 
     495             :         #pragma omp parallel for num_threads(nthreads) schedule(static)
     496           8 :         for (tid = 0 ; tid < nthreads ; tid++)
     497             :         {
     498           4 :             GrB_Index p = count [tid] ;
     499           4 :             Tp [range [tid]] = p ;
     500        9756 :             for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
     501             :             {
     502             :                 // construct T (i,:) from the first entries in A (i,:)
     503        9752 :                 for (int64_t j = 0 ;
     504       40390 :                     j < FASTSV_SAMPLES && Ap [i] + j < Ap [i + 1] ; j++)
     505             :                 {
     506       30638 :                     Tj [p++] = Aj [Ap [i] + j] ;
     507             :                 }
     508        9752 :                 Tp [i + 1] = p ;
     509             :             }
     510             :         }
     511             : 
     512             :         //----------------------------------------------------------------------
     513             :         // import the result into the GrB_Matrix T
     514             :         //----------------------------------------------------------------------
     515             : 
     516           4 :         GRB_TRY (GrB_Matrix_new (&T, GrB_BOOL, n, n)) ;
     517           4 :         GRB_TRY (GxB_Matrix_pack_CSR (T, &Tp, &Tj, &Tx, Tp_size, Tj_size,
     518             :             Tx_size, /* T is iso: */ true, A_jumbled, NULL)) ;
     519             : 
     520             : // ] todo: the above will all be done as a single call to GxB_select.
     521             : 
     522             :         #ifdef TIMINGS
     523             :         toc2 = LAGraph_WallClockTime ( ) ;
     524             :         timings [4] = toc2 - tic2 ;  // create T
     525             :         tic2 = toc2 ;
     526             :         #endif
     527             : 
     528             :         //----------------------------------------------------------------------
     529             :         // find the connected components of T
     530             :         //----------------------------------------------------------------------
     531             : 
     532           4 :         LG_TRY (fastsv (T, parent, mngp, &gp, &gp_new, t, eq, min, min_2nd,
     533             :             C, &Cp, &Px, &Cx, msg)) ;
     534             : 
     535             :         #ifdef TIMINGS
     536             :         toc2 = LAGraph_WallClockTime ( ) ;
     537             :         timings [5] = toc2 - tic2 ;  // fastsv, in sampling
     538             :         tic2 = toc2 ;
     539             :         #endif
     540             : 
     541             :         //----------------------------------------------------------------------
     542             :         // use sampling to estimate the largest connected component in T
     543             :         //----------------------------------------------------------------------
     544             : 
     545             :         // The sampling below computes an estimate of the mode of the parent
     546             :         // vector, the contents of which are currently in the non-opaque Px
     547             :         // array.
     548             : 
     549             :         // hash table size must be a power of 2
     550             :         #define HASH_SIZE 1024
     551             :         // number of samples to insert into the hash table
     552             :         #define HASH_SAMPLES 864
     553             :         #define HASH(x) (((x << 4) + x) & (HASH_SIZE-1))
     554             :         #define NEXT(x) ((x + 23) & (HASH_SIZE-1))
     555             : 
     556             :         // allocate and initialize the hash table
     557           4 :         LG_TRY (LAGraph_Malloc ((void **) &ht_key, HASH_SIZE,
     558             :             sizeof (GrB_Index), msg)) ;
     559           4 :         LG_TRY (LAGraph_Calloc ((void **) &ht_count, HASH_SIZE,
     560             :             sizeof (int), msg)) ;
     561        4100 :         for (int k = 0 ; k < HASH_SIZE ; k++)
     562             :         {
     563        4096 :             ht_key [k] = UINT64_MAX ;
     564             :         }
     565             : 
     566             :         // hash the samples and find the most frequent entry
     567           4 :         uint64_t seed = n ;         // random number seed
     568           4 :         int64_t key = -1 ;          // most frequent entry
     569           4 :         int max_count = 0 ;         // frequency of most frequent entry
     570        3460 :         for (int64_t k = 0 ; k < HASH_SAMPLES ; k++)
     571             :         {
     572             :             // select an entry from Px at random
     573        3456 :             GrB_Index x = Px [LG_Random64 (&seed) % n] ;
     574             :             // find x in the hash table
     575        3456 :             GrB_Index h = HASH (x) ;
     576        3598 :             while (ht_key [h] != UINT64_MAX && ht_key [h] != x) h = NEXT (h) ;
     577             :             // add x to the hash table
     578        3456 :             ht_key [h] = x ;
     579        3456 :             ht_count [h]++ ;
     580             :             // keep track of the most frequent value
     581        3456 :             if (ht_count [h] > max_count)
     582             :             {
     583        1870 :                 key = ht_key [h] ;
     584        1870 :                 max_count = ht_count [h] ;
     585             :             }
     586             :         }
     587             : 
     588             :         #ifdef TIMINGS
     589             :         toc2 = LAGraph_WallClockTime ( ) ;
     590             :         timings [6] = toc2 - tic2 ;  // hash
     591             :         tic2 = toc2 ;
     592             :         #endif
     593             : 
     594             :         //----------------------------------------------------------------------
     595             :         // compact the largest connected component in A
     596             :         //----------------------------------------------------------------------
     597             : 
     598             :         // Construct a new matrix T from the input matrix A (the matrix A is
     599             :         // not changed). The key node is the representative of the (estimated)
     600             :         // largest component.  T is constructed as a copy of A, except:
     601             :         // (1) all edges A(i,:) for nodes i in the key component deleted, and
     602             :         // (2) for nodes i not in the key component, A(i,j) is deleted if
     603             :         //     j is in the key component.
     604             :         // (3) If A(i,:) has any deletions from (2), T(i,key) is added to T.
     605             : 
     606             : // [ todo: replace this with GxB_extract with GrB_Vector index arrays.
     607             : // See https://github.com/GraphBLAS/graphblas-api-c/issues/67 .
     608             : // This method will not insert the new entries T(i,key) for rows i that have
     609             : // had entries deleted.  That can be done with GrB_assign, with an n-by-1 mask
     610             : // M computed from the before-and-after row degrees of A and T:
     611             : // M = (parent != key) && (out_degree(T) < out_degree(A))
     612             : // J [0] = key.
     613             : // GxB_Matrix_subassign_BOOL (T, M, NULL, true, GrB_ALL, n, J, 1, NULL)
     614             : // or with
     615             : // GrB_Col_assign (T, M, NULL, t, GrB_ALL, j, NULL) with an all-true
     616             : // vector t.
     617             : 
     618             :         // unpack T to reuse the space (all content is overwritten below)
     619             :         bool T_jumbled, T_iso ;
     620           4 :         GRB_TRY (GxB_Matrix_unpack_CSR (T, &Tp, &Tj, &Tx, &Tp_size, &Tj_size,
     621             :             &Tx_size, &T_iso, &T_jumbled, NULL)) ;
     622             :         #pragma omp parallel for num_threads(nthreads) schedule(static)
     623           8 :         for (tid = 0 ; tid < nthreads ; tid++)
     624             :         {
     625           4 :             GrB_Index p = Ap [range [tid]] ;
     626             :             // thread tid scans A (range [tid]:range [tid+1]-1,:),
     627             :             // and constructs T(i,:) for all rows in this range.
     628        9756 :             for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
     629             :             {
     630        9752 :                 int64_t pi = Px [i] ;   // pi = parent (i)
     631        9752 :                 Tp [i] = p ;            // start the construction of T(i,:)
     632             :                 // T(i,:) is empty if pi == key
     633        9752 :                 if (pi != key)
     634             :                 {
     635             :                     // scan A(i,:)
     636       47770 :                     for (GrB_Index pS = Ap [i] ; pS < Ap [i+1] ; pS++)
     637             :                     {
     638             :                         // get A(i,j)
     639       42470 :                         int64_t j = Aj [pS] ;
     640       42470 :                         if (Px [j] != key)
     641             :                         {
     642             :                             // add the entry T(i,j) to T, but skip it if
     643             :                             // Px [j] is equal to key
     644       42460 :                             Tj [p++] = j ;
     645             :                         }
     646             :                     }
     647             :                     // Add the entry T(i,key) if there is room for it in T(i,:);
     648             :                     // if and only if node i is adjacent to a node j in the
     649             :                     // largest component.  The only way there can be space if
     650             :                     // at least one T(i,j) appears with Px [j] equal to the key
     651             :                     // (that is, node j is in the largest connected component,
     652             :                     // key == Px [j].  One of these j's can then be replaced
     653             :                     // with the key.  If node i is not adjacent to any node in
     654             :                     // the largest component, then there is no space in T(i,:)
     655             :                     // and no new edge to the largest component is added.
     656        5300 :                     if (p - Tp [i] < Ap [i+1] - Ap [i])
     657             :                     {
     658           2 :                         Tj [p++] = key ;
     659             :                     }
     660             :                 }
     661             :             }
     662             :             // count the number of entries inserted into T by this thread
     663           4 :             count [tid] = p - Tp [range [tid]] ;
     664             :         }
     665             : 
     666             :         // Compact empty space out of Tj not filled in from the above phase.
     667           4 :         nvals = 0 ;
     668           8 :         for (tid = 0 ; tid < nthreads ; tid++)
     669             :         {
     670           4 :             memmove (Tj + nvals,
     671           4 :                 Tj + Tp [range [tid]], sizeof (GrB_Index) * count [tid]) ;
     672           4 :             nvals += count [tid] ;
     673           4 :             count [tid] = nvals - count [tid] ;
     674             :         }
     675             : 
     676             :         // Compact empty space out of Tp
     677             :         #pragma omp parallel for num_threads(nthreads) schedule(static)
     678           8 :         for (tid = 0 ; tid < nthreads ; tid++)
     679             :         {
     680           4 :             GrB_Index p = Tp [range [tid]] ;
     681        9756 :             for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
     682             :             {
     683        9752 :                 Tp [i] -= p - count [tid] ;
     684             :             }
     685             :         }
     686             : 
     687             :         // finalize T
     688           4 :         Tp [n] = nvals ;
     689             : 
     690             :         // pack T for the final phase
     691           4 :         GRB_TRY (GxB_Matrix_pack_CSR (T, &Tp, &Tj, &Tx, Tp_size, Tj_size,
     692             :             Tx_size, T_iso, /* T is now jumbled */ true, NULL)) ;
     693             : 
     694             :         // pack A (unchanged since last unpack); this is the original G->A.
     695           4 :         GRB_TRY (GxB_Matrix_pack_CSR (A, &Ap, &Aj, &Ax, Ap_size, Aj_size,
     696             :             Ax_size, A_iso, A_jumbled, NULL)) ;
     697             : 
     698             : // ].  The unpack/pack of A into Ap, Aj, Ax will not be needed, and G->A
     699             : // will become truly a read-only matrix.
     700             : 
     701             :         // final phase uses the pruned matrix T
     702           4 :         A = T ;
     703             : 
     704             :         #ifdef TIMINGS
     705             :         toc2 = LAGraph_WallClockTime ( ) ;
     706             :         timings [7] = toc2 - tic2 ;  // prune
     707             :         tic2 = toc2 ;
     708             :         #endif
     709             :     }
     710             : 
     711             :     #ifdef TIMINGS
     712             :     toc = LAGraph_WallClockTime ( ) ;
     713             :     timings [1] = toc - tic ;  // total sampling time
     714             :     tic = toc ;
     715             :     #endif
     716             : 
     717             :     //--------------------------------------------------------------------------
     718             :     // check for quick return
     719             :     //--------------------------------------------------------------------------
     720             : 
     721             :     // The sample phase may have already found that G->A has a single component,
     722             :     // in which case the matrix A is now empty.
     723             : 
     724          24 :     if (nvals == 0)
     725             :     {
     726           2 :         (*component) = parent ;
     727           2 :         LG_FREE_WORK ;
     728             :         #ifdef TIMINGS
     729             :         print_timings (timings) ;
     730             :         LG_SET_BURBLE (false) ;
     731             :         #endif
     732           2 :         return (GrB_SUCCESS) ;
     733             :     }
     734             : 
     735             :     //--------------------------------------------------------------------------
     736             :     // final phase
     737             :     //--------------------------------------------------------------------------
     738             : 
     739          22 :     LG_TRY (fastsv (A, parent, mngp, &gp, &gp_new, t, eq, min, min_2nd,
     740             :         C, &Cp, &Px, &Cx, msg)) ;
     741             : 
     742             :     //--------------------------------------------------------------------------
     743             :     // free workspace and return result
     744             :     //--------------------------------------------------------------------------
     745             : 
     746          22 :     (*component) = parent ;
     747          22 :     LG_FREE_WORK ;
     748             :     #ifdef TIMINGS
     749             :     toc = LAGraph_WallClockTime ( ) ;
     750             :     timings [2] = toc - tic ;  // final phase
     751             :     print_timings (timings) ;
     752             :     LG_SET_BURBLE (false) ;
     753             :     #endif
     754          22 :     return (GrB_SUCCESS) ;
     755             : #else
     756             :     return (GrB_NOT_IMPLEMENTED) ;
     757             : #endif
     758             : }

Generated by: LCOV version 1.14