LCOV - code coverage report
Current view: top level - experimental/algorithm - LG_CC_FastSV7_FA.c (source / functions) Hit Total Coverage
Test: LAGraph code coverage report. Commit id: 7b9d2ee. Current time (UTC): 2025-06-03T21:57:17Z Lines: 230 230 100.0 %
Date: 2025-06-03 22:02:40 Functions: 2 2 100.0 %

          Line data    Source code
       1             : //------------------------------------------------------------------------------
       2             : // LG_CC_FastSV7: connected components
       3             : //------------------------------------------------------------------------------
       4             : 
       5             : // LAGraph, (c) 2019-2025 by The LAGraph Contributors, All Rights Reserved.
       6             : // SPDX-License-Identifier: BSD-2-Clause
       7             : //
       8             : // For additional details (including references to third party source code and
       9             : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
      10             : // Contributors.txt for a full list of contributors. Created, in part, with
      11             : // funding and support from the U.S. Government (see Acknowledgments.txt file).
      12             : // DM22-0790
      13             : 
      14             : // Contributed by Yongzhe Zhang, modified by Timothy A. Davis, Texas A&M
      15             : // University
      16             : 
      17             : //------------------------------------------------------------------------------
      18             : 
      19             : // This is an Advanced algorithm (G->is_symmetric_structure must be known),
      20             : // but it is not user-callable (see LAGr_ConnectedComponents instead).
      21             : 
      22             : // Code is based on the algorithm described in the following paper:
      23             : // Zhang, Azad, Hu. FastSV: A Distributed-Memory Connected Component
      24             : // Algorithm with Fast Convergence (SIAM PP20)
      25             : 
      26             : // A subsequent update to the algorithm is here (which might not be reflected
      27             : // in this code):
      28             : // Yongzhe Zhang, Ariful Azad, Aydin Buluc: Parallel algorithms for finding
      29             : // connected components using linear algebra. J. Parallel Distributed Comput.
      30             : // 144: 14-27 (2020).
      31             : 
      32             : // Modified by Tim Davis, Texas A&M University: revised Reduce_assign to use
      33             : // purely GrB* and GxB* methods and the matrix Parent.  Added warmup phase.
      34             : // Changed to use GxB load/unload.  Converted to use the LAGraph_Graph object.
      35             : // Exploiting iso status for the temporary matrices Parent and T.
      36             : 
      37             : // Modified by Gabriel Gomez, Texas A&M University: moved Parent matrix trick 
      38             : // out to LAGraph_FastAssign.
      39             : 
      40             : // The input graph G must be undirected, or directed and with an adjacency
      41             : // matrix that has a symmetric structure.  Self-edges (diagonal entries) are
      42             : // OK, and are ignored.  The values and type of A are ignored; just its
      43             : // structure is accessed.
      44             : 
      45             : // NOTE: This function must not be called by multiple user threads at the same
      46             : // time on the same graph G, since it unloads G->A and loads it back when
      47             : // done.  G->A is unchanged when the function returns, but during execution
      48             : // G->A is empty.  This will be fixed once the todos are finished below, and
      49             : // G->A will then become a truly read-only object (assuming GrB_wait (G->A)
      50             : // has been done first).
      51             : 
      52             : // #define TIMINGS
      53             : 
      54             : #define __STDC_WANT_LIB_EXT1__ 1
      55             : #include <string.h>
      56             : 
      57             : #define LG_FREE_ALL ;
      58             : #include "LG_internal.h"
      59             : #include "LAGraphX.h"
      60             : 
      61             : static double timings [16] ;
      62             : 
      63             : #if LG_SUITESPARSE_GRAPHBLAS_V10
      64             : 
      65             : //==============================================================================
      66             : // fastsv: find the components of a graph
      67             : //==============================================================================
      68             : 
      69          69 : static inline GrB_Info fastsv
      70             : (
      71             :     GrB_Matrix A,           // adjacency matrix, G->A or a subset of G->A
      72             :     GrB_Vector parent2,     // workspace
      73             :     GrB_Vector mngp,        // min neighbor grandparent
      74             :     GrB_Vector *gp,         // grandparent
      75             :     GrB_Vector *gp_new,     // new grandparent (swapped with gp)
      76             :     GrB_Vector t,           // workspace
      77             :     GrB_BinaryOp eq,        // GrB_EQ_(integer type)
      78             :     GrB_BinaryOp min,       // GrB_MIN_(integer type)
      79             :     GrB_Semiring min_2nd,   // GrB_MIN_SECOND_(integer type)
      80             :     GrB_Vector parent,      // parent
      81             :     GrB_Vector ramp,        // [0:n] used to speed up FastAssign
      82             :     char *msg
      83             : )
      84             : {
      85          69 :     bool done = false ;
      86             : //  #ifdef TIMINGS
      87             : //  int pass = 0 ;
      88             : //  #endif
      89             : 
      90             :     while (true)
      91          69 :     {
      92             : //      #ifdef TIMINGS
      93             : //      printf ("\n-------------------------------------------fastsv: %d\n",
      94             : //          ++pass) ;
      95             : //      #endif
      96             : 
      97             :         //----------------------------------------------------------------------
      98             :         // hooking & shortcutting
      99             :         //----------------------------------------------------------------------
     100             : 
     101             :         // mngp = min (mngp, A*gp) using the MIN_SECOND semiring
     102         138 :         GRB_TRY (GrB_mxv (mngp, NULL, min, min_2nd, A, *gp, NULL)) ;
     103             : 
     104             :         //----------------------------------------------------------------------
     105             :         // parent2 = min (mngp, gp)
     106             :         //----------------------------------------------------------------------
     107             : 
     108             :         // The parent vector should not be allised into FastAssign, so the 
     109             :         // accumulation is done in a workspace vector, parent2.
     110             : 
     111         138 :         GRB_TRY (GrB_eWiseAdd (parent2, NULL, NULL, min, mngp, *gp, NULL)) ;
     112             : 
     113             :         // LAGraph_FastAssign: This function computes the following, which
     114             :         // is done explicitly in the Reduce_assign function in LG_CC_Boruvka:
     115             :         //
     116             :         //      for (j = 0 ; j < n ; j++)
     117             :         //      {
     118             :         //          uint64_t i = parent [j] ;
     119             :         //          parent2 [i] = min (parent2 [i], mngp [j]) ;
     120             :         //      }
     121             :         //
     122             :         // LAGraph_FastAssign does this by building a matrix. 
     123             :         // (See LAGraph_FastAssign.c) 
     124             :         // Giving it a full ramp vector speeds up the function
     125             : 
     126         137 :         LG_TRY (LAGraph_FastAssign_Semiring(
     127             :             parent2, NULL, min, parent, mngp, ramp, min_2nd, NULL, msg));
     128             : 
     129             :         //----------------------------------------------------------------------
     130             :         // parent = min (parent, parent2)
     131             :         //----------------------------------------------------------------------
     132             : 
     133         101 :         GRB_TRY (GrB_assign (parent, NULL, min, parent2, GrB_ALL, 0, NULL)) ;
     134             : 
     135             :         //----------------------------------------------------------------------
     136             :         // calculate grandparent: gp_new = parent (parent)
     137             :         //----------------------------------------------------------------------
     138             : 
     139         101 :         GRB_TRY (GrB_extract (*gp_new, NULL, NULL, parent, parent, NULL)) ;
     140             : 
     141             :         //----------------------------------------------------------------------
     142             :         // terminate if gp and gp_new are the same
     143             :         //----------------------------------------------------------------------
     144          97 :         GRB_TRY (GrB_eWiseMult (t, NULL, NULL, eq, *gp_new, *gp, NULL)) ;
     145          96 :         GRB_TRY (GrB_reduce (&done, NULL, GrB_LAND_MONOID_BOOL, t, NULL)) ;
     146          96 :         if (done) break ;
     147             : 
     148             :         // swap gp and gp_new
     149          69 :         GrB_Vector s = (*gp) ; (*gp) = (*gp_new) ; (*gp_new) = s ;
     150             :     }
     151          27 :     return (GrB_SUCCESS) ;
     152             : }
     153             : 
     154             : //==============================================================================
     155             : // LG_CC_FastSV7
     156             : //==============================================================================
     157             : 
     158             : // The output of LG_CC_FastSV* is a vector component, where component(i)=r if
     159             : // node i is in the connected compononent whose representative is node r.  If r
     160             : // is a representative, then component(r)=r.  The number of connected
     161             : // components in the graph G is the number of representatives.
     162             : 
     163             : #undef  LG_FREE_WORK
     164             : #define LG_FREE_WORK                            \
     165             : {                                               \
     166             :     LAGraph_Free ((void **) &Tp, NULL) ;        \
     167             :     LAGraph_Free ((void **) &Tj, NULL) ;        \
     168             :     LAGraph_Free ((void **) &Tx, NULL) ;        \
     169             :     LAGraph_Free ((void **) &ht_key, NULL) ;    \
     170             :     LAGraph_Free ((void **) &ht_count, NULL) ;  \
     171             :     LAGraph_Free ((void **) &count, NULL) ;     \
     172             :     LAGraph_Free ((void **) &range, NULL) ;     \
     173             :     LAGraph_Free ((void **) &Px, NULL) ;        \
     174             :     GrB_free (&T) ;                             \
     175             :     GrB_free (&t) ;                             \
     176             :     GrB_free (&gp) ;                            \
     177             :     GrB_free (&mngp) ;                          \
     178             :     GrB_free (&gp_new) ;                        \
     179             :     GrB_free (&parent2) ;                       \
     180             :     GrB_free (&ramp_v) ;                        \
     181             :     GrB_free (&A_Container) ;                   \
     182             :     GrB_free (&T_Container) ;                   \
     183             : }
     184             : 
     185             : #undef  LG_FREE_ALL
     186             : #define LG_FREE_ALL                             \
     187             : {                                               \
     188             :     GrB_free (&parent) ;                        \
     189             :     LG_FREE_WORK ;                              \
     190             : }
     191             : 
     192             : #endif
     193             : 
     194             : // get/set macros for 32/64 bit arrays:
     195             : #define AP(k) (Ap32 ? Ap32 [k] : Ap64 [k])
     196             : #define AJ(k) (Aj32 ? Aj32 [k] : Aj64 [k])
     197             : #define PARENT(i) (Px32 ? Px32 [i] : Px64 [i])
     198             : #define TP(k) (Tp32 ? Tp32 [k] : Tp64 [k])
     199             : #define TJ(k) (Tj32 ? Tj32 [k] : Tj64 [k])
     200             : #define SET_TP(k,p) { if (Tp32) { Tp32 [k] = p ; } else { Tp64 [k] = p ; }}
     201             : #define SET_TJ(k,i) { if (Tj32) { Tj32 [k] = i ; } else { Tj64 [k] = i ; }}
     202             : 
     203             : #ifdef TIMINGS
     204             : static void print_timings (double timings [16])
     205             : {
     206             :     double total = timings [0] + timings [1] + timings [2] ;
     207             :     printf ("SV7 %12.6f (%4.1f%%) init\n", timings [0], 100. * timings [0] / total) ;
     208             :     printf ("SV7 %12.6f (%4.1f%%) total sampling:\n", timings [1], 100. * timings [1] / total) ;
     209             :     printf ("SV7        %12.6f (%4.1f%%) setup T\n", timings [3], 100. * timings [3] / total) ;
     210             :     printf ("SV7        %12.6f (%4.1f%%) create T\n", timings [4], 100. * timings [4] / total) ;
     211             :     printf ("SV7        %12.6f (%4.1f%%) fastsv sample\n", timings [5], 100 * timings [5] / total) ;
     212             :     printf ("SV7        %12.6f (%4.1f%%) hash\n", timings [6], 100. * timings [6] / total) ;
     213             :     printf ("SV7        %12.6f (%4.1f%%) prune\n", timings [7], 100. * timings [7] / total) ;
     214             :     printf ("SV7 %12.6f (%4.1f%%) total final\n", timings [2], 100. * timings [2] / total) ;
     215             : }
     216             : #endif
     217             : 
     218         103 : int LG_CC_FastSV7_FA         // SuiteSparse:GraphBLAS method, with GraphBLAS v10
     219             : (
     220             :     // output:
     221             :     GrB_Vector *component,  // component(i)=r if node is in the component r
     222             :     // input:
     223             :     LAGraph_Graph G,        // input graph (modified then restored)
     224             :     char *msg
     225             : )
     226             : {
     227             : 
     228             : #if LG_SUITESPARSE_GRAPHBLAS_V10
     229             : 
     230             :     //--------------------------------------------------------------------------
     231             :     // check inputs
     232             :     //--------------------------------------------------------------------------
     233             : 
     234         103 :     LG_CLEAR_MSG ;
     235             : 
     236             :     #ifdef TIMINGS
     237             :     double timings [16] ;
     238             :     for (int kk = 0 ; kk < 16 ; kk++) timings [kk] = 0 ;
     239             :     double tic = LAGraph_WallClockTime ( ) ;
     240             :     LG_SET_BURBLE (false) ;
     241             :     #endif
     242             : 
     243         103 :     int64_t *range = NULL ;
     244         103 :     void *Px = NULL ;
     245         103 :     uint64_t Px_size = 0 ;
     246         103 :     GrB_Index n, nvals, *ht_key = NULL, *count = NULL ;
     247         103 :     void *Tp = NULL, *Tj = NULL ;
     248         103 :     GrB_Vector parent = NULL, gp_new = NULL, mngp = NULL, gp = NULL, t = NULL,
     249         103 :         parent2 = NULL, ramp_v = NULL ;
     250         103 :     GrB_Matrix T = NULL ;
     251         103 :     void *Tx = NULL ;
     252         103 :     int *ht_count = NULL ;
     253         103 :     GxB_Container A_Container = NULL, T_Container = NULL ;
     254             : 
     255         103 :     LG_TRY (LAGraph_CheckGraph (G, msg)) ;
     256         102 :     LG_ASSERT (component != NULL, GrB_NULL_POINTER) ;
     257         102 :     (*component) = NULL ;
     258             : 
     259         102 :     LG_ASSERT_MSG ((G->kind == LAGraph_ADJACENCY_UNDIRECTED ||
     260             :        (G->kind == LAGraph_ADJACENCY_DIRECTED &&
     261             :         G->is_symmetric_structure == LAGraph_TRUE)),
     262             :         LAGRAPH_SYMMETRIC_STRUCTURE_REQUIRED,
     263             :         "G->A must be known to be symmetric") ;
     264             : 
     265             :     //--------------------------------------------------------------------------
     266             :     // initializations
     267             :     //--------------------------------------------------------------------------
     268             : 
     269         101 :     GrB_Matrix A = G->A ;
     270         101 :     GRB_TRY (GrB_Matrix_nrows (&n, A)) ;
     271         101 :     GRB_TRY (GrB_Matrix_nvals (&nvals, A)) ;
     272             : 
     273             :     // determine the integer type, operators, and semirings to use
     274             :     GrB_Type Uint, Int ;
     275             :     GrB_IndexUnaryOp ramp ;
     276             :     GrB_Semiring min_2nd, min_2ndi ;
     277             :     GrB_BinaryOp min, eq, imin ;
     278             :     #ifdef COVERAGE
     279             :     // Just for test coverage, use 64-bit ints for n > 100.  Do not use this
     280             :     // rule in production!
     281             :     #define NBIG 100
     282             :     #else
     283             :     // For production use: 64-bit integers if n > 2^31
     284             :     #define NBIG INT32_MAX
     285             :     #endif
     286         101 :     if (n > NBIG)
     287             :     {
     288             :         // use 64-bit integers
     289           8 :         Uint = GrB_UINT64 ;
     290           8 :         Int  = GrB_INT64  ;
     291           8 :         ramp = GrB_ROWINDEX_INT64 ;
     292           8 :         min  = GrB_MIN_INT64 ;
     293           8 :         imin = GrB_MIN_INT64 ;
     294           8 :         eq   = GrB_EQ_INT64 ;
     295           8 :         min_2nd  = GrB_MIN_SECOND_SEMIRING_INT64 ;
     296           8 :         min_2ndi = GxB_MIN_SECONDI_INT64 ;
     297             :     }
     298             :     else
     299             :     {
     300             :         // use 32-bit integers
     301          93 :         Uint = GrB_UINT32 ;
     302          93 :         Int  = GrB_INT32  ;
     303          93 :         ramp = GrB_ROWINDEX_INT32 ;
     304          93 :         min  = GrB_MIN_INT32 ;
     305          93 :         imin = GrB_MIN_INT32 ;
     306          93 :         eq   = GrB_EQ_INT32 ;
     307          93 :         min_2nd  = GrB_MIN_SECOND_SEMIRING_INT32 ;
     308          93 :         min_2ndi = GxB_MIN_SECONDI_INT32 ;
     309             :     }
     310             : 
     311             :     // FASTSV_SAMPLES: number of samples to take from each row A(i,:).
     312             :     // Sampling is used if the average degree is > 8 and if n > 1024.
     313             :     #define FASTSV_SAMPLES 4
     314         101 :     bool sampling = (nvals > n * FASTSV_SAMPLES * 2 && n > 1024) ;
     315             : 
     316             :     //--------------------------------------------------------------------------
     317             :     // make ramp needed for FastAssign speedup
     318             :     //--------------------------------------------------------------------------
     319         101 :     GRB_TRY (GrB_Vector_new (&(ramp_v), Uint, n+1)) ;
     320          99 :     GRB_TRY (GrB_assign (ramp_v, NULL, NULL, 0, GrB_ALL, n+1,
     321             :         NULL)) ;
     322          98 :     GRB_TRY (GrB_apply (ramp_v, NULL, NULL, ramp, ramp_v, 0, NULL)) ;
     323             : // [ todo: nthreads will not be needed once GxB_select with a GxB_RankUnaryOp
     324             : // and a new GxB_extract are added to SuiteSparse:GraphBLAS.
     325             :     // determine # of threads to use
     326             :     int nthreads, nthreads_outer, nthreads_inner ;
     327          96 :     LG_TRY (LAGraph_GetNumThreads (&nthreads_outer, &nthreads_inner, msg)) ;
     328          96 :     nthreads = nthreads_outer * nthreads_inner ;
     329          96 :     nthreads = LAGRAPH_MIN (nthreads, n / 16) ;
     330          96 :     nthreads = LAGRAPH_MAX (nthreads, 1) ;
     331             : // ]
     332             : 
     333          96 :     GRB_TRY (GxB_Container_new (&A_Container)) ;
     334          90 :     GRB_TRY (GxB_Container_new (&T_Container)) ;
     335             : 
     336             :     //--------------------------------------------------------------------------
     337             :     // warmup: parent = min (0:n-1, A*1) using the MIN_SECONDI semiring
     338             :     //--------------------------------------------------------------------------
     339             : 
     340             :     // parent (i) = min (i, j) for all entries A(i,j).  This warmup phase takes only
     341             :     // O(n) time, because of how the MIN_SECONDI semiring is implemented in
     342             :     // SuiteSparse:GraphBLAS.  A is held by row, and the first entry in A(i,:)
     343             :     // is the minimum index j, so only the first entry in A(i,:) needs to be
     344             :     // considered for each row i.
     345             : 
     346          84 :     GRB_TRY (GrB_Vector_new (&t, GrB_BOOL, n)) ;
     347          82 :     GRB_TRY (GrB_Vector_new (&parent, Int, n)) ;
     348          80 :     GRB_TRY (GrB_assign (t, NULL, NULL, 0, GrB_ALL, n, NULL)) ;
     349          79 :     GRB_TRY (GrB_assign (parent, NULL, NULL, 0, GrB_ALL, n, NULL)) ;
     350          78 :     GRB_TRY (GrB_apply (parent, NULL, NULL, ramp, parent, 0, NULL)) ;
     351          77 :     GRB_TRY (GrB_mxv (parent, NULL, imin, min_2ndi, A, t, NULL)) ;
     352          77 :     GRB_TRY (GrB_free (&t)) ;
     353             : 
     354             :     // copy parent into gp and mngp.
     355          77 :     GRB_TRY (GrB_Vector_dup (&gp, parent)) ;
     356          75 :     GRB_TRY (GrB_Vector_dup (&mngp, parent)) ;
     357             : 
     358             :     // allocate workspace vectors
     359          73 :     GRB_TRY (GrB_Vector_new (&gp_new, Int, n)) ;
     360          71 :     GRB_TRY (GrB_Vector_new (&t, GrB_BOOL, n)) ;
     361          69 :     GRB_TRY (GrB_Vector_new (&parent2, Int, n)) ;
     362             : 
     363             :     #ifdef TIMINGS
     364             :     double toc = LAGraph_WallClockTime ( ) ;
     365             :     timings [0] = toc - tic ;  // init time
     366             :     tic = toc ;
     367             :     #endif
     368             : 
     369             :     //--------------------------------------------------------------------------
     370             :     // sample phase
     371             :     //--------------------------------------------------------------------------
     372             : 
     373          67 :     if (sampling)
     374             :     {
     375             : 
     376             : // [ todo: GxB_select, using a new operator: GxB_RankUnaryOp, will do all this,
     377             : // with GxB_Matrix_select_RankOp_Scalar with operator GxB_LEASTRANK and a
     378             : // GrB_Scalar input equal to FASTSV_SAMPLES.  Built-in operators will be,
     379             : // (where y is INT64):
     380             : //
     381             : //      GxB_LEASTRANK (aij, i, j, k, d, y): select if aij has rank k <= y
     382             : //      GxB_NLEASTRANK: select if aij has rank k > y
     383             : //      GxB_GREATESTRANK (...) select if aij has rank k >= (d-y) where
     384             : //          d = # of entries in A(i,:).
     385             : //      GxB_NGREATESTRANK (...): select if aij has rank k < (d-y)
     386             : // and perhaps other operators such as:
     387             : //      GxB_LEASTRELRANK (...): select aij if rank k <= y*d where y is double
     388             : //      GxB_GREATESTRELRANK (...): select aij rank k > y*d where y is double
     389             : //
     390             : // By default, the rank of aij is its relative position as the kth entry in its
     391             : // row (from "left" to "right").  If a new GxB setting in the descriptor is
     392             : // set, then k is the relative position of aij as the kth entry in its column.
     393             : // The default would be that the rank is the position of aij in its row A(i,:).
     394             : 
     395             : // Other:
     396             : //      give me 3 random items from the row (y = 3)
     397             : //      give me the 4 biggest *values* in each row (y = 4)
     398             : 
     399             :         //----------------------------------------------------------------------
     400             :         // unload A in CSR format
     401             :         //----------------------------------------------------------------------
     402             : 
     403             :         #ifdef TIMINGS
     404             :         double tic2 = LAGraph_WallClockTime ( ) ;
     405             :         #endif
     406             : 
     407           4 :         void *Ap = NULL, *Aj = NULL ;
     408             :         uint64_t Ap_size, Aj_size, Ap_len, Aj_len ;
     409             :         bool A_jumbled, A_iso ;
     410             :         int Ap_handling, Aj_handling ;
     411             : 
     412             :         // unload A in sparse CSR format into the A_Container
     413           4 :         GRB_TRY (GrB_set (A, GxB_SPARSE, GxB_SPARSITY_CONTROL)) ;
     414           4 :         GRB_TRY (GrB_set (A, GrB_ROWMAJOR, GrB_STORAGE_ORIENTATION_HINT)) ;
     415           4 :         GRB_TRY (GxB_unload_Matrix_into_Container (A, A_Container, NULL)) ;
     416           4 :         A_jumbled = A_Container->jumbled ;
     417           4 :         A_iso = A_Container->iso ;
     418             : 
     419             :         // unload A_Container->p,i into the C arrays, Ap and Aj
     420             :         GrB_Type Ap_type, Aj_type ;
     421           4 :         GRB_TRY (GxB_Vector_unload (A_Container->p, &Ap, &Ap_type, &Ap_len,
     422             :             &Ap_size, &Ap_handling, NULL)) ;
     423           4 :         GRB_TRY (GxB_Vector_unload (A_Container->i, &Aj, &Aj_type, &Aj_len,
     424             :             &Aj_size, &Aj_handling, NULL)) ;
     425             : 
     426           4 :         bool Ap_is_32 = (Ap_type == GrB_UINT32 || Ap_type == GrB_INT32) ;
     427           4 :         bool Aj_is_32 = (Aj_type == GrB_UINT32 || Aj_type == GrB_INT32) ;
     428           4 :         const uint32_t *Ap32 = Ap_is_32 ? Ap : NULL ;
     429           4 :         const uint64_t *Ap64 = Ap_is_32 ? NULL : Ap ;
     430           4 :         const uint32_t *Aj32 = Aj_is_32 ? Aj : NULL ;
     431           4 :         const uint64_t *Aj64 = Aj_is_32 ? NULL : Aj ;
     432             : 
     433             :         //----------------------------------------------------------------------
     434             :         // allocate workspace, including space to construct T
     435             :         //----------------------------------------------------------------------
     436             : 
     437           4 :         bool Tp_is_32 = (nvals < UINT32_MAX) ;
     438           4 :         bool Tj_is_32 = (n < INT32_MAX) ;
     439           4 :         GrB_Type Tp_type = Tp_is_32 ? GrB_UINT32 : GrB_UINT64 ;
     440           4 :         GrB_Type Tj_type = Tj_is_32 ? GrB_UINT32 : GrB_UINT64 ;
     441           4 :         size_t tpsize = Tp_is_32 ? sizeof (uint32_t) : sizeof (uint64_t) ;
     442           4 :         size_t tjsize = Tj_is_32 ? sizeof (uint32_t) : sizeof (uint64_t) ;
     443             : 
     444           4 :         GrB_Index Tp_size = (n+1) * tpsize ;
     445           4 :         GrB_Index Tj_size = nvals * tjsize ;
     446           4 :         GrB_Index Tx_size = sizeof (bool) ;
     447           4 :         LG_TRY (LAGraph_Malloc ((void **) &Tp, n+1, tpsize, msg)) ;
     448           4 :         LG_TRY (LAGraph_Malloc ((void **) &Tj, nvals, tjsize, msg)) ;
     449           4 :         LG_TRY (LAGraph_Calloc ((void **) &Tx, 1, sizeof (bool), msg)) ;
     450           4 :         LG_TRY (LAGraph_Malloc ((void **) &range, nthreads+1, sizeof (int64_t),
     451             :             msg)) ;
     452           4 :         LG_TRY (LAGraph_Calloc ((void **) &count, nthreads+1, sizeof (uint64_t),
     453             :             msg)) ;
     454             : 
     455           4 :         uint32_t *Tp32 = Tp_is_32 ? Tp : NULL ;
     456           4 :         uint64_t *Tp64 = Tp_is_32 ? NULL : Tp ;
     457           4 :         uint32_t *Tj32 = Tj_is_32 ? Tj : NULL ;
     458           4 :         uint64_t *Tj64 = Tj_is_32 ? NULL : Tj ;
     459             : 
     460             :         //----------------------------------------------------------------------
     461             :         // define parallel tasks to construct T
     462             :         //----------------------------------------------------------------------
     463             : 
     464             :         // thread tid works on rows range[tid]:range[tid+1]-1 of A and T
     465             :         int tid;
     466          12 :         for (tid = 0 ; tid <= nthreads ; tid++)
     467             :         {
     468           8 :             range [tid] = (n * tid + nthreads - 1) / nthreads ;
     469             :         }
     470             : 
     471             :         //----------------------------------------------------------------------
     472             :         // determine the number entries to be constructed in T for each thread
     473             :         //----------------------------------------------------------------------
     474             : 
     475             :         #pragma omp parallel for num_threads(nthreads) schedule(static)
     476           8 :         for (tid = 0 ; tid < nthreads ; tid++)
     477             :         {
     478        9756 :             for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
     479             :             {
     480        9752 :                 int64_t deg = AP (i + 1) - AP (i) ;
     481        9752 :                 count [tid + 1] += LAGRAPH_MIN (FASTSV_SAMPLES, deg) ;
     482             :             }
     483             :         }
     484             : 
     485             :         //----------------------------------------------------------------------
     486             :         // count = cumsum (count)
     487             :         //----------------------------------------------------------------------
     488             : 
     489           8 :         for (tid = 0 ; tid < nthreads ; tid++)
     490             :         {
     491           4 :             count [tid + 1] += count [tid] ;
     492             :         }
     493             : 
     494             :         #ifdef TIMINGS
     495             :         double toc2 = LAGraph_WallClockTime ( ) ;
     496             :         timings [3] = toc2 - tic2 ;  // setup T
     497             :         tic2 = toc2 ;
     498             :         #endif
     499             : 
     500             :         //----------------------------------------------------------------------
     501             :         // construct T
     502             :         //----------------------------------------------------------------------
     503             : 
     504             :         // T (i,:) consists of the first FASTSV_SAMPLES of A (i,:).
     505             : 
     506             :         #pragma omp parallel for num_threads(nthreads) schedule(static)
     507           8 :         for (tid = 0 ; tid < nthreads ; tid++)
     508             :         {
     509           4 :             GrB_Index p = count [tid] ;
     510           4 :             int64_t ktid = range [tid] ;
     511           4 :             SET_TP (ktid, p) ;      // Tp [ktid] = p ;
     512        9756 :             for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
     513             :             {
     514             :                 // construct T (i,:) from the first entries in A (i,:)
     515        9752 :                 for (int64_t j = 0 ;
     516       40390 :                     j < FASTSV_SAMPLES && AP (i) + j < AP (i + 1) ; j++)
     517             :                 {
     518       30638 :                     uint64_t pi = AP (i) + j ;
     519       30638 :                     uint64_t j = AJ (pi) ;
     520       30638 :                     SET_TJ (p, j) ;         // Tj [p] = j ;
     521       30638 :                     p++ ;
     522             :                 }
     523        9752 :                 SET_TP (i+1, p) ;           // Tp [i + 1] = p ;
     524             :             }
     525             :         }
     526             : 
     527             :         //----------------------------------------------------------------------
     528             :         // import the result into the GrB_Matrix T
     529             :         //----------------------------------------------------------------------
     530             : 
     531           4 :         GRB_TRY (GrB_Matrix_new (&T, GrB_BOOL, n, n)) ;
     532             : 
     533           4 :         uint64_t T_nvals = TP (n) ;
     534           4 :         uint64_t Tp_len = n+1 ;
     535           4 :         uint64_t Tj_len = T_nvals ;
     536             : 
     537           4 :         T_Container->nrows = n ;
     538           4 :         T_Container->ncols = n ;
     539           4 :         T_Container->nrows_nonempty = -1 ;
     540           4 :         T_Container->ncols_nonempty = -1 ;
     541           4 :         T_Container->nvals = T_nvals ;
     542           4 :         T_Container->format = GxB_SPARSE ;
     543           4 :         T_Container->orientation = GrB_ROWMAJOR ;
     544           4 :         T_Container->iso = true ;
     545           4 :         T_Container->jumbled = A_jumbled ;
     546             : 
     547             :         // load Tp, Tj, and Tx into the T_Container
     548           4 :         GRB_TRY (GxB_Vector_load (T_Container->p, &Tp, Tp_type, Tp_len,
     549             :             Tp_size, GrB_DEFAULT, NULL)) ;
     550           4 :         GRB_TRY (GxB_Vector_load (T_Container->i, &Tj, Tj_type, Tj_len,
     551             :             Tj_size, GrB_DEFAULT, NULL)) ;
     552           4 :         GRB_TRY (GxB_Vector_load (T_Container->x, &Tx, GrB_BOOL, 1,
     553             :             Tx_size, GrB_DEFAULT, NULL)) ;
     554             : 
     555             :         // load T from the T_Container
     556           4 :         GRB_TRY (GxB_load_Matrix_from_Container (T, T_Container, NULL)) ;
     557             : 
     558             : // ] todo: the above will all be done as a single call to GxB_select.
     559             : 
     560             :         #ifdef TIMINGS
     561             :         toc2 = LAGraph_WallClockTime ( ) ;
     562             :         timings [4] = toc2 - tic2 ;  // create T
     563             :         tic2 = toc2 ;
     564             :         #endif
     565             : 
     566             :         //----------------------------------------------------------------------
     567             :         // find the connected components of T
     568             :         //----------------------------------------------------------------------
     569             : 
     570           4 :         LG_TRY (fastsv (T, parent2, mngp, &gp, &gp_new, t, eq, min, min_2nd,
     571             :             parent, ramp_v, msg)) ;
     572             : 
     573             :         #ifdef TIMINGS
     574             :         toc2 = LAGraph_WallClockTime ( ) ;
     575             :         timings [5] = toc2 - tic2 ;  // fastsv, in sampling
     576             :         tic2 = toc2 ;
     577             :         #endif
     578             : 
     579             :         //----------------------------------------------------------------------
     580             :         // unload the parent i vector into the Px array
     581             :         //----------------------------------------------------------------------
     582             : 
     583           4 :         int handling = 0 ;
     584           4 :         GrB_Type type = NULL ;
     585           4 :         GRB_TRY (GxB_Vector_unload (parent, &Px, &type, &n, &Px_size,
     586             :             &handling, NULL)) ;
     587           4 :         bool Px_is_32 = (type == GrB_UINT32 || type == GrB_INT32) ;
     588           4 :         uint32_t *Px32 = Px_is_32 ? Px : NULL ;
     589           4 :         uint64_t *Px64 = Px_is_32 ? NULL : Px ;
     590             : 
     591             :         // At this point, the Px array holds the content of parent vector.
     592             : 
     593             :         //----------------------------------------------------------------------
     594             :         // use sampling to estimate the largest connected component in T
     595             :         //----------------------------------------------------------------------
     596             : 
     597             :         // The sampling below computes an estimate of the mode of the parent
     598             :         // vector, the contents of which are currently in the non-opaque Px
     599             :         // array.
     600             : 
     601             :         // hash table size must be a power of 2
     602             :         #define HASH_SIZE 1024
     603             :         // number of samples to insert into the hash table
     604             :         #define HASH_SAMPLES 864
     605             :         #define HASH(x) (((x << 4) + x) & (HASH_SIZE-1))
     606             :         #define NEXT(x) ((x + 23) & (HASH_SIZE-1))
     607             : 
     608             :         // allocate and initialize the hash table
     609           4 :         LG_TRY (LAGraph_Malloc ((void **) &ht_key, HASH_SIZE,
     610             :             sizeof (GrB_Index), msg)) ;
     611           4 :         LG_TRY (LAGraph_Calloc ((void **) &ht_count, HASH_SIZE,
     612             :             sizeof (int), msg)) ;
     613        4100 :         for (int k = 0 ; k < HASH_SIZE ; k++)
     614             :         {
     615        4096 :             ht_key [k] = UINT64_MAX ;
     616             :         }
     617             : 
     618             :         // hash the samples and find the most frequent entry
     619           4 :         uint64_t seed = n ;         // random number seed
     620           4 :         int64_t key = -1 ;          // most frequent entry
     621           4 :         int max_count = 0 ;         // frequency of most frequent entry
     622        3460 :         for (int64_t k = 0 ; k < HASH_SAMPLES ; k++)
     623             :         {
     624             :             // select an entry ii from PARENT at random
     625        3456 :             uint64_t i = LG_Random64 (&seed) % n ;
     626        3456 :             GrB_Index x = PARENT (i) ;
     627             :             // find x in the hash table
     628        3456 :             GrB_Index h = HASH (x) ;
     629        3598 :             while (ht_key [h] != UINT64_MAX && ht_key [h] != x) h = NEXT (h) ;
     630             :             // add x to the hash table
     631        3456 :             ht_key [h] = x ;
     632        3456 :             ht_count [h]++ ;
     633             :             // keep track of the most frequent value
     634        3456 :             if (ht_count [h] > max_count)
     635             :             {
     636        1870 :                 key = ht_key [h] ;
     637        1870 :                 max_count = ht_count [h] ;
     638             :             }
     639             :         }
     640             : 
     641             :         #ifdef TIMINGS
     642             :         toc2 = LAGraph_WallClockTime ( ) ;
     643             :         timings [6] = toc2 - tic2 ;  // hash
     644             :         tic2 = toc2 ;
     645             :         #endif
     646             : 
     647             :         //----------------------------------------------------------------------
     648             :         // compact the largest connected component in A
     649             :         //----------------------------------------------------------------------
     650             : 
     651             :         // Construct a new matrix T from the input matrix A (the matrix A is
     652             :         // not changed). The key node is the representative of the (estimated)
     653             :         // largest component.  T is constructed as a copy of A, except:
     654             :         // (1) all edges A(i,:) for nodes i in the key component deleted, and
     655             :         // (2) for nodes i not in the key component, A(i,j) is deleted if
     656             :         //     j is in the key component.
     657             :         // (3) If A(i,:) has any deletions from (2), T(i,key) is added to T.
     658             : 
     659             : // [ todo: replace this with GxB_extract with GrB_Vector index arrays.
     660             : // See https://github.com/GraphBLAS/graphblas-api-c/issues/67 .
     661             : // This method will not insert the new entries T(i,key) for rows i that have
     662             : // had entries deleted.  That can be done with GrB_assign, with an n-by-1 mask
     663             : // M computed from the before-and-after row degrees of A and T:
     664             : // M = (parent != key) && (out_degree(T) < out_degree(A))
     665             : // J [0] = key.
     666             : // GxB_Matrix_subassign_BOOL (T, M, NULL, true, GrB_ALL, n, J, 1, NULL)
     667             : // or with
     668             : // GrB_Col_assign (T, M, NULL, t, GrB_ALL, j, NULL) with an all-true
     669             : // vector t.
     670             : 
     671             :         // unload T from the T_Container; its contents are revised below
     672           4 :         GRB_TRY (GxB_unload_Matrix_into_Container (T, T_Container, NULL)) ;
     673             : 
     674             :         // unload Tp and Tj from the T_Container
     675             :         int ignore ;
     676           4 :         GRB_TRY (GxB_Vector_unload (T_Container->p, &Tp, &Tp_type, &Tp_len,
     677             :             &Tp_size, &ignore, NULL)) ;
     678           4 :         GRB_TRY (GxB_Vector_unload (T_Container->i, &Tj, &Tj_type, &Tj_len,
     679             :             &Tj_size, &ignore, NULL)) ;
     680             : 
     681             :         // these are likely to be unchanged since the last load of T
     682           4 :         Tp_is_32 = (Tp_type == GrB_UINT32 || Tp_type == GrB_INT32) ;
     683           4 :         Tj_is_32 = (Tj_type == GrB_UINT32 || Tj_type == GrB_INT32) ;
     684           4 :         Tp32 = Tp_is_32 ? Tp : NULL ;
     685           4 :         Tp64 = Tp_is_32 ? NULL : Tp ;
     686           4 :         Tj32 = Tj_is_32 ? Tj : NULL ;
     687           4 :         Tj64 = Tj_is_32 ? NULL : Tj ;
     688             : 
     689             :         #pragma omp parallel for num_threads(nthreads) schedule(static)
     690           8 :         for (tid = 0 ; tid < nthreads ; tid++)
     691             :         {
     692           4 :             uint64_t ktid = range [tid] ;
     693           4 :             GrB_Index p = AP (ktid) ;
     694             :             // thread tid scans A (range [tid]:range [tid+1]-1,:),
     695             :             // and constructs T(i,:) for all rows in this range.
     696        9756 :             for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
     697             :             {
     698        9752 :                 int64_t pi = PARENT (i) ;
     699        9752 :                 int64_t pstart = p ;
     700        9752 :                 SET_TP (i, p) ; // Tp [i] = p ; start the construction of T(i,:)
     701             :                 // T(i,:) is empty if pi == key
     702        9752 :                 if (pi != key)
     703             :                 {
     704             :                     // scan A(i,:)
     705       47770 :                     for (GrB_Index pS = AP (i) ; pS < AP (i+1) ; pS++)
     706             :                     {
     707             :                         // get A(i,j)
     708       42470 :                         int64_t j = AJ (pS) ;
     709       42470 :                         if (PARENT (j) != key)
     710             :                         {
     711             :                             // add the entry T(i,j) to T, but skip it if
     712             :                             // PARENT (j) is equal to key
     713       42460 :                             SET_TJ (p, j)       // Tj [p] = j ;
     714       42460 :                             p++ ;
     715             :                         }
     716             :                     }
     717             :                     // Add the entry T(i,key) if there is room for it in T(i,:);
     718             :                     // if and only if node i is adjacent to a node j in the
     719             :                     // largest component.  The only way there can be space if
     720             :                     // at least one T(i,j) appears with PARENT (j) equal to the
     721             :                     // key (that is, node j is in the largest connected
     722             :                     // component, key == PARENT (j).  One of these j's can then
     723             :                     // be replaced with the key.  If node i is not adjacent to
     724             :                     // any node in the largest component, then there is no
     725             :                     // space in T(i,:) and no new edge to the largest component
     726             :                     // is added.
     727        5300 :                     if (p - pstart < AP (i+1) - AP (i))
     728             :                     {
     729           2 :                         SET_TJ (p, key) ;       // Tj [p] = key ;
     730           2 :                         p++ ;
     731             :                     }
     732             :                 }
     733             :             }
     734             :             // count the number of entries inserted into T by this thread
     735           4 :             count [tid] = p - TP (ktid) ;
     736             :         }
     737             : 
     738             :         // Compact empty space out of Tj not filled in from the above phase.
     739           4 :         nvals = 0 ;
     740           8 :         for (tid = 0 ; tid < nthreads ; tid++)
     741             :         {
     742           4 :             int64_t ktid = range [tid]  ;
     743           4 :             memmove (Tj32 ? ((void *) (Tj32 + nvals)) : ((void *) (Tj64 + nvals)),
     744           4 :                      Tj32 ? ((void *) (Tj32 + TP (ktid))) : ((void *) (Tj64 + TP (ktid))),
     745           4 :                      tjsize * count [tid]) ;
     746             : 
     747             : #if 0
     748             :             if (Tj32)
     749             :             {
     750             :                 memmove (Tj32 + nvals, Tj32 + TP (ktid),
     751             :                     sizeof (uint32_t) * count [tid]) ;
     752             :             }
     753             :             else
     754             :             {
     755             :                 memmove (Tj64 + nvals, Tj64 + TP (ktid),
     756             :                     sizeof (uint64_t) * count [tid]) ;
     757             :             }
     758             : #endif
     759             : 
     760           4 :             nvals += count [tid] ;
     761           4 :             count [tid] = nvals - count [tid] ;
     762             :         }
     763             : 
     764             :         // Compact empty space out of Tp
     765             :         #pragma omp parallel for num_threads(nthreads) schedule(static)
     766           8 :         for (tid = 0 ; tid < nthreads ; tid++)
     767             :         {
     768           4 :             int64_t ktid = range [tid] ;
     769           4 :             GrB_Index p = TP (ktid) ;
     770        9756 :             for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
     771             :             {
     772        9752 :                 int64_t tp = TP (i) ;
     773        9752 :                 tp -= p - count [tid] ;
     774        9752 :                 SET_TP (i, tp) ;            // Tp [i] = tp ;
     775             :             }
     776             :         }
     777             : 
     778             :         // finalize T
     779           4 :         SET_TP (n, nvals) ;     // Tp [n] = nvals ;
     780           4 :         Tj_len = nvals ;
     781             : 
     782             :         // load T_Container->p,i from the C arrays, Tp and Tj, for final phase
     783           4 :         GRB_TRY (GxB_Vector_load (T_Container->p, &Tp, Tp_type, Tp_len,
     784             :             Tp_size, GrB_DEFAULT, NULL)) ;
     785           4 :         GRB_TRY (GxB_Vector_load (T_Container->i, &Tj, Tj_type, Tj_len,
     786             :             Tj_size, GrB_DEFAULT, NULL)) ;
     787             : 
     788           4 :         T_Container->nrows_nonempty = -1 ;
     789           4 :         T_Container->ncols_nonempty = -1 ;
     790           4 :         T_Container->jumbled = true ;
     791           4 :         T_Container->nvals = nvals ;
     792             : 
     793             :         // load T in sparse CSR format from the T_Container
     794           4 :         GRB_TRY (GxB_load_Matrix_from_Container (T, T_Container, NULL)) ;
     795             : 
     796             :         // load A_Container->p,i from the C arrays, Ap and Aj
     797             :         // This is the original G->A, and it is unchanged.
     798           4 :         GRB_TRY (GxB_Vector_load (A_Container->p, &Ap, Ap_type, Ap_len,
     799             :             Ap_size, Ap_handling, NULL)) ;
     800           4 :         GRB_TRY (GxB_Vector_load (A_Container->i, &Aj, Aj_type, Aj_len,
     801             :             Aj_size, Aj_handling, NULL)) ;
     802             : 
     803             :         // load A in sparse CSR format from the A_Container
     804           4 :         GRB_TRY (GxB_load_Matrix_from_Container (A, A_Container, NULL)) ;
     805             : 
     806             :         //----------------------------------------------------------------------
     807             :         // load the Px array back into the parent vector
     808             :         //----------------------------------------------------------------------
     809             : 
     810           4 :         GRB_TRY (GxB_Vector_load (parent, &Px, type, n, Px_size,
     811             :             GrB_DEFAULT, NULL)) ;
     812             : 
     813             : // ].  The unload/load of A into Ap, Aj, Ax will not be needed, and G->A
     814             : // will become truly a read-only matrix.
     815             : 
     816             :         // final phase uses the pruned matrix T
     817           4 :         A = T ;
     818             : 
     819             :         #ifdef TIMINGS
     820             :         toc2 = LAGraph_WallClockTime ( ) ;
     821             :         timings [7] = toc2 - tic2 ;  // prune
     822             :         tic2 = toc2 ;
     823             :         #endif
     824             :     }
     825             : 
     826             :     #ifdef TIMINGS
     827             :     toc = LAGraph_WallClockTime ( ) ;
     828             :     timings [1] = toc - tic ;  // total sampling time
     829             :     tic = toc ;
     830             :     #endif
     831             : 
     832             :     //--------------------------------------------------------------------------
     833             :     // check for quick return
     834             :     //--------------------------------------------------------------------------
     835             : 
     836             :     // The sample phase may have already found that G->A has a single component,
     837             :     // in which case the matrix A is now empty.
     838             : 
     839          67 :     if (nvals == 0)
     840             :     {
     841           2 :         (*component) = parent ;
     842           2 :         LG_FREE_WORK ;
     843             :         #ifdef TIMINGS
     844             :         print_timings (timings) ;
     845             :         LG_SET_BURBLE (false) ;
     846             :         #endif
     847           2 :         return (GrB_SUCCESS) ;
     848             :     }
     849             : 
     850             :     //--------------------------------------------------------------------------
     851             :     // final phase
     852             :     //--------------------------------------------------------------------------
     853             : 
     854          65 :     LG_TRY (fastsv (A, parent2, mngp, &gp, &gp_new, t, eq, min, min_2nd,
     855             :         parent, ramp_v, msg)) ;
     856             : 
     857             :     //--------------------------------------------------------------------------
     858             :     // free workspace and return result
     859             :     //--------------------------------------------------------------------------
     860             : 
     861          23 :     (*component) = parent ;
     862          23 :     parent = NULL ;
     863          23 :     LG_FREE_WORK ;
     864             :     #ifdef TIMINGS
     865             :     toc = LAGraph_WallClockTime ( ) ;
     866             :     timings [2] = toc - tic ;  // final phase
     867             :     print_timings (timings) ;
     868             :     LG_SET_BURBLE (false) ;
     869             :     #endif
     870          23 :     return (GrB_SUCCESS) ;
     871             : #else
     872             :     LG_ASSERT (false, GrB_NOT_IMPLEMENTED) ;
     873             : #endif
     874             : }

Generated by: LCOV version 1.14