LCOV - code coverage report
Current view: top level - src/algorithm - LG_CC_FastSV6.c (source / functions) Hit Total Coverage
Test: LAGraph code coverage report. Commit id: cc56ed4. Current time (UTC): 2024-08-30T17:14:30Z Lines: 162 162 100.0 %
Date: 2024-08-30 17:16:41 Functions: 2 2 100.0 %

          Line data    Source code
       1             : //------------------------------------------------------------------------------
       2             : // LG_CC_FastSV6: connected components
       3             : //------------------------------------------------------------------------------
       4             : 
       5             : // LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved.
       6             : // SPDX-License-Identifier: BSD-2-Clause
       7             : //
       8             : // For additional details (including references to third party source code and
       9             : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
      10             : // Contributors.txt for a full list of contributors. Created, in part, with
      11             : // funding and support from the U.S. Government (see Acknowledgments.txt file).
      12             : // DM22-0790
      13             : 
      14             : // Contributed by Yongzhe Zhang, modified by Timothy A. Davis, Texas A&M
      15             : // University
      16             : 
      17             : //------------------------------------------------------------------------------
      18             : 
      19             : // This is an Advanced algorithm (G->is_symmetric_structure must be known),
      20             : // but it is not user-callable (see LAGr_ConnectedComponents instead).
      21             : 
      22             : // Code is based on the algorithm described in the following paper:
      23             : // Zhang, Azad, Hu. FastSV: A Distributed-Memory Connected Component
      24             : // Algorithm with Fast Convergence (SIAM PP20)
      25             : 
      26             : // A subsequent update to the algorithm is here (which might not be reflected
      27             : // in this code):
      28             : // Yongzhe Zhang, Ariful Azad, Aydin Buluc: Parallel algorithms for finding
      29             : // connected components using linear algebra. J. Parallel Distributed Comput.
      30             : // 144: 14-27 (2020).
      31             : 
      32             : // Modified by Tim Davis, Texas A&M University: revised Reduce_assign to use
      33             : // purely GrB* and GxB* methods and the matrix C.  Added warmup phase.  Changed
      34             : // to use GxB pack/unpack instead of GxB import/export.  Converted to use the
      35             : // LAGraph_Graph object.  Exploiting iso status for the temporary matrices
      36             : // C and T.
      37             : 
      38             : // The input graph G must be undirected, or directed and with an adjacency
      39             : // matrix that has a symmetric structure.  Self-edges (diagonal entries) are
      40             : // OK, and are ignored.  The values and type of A are ignored; just its
      41             : // structure is accessed.
      42             : 
      43             : // NOTE: This function must not be called by multiple user threads at the same
      44             : // time on the same graph G, since it unpacks G->A and then packs it back when
      45             : // done.  G->A is unchanged when the function returns, but during execution
      46             : // G->A is empty.  This will be fixed once the todos are finished below, and
      47             : // G->A will then become a truly read-only object (assuming GrB_wait (G->A)
      48             : // has been done first).
      49             : 
      50             : #define __STDC_WANT_LIB_EXT1__ 1
      51             : #include <string.h>
      52             : 
      53             : #define LG_FREE_ALL ;
      54             : #include "LG_internal.h"
      55             : 
      56             : #if LAGRAPH_SUITESPARSE
      57             : 
      58             : //==============================================================================
      59             : // fastsv: find the components of a graph
      60             : //==============================================================================
      61             : 
      62          36 : static inline GrB_Info fastsv
      63             : (
      64             :     GrB_Matrix A,           // adjacency matrix, G->A or a subset of G->A
      65             :     GrB_Vector parent,      // parent vector
      66             :     GrB_Vector mngp,        // min neighbor grandparent
      67             :     GrB_Vector *gp,         // grandparent
      68             :     GrB_Vector *gp_new,     // new grandparent (swapped with gp)
      69             :     GrB_Vector t,           // workspace
      70             :     GrB_BinaryOp eq,        // GrB_EQ_(integer type)
      71             :     GrB_BinaryOp min,       // GrB_MIN_(integer type)
      72             :     GrB_Semiring min_2nd,   // GrB_MIN_SECOND_(integer type)
      73             :     GrB_Matrix C,           // C(i,j) present if i = Px (j)
      74             :     GrB_Index **Cp,         // 0:n, size n+1
      75             :     GrB_Index **Px,         // Px: non-opaque copy of parent vector, size n
      76             :     void **Cx,              // size 1, contents not accessed
      77             :     char *msg
      78             : )
      79             : {
      80             :     GrB_Index n ;
      81          36 :     GRB_TRY (GrB_Vector_size (&n, parent)) ;
      82          36 :     GrB_Index Cp_size = (n+1) * sizeof (GrB_Index) ;
      83          36 :     GrB_Index Ci_size = n * sizeof (GrB_Index) ;
      84          36 :     GrB_Index Cx_size = sizeof (bool) ;
      85          36 :     bool iso = true, jumbled = false, done = false ;
      86             : 
      87             :     while (true)
      88          53 :     {
      89             : 
      90             :         //----------------------------------------------------------------------
      91             :         // hooking & shortcutting
      92             :         //----------------------------------------------------------------------
      93             : 
      94             :         // mngp = min (mngp, A*gp) using the MIN_SECOND semiring
      95          89 :         GRB_TRY (GrB_mxv (mngp, NULL, min, min_2nd, A, *gp, NULL)) ;
      96             : 
      97             :         //----------------------------------------------------------------------
      98             :         // parent = min (parent, C*mngp) where C(i,j) is present if i=Px(j)
      99             :         //----------------------------------------------------------------------
     100             : 
     101             :         // Reduce_assign: The Px array of size n is the non-opaque copy of the
     102             :         // parent vector, where i = Px [j] if the parent of node j is node i.
     103             :         // It can thus have duplicates.  The vectors parent and mngp are full
     104             :         // (all entries present).  This function computes the following, which
     105             :         // is done explicitly in the Reduce_assign function in LG_CC_Boruvka:
     106             :         //
     107             :         //      for (j = 0 ; j < n ; j++)
     108             :         //      {
     109             :         //          uint64_t i = Px [j] ;
     110             :         //          parent [i] = min (parent [i], mngp [j]) ;
     111             :         //      }
     112             :         //
     113             :         // If C(i,j) is present where i == Px [j], then this can be written as:
     114             :         //
     115             :         //      parent = min (parent, C*mngp)
     116             :         //
     117             :         // when using the min_2nd semiring.  This can be done efficiently
     118             :         // because C can be constructed in O(1) time and O(1) additional space
     119             :         // (not counting the prior Cp, Px, and Cx arrays), when using the
     120             :         // SuiteSparse pack/unpack move constructors.  The min_2nd semiring
     121             :         // ignores the values of C and operates only on the structure, so its
     122             :         // values are not relevant.  Cx is thus chosen as a GrB_BOOL array of
     123             :         // size 1 where Cx [0] = false, so the all entries present in C are
     124             :         // equal to false.
     125             : 
     126             :         // pack Cp, Px, and Cx into a matrix C with C(i,j) present if Px(j) == i
     127          89 :         GRB_TRY (GxB_Matrix_pack_CSC (C, Cp, /* Px is Ci: */ Px, Cx,
     128             :             Cp_size, Ci_size, Cx_size, iso, jumbled, NULL)) ;
     129             : 
     130             :         // parent = min (parent, C*mngp) using the MIN_SECOND semiring
     131          89 :         GRB_TRY (GrB_mxv (parent, NULL, min, min_2nd, C, mngp, NULL)) ;
     132             : 
     133             :         // unpack the contents of C, to make Px available to this method again.
     134          89 :         GRB_TRY (GxB_Matrix_unpack_CSC (C, Cp, Px, Cx,
     135             :             &Cp_size, &Ci_size, &Cx_size, &iso, &jumbled, NULL)) ;
     136             : 
     137             :         //----------------------------------------------------------------------
     138             :         // parent = min (parent, mngp, gp)
     139             :         //----------------------------------------------------------------------
     140             : 
     141          85 :         GRB_TRY (GrB_eWiseAdd (parent, NULL, min, min, mngp, *gp, NULL)) ;
     142             : 
     143             :         //----------------------------------------------------------------------
     144             :         // calculate grandparent: gp_new = parent (parent), and extract Px
     145             :         //----------------------------------------------------------------------
     146             : 
     147             :         // if parent is uint32, GraphBLAS typecasts to uint64 for Px.
     148          85 :         GRB_TRY (GrB_Vector_extractTuples (NULL, *Px, &n, parent)) ;
     149          85 :         GRB_TRY (GrB_extract (*gp_new, NULL, NULL, parent, *Px, n, NULL)) ;
     150             : 
     151             :         //----------------------------------------------------------------------
     152             :         // terminate if gp and gp_new are the same
     153             :         //----------------------------------------------------------------------
     154             : 
     155          81 :         GRB_TRY (GrB_eWiseMult (t, NULL, NULL, eq, *gp_new, *gp, NULL)) ;
     156          80 :         GRB_TRY (GrB_reduce (&done, NULL, GrB_LAND_MONOID_BOOL, t, NULL)) ;
     157          80 :         if (done) break ;
     158             : 
     159             :         // swap gp and gp_new
     160          53 :         GrB_Vector s = (*gp) ; (*gp) = (*gp_new) ; (*gp_new) = s ;
     161             :     }
     162          27 :     return (GrB_SUCCESS) ;
     163             : }
     164             : 
     165             : //==============================================================================
     166             : // LG_CC_FastSV6
     167             : //==============================================================================
     168             : 
     169             : // The output of LG_CC_FastSV* is a vector component, where component(i)=r if
     170             : // node i is in the connected compononent whose representative is node r.  If r
     171             : // is a representative, then component(r)=r.  The number of connected
     172             : // components in the graph G is the number of representatives.
     173             : 
     174             : #undef  LG_FREE_WORK
     175             : #define LG_FREE_WORK                            \
     176             : {                                               \
     177             :     LAGraph_Free ((void **) &Tp, NULL) ;        \
     178             :     LAGraph_Free ((void **) &Tj, NULL) ;        \
     179             :     LAGraph_Free ((void **) &Tx, NULL) ;        \
     180             :     LAGraph_Free ((void **) &Cp, NULL) ;        \
     181             :     LAGraph_Free ((void **) &Px, NULL) ;        \
     182             :     LAGraph_Free ((void **) &Cx, NULL) ;        \
     183             :     LAGraph_Free ((void **) &ht_key, NULL) ;    \
     184             :     LAGraph_Free ((void **) &ht_count, NULL) ;  \
     185             :     LAGraph_Free ((void **) &count, NULL) ;     \
     186             :     LAGraph_Free ((void **) &range, NULL) ;     \
     187             :     GrB_free (&C) ;                             \
     188             :     GrB_free (&T) ;                             \
     189             :     GrB_free (&t) ;                             \
     190             :     GrB_free (&y) ;                             \
     191             :     GrB_free (&gp) ;                            \
     192             :     GrB_free (&mngp) ;                          \
     193             :     GrB_free (&gp_new) ;                        \
     194             : }
     195             : 
     196             : #undef  LG_FREE_ALL
     197             : #define LG_FREE_ALL                             \
     198             : {                                               \
     199             :     LG_FREE_WORK ;                              \
     200             :     GrB_free (&parent) ;                        \
     201             : }
     202             : 
     203             : #endif
     204             : 
     205          64 : int LG_CC_FastSV6           // SuiteSparse:GraphBLAS method, with GxB extensions
     206             : (
     207             :     // output:
     208             :     GrB_Vector *component,  // component(i)=r if node is in the component r
     209             :     // input:
     210             :     LAGraph_Graph G,        // input graph (modified then restored)
     211             :     char *msg
     212             : )
     213             : {
     214             : 
     215             : #if !LAGRAPH_SUITESPARSE
     216             :     LG_ASSERT (false, GrB_NOT_IMPLEMENTED) ;
     217             : #else
     218             : 
     219             :     //--------------------------------------------------------------------------
     220             :     // check inputs
     221             :     //--------------------------------------------------------------------------
     222             : 
     223          64 :     LG_CLEAR_MSG ;
     224             : 
     225          64 :     int64_t *range = NULL ;
     226          64 :     GrB_Index n, nvals, Cp_size = 0, *ht_key = NULL, *Px = NULL, *Cp = NULL,
     227          64 :         *count = NULL, *Tp = NULL, *Tj = NULL ;
     228          64 :     GrB_Vector parent = NULL, gp_new = NULL, mngp = NULL, gp = NULL, t = NULL,
     229          64 :         y = NULL ;
     230          64 :     GrB_Matrix T = NULL, C = NULL ;
     231          64 :     void *Tx = NULL, *Cx = NULL ;
     232          64 :     int *ht_count = NULL ;
     233             : 
     234          64 :     LG_TRY (LAGraph_CheckGraph (G, msg)) ;
     235          63 :     LG_ASSERT (component != NULL, GrB_NULL_POINTER) ;
     236          63 :     (*component) = NULL ;
     237             : 
     238          63 :     LG_ASSERT_MSG ((G->kind == LAGraph_ADJACENCY_UNDIRECTED ||
     239             :        (G->kind == LAGraph_ADJACENCY_DIRECTED &&
     240             :         G->is_symmetric_structure == LAGraph_TRUE)),
     241             :         LAGRAPH_SYMMETRIC_STRUCTURE_REQUIRED,
     242             :         "G->A must be known to be symmetric") ;
     243             : 
     244             :     //--------------------------------------------------------------------------
     245             :     // initializations
     246             :     //--------------------------------------------------------------------------
     247             : 
     248          62 :     GrB_Matrix A = G->A ;
     249          62 :     GRB_TRY (GrB_Matrix_nrows (&n, A)) ;
     250          62 :     GRB_TRY (GrB_Matrix_nvals (&nvals, A)) ;
     251             : 
     252             :     // determine the integer type, operators, and semirings to use
     253             :     GrB_Type Uint, Int ;
     254             :     GrB_IndexUnaryOp ramp ;
     255             :     GrB_Semiring min_2nd, min_2ndi ;
     256             :     GrB_BinaryOp min, eq, imin ;
     257             :     #ifdef COVERAGE
     258             :     // Just for test coverage, use 64-bit ints for n > 100.  Do not use this
     259             :     // rule in production!
     260             :     #define NBIG 100
     261             :     #else
     262             :     // For production use: 64-bit integers if n > 2^31
     263             :     #define NBIG INT32_MAX
     264             :     #endif
     265          62 :     if (n > NBIG)
     266             :     {
     267             :         // use 64-bit integers throughout
     268           8 :         Uint = GrB_UINT64 ;
     269           8 :         Int  = GrB_INT64  ;
     270           8 :         ramp = GrB_ROWINDEX_INT64 ;
     271           8 :         min  = GrB_MIN_UINT64 ;
     272           8 :         imin = GrB_MIN_INT64 ;
     273           8 :         eq   = GrB_EQ_UINT64 ;
     274           8 :         min_2nd  = GrB_MIN_SECOND_SEMIRING_UINT64 ;
     275           8 :         min_2ndi = GxB_MIN_SECONDI_INT64 ;
     276             :     }
     277             :     else
     278             :     {
     279             :         // use 32-bit integers, except for Px and for constructing the matrix C
     280          54 :         Uint = GrB_UINT32 ;
     281          54 :         Int  = GrB_INT32  ;
     282          54 :         ramp = GrB_ROWINDEX_INT32 ;
     283          54 :         min  = GrB_MIN_UINT32 ;
     284          54 :         imin = GrB_MIN_INT32 ;
     285          54 :         eq   = GrB_EQ_UINT32 ;
     286          54 :         min_2nd  = GrB_MIN_SECOND_SEMIRING_UINT32 ;
     287          54 :         min_2ndi = GxB_MIN_SECONDI_INT32 ;
     288             :     }
     289             : 
     290             :     // FASTSV_SAMPLES: number of samples to take from each row A(i,:).
     291             :     // Sampling is used if the average degree is > 8 and if n > 1024.
     292             :     #define FASTSV_SAMPLES 4
     293          62 :     bool sampling = (nvals > n * FASTSV_SAMPLES * 2 && n > 1024) ;
     294             : 
     295             : // [ todo: nthreads will not be needed once GxB_select with a GxB_RankUnaryOp
     296             : // and a new GxB_extract are added to SuiteSparse:GraphBLAS.
     297             :     // determine # of threads to use
     298             :     int nthreads, nthreads_outer, nthreads_inner ;
     299          62 :     LG_TRY (LAGraph_GetNumThreads (&nthreads_outer, &nthreads_inner, msg)) ;
     300          62 :     nthreads = nthreads_outer * nthreads_inner ;
     301          62 :     nthreads = LAGRAPH_MIN (nthreads, n / 16) ;
     302          62 :     nthreads = LAGRAPH_MAX (nthreads, 1) ;
     303             : // ]
     304             : 
     305          62 :     LG_TRY (LAGraph_Calloc ((void **) &Cx, 1, sizeof (bool), msg)) ;
     306          61 :     LG_TRY (LAGraph_Malloc ((void **) &Px, n, sizeof (GrB_Index), msg)) ;
     307             : 
     308             :     // create Cp = 0:n (always 64-bit) and the empty C matrix
     309          60 :     GRB_TRY (GrB_Matrix_new (&C, GrB_BOOL, n, n)) ;
     310          57 :     GRB_TRY (GrB_Vector_new (&t, GrB_INT64, n+1)) ;
     311          55 :     GRB_TRY (GrB_assign (t, NULL, NULL, 0, GrB_ALL, n+1, NULL)) ;
     312          54 :     GRB_TRY (GrB_apply (t, NULL, NULL, GrB_ROWINDEX_INT64, t, 0, NULL)) ;
     313          53 :     GRB_TRY (GxB_Vector_unpack_Full (t, (void **) &Cp, &Cp_size, NULL, NULL)) ;
     314          52 :     GRB_TRY (GrB_free (&t)) ;
     315             : 
     316             :     //--------------------------------------------------------------------------
     317             :     // warmup: parent = min (0:n-1, A*1) using the MIN_SECONDI semiring
     318             :     //--------------------------------------------------------------------------
     319             : 
     320             :     // y (i) = min (i, j) for all entries A(i,j).  This warmup phase takes only
     321             :     // O(n) time, because of how the MIN_SECONDI semiring is implemented in
     322             :     // SuiteSparse:GraphBLAS.  A is held by row, and the first entry in A(i,:)
     323             :     // is the minimum index j, so only the first entry in A(i,:) needs to be
     324             :     // considered for each row i.
     325             : 
     326          52 :     GRB_TRY (GrB_Vector_new (&t, Int, n)) ;
     327          50 :     GRB_TRY (GrB_Vector_new (&y, Int, n)) ;
     328          48 :     GRB_TRY (GrB_assign (t, NULL, NULL, 0, GrB_ALL, n, NULL)) ;
     329          47 :     GRB_TRY (GrB_assign (y, NULL, NULL, 0, GrB_ALL, n, NULL)) ;
     330          46 :     GRB_TRY (GrB_apply (y, NULL, NULL, ramp, y, 0, NULL)) ;
     331          45 :     GRB_TRY (GrB_mxv (y, NULL, imin, min_2ndi, A, t, NULL)) ;
     332          45 :     GRB_TRY (GrB_free (&t)) ;
     333             : 
     334             :     // The typecast from Int to Uint is required because the ROWINDEX operator
     335             :     // and MIN_SECONDI do not work in the UINT* domains, as built-in operators.
     336             :     // parent = (Uint) y
     337          45 :     GRB_TRY (GrB_Vector_new (&parent, Uint, n)) ;
     338          43 :     GRB_TRY (GrB_assign (parent, NULL, NULL, y, GrB_ALL, n, NULL)) ;
     339          42 :     GRB_TRY (GrB_free (&y)) ;
     340             : 
     341             :     // copy parent into gp, mngp, and Px.  Px is a non-opaque 64-bit copy of the
     342             :     // parent GrB_Vector.  The Px array is always of type GrB_Index since it
     343             :     // must be used as the input array for extractTuples and as Ci for pack_CSR.
     344             :     // If parent is uint32, GraphBLAS typecasts it to the uint64 Px array.
     345          42 :     GRB_TRY (GrB_Vector_extractTuples (NULL, Px, &n, parent)) ;
     346          42 :     GRB_TRY (GrB_Vector_dup (&gp, parent)) ;
     347          40 :     GRB_TRY (GrB_Vector_dup (&mngp, parent)) ;
     348          38 :     GRB_TRY (GrB_Vector_new (&gp_new, Uint, n)) ;
     349          36 :     GRB_TRY (GrB_Vector_new (&t, GrB_BOOL, n)) ;
     350             : 
     351             :     //--------------------------------------------------------------------------
     352             :     // sample phase
     353             :     //--------------------------------------------------------------------------
     354             : 
     355          34 :     if (sampling)
     356             :     {
     357             : 
     358             : // [ todo: GxB_select, using a new operator: GxB_RankUnaryOp, will do all this,
     359             : // with GxB_Matrix_select_RankOp_Scalar with operator GxB_LEASTRANK and a
     360             : // GrB_Scalar input equal to FASTSV_SAMPLES.  Built-in operators will be,
     361             : // (where y is INT64):
     362             : //
     363             : //      GxB_LEASTRANK (aij, i, j, k, d, y): select if aij has rank k <= y
     364             : //      GxB_NLEASTRANK: select if aij has rank k > y
     365             : //      GxB_GREATESTRANK (...) select if aij has rank k >= (d-y) where
     366             : //          d = # of entries in A(i,:).
     367             : //      GxB_NGREATESTRANK (...): select if aij has rank k < (d-y)
     368             : // and perhaps other operators such as:
     369             : //      GxB_LEASTRELRANK (...): select aij if rank k <= y*d where y is double
     370             : //      GxB_GREATESTRELRANK (...): select aij rank k > y*d where y is double
     371             : //
     372             : // By default, the rank of aij is its relative position as the kth entry in its
     373             : // row (from "left" to "right").  If a new GxB setting in the descriptor is
     374             : // set, then k is the relative position of aij as the kth entry in its column.
     375             : // The default would be that the rank is the position of aij in its row A(i,:).
     376             : 
     377             : // Other:
     378             : //      give me 3 random items from the row (y = 3)
     379             : //      give me the 4 biggest *values* in each row (y = 4)
     380             : 
     381             : // mxv:
     382             : //      C = A*diag(D)
     383             : 
     384             :         //----------------------------------------------------------------------
     385             :         // unpack A in CSR format
     386             :         //----------------------------------------------------------------------
     387             : 
     388             :         void *Ax ;
     389             :         GrB_Index *Ap, *Aj, Ap_size, Aj_size, Ax_size ;
     390             :         bool A_jumbled, A_iso ;
     391           4 :         GRB_TRY (GxB_Matrix_unpack_CSR (A, &Ap, &Aj, &Ax,
     392             :             &Ap_size, &Aj_size, &Ax_size, &A_iso, &A_jumbled, NULL)) ;
     393             : 
     394             :         //----------------------------------------------------------------------
     395             :         // allocate workspace, including space to construct T
     396             :         //----------------------------------------------------------------------
     397             : 
     398           4 :         GrB_Index Tp_size = (n+1) * sizeof (GrB_Index) ;
     399           4 :         GrB_Index Tj_size = nvals * sizeof (GrB_Index) ;
     400           4 :         GrB_Index Tx_size = sizeof (bool) ;
     401           4 :         LG_TRY (LAGraph_Malloc ((void **) &Tp, n+1, sizeof (GrB_Index), msg)) ;
     402           4 :         LG_TRY (LAGraph_Malloc ((void **) &Tj, nvals, sizeof (GrB_Index),
     403             :             msg)) ;
     404           4 :         LG_TRY (LAGraph_Calloc ((void **) &Tx, 1, sizeof (bool), msg)) ;
     405           4 :         LG_TRY (LAGraph_Malloc ((void **) &range, nthreads + 1,
     406             :             sizeof (int64_t), msg)) ;
     407           4 :         LG_TRY (LAGraph_Calloc ((void **) &count, nthreads + 1,
     408             :             sizeof (GrB_Index), msg)) ;
     409             : 
     410             :         //----------------------------------------------------------------------
     411             :         // define parallel tasks to construct T
     412             :         //----------------------------------------------------------------------
     413             : 
     414             :         // thread tid works on rows range[tid]:range[tid+1]-1 of A and T
     415             :         int tid;
     416          12 :         for (tid = 0 ; tid <= nthreads ; tid++)
     417             :         {
     418           8 :             range [tid] = (n * tid + nthreads - 1) / nthreads ;
     419             :         }
     420             : 
     421             :         //----------------------------------------------------------------------
     422             :         // determine the number entries to be constructed in T for each thread
     423             :         //----------------------------------------------------------------------
     424             : 
     425             :         #pragma omp parallel for num_threads(nthreads) schedule(static)
     426           8 :         for (tid = 0 ; tid < nthreads ; tid++)
     427             :         {
     428        9756 :             for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
     429             :             {
     430        9752 :                 int64_t deg = Ap [i + 1] - Ap [i] ;
     431        9752 :                 count [tid + 1] += LAGRAPH_MIN (FASTSV_SAMPLES, deg) ;
     432             :             }
     433             :         }
     434             : 
     435             :         //----------------------------------------------------------------------
     436             :         // count = cumsum (count)
     437             :         //----------------------------------------------------------------------
     438             : 
     439           8 :         for (tid = 0 ; tid < nthreads ; tid++)
     440             :         {
     441           4 :             count [tid + 1] += count [tid] ;
     442             :         }
     443             : 
     444             :         //----------------------------------------------------------------------
     445             :         // construct T
     446             :         //----------------------------------------------------------------------
     447             : 
     448             :         // T (i,:) consists of the first FASTSV_SAMPLES of A (i,:).
     449             : 
     450             :         #pragma omp parallel for num_threads(nthreads) schedule(static)
     451           8 :         for (tid = 0 ; tid < nthreads ; tid++)
     452             :         {
     453           4 :             GrB_Index p = count [tid] ;
     454           4 :             Tp [range [tid]] = p ;
     455        9756 :             for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
     456             :             {
     457             :                 // construct T (i,:) from the first entries in A (i,:)
     458        9752 :                 for (int64_t j = 0 ;
     459       40390 :                     j < FASTSV_SAMPLES && Ap [i] + j < Ap [i + 1] ; j++)
     460             :                 {
     461       30638 :                     Tj [p++] = Aj [Ap [i] + j] ;
     462             :                 }
     463        9752 :                 Tp [i + 1] = p ;
     464             :             }
     465             :         }
     466             : 
     467             :         //----------------------------------------------------------------------
     468             :         // import the result into the GrB_Matrix T
     469             :         //----------------------------------------------------------------------
     470             : 
     471           4 :         GRB_TRY (GrB_Matrix_new (&T, GrB_BOOL, n, n)) ;
     472           4 :         GRB_TRY (GxB_Matrix_pack_CSR (T, &Tp, &Tj, &Tx, Tp_size, Tj_size,
     473             :             Tx_size, /* T is iso: */ true, A_jumbled, NULL)) ;
     474             : 
     475             : // ] todo: the above will all be done as a single call to GxB_select.
     476             : 
     477             :         //----------------------------------------------------------------------
     478             :         // find the connected components of T
     479             :         //----------------------------------------------------------------------
     480             : 
     481           4 :         LG_TRY (fastsv (T, parent, mngp, &gp, &gp_new, t, eq, min, min_2nd,
     482             :             C, &Cp, &Px, &Cx, msg)) ;
     483             : 
     484             :         //----------------------------------------------------------------------
     485             :         // use sampling to estimate the largest connected component in T
     486             :         //----------------------------------------------------------------------
     487             : 
     488             :         // The sampling below computes an estimate of the mode of the parent
     489             :         // vector, the contents of which are currently in the non-opaque Px
     490             :         // array.
     491             : 
     492             :         // hash table size must be a power of 2
     493             :         #define HASH_SIZE 1024
     494             :         // number of samples to insert into the hash table
     495             :         #define HASH_SAMPLES 864
     496             :         #define HASH(x) (((x << 4) + x) & (HASH_SIZE-1))
     497             :         #define NEXT(x) ((x + 23) & (HASH_SIZE-1))
     498             : 
     499             :         // allocate and initialize the hash table
     500           4 :         LG_TRY (LAGraph_Malloc ((void **) &ht_key, HASH_SIZE,
     501             :             sizeof (GrB_Index), msg)) ;
     502           4 :         LG_TRY (LAGraph_Calloc ((void **) &ht_count, HASH_SIZE,
     503             :             sizeof (int), msg)) ;
     504        4100 :         for (int k = 0 ; k < HASH_SIZE ; k++)
     505             :         {
     506        4096 :             ht_key [k] = UINT64_MAX ;
     507             :         }
     508             : 
     509             :         // hash the samples and find the most frequent entry
     510           4 :         uint64_t seed = n ;         // random number seed
     511           4 :         int64_t key = -1 ;          // most frequent entry
     512           4 :         int max_count = 0 ;         // frequency of most frequent entry
     513        3460 :         for (int64_t k = 0 ; k < HASH_SAMPLES ; k++)
     514             :         {
     515             :             // select an entry from Px at random
     516        3456 :             GrB_Index x = Px [LG_Random60 (&seed) % n] ;
     517             :             // find x in the hash table
     518        3456 :             GrB_Index h = HASH (x) ;
     519        3650 :             while (ht_key [h] != UINT64_MAX && ht_key [h] != x) h = NEXT (h) ;
     520             :             // add x to the hash table
     521        3456 :             ht_key [h] = x ;
     522        3456 :             ht_count [h]++ ;
     523             :             // keep track of the most frequent value
     524        3456 :             if (ht_count [h] > max_count)
     525             :             {
     526        1864 :                 key = ht_key [h] ;
     527        1864 :                 max_count = ht_count [h] ;
     528             :             }
     529             :         }
     530             : 
     531             :         //----------------------------------------------------------------------
     532             :         // compact the largest connected component in A
     533             :         //----------------------------------------------------------------------
     534             : 
     535             :         // Construct a new matrix T from the input matrix A (the matrix A is
     536             :         // not changed). The key node is the representative of the (estimated)
     537             :         // largest component.  T is constructed as a copy of A, except:
     538             :         // (1) all edges A(i,:) for nodes i in the key component deleted, and
     539             :         // (2) for nodes i not in the key component, A(i,j) is deleted if
     540             :         //     j is in the key component.
     541             :         // (3) If A(i,:) has any deletions from (2), T(i,key) is added to T.
     542             : 
     543             : // [ todo: replace this with GxB_extract with GrB_Vector index arrays.
     544             : // See https://github.com/GraphBLAS/graphblas-api-c/issues/67 .
     545             : // This method will not insert the new entries T(i,key) for rows i that have
     546             : // had entries deleted.  That can be done with GrB_assign, with an n-by-1 mask
     547             : // M computed from the before-and-after row degrees of A and T:
     548             : // M = (parent != key) && (out_degree(T) < out_degree(A))
     549             : // J [0] = key.
     550             : // GxB_Matrix_subassign_BOOL (T, M, NULL, true, GrB_ALL, n, J, 1, NULL)
     551             : // or with
     552             : // GrB_Col_assign (T, M, NULL, t, GrB_ALL, j, NULL) with an all-true
     553             : // vector t.
     554             : 
     555             :         // unpack T to reuse the space (all content is overwritten below)
     556             :         bool T_jumbled, T_iso ;
     557           4 :         GRB_TRY (GxB_Matrix_unpack_CSR (T, &Tp, &Tj, &Tx, &Tp_size, &Tj_size,
     558             :             &Tx_size, &T_iso, &T_jumbled, NULL)) ;
     559             :         #pragma omp parallel for num_threads(nthreads) schedule(static)
     560           8 :         for (tid = 0 ; tid < nthreads ; tid++)
     561             :         {
     562           4 :             GrB_Index p = Ap [range [tid]] ;
     563             :             // thread tid scans A (range [tid]:range [tid+1]-1,:),
     564             :             // and constructs T(i,:) for all rows in this range.
     565        9756 :             for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
     566             :             {
     567        9752 :                 int64_t pi = Px [i] ;   // pi = parent (i)
     568        9752 :                 Tp [i] = p ;            // start the construction of T(i,:)
     569             :                 // T(i,:) is empty if pi == key
     570        9752 :                 if (pi != key)
     571             :                 {
     572             :                     // scan A(i,:)
     573       46550 :                     for (GrB_Index pS = Ap [i] ; pS < Ap [i+1] ; pS++)
     574             :                     {
     575             :                         // get A(i,j)
     576       41274 :                         int64_t j = Aj [pS] ;
     577       41274 :                         if (Px [j] != key)
     578             :                         {
     579             :                             // add the entry T(i,j) to T, but skip it if
     580             :                             // Px [j] is equal to key
     581       41264 :                             Tj [p++] = j ;
     582             :                         }
     583             :                     }
     584             :                     // Add the entry T(i,key) if there is room for it in T(i,:);
     585             :                     // if and only if node i is adjacent to a node j in the
     586             :                     // largest component.  The only way there can be space if
     587             :                     // at least one T(i,j) appears with Px [j] equal to the key
     588             :                     // (that is, node j is in the largest connected component,
     589             :                     // key == Px [j].  One of these j's can then be replaced
     590             :                     // with the key.  If node i is not adjacent to any node in
     591             :                     // the largest component, then there is no space in T(i,:)
     592             :                     // and no new edge to the largest component is added.
     593        5276 :                     if (p - Tp [i] < Ap [i+1] - Ap [i])
     594             :                     {
     595           2 :                         Tj [p++] = key ;
     596             :                     }
     597             :                 }
     598             :             }
     599             :             // count the number of entries inserted into T by this thread
     600           4 :             count [tid] = p - Tp [range [tid]] ;
     601             :         }
     602             : 
     603             :         // Compact empty space out of Tj not filled in from the above phase.
     604           4 :         nvals = 0 ;
     605           8 :         for (tid = 0 ; tid < nthreads ; tid++)
     606             :         {
     607           4 :             memmove (Tj + nvals,
     608           4 :                 Tj + Tp [range [tid]], sizeof (GrB_Index) * count [tid]) ;
     609           4 :             nvals += count [tid] ;
     610           4 :             count [tid] = nvals - count [tid] ;
     611             :         }
     612             : 
     613             :         // Compact empty space out of Tp
     614             :         #pragma omp parallel for num_threads(nthreads) schedule(static)
     615           8 :         for (tid = 0 ; tid < nthreads ; tid++)
     616             :         {
     617           4 :             GrB_Index p = Tp [range [tid]] ;
     618        9756 :             for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
     619             :             {
     620        9752 :                 Tp [i] -= p - count [tid] ;
     621             :             }
     622             :         }
     623             : 
     624             :         // finalize T
     625           4 :         Tp [n] = nvals ;
     626             : 
     627             :         // pack T for the final phase
     628           4 :         GRB_TRY (GxB_Matrix_pack_CSR (T, &Tp, &Tj, &Tx, Tp_size, Tj_size,
     629             :             Tx_size, T_iso, /* T is now jumbled */ true, NULL)) ;
     630             : 
     631             :         // pack A (unchanged since last unpack); this is the original G->A.
     632           4 :         GRB_TRY (GxB_Matrix_pack_CSR (A, &Ap, &Aj, &Ax, Ap_size, Aj_size,
     633             :             Ax_size, A_iso, A_jumbled, NULL)) ;
     634             : 
     635             : // ].  The unpack/pack of A into Ap, Aj, Ax will not be needed, and G->A
     636             : // will become truly a read-only matrix.
     637             : 
     638             :         // final phase uses the pruned matrix T
     639           4 :         A = T ;
     640             :     }
     641             : 
     642             :     //--------------------------------------------------------------------------
     643             :     // check for quick return
     644             :     //--------------------------------------------------------------------------
     645             : 
     646             :     // The sample phase may have already found that G->A has a single component,
     647             :     // in which case the matrix A is now empty.
     648             : 
     649          34 :     if (nvals == 0)
     650             :     {
     651           2 :         (*component) = parent ;
     652           2 :         LG_FREE_WORK ;
     653           2 :         return (GrB_SUCCESS) ;
     654             :     }
     655             : 
     656             :     //--------------------------------------------------------------------------
     657             :     // final phase
     658             :     //--------------------------------------------------------------------------
     659             : 
     660          32 :     LG_TRY (fastsv (A, parent, mngp, &gp, &gp_new, t, eq, min, min_2nd,
     661             :         C, &Cp, &Px, &Cx, msg)) ;
     662             : 
     663             :     //--------------------------------------------------------------------------
     664             :     // free workspace and return result
     665             :     //--------------------------------------------------------------------------
     666             : 
     667          23 :     (*component) = parent ;
     668          23 :     LG_FREE_WORK ;
     669          23 :     return (GrB_SUCCESS) ;
     670             : #endif
     671             : }

Generated by: LCOV version 1.14