Line data Source code
1 : //------------------------------------------------------------------------------
2 : // LG_CC_FastSV7: connected components
3 : //------------------------------------------------------------------------------
4 :
5 : // LAGraph, (c) 2019-2025 by The LAGraph Contributors, All Rights Reserved.
6 : // SPDX-License-Identifier: BSD-2-Clause
7 : //
8 : // For additional details (including references to third party source code and
9 : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
10 : // Contributors.txt for a full list of contributors. Created, in part, with
11 : // funding and support from the U.S. Government (see Acknowledgments.txt file).
12 : // DM22-0790
13 :
14 : // Contributed by Yongzhe Zhang, modified by Timothy A. Davis, Texas A&M
15 : // University
16 :
17 : //------------------------------------------------------------------------------
18 :
19 : // This is an Advanced algorithm (G->is_symmetric_structure must be known),
20 : // but it is not user-callable (see LAGr_ConnectedComponents instead).
21 :
22 : // Code is based on the algorithm described in the following paper:
23 : // Zhang, Azad, Hu. FastSV: A Distributed-Memory Connected Component
24 : // Algorithm with Fast Convergence (SIAM PP20)
25 :
26 : // A subsequent update to the algorithm is here (which might not be reflected
27 : // in this code):
28 : // Yongzhe Zhang, Ariful Azad, Aydin Buluc: Parallel algorithms for finding
29 : // connected components using linear algebra. J. Parallel Distributed Comput.
30 : // 144: 14-27 (2020).
31 :
32 : // Modified by Tim Davis, Texas A&M University: revised Reduce_assign to use
33 : // purely GrB* and GxB* methods and the matrix Parent. Added warmup phase.
34 : // Changed to use GxB load/unload. Converted to use the LAGraph_Graph object.
35 : // Exploiting iso status for the temporary matrices Parent and T.
36 :
37 : // The input graph G must be undirected, or directed and with an adjacency
38 : // matrix that has a symmetric structure. Self-edges (diagonal entries) are
39 : // OK, and are ignored. The values and type of A are ignored; just its
40 : // structure is accessed.
41 :
42 : // NOTE: This function must not be called by multiple user threads at the same
43 : // time on the same graph G, since it unloads G->A and loads it back when
44 : // done. G->A is unchanged when the function returns, but during execution
45 : // G->A is empty. This will be fixed once the todos are finished below, and
46 : // G->A will then become a truly read-only object (assuming GrB_wait (G->A)
47 : // has been done first).
48 :
49 : // #define TIMINGS
50 :
51 : #define __STDC_WANT_LIB_EXT1__ 1
52 : #include <string.h>
53 :
54 : #define LG_FREE_ALL ;
55 : #include "LG_internal.h"
56 :
57 : static double timings [16] ;
58 :
59 : #if LG_SUITESPARSE_GRAPHBLAS_V10
60 :
61 : //==============================================================================
62 : // fastsv: find the components of a graph
63 : //==============================================================================
64 :
65 59 : static inline GrB_Info fastsv
66 : (
67 : GrB_Matrix A, // adjacency matrix, G->A or a subset of G->A
68 : GrB_Vector parent2, // workspace
69 : GrB_Vector mngp, // min neighbor grandparent
70 : GrB_Vector *gp, // grandparent
71 : GrB_Vector *gp_new, // new grandparent (swapped with gp)
72 : GrB_Vector t, // workspace
73 : GrB_BinaryOp eq, // GrB_EQ_(integer type)
74 : GrB_BinaryOp min, // GrB_MIN_(integer type)
75 : GrB_Semiring min_2nd, // GrB_MIN_SECOND_(integer type)
76 : GrB_Matrix Parent, // Parent(i,j) present if i = parent (j)
77 : GxB_Container Parent_Container, // holds the Parent matrix
78 : char *msg
79 : )
80 : {
81 :
82 59 : GrB_Index n = Parent_Container->nrows ;
83 59 : GrB_Vector parent = Parent_Container->i ;
84 59 : bool done = false ;
85 :
86 : // #ifdef TIMINGS
87 : // int pass = 0 ;
88 : // #endif
89 :
90 : while (true)
91 99 : {
92 : // #ifdef TIMINGS
93 : // printf ("\n-------------------------------------------fastsv: %d\n",
94 : // ++pass) ;
95 : // #endif
96 :
97 : //----------------------------------------------------------------------
98 : // hooking & shortcutting
99 : //----------------------------------------------------------------------
100 :
101 : // mngp = min (mngp, A*gp) using the MIN_SECOND semiring
102 158 : GRB_TRY (GrB_mxv (mngp, NULL, min, min_2nd, A, *gp, NULL)) ;
103 :
104 : //----------------------------------------------------------------------
105 : // parent2 = min (mngp, gp)
106 : //----------------------------------------------------------------------
107 :
108 : // The parent vector is Parent_Container->i, and thus no longer exists
109 : // when the Parent matrix exists. So the accumulation is done in a
110 : // workspace vector, parent2.
111 :
112 158 : GRB_TRY (GrB_eWiseAdd (parent2, NULL, NULL, min, mngp, *gp, NULL)) ;
113 :
114 : //----------------------------------------------------------------------
115 : // parent2 = min (parent2, Parent*mngp) using the MIN_SECOND semiring
116 : //----------------------------------------------------------------------
117 :
118 : // Reduce_assign: This function computes the following, which
119 : // is done explicitly in the Reduce_assign function in LG_CC_Boruvka:
120 : //
121 : // for (j = 0 ; j < n ; j++)
122 : // {
123 : // uint64_t i = parent [j] ;
124 : // parent2 [i] = min (parent2 [i], mngp [j]) ;
125 : // }
126 : //
127 : // If Parent(i,j) is present where i == parent (j), then this can be
128 : // written as:
129 : //
130 : // parent2 = min (parent2, Parent*mngp)
131 : //
132 : // when using the min_2nd semiring. This can be done efficiently
133 : // because Parent can be constructed in O(1) time and O(1) additional
134 : // space when using the SuiteSparse load/unload move constructors. The
135 : // min_2nd semiring ignores the values of Parent and operates only on
136 : // the structure, so its values are not relevant. Parent_Container->x
137 : // is thus chosen as a GrB_BOOL array of size 1 where x [0] = false, so
138 : // all entries present in Parent are equal to false.
139 :
140 : // load the parent vector into its matrix form, Parent
141 157 : GRB_TRY (GxB_load_Matrix_from_Container (Parent, Parent_Container,
142 : NULL)) ;
143 :
144 : // parent2 = min (parent2, Parent*mngp) using the min_2nd semiring
145 157 : GRB_TRY (GrB_mxv (parent2, NULL, min, min_2nd, Parent, mngp, NULL)) ;
146 :
147 : // unload the container to regain the parent vector
148 157 : GRB_TRY (GxB_unload_Matrix_into_Container (Parent, Parent_Container,
149 : NULL)) ;
150 :
151 : //----------------------------------------------------------------------
152 : // parent = min (parent, parent2)
153 : //----------------------------------------------------------------------
154 :
155 157 : GRB_TRY (GrB_assign (parent, NULL, min, parent2, GrB_ALL, n, NULL)) ;
156 :
157 : //----------------------------------------------------------------------
158 : // calculate grandparent: gp_new = parent (parent)
159 : //----------------------------------------------------------------------
160 :
161 157 : GRB_TRY (GrB_extract (*gp_new, NULL, NULL, parent, parent, NULL)) ;
162 :
163 : //----------------------------------------------------------------------
164 : // terminate if gp and gp_new are the same
165 : //----------------------------------------------------------------------
166 :
167 153 : GRB_TRY (GrB_eWiseMult (t, NULL, NULL, eq, *gp_new, *gp, NULL)) ;
168 152 : GRB_TRY (GrB_reduce (&done, NULL, GrB_LAND_MONOID_BOOL, t, NULL)) ;
169 152 : if (done) break ;
170 :
171 : // swap gp and gp_new
172 99 : GrB_Vector s = (*gp) ; (*gp) = (*gp_new) ; (*gp_new) = s ;
173 : }
174 53 : return (GrB_SUCCESS) ;
175 : }
176 :
177 : //==============================================================================
178 : // LG_CC_FastSV7
179 : //==============================================================================
180 :
181 : // The output of LG_CC_FastSV* is a vector component, where component(i)=r if
182 : // node i is in the connected compononent whose representative is node r. If r
183 : // is a representative, then component(r)=r. The number of connected
184 : // components in the graph G is the number of representatives.
185 :
186 : #undef LG_FREE_WORK
187 : #define LG_FREE_WORK \
188 : { \
189 : LAGraph_Free ((void **) &Tp, NULL) ; \
190 : LAGraph_Free ((void **) &Tj, NULL) ; \
191 : LAGraph_Free ((void **) &Tx, NULL) ; \
192 : LAGraph_Free ((void **) &ht_key, NULL) ; \
193 : LAGraph_Free ((void **) &ht_count, NULL) ; \
194 : LAGraph_Free ((void **) &count, NULL) ; \
195 : LAGraph_Free ((void **) &range, NULL) ; \
196 : LAGraph_Free ((void **) &Px, NULL) ; \
197 : GrB_free (&T) ; \
198 : GrB_free (&t) ; \
199 : GrB_free (&gp) ; \
200 : GrB_free (&mngp) ; \
201 : GrB_free (&gp_new) ; \
202 : GrB_free (&parent2) ; \
203 : GrB_free (&Parent) ; \
204 : GrB_free (&Parent_Container) ; \
205 : GrB_free (&A_Container) ; \
206 : GrB_free (&T_Container) ; \
207 : }
208 :
209 : #undef LG_FREE_ALL
210 : #define LG_FREE_ALL \
211 : { \
212 : LG_FREE_WORK ; \
213 : }
214 :
215 : #endif
216 :
217 : // get/set macros for 32/64 bit arrays:
218 : #define AP(k) (Ap32 ? Ap32 [k] : Ap64 [k])
219 : #define AJ(k) (Aj32 ? Aj32 [k] : Aj64 [k])
220 : #define PARENT(i) (Px32 ? Px32 [i] : Px64 [i])
221 : #define TP(k) (Tp32 ? Tp32 [k] : Tp64 [k])
222 : #define TJ(k) (Tj32 ? Tj32 [k] : Tj64 [k])
223 : #define SET_TP(k,p) { if (Tp32) { Tp32 [k] = p ; } else { Tp64 [k] = p ; }}
224 : #define SET_TJ(k,i) { if (Tj32) { Tj32 [k] = i ; } else { Tj64 [k] = i ; }}
225 :
226 : #ifdef TIMINGS
227 : static void print_timings (double timings [16])
228 : {
229 : double total = timings [0] + timings [1] + timings [2] ;
230 : printf ("SV7 %12.6f (%4.1f%%) init\n", timings [0], 100. * timings [0] / total) ;
231 : printf ("SV7 %12.6f (%4.1f%%) total sampling:\n", timings [1], 100. * timings [1] / total) ;
232 : printf ("SV7 %12.6f (%4.1f%%) setup T\n", timings [3], 100. * timings [3] / total) ;
233 : printf ("SV7 %12.6f (%4.1f%%) create T\n", timings [4], 100. * timings [4] / total) ;
234 : printf ("SV7 %12.6f (%4.1f%%) fastsv sample\n", timings [5], 100 * timings [5] / total) ;
235 : printf ("SV7 %12.6f (%4.1f%%) hash\n", timings [6], 100. * timings [6] / total) ;
236 : printf ("SV7 %12.6f (%4.1f%%) prune\n", timings [7], 100. * timings [7] / total) ;
237 : printf ("SV7 %12.6f (%4.1f%%) total final\n", timings [2], 100. * timings [2] / total) ;
238 : }
239 : #endif
240 :
241 107 : int LG_CC_FastSV7 // SuiteSparse:GraphBLAS method, with GraphBLAS v10
242 : (
243 : // output:
244 : GrB_Vector *component, // component(i)=r if node is in the component r
245 : // input:
246 : LAGraph_Graph G, // input graph (modified then restored)
247 : char *msg
248 : )
249 : {
250 :
251 : #if LG_SUITESPARSE_GRAPHBLAS_V10
252 :
253 : //--------------------------------------------------------------------------
254 : // check inputs
255 : //--------------------------------------------------------------------------
256 :
257 107 : LG_CLEAR_MSG ;
258 :
259 : #ifdef TIMINGS
260 : double timings [16] ;
261 : for (int kk = 0 ; kk < 16 ; kk++) timings [kk] = 0 ;
262 : double tic = LAGraph_WallClockTime ( ) ;
263 : LG_SET_BURBLE (false) ;
264 : #endif
265 :
266 107 : int64_t *range = NULL ;
267 107 : void *Px = NULL ;
268 107 : uint64_t Px_size = 0 ;
269 107 : GrB_Index n, nvals, *ht_key = NULL, *count = NULL ;
270 107 : void *Tp = NULL, *Tj = NULL ;
271 107 : GrB_Vector parent = NULL, gp_new = NULL, mngp = NULL, gp = NULL, t = NULL,
272 107 : parent2 = NULL ;
273 107 : GrB_Matrix T = NULL, Parent = NULL ;
274 107 : void *Tx = NULL ;
275 107 : int *ht_count = NULL ;
276 107 : GxB_Container Parent_Container = NULL,
277 107 : A_Container = NULL, T_Container = NULL ;
278 :
279 107 : LG_TRY (LAGraph_CheckGraph (G, msg)) ;
280 107 : LG_ASSERT (component != NULL, GrB_NULL_POINTER) ;
281 107 : (*component) = NULL ;
282 :
283 107 : LG_ASSERT_MSG ((G->kind == LAGraph_ADJACENCY_UNDIRECTED ||
284 : (G->kind == LAGraph_ADJACENCY_DIRECTED &&
285 : G->is_symmetric_structure == LAGraph_TRUE)),
286 : LAGRAPH_SYMMETRIC_STRUCTURE_REQUIRED,
287 : "G->A must be known to be symmetric") ;
288 :
289 : //--------------------------------------------------------------------------
290 : // initializations
291 : //--------------------------------------------------------------------------
292 :
293 107 : GrB_Matrix A = G->A ;
294 107 : GRB_TRY (GrB_Matrix_nrows (&n, A)) ;
295 107 : GRB_TRY (GrB_Matrix_nvals (&nvals, A)) ;
296 :
297 : // determine the integer type, operators, and semirings to use
298 : GrB_Type Uint, Int ;
299 : GrB_IndexUnaryOp ramp ;
300 : GrB_Semiring min_2nd, min_2ndi ;
301 : GrB_BinaryOp min, eq, imin ;
302 : #ifdef COVERAGE
303 : // Just for test coverage, use 64-bit ints for n > 100. Do not use this
304 : // rule in production!
305 : #define NBIG 100
306 : #else
307 : // For production use: 64-bit integers if n > 2^31
308 : #define NBIG INT32_MAX
309 : #endif
310 107 : if (n > NBIG)
311 : {
312 : // use 64-bit integers
313 16 : Uint = GrB_UINT64 ;
314 16 : Int = GrB_INT64 ;
315 16 : ramp = GrB_ROWINDEX_INT64 ;
316 16 : min = GrB_MIN_INT64 ;
317 16 : imin = GrB_MIN_INT64 ;
318 16 : eq = GrB_EQ_INT64 ;
319 16 : min_2nd = GrB_MIN_SECOND_SEMIRING_INT64 ;
320 16 : min_2ndi = GxB_MIN_SECONDI_INT64 ;
321 : }
322 : else
323 : {
324 : // use 32-bit integers
325 91 : Uint = GrB_UINT32 ;
326 91 : Int = GrB_INT32 ;
327 91 : ramp = GrB_ROWINDEX_INT32 ;
328 91 : min = GrB_MIN_INT32 ;
329 91 : imin = GrB_MIN_INT32 ;
330 91 : eq = GrB_EQ_INT32 ;
331 91 : min_2nd = GrB_MIN_SECOND_SEMIRING_INT32 ;
332 91 : min_2ndi = GxB_MIN_SECONDI_INT32 ;
333 : }
334 :
335 : // FASTSV_SAMPLES: number of samples to take from each row A(i,:).
336 : // Sampling is used if the average degree is > 8 and if n > 1024.
337 : #define FASTSV_SAMPLES 4
338 107 : bool sampling = (nvals > n * FASTSV_SAMPLES * 2 && n > 1024) ;
339 :
340 : // [ todo: nthreads will not be needed once GxB_select with a GxB_RankUnaryOp
341 : // and a new GxB_extract are added to SuiteSparse:GraphBLAS.
342 : // determine # of threads to use
343 : int nthreads, nthreads_outer, nthreads_inner ;
344 107 : LG_TRY (LAGraph_GetNumThreads (&nthreads_outer, &nthreads_inner, msg)) ;
345 107 : nthreads = nthreads_outer * nthreads_inner ;
346 107 : nthreads = LAGRAPH_MIN (nthreads, n / 16) ;
347 107 : nthreads = LAGRAPH_MAX (nthreads, 1) ;
348 : // ]
349 :
350 : //--------------------------------------------------------------------------
351 : // create the Parent Matrix and Containers
352 : //--------------------------------------------------------------------------
353 :
354 : // The Parent matrix is held in the Parent_Container nearly all the time,
355 : // except for a single call to GrB_mxv in the fastsv method. It is n-by-n
356 : // in sparse CSC form, with exactly one entry per column. That entry is
357 : // Parent(i,j) if i = parent(j). The Parent matrix is iso-valued since no
358 : // operation needs its values. Only its structure is used.
359 :
360 107 : GRB_TRY (GrB_Matrix_new (&Parent, GrB_BOOL, n, n)) ;
361 :
362 : // While the Parent_Container holds the data, instead of the Parent matrix,
363 : // the parent vector is an alias to Parent_Container->i. Thus, at any
364 : // given moment, either the parent vector exists, or the Parent matrix
365 : // exists.
366 :
367 104 : GRB_TRY (GxB_Container_new (&Parent_Container)) ;
368 :
369 : // Parent_Container->p = 0:n
370 98 : GRB_TRY (GrB_Vector_free (&(Parent_Container->p))) ;
371 98 : GRB_TRY (GrB_Vector_new (&(Parent_Container->p), Uint, n+1)) ;
372 96 : GRB_TRY (GrB_assign (Parent_Container->p, NULL, NULL, 0, GrB_ALL, n+1,
373 : NULL)) ;
374 95 : GRB_TRY (GrB_apply (Parent_Container->p, NULL, NULL, ramp,
375 : Parent_Container->p, 0, NULL)) ;
376 :
377 : // Parent_Container->x [0] = false, of length 1
378 93 : GRB_TRY (GrB_Vector_free (&(Parent_Container->x))) ;
379 93 : GRB_TRY (GrB_Vector_new (&(Parent_Container->x), GrB_BOOL, 1)) ;
380 91 : GRB_TRY (GrB_assign (Parent_Container->x, NULL, NULL, 0, GrB_ALL, 1,
381 : NULL)) ;
382 :
383 84 : Parent_Container->nrows = n ;
384 84 : Parent_Container->ncols = n ;
385 84 : Parent_Container->nvals = n ;
386 84 : Parent_Container->nrows_nonempty = -1 ;
387 84 : Parent_Container->ncols_nonempty = n ;
388 84 : Parent_Container->iso = true ;
389 84 : Parent_Container->jumbled = false ;
390 84 : Parent_Container->format = GxB_SPARSE ;
391 84 : Parent_Container->orientation = GrB_COLMAJOR ;
392 84 : Parent_Container->Y = NULL ;
393 :
394 : // the GrB_Vector parent is identical to Parent_Container->i
395 84 : GRB_TRY (GrB_Vector_free (&(Parent_Container->i))) ;
396 84 : GRB_TRY (GrB_Vector_new (&(Parent_Container->i), Int, n)) ;
397 82 : parent = Parent_Container->i ;
398 :
399 82 : GRB_TRY (GxB_Container_new (&A_Container)) ;
400 76 : GRB_TRY (GxB_Container_new (&T_Container)) ;
401 :
402 : //--------------------------------------------------------------------------
403 : // warmup: parent = min (0:n-1, A*1) using the MIN_SECONDI semiring
404 : //--------------------------------------------------------------------------
405 :
406 : // parent (i) = min (i, j) for all entries A(i,j). This warmup phase takes only
407 : // O(n) time, because of how the MIN_SECONDI semiring is implemented in
408 : // SuiteSparse:GraphBLAS. A is held by row, and the first entry in A(i,:)
409 : // is the minimum index j, so only the first entry in A(i,:) needs to be
410 : // considered for each row i.
411 :
412 70 : GRB_TRY (GrB_Vector_new (&t, GrB_BOOL, n)) ;
413 68 : GRB_TRY (GrB_assign (t, NULL, NULL, 0, GrB_ALL, n, NULL)) ;
414 67 : GRB_TRY (GrB_assign (parent, NULL, NULL, 0, GrB_ALL, n, NULL)) ;
415 66 : GRB_TRY (GrB_apply (parent, NULL, NULL, ramp, parent, 0, NULL)) ;
416 65 : GRB_TRY (GrB_mxv (parent, NULL, imin, min_2ndi, A, t, NULL)) ;
417 65 : GRB_TRY (GrB_free (&t)) ;
418 :
419 : // copy parent into gp and mngp.
420 65 : GRB_TRY (GrB_Vector_dup (&gp, parent)) ;
421 63 : GRB_TRY (GrB_Vector_dup (&mngp, parent)) ;
422 :
423 : // allocate workspace vectors
424 61 : GRB_TRY (GrB_Vector_new (&gp_new, Int, n)) ;
425 59 : GRB_TRY (GrB_Vector_new (&t, GrB_BOOL, n)) ;
426 57 : GRB_TRY (GrB_Vector_new (&parent2, Int, n)) ;
427 :
428 : #ifdef TIMINGS
429 : double toc = LAGraph_WallClockTime ( ) ;
430 : timings [0] = toc - tic ; // init time
431 : tic = toc ;
432 : #endif
433 :
434 : //--------------------------------------------------------------------------
435 : // sample phase
436 : //--------------------------------------------------------------------------
437 :
438 55 : if (sampling)
439 : {
440 :
441 : // [ todo: GxB_select, using a new operator: GxB_RankUnaryOp, will do all this,
442 : // with GxB_Matrix_select_RankOp_Scalar with operator GxB_LEASTRANK and a
443 : // GrB_Scalar input equal to FASTSV_SAMPLES. Built-in operators will be,
444 : // (where y is INT64):
445 : //
446 : // GxB_LEASTRANK (aij, i, j, k, d, y): select if aij has rank k <= y
447 : // GxB_NLEASTRANK: select if aij has rank k > y
448 : // GxB_GREATESTRANK (...) select if aij has rank k >= (d-y) where
449 : // d = # of entries in A(i,:).
450 : // GxB_NGREATESTRANK (...): select if aij has rank k < (d-y)
451 : // and perhaps other operators such as:
452 : // GxB_LEASTRELRANK (...): select aij if rank k <= y*d where y is double
453 : // GxB_GREATESTRELRANK (...): select aij rank k > y*d where y is double
454 : //
455 : // By default, the rank of aij is its relative position as the kth entry in its
456 : // row (from "left" to "right"). If a new GxB setting in the descriptor is
457 : // set, then k is the relative position of aij as the kth entry in its column.
458 : // The default would be that the rank is the position of aij in its row A(i,:).
459 :
460 : // Other:
461 : // give me 3 random items from the row (y = 3)
462 : // give me the 4 biggest *values* in each row (y = 4)
463 :
464 : //----------------------------------------------------------------------
465 : // unload A in CSR format
466 : //----------------------------------------------------------------------
467 :
468 : #ifdef TIMINGS
469 : double tic2 = LAGraph_WallClockTime ( ) ;
470 : #endif
471 :
472 8 : void *Ap = NULL, *Aj = NULL ;
473 : uint64_t Ap_size, Aj_size, Ap_len, Aj_len ;
474 : bool A_jumbled, A_iso ;
475 : int Ap_handling, Aj_handling ;
476 :
477 : // unload A in sparse CSR format into the A_Container
478 : // GRB_TRY (GrB_set (A, GxB_SPARSE, GxB_SPARSITY_CONTROL)) ;
479 8 : GRB_TRY (LG_SET_FORMAT_HINT (A, LG_SPARSE)) ;
480 8 : GRB_TRY (GrB_set (A, GrB_ROWMAJOR, GrB_STORAGE_ORIENTATION_HINT)) ;
481 8 : GRB_TRY (GxB_unload_Matrix_into_Container (A, A_Container, NULL)) ;
482 8 : A_jumbled = A_Container->jumbled ;
483 8 : A_iso = A_Container->iso ;
484 :
485 : // unload A_Container->p,i into the C arrays, Ap and Aj
486 : GrB_Type Ap_type, Aj_type ;
487 8 : GRB_TRY (GxB_Vector_unload (A_Container->p, &Ap, &Ap_type, &Ap_len,
488 : &Ap_size, &Ap_handling, NULL)) ;
489 8 : GRB_TRY (GxB_Vector_unload (A_Container->i, &Aj, &Aj_type, &Aj_len,
490 : &Aj_size, &Aj_handling, NULL)) ;
491 :
492 8 : bool Ap_is_32 = (Ap_type == GrB_UINT32 || Ap_type == GrB_INT32) ;
493 8 : bool Aj_is_32 = (Aj_type == GrB_UINT32 || Aj_type == GrB_INT32) ;
494 8 : const uint32_t *Ap32 = Ap_is_32 ? Ap : NULL ;
495 8 : const uint64_t *Ap64 = Ap_is_32 ? NULL : Ap ;
496 8 : const uint32_t *Aj32 = Aj_is_32 ? Aj : NULL ;
497 8 : const uint64_t *Aj64 = Aj_is_32 ? NULL : Aj ;
498 :
499 : //----------------------------------------------------------------------
500 : // allocate workspace, including space to construct T
501 : //----------------------------------------------------------------------
502 :
503 8 : bool Tp_is_32 = (nvals < UINT32_MAX) ;
504 8 : bool Tj_is_32 = (n < INT32_MAX) ;
505 8 : GrB_Type Tp_type = Tp_is_32 ? GrB_UINT32 : GrB_UINT64 ;
506 8 : GrB_Type Tj_type = Tj_is_32 ? GrB_UINT32 : GrB_UINT64 ;
507 8 : size_t tpsize = Tp_is_32 ? sizeof (uint32_t) : sizeof (uint64_t) ;
508 8 : size_t tjsize = Tj_is_32 ? sizeof (uint32_t) : sizeof (uint64_t) ;
509 :
510 8 : GrB_Index Tp_size = (n+1) * tpsize ;
511 8 : GrB_Index Tj_size = nvals * tjsize ;
512 8 : GrB_Index Tx_size = sizeof (bool) ;
513 8 : LG_TRY (LAGraph_Malloc ((void **) &Tp, n+1, tpsize, msg)) ;
514 8 : LG_TRY (LAGraph_Malloc ((void **) &Tj, nvals, tjsize, msg)) ;
515 8 : LG_TRY (LAGraph_Calloc ((void **) &Tx, 1, sizeof (bool), msg)) ;
516 8 : LG_TRY (LAGraph_Malloc ((void **) &range, nthreads+1, sizeof (int64_t),
517 : msg)) ;
518 8 : LG_TRY (LAGraph_Calloc ((void **) &count, nthreads+1, sizeof (uint64_t),
519 : msg)) ;
520 :
521 8 : uint32_t *Tp32 = Tp_is_32 ? Tp : NULL ;
522 8 : uint64_t *Tp64 = Tp_is_32 ? NULL : Tp ;
523 8 : uint32_t *Tj32 = Tj_is_32 ? Tj : NULL ;
524 8 : uint64_t *Tj64 = Tj_is_32 ? NULL : Tj ;
525 :
526 : //----------------------------------------------------------------------
527 : // define parallel tasks to construct T
528 : //----------------------------------------------------------------------
529 :
530 : // thread tid works on rows range[tid]:range[tid+1]-1 of A and T
531 : int tid;
532 24 : for (tid = 0 ; tid <= nthreads ; tid++)
533 : {
534 16 : range [tid] = (n * tid + nthreads - 1) / nthreads ;
535 : }
536 :
537 : //----------------------------------------------------------------------
538 : // determine the number entries to be constructed in T for each thread
539 : //----------------------------------------------------------------------
540 :
541 : #pragma omp parallel for num_threads(nthreads) schedule(static)
542 16 : for (tid = 0 ; tid < nthreads ; tid++)
543 : {
544 19512 : for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
545 : {
546 19504 : int64_t deg = AP (i + 1) - AP (i) ;
547 19504 : count [tid + 1] += LAGRAPH_MIN (FASTSV_SAMPLES, deg) ;
548 : }
549 : }
550 :
551 : //----------------------------------------------------------------------
552 : // count = cumsum (count)
553 : //----------------------------------------------------------------------
554 :
555 16 : for (tid = 0 ; tid < nthreads ; tid++)
556 : {
557 8 : count [tid + 1] += count [tid] ;
558 : }
559 :
560 : #ifdef TIMINGS
561 : double toc2 = LAGraph_WallClockTime ( ) ;
562 : timings [3] = toc2 - tic2 ; // setup T
563 : tic2 = toc2 ;
564 : #endif
565 :
566 : //----------------------------------------------------------------------
567 : // construct T
568 : //----------------------------------------------------------------------
569 :
570 : // T (i,:) consists of the first FASTSV_SAMPLES of A (i,:).
571 :
572 : #pragma omp parallel for num_threads(nthreads) schedule(static)
573 16 : for (tid = 0 ; tid < nthreads ; tid++)
574 : {
575 8 : GrB_Index p = count [tid] ;
576 8 : int64_t ktid = range [tid] ;
577 8 : SET_TP (ktid, p) ; // Tp [ktid] = p ;
578 19512 : for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
579 : {
580 : // construct T (i,:) from the first entries in A (i,:)
581 19504 : for (int64_t j = 0 ;
582 80780 : j < FASTSV_SAMPLES && AP (i) + j < AP (i + 1) ; j++)
583 : {
584 61276 : uint64_t pi = AP (i) + j ;
585 61276 : uint64_t j = AJ (pi) ;
586 61276 : SET_TJ (p, j) ; // Tj [p] = j ;
587 61276 : p++ ;
588 : }
589 19504 : SET_TP (i+1, p) ; // Tp [i + 1] = p ;
590 : }
591 : }
592 :
593 : //----------------------------------------------------------------------
594 : // import the result into the GrB_Matrix T
595 : //----------------------------------------------------------------------
596 :
597 8 : GRB_TRY (GrB_Matrix_new (&T, GrB_BOOL, n, n)) ;
598 :
599 8 : uint64_t T_nvals = TP (n) ;
600 8 : uint64_t Tp_len = n+1 ;
601 8 : uint64_t Tj_len = T_nvals ;
602 :
603 8 : T_Container->nrows = n ;
604 8 : T_Container->ncols = n ;
605 8 : T_Container->nrows_nonempty = -1 ;
606 8 : T_Container->ncols_nonempty = -1 ;
607 8 : T_Container->nvals = T_nvals ;
608 8 : T_Container->format = GxB_SPARSE ;
609 8 : T_Container->orientation = GrB_ROWMAJOR ;
610 8 : T_Container->iso = true ;
611 8 : T_Container->jumbled = A_jumbled ;
612 :
613 : // load Tp, Tj, and Tx into the T_Container
614 8 : GRB_TRY (GxB_Vector_load (T_Container->p, &Tp, Tp_type, Tp_len,
615 : Tp_size, GrB_DEFAULT, NULL)) ;
616 8 : GRB_TRY (GxB_Vector_load (T_Container->i, &Tj, Tj_type, Tj_len,
617 : Tj_size, GrB_DEFAULT, NULL)) ;
618 8 : GRB_TRY (GxB_Vector_load (T_Container->x, &Tx, GrB_BOOL, 1,
619 : Tx_size, GrB_DEFAULT, NULL)) ;
620 :
621 : // load T from the T_Container
622 8 : GRB_TRY (GxB_load_Matrix_from_Container (T, T_Container, NULL)) ;
623 :
624 : // ] todo: the above will all be done as a single call to GxB_select.
625 :
626 : #ifdef TIMINGS
627 : toc2 = LAGraph_WallClockTime ( ) ;
628 : timings [4] = toc2 - tic2 ; // create T
629 : tic2 = toc2 ;
630 : #endif
631 :
632 : //----------------------------------------------------------------------
633 : // find the connected components of T
634 : //----------------------------------------------------------------------
635 :
636 8 : LG_TRY (fastsv (T, parent2, mngp, &gp, &gp_new, t, eq, min, min_2nd,
637 : Parent, Parent_Container, msg)) ;
638 :
639 : #ifdef TIMINGS
640 : toc2 = LAGraph_WallClockTime ( ) ;
641 : timings [5] = toc2 - tic2 ; // fastsv, in sampling
642 : tic2 = toc2 ;
643 : #endif
644 :
645 : //----------------------------------------------------------------------
646 : // unload the parent == Parent_Container->i vector into the Px array
647 : //----------------------------------------------------------------------
648 :
649 8 : int handling = 0 ;
650 8 : GrB_Type type = NULL ;
651 8 : GRB_TRY (GxB_Vector_unload (parent, &Px, &type, &n, &Px_size,
652 : &handling, NULL)) ;
653 8 : bool Px_is_32 = (type == GrB_UINT32 || type == GrB_INT32) ;
654 8 : uint32_t *Px32 = Px_is_32 ? Px : NULL ;
655 8 : uint64_t *Px64 = Px_is_32 ? NULL : Px ;
656 :
657 : // At this point, both the parent vector and Parent matrix are empty,
658 : // and the Px array holds the content of parent vector.
659 :
660 : //----------------------------------------------------------------------
661 : // use sampling to estimate the largest connected component in T
662 : //----------------------------------------------------------------------
663 :
664 : // The sampling below computes an estimate of the mode of the parent
665 : // vector, the contents of which are currently in the non-opaque Px
666 : // array.
667 :
668 : // hash table size must be a power of 2
669 : #define HASH_SIZE 1024
670 : // number of samples to insert into the hash table
671 : #define HASH_SAMPLES 864
672 : #define HASH(x) (((x << 4) + x) & (HASH_SIZE-1))
673 : #define NEXT(x) ((x + 23) & (HASH_SIZE-1))
674 :
675 : // allocate and initialize the hash table
676 8 : LG_TRY (LAGraph_Malloc ((void **) &ht_key, HASH_SIZE,
677 : sizeof (GrB_Index), msg)) ;
678 8 : LG_TRY (LAGraph_Calloc ((void **) &ht_count, HASH_SIZE,
679 : sizeof (int), msg)) ;
680 8200 : for (int k = 0 ; k < HASH_SIZE ; k++)
681 : {
682 8192 : ht_key [k] = UINT64_MAX ;
683 : }
684 :
685 : // hash the samples and find the most frequent entry
686 8 : uint64_t seed = n ; // random number seed
687 8 : int64_t key = -1 ; // most frequent entry
688 8 : int max_count = 0 ; // frequency of most frequent entry
689 6920 : for (int64_t k = 0 ; k < HASH_SAMPLES ; k++)
690 : {
691 : // select an entry ii from PARENT at random
692 6912 : uint64_t i = LG_Random64 (&seed) % n ;
693 6912 : GrB_Index x = PARENT (i) ;
694 : // find x in the hash table
695 6912 : GrB_Index h = HASH (x) ;
696 7196 : while (ht_key [h] != UINT64_MAX && ht_key [h] != x) h = NEXT (h) ;
697 : // add x to the hash table
698 6912 : ht_key [h] = x ;
699 6912 : ht_count [h]++ ;
700 : // keep track of the most frequent value
701 6912 : if (ht_count [h] > max_count)
702 : {
703 3740 : key = ht_key [h] ;
704 3740 : max_count = ht_count [h] ;
705 : }
706 : }
707 :
708 : #ifdef TIMINGS
709 : toc2 = LAGraph_WallClockTime ( ) ;
710 : timings [6] = toc2 - tic2 ; // hash
711 : tic2 = toc2 ;
712 : #endif
713 :
714 : //----------------------------------------------------------------------
715 : // compact the largest connected component in A
716 : //----------------------------------------------------------------------
717 :
718 : // Construct a new matrix T from the input matrix A (the matrix A is
719 : // not changed). The key node is the representative of the (estimated)
720 : // largest component. T is constructed as a copy of A, except:
721 : // (1) all edges A(i,:) for nodes i in the key component deleted, and
722 : // (2) for nodes i not in the key component, A(i,j) is deleted if
723 : // j is in the key component.
724 : // (3) If A(i,:) has any deletions from (2), T(i,key) is added to T.
725 :
726 : // [ todo: replace this with GxB_extract with GrB_Vector index arrays.
727 : // See https://github.com/GraphBLAS/graphblas-api-c/issues/67 .
728 : // This method will not insert the new entries T(i,key) for rows i that have
729 : // had entries deleted. That can be done with GrB_assign, with an n-by-1 mask
730 : // M computed from the before-and-after row degrees of A and T:
731 : // M = (parent != key) && (out_degree(T) < out_degree(A))
732 : // J [0] = key.
733 : // GxB_Matrix_subassign_BOOL (T, M, NULL, true, GrB_ALL, n, J, 1, NULL)
734 : // or with
735 : // GrB_Col_assign (T, M, NULL, t, GrB_ALL, j, NULL) with an all-true
736 : // vector t.
737 :
738 : // unload T from the T_Container; its contents are revised below
739 8 : GRB_TRY (GxB_unload_Matrix_into_Container (T, T_Container, NULL)) ;
740 :
741 : // unload Tp and Tj from the T_Container
742 : int ignore ;
743 8 : GRB_TRY (GxB_Vector_unload (T_Container->p, &Tp, &Tp_type, &Tp_len,
744 : &Tp_size, &ignore, NULL)) ;
745 8 : GRB_TRY (GxB_Vector_unload (T_Container->i, &Tj, &Tj_type, &Tj_len,
746 : &Tj_size, &ignore, NULL)) ;
747 :
748 : // these are likely to be unchanged since the last load of T
749 8 : Tp_is_32 = (Tp_type == GrB_UINT32 || Tp_type == GrB_INT32) ;
750 8 : Tj_is_32 = (Tj_type == GrB_UINT32 || Tj_type == GrB_INT32) ;
751 8 : Tp32 = Tp_is_32 ? Tp : NULL ;
752 8 : Tp64 = Tp_is_32 ? NULL : Tp ;
753 8 : Tj32 = Tj_is_32 ? Tj : NULL ;
754 8 : Tj64 = Tj_is_32 ? NULL : Tj ;
755 :
756 : #pragma omp parallel for num_threads(nthreads) schedule(static)
757 16 : for (tid = 0 ; tid < nthreads ; tid++)
758 : {
759 8 : uint64_t ktid = range [tid] ;
760 8 : GrB_Index p = AP (ktid) ;
761 : // thread tid scans A (range [tid]:range [tid+1]-1,:),
762 : // and constructs T(i,:) for all rows in this range.
763 19512 : for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
764 : {
765 19504 : int64_t pi = PARENT (i) ;
766 19504 : int64_t pstart = p ;
767 19504 : SET_TP (i, p) ; // Tp [i] = p ; start the construction of T(i,:)
768 : // T(i,:) is empty if pi == key
769 19504 : if (pi != key)
770 : {
771 : // scan A(i,:)
772 95540 : for (GrB_Index pS = AP (i) ; pS < AP (i+1) ; pS++)
773 : {
774 : // get A(i,j)
775 84940 : int64_t j = AJ (pS) ;
776 84940 : if (PARENT (j) != key)
777 : {
778 : // add the entry T(i,j) to T, but skip it if
779 : // PARENT (j) is equal to key
780 84920 : SET_TJ (p, j) // Tj [p] = j ;
781 84920 : p++ ;
782 : }
783 : }
784 : // Add the entry T(i,key) if there is room for it in T(i,:);
785 : // if and only if node i is adjacent to a node j in the
786 : // largest component. The only way there can be space if
787 : // at least one T(i,j) appears with PARENT (j) equal to the
788 : // key (that is, node j is in the largest connected
789 : // component, key == PARENT (j). One of these j's can then
790 : // be replaced with the key. If node i is not adjacent to
791 : // any node in the largest component, then there is no
792 : // space in T(i,:) and no new edge to the largest component
793 : // is added.
794 10600 : if (p - pstart < AP (i+1) - AP (i))
795 : {
796 4 : SET_TJ (p, key) ; // Tj [p] = key ;
797 4 : p++ ;
798 : }
799 : }
800 : }
801 : // count the number of entries inserted into T by this thread
802 8 : count [tid] = p - TP (ktid) ;
803 : }
804 :
805 : // Compact empty space out of Tj not filled in from the above phase.
806 8 : nvals = 0 ;
807 16 : for (tid = 0 ; tid < nthreads ; tid++)
808 : {
809 8 : int64_t ktid = range [tid] ;
810 8 : memmove (Tj32 ? ((void *) (Tj32 + nvals)) : ((void *) (Tj64 + nvals)),
811 8 : Tj32 ? ((void *) (Tj32 + TP (ktid))) : ((void *) (Tj64 + TP (ktid))),
812 8 : tjsize * count [tid]) ;
813 :
814 : #if 0
815 : if (Tj32)
816 : {
817 : memmove (Tj32 + nvals, Tj32 + TP (ktid),
818 : sizeof (uint32_t) * count [tid]) ;
819 : }
820 : else
821 : {
822 : memmove (Tj64 + nvals, Tj64 + TP (ktid),
823 : sizeof (uint64_t) * count [tid]) ;
824 : }
825 : #endif
826 :
827 8 : nvals += count [tid] ;
828 8 : count [tid] = nvals - count [tid] ;
829 : }
830 :
831 : // Compact empty space out of Tp
832 : #pragma omp parallel for num_threads(nthreads) schedule(static)
833 16 : for (tid = 0 ; tid < nthreads ; tid++)
834 : {
835 8 : int64_t ktid = range [tid] ;
836 8 : GrB_Index p = TP (ktid) ;
837 19512 : for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
838 : {
839 19504 : int64_t tp = TP (i) ;
840 19504 : tp -= p - count [tid] ;
841 19504 : SET_TP (i, tp) ; // Tp [i] = tp ;
842 : }
843 : }
844 :
845 : // finalize T
846 8 : SET_TP (n, nvals) ; // Tp [n] = nvals ;
847 8 : Tj_len = nvals ;
848 :
849 : // load T_Container->p,i from the C arrays, Tp and Tj, for final phase
850 8 : GRB_TRY (GxB_Vector_load (T_Container->p, &Tp, Tp_type, Tp_len,
851 : Tp_size, GrB_DEFAULT, NULL)) ;
852 8 : GRB_TRY (GxB_Vector_load (T_Container->i, &Tj, Tj_type, Tj_len,
853 : Tj_size, GrB_DEFAULT, NULL)) ;
854 :
855 8 : T_Container->nrows_nonempty = -1 ;
856 8 : T_Container->ncols_nonempty = -1 ;
857 8 : T_Container->jumbled = true ;
858 8 : T_Container->nvals = nvals ;
859 :
860 : // load T in sparse CSR format from the T_Container
861 8 : GRB_TRY (GxB_load_Matrix_from_Container (T, T_Container, NULL)) ;
862 :
863 : // load A_Container->p,i from the C arrays, Ap and Aj
864 : // This is the original G->A, and it is unchanged.
865 8 : GRB_TRY (GxB_Vector_load (A_Container->p, &Ap, Ap_type, Ap_len,
866 : Ap_size, Ap_handling, NULL)) ;
867 8 : GRB_TRY (GxB_Vector_load (A_Container->i, &Aj, Aj_type, Aj_len,
868 : Aj_size, Aj_handling, NULL)) ;
869 :
870 : // load A in sparse CSR format from the A_Container
871 8 : GRB_TRY (GxB_load_Matrix_from_Container (A, A_Container, NULL)) ;
872 :
873 : //----------------------------------------------------------------------
874 : // load the Px array back into the parent == Parent_Container->i vector
875 : //----------------------------------------------------------------------
876 :
877 8 : GRB_TRY (GxB_Vector_load (parent, &Px, type, n, Px_size,
878 : GrB_DEFAULT, NULL)) ;
879 :
880 : // ]. The unload/load of A into Ap, Aj, Ax will not be needed, and G->A
881 : // will become truly a read-only matrix.
882 :
883 : // final phase uses the pruned matrix T
884 8 : A = T ;
885 :
886 : #ifdef TIMINGS
887 : toc2 = LAGraph_WallClockTime ( ) ;
888 : timings [7] = toc2 - tic2 ; // prune
889 : tic2 = toc2 ;
890 : #endif
891 : }
892 :
893 : #ifdef TIMINGS
894 : toc = LAGraph_WallClockTime ( ) ;
895 : timings [1] = toc - tic ; // total sampling time
896 : tic = toc ;
897 : #endif
898 :
899 : //--------------------------------------------------------------------------
900 : // check for quick return
901 : //--------------------------------------------------------------------------
902 :
903 : // The sample phase may have already found that G->A has a single component,
904 : // in which case the matrix A is now empty.
905 :
906 55 : if (nvals == 0)
907 : {
908 4 : (*component) = parent ;
909 4 : Parent_Container->i = NULL ; // do not free the parent vector
910 4 : LG_FREE_WORK ;
911 : #ifdef TIMINGS
912 : print_timings (timings) ;
913 : LG_SET_BURBLE (false) ;
914 : #endif
915 4 : return (GrB_SUCCESS) ;
916 : }
917 :
918 : //--------------------------------------------------------------------------
919 : // final phase
920 : //--------------------------------------------------------------------------
921 :
922 51 : LG_TRY (fastsv (A, parent2, mngp, &gp, &gp_new, t, eq, min, min_2nd,
923 : Parent, Parent_Container, msg)) ;
924 :
925 : //--------------------------------------------------------------------------
926 : // free workspace and return result
927 : //--------------------------------------------------------------------------
928 :
929 45 : (*component) = parent ;
930 45 : Parent_Container->i = NULL ; // do not free the parent vector
931 45 : LG_FREE_WORK ;
932 : #ifdef TIMINGS
933 : toc = LAGraph_WallClockTime ( ) ;
934 : timings [2] = toc - tic ; // final phase
935 : print_timings (timings) ;
936 : LG_SET_BURBLE (false) ;
937 : #endif
938 45 : return (GrB_SUCCESS) ;
939 : #else
940 : LG_ASSERT (false, GrB_NOT_IMPLEMENTED) ;
941 : #endif
942 : }
|