Line data Source code
1 : //------------------------------------------------------------------------------
2 : // LG_CC_FastSV7: connected components
3 : //------------------------------------------------------------------------------
4 :
5 : // LAGraph, (c) 2019-2025 by The LAGraph Contributors, All Rights Reserved.
6 : // SPDX-License-Identifier: BSD-2-Clause
7 : //
8 : // For additional details (including references to third party source code and
9 : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
10 : // Contributors.txt for a full list of contributors. Created, in part, with
11 : // funding and support from the U.S. Government (see Acknowledgments.txt file).
12 : // DM22-0790
13 :
14 : // Contributed by Yongzhe Zhang, modified by Timothy A. Davis, Texas A&M
15 : // University
16 :
17 : //------------------------------------------------------------------------------
18 :
19 : // This is an Advanced algorithm (G->is_symmetric_structure must be known),
20 : // but it is not user-callable (see LAGr_ConnectedComponents instead).
21 :
22 : // Code is based on the algorithm described in the following paper:
23 : // Zhang, Azad, Hu. FastSV: A Distributed-Memory Connected Component
24 : // Algorithm with Fast Convergence (SIAM PP20)
25 :
26 : // A subsequent update to the algorithm is here (which might not be reflected
27 : // in this code):
28 : // Yongzhe Zhang, Ariful Azad, Aydin Buluc: Parallel algorithms for finding
29 : // connected components using linear algebra. J. Parallel Distributed Comput.
30 : // 144: 14-27 (2020).
31 :
32 : // Modified by Tim Davis, Texas A&M University: revised Reduce_assign to use
33 : // purely GrB* and GxB* methods and the matrix Parent. Added warmup phase.
34 : // Changed to use GxB load/unload. Converted to use the LAGraph_Graph object.
35 : // Exploiting iso status for the temporary matrices Parent and T.
36 :
37 : // Modified by Gabriel Gomez, Texas A&M University: moved Parent matrix trick
38 : // out to LAGraph_FastAssign.
39 :
40 : // The input graph G must be undirected, or directed and with an adjacency
41 : // matrix that has a symmetric structure. Self-edges (diagonal entries) are
42 : // OK, and are ignored. The values and type of A are ignored; just its
43 : // structure is accessed.
44 :
45 : // NOTE: This function must not be called by multiple user threads at the same
46 : // time on the same graph G, since it unloads G->A and loads it back when
47 : // done. G->A is unchanged when the function returns, but during execution
48 : // G->A is empty. This will be fixed once the todos are finished below, and
49 : // G->A will then become a truly read-only object (assuming GrB_wait (G->A)
50 : // has been done first).
51 :
52 : // #define TIMINGS
53 :
54 : #define __STDC_WANT_LIB_EXT1__ 1
55 : #include <string.h>
56 :
57 : #define LG_FREE_ALL ;
58 : #include "LG_internal.h"
59 : #include "LAGraphX.h"
60 :
61 : static double timings [16] ;
62 :
63 : #if LG_SUITESPARSE_GRAPHBLAS_V10
64 :
65 : //==============================================================================
66 : // fastsv: find the components of a graph
67 : //==============================================================================
68 :
69 69 : static inline GrB_Info fastsv
70 : (
71 : GrB_Matrix A, // adjacency matrix, G->A or a subset of G->A
72 : GrB_Vector parent2, // workspace
73 : GrB_Vector mngp, // min neighbor grandparent
74 : GrB_Vector *gp, // grandparent
75 : GrB_Vector *gp_new, // new grandparent (swapped with gp)
76 : GrB_Vector t, // workspace
77 : GrB_BinaryOp eq, // GrB_EQ_(integer type)
78 : GrB_BinaryOp min, // GrB_MIN_(integer type)
79 : GrB_Semiring min_2nd, // GrB_MIN_SECOND_(integer type)
80 : GrB_Vector parent, // parent
81 : GrB_Vector ramp, // [0:n] used to speed up FastAssign
82 : char *msg
83 : )
84 : {
85 69 : bool done = false ;
86 : // #ifdef TIMINGS
87 : // int pass = 0 ;
88 : // #endif
89 :
90 : while (true)
91 69 : {
92 : // #ifdef TIMINGS
93 : // printf ("\n-------------------------------------------fastsv: %d\n",
94 : // ++pass) ;
95 : // #endif
96 :
97 : //----------------------------------------------------------------------
98 : // hooking & shortcutting
99 : //----------------------------------------------------------------------
100 :
101 : // mngp = min (mngp, A*gp) using the MIN_SECOND semiring
102 138 : GRB_TRY (GrB_mxv (mngp, NULL, min, min_2nd, A, *gp, NULL)) ;
103 :
104 : //----------------------------------------------------------------------
105 : // parent2 = min (mngp, gp)
106 : //----------------------------------------------------------------------
107 :
108 : // The parent vector should not be allised into FastAssign, so the
109 : // accumulation is done in a workspace vector, parent2.
110 :
111 138 : GRB_TRY (GrB_eWiseAdd (parent2, NULL, NULL, min, mngp, *gp, NULL)) ;
112 :
113 : // LAGraph_FastAssign: This function computes the following, which
114 : // is done explicitly in the Reduce_assign function in LG_CC_Boruvka:
115 : //
116 : // for (j = 0 ; j < n ; j++)
117 : // {
118 : // uint64_t i = parent [j] ;
119 : // parent2 [i] = min (parent2 [i], mngp [j]) ;
120 : // }
121 : //
122 : // LAGraph_FastAssign does this by building a matrix.
123 : // (See LAGraph_FastAssign.c)
124 : // Giving it a full ramp vector speeds up the function
125 :
126 137 : LG_TRY (LAGraph_FastAssign_Semiring(
127 : parent2, NULL, min, parent, mngp, ramp, min_2nd, NULL, msg));
128 :
129 : //----------------------------------------------------------------------
130 : // parent = min (parent, parent2)
131 : //----------------------------------------------------------------------
132 :
133 101 : GRB_TRY (GrB_assign (parent, NULL, min, parent2, GrB_ALL, 0, NULL)) ;
134 :
135 : //----------------------------------------------------------------------
136 : // calculate grandparent: gp_new = parent (parent)
137 : //----------------------------------------------------------------------
138 :
139 101 : GRB_TRY (GrB_extract (*gp_new, NULL, NULL, parent, parent, NULL)) ;
140 :
141 : //----------------------------------------------------------------------
142 : // terminate if gp and gp_new are the same
143 : //----------------------------------------------------------------------
144 97 : GRB_TRY (GrB_eWiseMult (t, NULL, NULL, eq, *gp_new, *gp, NULL)) ;
145 96 : GRB_TRY (GrB_reduce (&done, NULL, GrB_LAND_MONOID_BOOL, t, NULL)) ;
146 96 : if (done) break ;
147 :
148 : // swap gp and gp_new
149 69 : GrB_Vector s = (*gp) ; (*gp) = (*gp_new) ; (*gp_new) = s ;
150 : }
151 27 : return (GrB_SUCCESS) ;
152 : }
153 :
154 : //==============================================================================
155 : // LG_CC_FastSV7
156 : //==============================================================================
157 :
158 : // The output of LG_CC_FastSV* is a vector component, where component(i)=r if
159 : // node i is in the connected compononent whose representative is node r. If r
160 : // is a representative, then component(r)=r. The number of connected
161 : // components in the graph G is the number of representatives.
162 :
163 : #undef LG_FREE_WORK
164 : #define LG_FREE_WORK \
165 : { \
166 : LAGraph_Free ((void **) &Tp, NULL) ; \
167 : LAGraph_Free ((void **) &Tj, NULL) ; \
168 : LAGraph_Free ((void **) &Tx, NULL) ; \
169 : LAGraph_Free ((void **) &ht_key, NULL) ; \
170 : LAGraph_Free ((void **) &ht_count, NULL) ; \
171 : LAGraph_Free ((void **) &count, NULL) ; \
172 : LAGraph_Free ((void **) &range, NULL) ; \
173 : LAGraph_Free ((void **) &Px, NULL) ; \
174 : GrB_free (&T) ; \
175 : GrB_free (&t) ; \
176 : GrB_free (&gp) ; \
177 : GrB_free (&mngp) ; \
178 : GrB_free (&gp_new) ; \
179 : GrB_free (&parent2) ; \
180 : GrB_free (&ramp_v) ; \
181 : GrB_free (&A_Container) ; \
182 : GrB_free (&T_Container) ; \
183 : }
184 :
185 : #undef LG_FREE_ALL
186 : #define LG_FREE_ALL \
187 : { \
188 : GrB_free (&parent) ; \
189 : LG_FREE_WORK ; \
190 : }
191 :
192 : #endif
193 :
194 : // get/set macros for 32/64 bit arrays:
195 : #define AP(k) (Ap32 ? Ap32 [k] : Ap64 [k])
196 : #define AJ(k) (Aj32 ? Aj32 [k] : Aj64 [k])
197 : #define PARENT(i) (Px32 ? Px32 [i] : Px64 [i])
198 : #define TP(k) (Tp32 ? Tp32 [k] : Tp64 [k])
199 : #define TJ(k) (Tj32 ? Tj32 [k] : Tj64 [k])
200 : #define SET_TP(k,p) { if (Tp32) { Tp32 [k] = p ; } else { Tp64 [k] = p ; }}
201 : #define SET_TJ(k,i) { if (Tj32) { Tj32 [k] = i ; } else { Tj64 [k] = i ; }}
202 :
203 : #ifdef TIMINGS
204 : static void print_timings (double timings [16])
205 : {
206 : double total = timings [0] + timings [1] + timings [2] ;
207 : printf ("SV7 %12.6f (%4.1f%%) init\n", timings [0], 100. * timings [0] / total) ;
208 : printf ("SV7 %12.6f (%4.1f%%) total sampling:\n", timings [1], 100. * timings [1] / total) ;
209 : printf ("SV7 %12.6f (%4.1f%%) setup T\n", timings [3], 100. * timings [3] / total) ;
210 : printf ("SV7 %12.6f (%4.1f%%) create T\n", timings [4], 100. * timings [4] / total) ;
211 : printf ("SV7 %12.6f (%4.1f%%) fastsv sample\n", timings [5], 100 * timings [5] / total) ;
212 : printf ("SV7 %12.6f (%4.1f%%) hash\n", timings [6], 100. * timings [6] / total) ;
213 : printf ("SV7 %12.6f (%4.1f%%) prune\n", timings [7], 100. * timings [7] / total) ;
214 : printf ("SV7 %12.6f (%4.1f%%) total final\n", timings [2], 100. * timings [2] / total) ;
215 : }
216 : #endif
217 :
218 103 : int LG_CC_FastSV7_FA // SuiteSparse:GraphBLAS method, with GraphBLAS v10
219 : (
220 : // output:
221 : GrB_Vector *component, // component(i)=r if node is in the component r
222 : // input:
223 : LAGraph_Graph G, // input graph (modified then restored)
224 : char *msg
225 : )
226 : {
227 :
228 : #if LG_SUITESPARSE_GRAPHBLAS_V10
229 :
230 : //--------------------------------------------------------------------------
231 : // check inputs
232 : //--------------------------------------------------------------------------
233 :
234 103 : LG_CLEAR_MSG ;
235 :
236 : #ifdef TIMINGS
237 : double timings [16] ;
238 : for (int kk = 0 ; kk < 16 ; kk++) timings [kk] = 0 ;
239 : double tic = LAGraph_WallClockTime ( ) ;
240 : LG_SET_BURBLE (false) ;
241 : #endif
242 :
243 103 : int64_t *range = NULL ;
244 103 : void *Px = NULL ;
245 103 : uint64_t Px_size = 0 ;
246 103 : GrB_Index n, nvals, *ht_key = NULL, *count = NULL ;
247 103 : void *Tp = NULL, *Tj = NULL ;
248 103 : GrB_Vector parent = NULL, gp_new = NULL, mngp = NULL, gp = NULL, t = NULL,
249 103 : parent2 = NULL, ramp_v = NULL ;
250 103 : GrB_Matrix T = NULL ;
251 103 : void *Tx = NULL ;
252 103 : int *ht_count = NULL ;
253 103 : GxB_Container A_Container = NULL, T_Container = NULL ;
254 :
255 103 : LG_TRY (LAGraph_CheckGraph (G, msg)) ;
256 102 : LG_ASSERT (component != NULL, GrB_NULL_POINTER) ;
257 102 : (*component) = NULL ;
258 :
259 102 : LG_ASSERT_MSG ((G->kind == LAGraph_ADJACENCY_UNDIRECTED ||
260 : (G->kind == LAGraph_ADJACENCY_DIRECTED &&
261 : G->is_symmetric_structure == LAGraph_TRUE)),
262 : LAGRAPH_SYMMETRIC_STRUCTURE_REQUIRED,
263 : "G->A must be known to be symmetric") ;
264 :
265 : //--------------------------------------------------------------------------
266 : // initializations
267 : //--------------------------------------------------------------------------
268 :
269 101 : GrB_Matrix A = G->A ;
270 101 : GRB_TRY (GrB_Matrix_nrows (&n, A)) ;
271 101 : GRB_TRY (GrB_Matrix_nvals (&nvals, A)) ;
272 :
273 : // determine the integer type, operators, and semirings to use
274 : GrB_Type Uint, Int ;
275 : GrB_IndexUnaryOp ramp ;
276 : GrB_Semiring min_2nd, min_2ndi ;
277 : GrB_BinaryOp min, eq, imin ;
278 : #ifdef COVERAGE
279 : // Just for test coverage, use 64-bit ints for n > 100. Do not use this
280 : // rule in production!
281 : #define NBIG 100
282 : #else
283 : // For production use: 64-bit integers if n > 2^31
284 : #define NBIG INT32_MAX
285 : #endif
286 101 : if (n > NBIG)
287 : {
288 : // use 64-bit integers
289 8 : Uint = GrB_UINT64 ;
290 8 : Int = GrB_INT64 ;
291 8 : ramp = GrB_ROWINDEX_INT64 ;
292 8 : min = GrB_MIN_INT64 ;
293 8 : imin = GrB_MIN_INT64 ;
294 8 : eq = GrB_EQ_INT64 ;
295 8 : min_2nd = GrB_MIN_SECOND_SEMIRING_INT64 ;
296 8 : min_2ndi = GxB_MIN_SECONDI_INT64 ;
297 : }
298 : else
299 : {
300 : // use 32-bit integers
301 93 : Uint = GrB_UINT32 ;
302 93 : Int = GrB_INT32 ;
303 93 : ramp = GrB_ROWINDEX_INT32 ;
304 93 : min = GrB_MIN_INT32 ;
305 93 : imin = GrB_MIN_INT32 ;
306 93 : eq = GrB_EQ_INT32 ;
307 93 : min_2nd = GrB_MIN_SECOND_SEMIRING_INT32 ;
308 93 : min_2ndi = GxB_MIN_SECONDI_INT32 ;
309 : }
310 :
311 : // FASTSV_SAMPLES: number of samples to take from each row A(i,:).
312 : // Sampling is used if the average degree is > 8 and if n > 1024.
313 : #define FASTSV_SAMPLES 4
314 101 : bool sampling = (nvals > n * FASTSV_SAMPLES * 2 && n > 1024) ;
315 :
316 : //--------------------------------------------------------------------------
317 : // make ramp needed for FastAssign speedup
318 : //--------------------------------------------------------------------------
319 101 : GRB_TRY (GrB_Vector_new (&(ramp_v), Uint, n+1)) ;
320 99 : GRB_TRY (GrB_assign (ramp_v, NULL, NULL, 0, GrB_ALL, n+1,
321 : NULL)) ;
322 98 : GRB_TRY (GrB_apply (ramp_v, NULL, NULL, ramp, ramp_v, 0, NULL)) ;
323 : // [ todo: nthreads will not be needed once GxB_select with a GxB_RankUnaryOp
324 : // and a new GxB_extract are added to SuiteSparse:GraphBLAS.
325 : // determine # of threads to use
326 : int nthreads, nthreads_outer, nthreads_inner ;
327 96 : LG_TRY (LAGraph_GetNumThreads (&nthreads_outer, &nthreads_inner, msg)) ;
328 96 : nthreads = nthreads_outer * nthreads_inner ;
329 96 : nthreads = LAGRAPH_MIN (nthreads, n / 16) ;
330 96 : nthreads = LAGRAPH_MAX (nthreads, 1) ;
331 : // ]
332 :
333 96 : GRB_TRY (GxB_Container_new (&A_Container)) ;
334 90 : GRB_TRY (GxB_Container_new (&T_Container)) ;
335 :
336 : //--------------------------------------------------------------------------
337 : // warmup: parent = min (0:n-1, A*1) using the MIN_SECONDI semiring
338 : //--------------------------------------------------------------------------
339 :
340 : // parent (i) = min (i, j) for all entries A(i,j). This warmup phase takes only
341 : // O(n) time, because of how the MIN_SECONDI semiring is implemented in
342 : // SuiteSparse:GraphBLAS. A is held by row, and the first entry in A(i,:)
343 : // is the minimum index j, so only the first entry in A(i,:) needs to be
344 : // considered for each row i.
345 :
346 84 : GRB_TRY (GrB_Vector_new (&t, GrB_BOOL, n)) ;
347 82 : GRB_TRY (GrB_Vector_new (&parent, Int, n)) ;
348 80 : GRB_TRY (GrB_assign (t, NULL, NULL, 0, GrB_ALL, n, NULL)) ;
349 79 : GRB_TRY (GrB_assign (parent, NULL, NULL, 0, GrB_ALL, n, NULL)) ;
350 78 : GRB_TRY (GrB_apply (parent, NULL, NULL, ramp, parent, 0, NULL)) ;
351 77 : GRB_TRY (GrB_mxv (parent, NULL, imin, min_2ndi, A, t, NULL)) ;
352 77 : GRB_TRY (GrB_free (&t)) ;
353 :
354 : // copy parent into gp and mngp.
355 77 : GRB_TRY (GrB_Vector_dup (&gp, parent)) ;
356 75 : GRB_TRY (GrB_Vector_dup (&mngp, parent)) ;
357 :
358 : // allocate workspace vectors
359 73 : GRB_TRY (GrB_Vector_new (&gp_new, Int, n)) ;
360 71 : GRB_TRY (GrB_Vector_new (&t, GrB_BOOL, n)) ;
361 69 : GRB_TRY (GrB_Vector_new (&parent2, Int, n)) ;
362 :
363 : #ifdef TIMINGS
364 : double toc = LAGraph_WallClockTime ( ) ;
365 : timings [0] = toc - tic ; // init time
366 : tic = toc ;
367 : #endif
368 :
369 : //--------------------------------------------------------------------------
370 : // sample phase
371 : //--------------------------------------------------------------------------
372 :
373 67 : if (sampling)
374 : {
375 :
376 : // [ todo: GxB_select, using a new operator: GxB_RankUnaryOp, will do all this,
377 : // with GxB_Matrix_select_RankOp_Scalar with operator GxB_LEASTRANK and a
378 : // GrB_Scalar input equal to FASTSV_SAMPLES. Built-in operators will be,
379 : // (where y is INT64):
380 : //
381 : // GxB_LEASTRANK (aij, i, j, k, d, y): select if aij has rank k <= y
382 : // GxB_NLEASTRANK: select if aij has rank k > y
383 : // GxB_GREATESTRANK (...) select if aij has rank k >= (d-y) where
384 : // d = # of entries in A(i,:).
385 : // GxB_NGREATESTRANK (...): select if aij has rank k < (d-y)
386 : // and perhaps other operators such as:
387 : // GxB_LEASTRELRANK (...): select aij if rank k <= y*d where y is double
388 : // GxB_GREATESTRELRANK (...): select aij rank k > y*d where y is double
389 : //
390 : // By default, the rank of aij is its relative position as the kth entry in its
391 : // row (from "left" to "right"). If a new GxB setting in the descriptor is
392 : // set, then k is the relative position of aij as the kth entry in its column.
393 : // The default would be that the rank is the position of aij in its row A(i,:).
394 :
395 : // Other:
396 : // give me 3 random items from the row (y = 3)
397 : // give me the 4 biggest *values* in each row (y = 4)
398 :
399 : //----------------------------------------------------------------------
400 : // unload A in CSR format
401 : //----------------------------------------------------------------------
402 :
403 : #ifdef TIMINGS
404 : double tic2 = LAGraph_WallClockTime ( ) ;
405 : #endif
406 :
407 4 : void *Ap = NULL, *Aj = NULL ;
408 : uint64_t Ap_size, Aj_size, Ap_len, Aj_len ;
409 : bool A_jumbled, A_iso ;
410 : int Ap_handling, Aj_handling ;
411 :
412 : // unload A in sparse CSR format into the A_Container
413 4 : GRB_TRY (GrB_set (A, GxB_SPARSE, GxB_SPARSITY_CONTROL)) ;
414 4 : GRB_TRY (GrB_set (A, GrB_ROWMAJOR, GrB_STORAGE_ORIENTATION_HINT)) ;
415 4 : GRB_TRY (GxB_unload_Matrix_into_Container (A, A_Container, NULL)) ;
416 4 : A_jumbled = A_Container->jumbled ;
417 4 : A_iso = A_Container->iso ;
418 :
419 : // unload A_Container->p,i into the C arrays, Ap and Aj
420 : GrB_Type Ap_type, Aj_type ;
421 4 : GRB_TRY (GxB_Vector_unload (A_Container->p, &Ap, &Ap_type, &Ap_len,
422 : &Ap_size, &Ap_handling, NULL)) ;
423 4 : GRB_TRY (GxB_Vector_unload (A_Container->i, &Aj, &Aj_type, &Aj_len,
424 : &Aj_size, &Aj_handling, NULL)) ;
425 :
426 4 : bool Ap_is_32 = (Ap_type == GrB_UINT32 || Ap_type == GrB_INT32) ;
427 4 : bool Aj_is_32 = (Aj_type == GrB_UINT32 || Aj_type == GrB_INT32) ;
428 4 : const uint32_t *Ap32 = Ap_is_32 ? Ap : NULL ;
429 4 : const uint64_t *Ap64 = Ap_is_32 ? NULL : Ap ;
430 4 : const uint32_t *Aj32 = Aj_is_32 ? Aj : NULL ;
431 4 : const uint64_t *Aj64 = Aj_is_32 ? NULL : Aj ;
432 :
433 : //----------------------------------------------------------------------
434 : // allocate workspace, including space to construct T
435 : //----------------------------------------------------------------------
436 :
437 4 : bool Tp_is_32 = (nvals < UINT32_MAX) ;
438 4 : bool Tj_is_32 = (n < INT32_MAX) ;
439 4 : GrB_Type Tp_type = Tp_is_32 ? GrB_UINT32 : GrB_UINT64 ;
440 4 : GrB_Type Tj_type = Tj_is_32 ? GrB_UINT32 : GrB_UINT64 ;
441 4 : size_t tpsize = Tp_is_32 ? sizeof (uint32_t) : sizeof (uint64_t) ;
442 4 : size_t tjsize = Tj_is_32 ? sizeof (uint32_t) : sizeof (uint64_t) ;
443 :
444 4 : GrB_Index Tp_size = (n+1) * tpsize ;
445 4 : GrB_Index Tj_size = nvals * tjsize ;
446 4 : GrB_Index Tx_size = sizeof (bool) ;
447 4 : LG_TRY (LAGraph_Malloc ((void **) &Tp, n+1, tpsize, msg)) ;
448 4 : LG_TRY (LAGraph_Malloc ((void **) &Tj, nvals, tjsize, msg)) ;
449 4 : LG_TRY (LAGraph_Calloc ((void **) &Tx, 1, sizeof (bool), msg)) ;
450 4 : LG_TRY (LAGraph_Malloc ((void **) &range, nthreads+1, sizeof (int64_t),
451 : msg)) ;
452 4 : LG_TRY (LAGraph_Calloc ((void **) &count, nthreads+1, sizeof (uint64_t),
453 : msg)) ;
454 :
455 4 : uint32_t *Tp32 = Tp_is_32 ? Tp : NULL ;
456 4 : uint64_t *Tp64 = Tp_is_32 ? NULL : Tp ;
457 4 : uint32_t *Tj32 = Tj_is_32 ? Tj : NULL ;
458 4 : uint64_t *Tj64 = Tj_is_32 ? NULL : Tj ;
459 :
460 : //----------------------------------------------------------------------
461 : // define parallel tasks to construct T
462 : //----------------------------------------------------------------------
463 :
464 : // thread tid works on rows range[tid]:range[tid+1]-1 of A and T
465 : int tid;
466 12 : for (tid = 0 ; tid <= nthreads ; tid++)
467 : {
468 8 : range [tid] = (n * tid + nthreads - 1) / nthreads ;
469 : }
470 :
471 : //----------------------------------------------------------------------
472 : // determine the number entries to be constructed in T for each thread
473 : //----------------------------------------------------------------------
474 :
475 : #pragma omp parallel for num_threads(nthreads) schedule(static)
476 8 : for (tid = 0 ; tid < nthreads ; tid++)
477 : {
478 9756 : for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
479 : {
480 9752 : int64_t deg = AP (i + 1) - AP (i) ;
481 9752 : count [tid + 1] += LAGRAPH_MIN (FASTSV_SAMPLES, deg) ;
482 : }
483 : }
484 :
485 : //----------------------------------------------------------------------
486 : // count = cumsum (count)
487 : //----------------------------------------------------------------------
488 :
489 8 : for (tid = 0 ; tid < nthreads ; tid++)
490 : {
491 4 : count [tid + 1] += count [tid] ;
492 : }
493 :
494 : #ifdef TIMINGS
495 : double toc2 = LAGraph_WallClockTime ( ) ;
496 : timings [3] = toc2 - tic2 ; // setup T
497 : tic2 = toc2 ;
498 : #endif
499 :
500 : //----------------------------------------------------------------------
501 : // construct T
502 : //----------------------------------------------------------------------
503 :
504 : // T (i,:) consists of the first FASTSV_SAMPLES of A (i,:).
505 :
506 : #pragma omp parallel for num_threads(nthreads) schedule(static)
507 8 : for (tid = 0 ; tid < nthreads ; tid++)
508 : {
509 4 : GrB_Index p = count [tid] ;
510 4 : int64_t ktid = range [tid] ;
511 4 : SET_TP (ktid, p) ; // Tp [ktid] = p ;
512 9756 : for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
513 : {
514 : // construct T (i,:) from the first entries in A (i,:)
515 9752 : for (int64_t j = 0 ;
516 40390 : j < FASTSV_SAMPLES && AP (i) + j < AP (i + 1) ; j++)
517 : {
518 30638 : uint64_t pi = AP (i) + j ;
519 30638 : uint64_t j = AJ (pi) ;
520 30638 : SET_TJ (p, j) ; // Tj [p] = j ;
521 30638 : p++ ;
522 : }
523 9752 : SET_TP (i+1, p) ; // Tp [i + 1] = p ;
524 : }
525 : }
526 :
527 : //----------------------------------------------------------------------
528 : // import the result into the GrB_Matrix T
529 : //----------------------------------------------------------------------
530 :
531 4 : GRB_TRY (GrB_Matrix_new (&T, GrB_BOOL, n, n)) ;
532 :
533 4 : uint64_t T_nvals = TP (n) ;
534 4 : uint64_t Tp_len = n+1 ;
535 4 : uint64_t Tj_len = T_nvals ;
536 :
537 4 : T_Container->nrows = n ;
538 4 : T_Container->ncols = n ;
539 4 : T_Container->nrows_nonempty = -1 ;
540 4 : T_Container->ncols_nonempty = -1 ;
541 4 : T_Container->nvals = T_nvals ;
542 4 : T_Container->format = GxB_SPARSE ;
543 4 : T_Container->orientation = GrB_ROWMAJOR ;
544 4 : T_Container->iso = true ;
545 4 : T_Container->jumbled = A_jumbled ;
546 :
547 : // load Tp, Tj, and Tx into the T_Container
548 4 : GRB_TRY (GxB_Vector_load (T_Container->p, &Tp, Tp_type, Tp_len,
549 : Tp_size, GrB_DEFAULT, NULL)) ;
550 4 : GRB_TRY (GxB_Vector_load (T_Container->i, &Tj, Tj_type, Tj_len,
551 : Tj_size, GrB_DEFAULT, NULL)) ;
552 4 : GRB_TRY (GxB_Vector_load (T_Container->x, &Tx, GrB_BOOL, 1,
553 : Tx_size, GrB_DEFAULT, NULL)) ;
554 :
555 : // load T from the T_Container
556 4 : GRB_TRY (GxB_load_Matrix_from_Container (T, T_Container, NULL)) ;
557 :
558 : // ] todo: the above will all be done as a single call to GxB_select.
559 :
560 : #ifdef TIMINGS
561 : toc2 = LAGraph_WallClockTime ( ) ;
562 : timings [4] = toc2 - tic2 ; // create T
563 : tic2 = toc2 ;
564 : #endif
565 :
566 : //----------------------------------------------------------------------
567 : // find the connected components of T
568 : //----------------------------------------------------------------------
569 :
570 4 : LG_TRY (fastsv (T, parent2, mngp, &gp, &gp_new, t, eq, min, min_2nd,
571 : parent, ramp_v, msg)) ;
572 :
573 : #ifdef TIMINGS
574 : toc2 = LAGraph_WallClockTime ( ) ;
575 : timings [5] = toc2 - tic2 ; // fastsv, in sampling
576 : tic2 = toc2 ;
577 : #endif
578 :
579 : //----------------------------------------------------------------------
580 : // unload the parent i vector into the Px array
581 : //----------------------------------------------------------------------
582 :
583 4 : int handling = 0 ;
584 4 : GrB_Type type = NULL ;
585 4 : GRB_TRY (GxB_Vector_unload (parent, &Px, &type, &n, &Px_size,
586 : &handling, NULL)) ;
587 4 : bool Px_is_32 = (type == GrB_UINT32 || type == GrB_INT32) ;
588 4 : uint32_t *Px32 = Px_is_32 ? Px : NULL ;
589 4 : uint64_t *Px64 = Px_is_32 ? NULL : Px ;
590 :
591 : // At this point, the Px array holds the content of parent vector.
592 :
593 : //----------------------------------------------------------------------
594 : // use sampling to estimate the largest connected component in T
595 : //----------------------------------------------------------------------
596 :
597 : // The sampling below computes an estimate of the mode of the parent
598 : // vector, the contents of which are currently in the non-opaque Px
599 : // array.
600 :
601 : // hash table size must be a power of 2
602 : #define HASH_SIZE 1024
603 : // number of samples to insert into the hash table
604 : #define HASH_SAMPLES 864
605 : #define HASH(x) (((x << 4) + x) & (HASH_SIZE-1))
606 : #define NEXT(x) ((x + 23) & (HASH_SIZE-1))
607 :
608 : // allocate and initialize the hash table
609 4 : LG_TRY (LAGraph_Malloc ((void **) &ht_key, HASH_SIZE,
610 : sizeof (GrB_Index), msg)) ;
611 4 : LG_TRY (LAGraph_Calloc ((void **) &ht_count, HASH_SIZE,
612 : sizeof (int), msg)) ;
613 4100 : for (int k = 0 ; k < HASH_SIZE ; k++)
614 : {
615 4096 : ht_key [k] = UINT64_MAX ;
616 : }
617 :
618 : // hash the samples and find the most frequent entry
619 4 : uint64_t seed = n ; // random number seed
620 4 : int64_t key = -1 ; // most frequent entry
621 4 : int max_count = 0 ; // frequency of most frequent entry
622 3460 : for (int64_t k = 0 ; k < HASH_SAMPLES ; k++)
623 : {
624 : // select an entry ii from PARENT at random
625 3456 : uint64_t i = LG_Random64 (&seed) % n ;
626 3456 : GrB_Index x = PARENT (i) ;
627 : // find x in the hash table
628 3456 : GrB_Index h = HASH (x) ;
629 3598 : while (ht_key [h] != UINT64_MAX && ht_key [h] != x) h = NEXT (h) ;
630 : // add x to the hash table
631 3456 : ht_key [h] = x ;
632 3456 : ht_count [h]++ ;
633 : // keep track of the most frequent value
634 3456 : if (ht_count [h] > max_count)
635 : {
636 1870 : key = ht_key [h] ;
637 1870 : max_count = ht_count [h] ;
638 : }
639 : }
640 :
641 : #ifdef TIMINGS
642 : toc2 = LAGraph_WallClockTime ( ) ;
643 : timings [6] = toc2 - tic2 ; // hash
644 : tic2 = toc2 ;
645 : #endif
646 :
647 : //----------------------------------------------------------------------
648 : // compact the largest connected component in A
649 : //----------------------------------------------------------------------
650 :
651 : // Construct a new matrix T from the input matrix A (the matrix A is
652 : // not changed). The key node is the representative of the (estimated)
653 : // largest component. T is constructed as a copy of A, except:
654 : // (1) all edges A(i,:) for nodes i in the key component deleted, and
655 : // (2) for nodes i not in the key component, A(i,j) is deleted if
656 : // j is in the key component.
657 : // (3) If A(i,:) has any deletions from (2), T(i,key) is added to T.
658 :
659 : // [ todo: replace this with GxB_extract with GrB_Vector index arrays.
660 : // See https://github.com/GraphBLAS/graphblas-api-c/issues/67 .
661 : // This method will not insert the new entries T(i,key) for rows i that have
662 : // had entries deleted. That can be done with GrB_assign, with an n-by-1 mask
663 : // M computed from the before-and-after row degrees of A and T:
664 : // M = (parent != key) && (out_degree(T) < out_degree(A))
665 : // J [0] = key.
666 : // GxB_Matrix_subassign_BOOL (T, M, NULL, true, GrB_ALL, n, J, 1, NULL)
667 : // or with
668 : // GrB_Col_assign (T, M, NULL, t, GrB_ALL, j, NULL) with an all-true
669 : // vector t.
670 :
671 : // unload T from the T_Container; its contents are revised below
672 4 : GRB_TRY (GxB_unload_Matrix_into_Container (T, T_Container, NULL)) ;
673 :
674 : // unload Tp and Tj from the T_Container
675 : int ignore ;
676 4 : GRB_TRY (GxB_Vector_unload (T_Container->p, &Tp, &Tp_type, &Tp_len,
677 : &Tp_size, &ignore, NULL)) ;
678 4 : GRB_TRY (GxB_Vector_unload (T_Container->i, &Tj, &Tj_type, &Tj_len,
679 : &Tj_size, &ignore, NULL)) ;
680 :
681 : // these are likely to be unchanged since the last load of T
682 4 : Tp_is_32 = (Tp_type == GrB_UINT32 || Tp_type == GrB_INT32) ;
683 4 : Tj_is_32 = (Tj_type == GrB_UINT32 || Tj_type == GrB_INT32) ;
684 4 : Tp32 = Tp_is_32 ? Tp : NULL ;
685 4 : Tp64 = Tp_is_32 ? NULL : Tp ;
686 4 : Tj32 = Tj_is_32 ? Tj : NULL ;
687 4 : Tj64 = Tj_is_32 ? NULL : Tj ;
688 :
689 : #pragma omp parallel for num_threads(nthreads) schedule(static)
690 8 : for (tid = 0 ; tid < nthreads ; tid++)
691 : {
692 4 : uint64_t ktid = range [tid] ;
693 4 : GrB_Index p = AP (ktid) ;
694 : // thread tid scans A (range [tid]:range [tid+1]-1,:),
695 : // and constructs T(i,:) for all rows in this range.
696 9756 : for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
697 : {
698 9752 : int64_t pi = PARENT (i) ;
699 9752 : int64_t pstart = p ;
700 9752 : SET_TP (i, p) ; // Tp [i] = p ; start the construction of T(i,:)
701 : // T(i,:) is empty if pi == key
702 9752 : if (pi != key)
703 : {
704 : // scan A(i,:)
705 47770 : for (GrB_Index pS = AP (i) ; pS < AP (i+1) ; pS++)
706 : {
707 : // get A(i,j)
708 42470 : int64_t j = AJ (pS) ;
709 42470 : if (PARENT (j) != key)
710 : {
711 : // add the entry T(i,j) to T, but skip it if
712 : // PARENT (j) is equal to key
713 42460 : SET_TJ (p, j) // Tj [p] = j ;
714 42460 : p++ ;
715 : }
716 : }
717 : // Add the entry T(i,key) if there is room for it in T(i,:);
718 : // if and only if node i is adjacent to a node j in the
719 : // largest component. The only way there can be space if
720 : // at least one T(i,j) appears with PARENT (j) equal to the
721 : // key (that is, node j is in the largest connected
722 : // component, key == PARENT (j). One of these j's can then
723 : // be replaced with the key. If node i is not adjacent to
724 : // any node in the largest component, then there is no
725 : // space in T(i,:) and no new edge to the largest component
726 : // is added.
727 5300 : if (p - pstart < AP (i+1) - AP (i))
728 : {
729 2 : SET_TJ (p, key) ; // Tj [p] = key ;
730 2 : p++ ;
731 : }
732 : }
733 : }
734 : // count the number of entries inserted into T by this thread
735 4 : count [tid] = p - TP (ktid) ;
736 : }
737 :
738 : // Compact empty space out of Tj not filled in from the above phase.
739 4 : nvals = 0 ;
740 8 : for (tid = 0 ; tid < nthreads ; tid++)
741 : {
742 4 : int64_t ktid = range [tid] ;
743 4 : memmove (Tj32 ? ((void *) (Tj32 + nvals)) : ((void *) (Tj64 + nvals)),
744 4 : Tj32 ? ((void *) (Tj32 + TP (ktid))) : ((void *) (Tj64 + TP (ktid))),
745 4 : tjsize * count [tid]) ;
746 :
747 : #if 0
748 : if (Tj32)
749 : {
750 : memmove (Tj32 + nvals, Tj32 + TP (ktid),
751 : sizeof (uint32_t) * count [tid]) ;
752 : }
753 : else
754 : {
755 : memmove (Tj64 + nvals, Tj64 + TP (ktid),
756 : sizeof (uint64_t) * count [tid]) ;
757 : }
758 : #endif
759 :
760 4 : nvals += count [tid] ;
761 4 : count [tid] = nvals - count [tid] ;
762 : }
763 :
764 : // Compact empty space out of Tp
765 : #pragma omp parallel for num_threads(nthreads) schedule(static)
766 8 : for (tid = 0 ; tid < nthreads ; tid++)
767 : {
768 4 : int64_t ktid = range [tid] ;
769 4 : GrB_Index p = TP (ktid) ;
770 9756 : for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
771 : {
772 9752 : int64_t tp = TP (i) ;
773 9752 : tp -= p - count [tid] ;
774 9752 : SET_TP (i, tp) ; // Tp [i] = tp ;
775 : }
776 : }
777 :
778 : // finalize T
779 4 : SET_TP (n, nvals) ; // Tp [n] = nvals ;
780 4 : Tj_len = nvals ;
781 :
782 : // load T_Container->p,i from the C arrays, Tp and Tj, for final phase
783 4 : GRB_TRY (GxB_Vector_load (T_Container->p, &Tp, Tp_type, Tp_len,
784 : Tp_size, GrB_DEFAULT, NULL)) ;
785 4 : GRB_TRY (GxB_Vector_load (T_Container->i, &Tj, Tj_type, Tj_len,
786 : Tj_size, GrB_DEFAULT, NULL)) ;
787 :
788 4 : T_Container->nrows_nonempty = -1 ;
789 4 : T_Container->ncols_nonempty = -1 ;
790 4 : T_Container->jumbled = true ;
791 4 : T_Container->nvals = nvals ;
792 :
793 : // load T in sparse CSR format from the T_Container
794 4 : GRB_TRY (GxB_load_Matrix_from_Container (T, T_Container, NULL)) ;
795 :
796 : // load A_Container->p,i from the C arrays, Ap and Aj
797 : // This is the original G->A, and it is unchanged.
798 4 : GRB_TRY (GxB_Vector_load (A_Container->p, &Ap, Ap_type, Ap_len,
799 : Ap_size, Ap_handling, NULL)) ;
800 4 : GRB_TRY (GxB_Vector_load (A_Container->i, &Aj, Aj_type, Aj_len,
801 : Aj_size, Aj_handling, NULL)) ;
802 :
803 : // load A in sparse CSR format from the A_Container
804 4 : GRB_TRY (GxB_load_Matrix_from_Container (A, A_Container, NULL)) ;
805 :
806 : //----------------------------------------------------------------------
807 : // load the Px array back into the parent vector
808 : //----------------------------------------------------------------------
809 :
810 4 : GRB_TRY (GxB_Vector_load (parent, &Px, type, n, Px_size,
811 : GrB_DEFAULT, NULL)) ;
812 :
813 : // ]. The unload/load of A into Ap, Aj, Ax will not be needed, and G->A
814 : // will become truly a read-only matrix.
815 :
816 : // final phase uses the pruned matrix T
817 4 : A = T ;
818 :
819 : #ifdef TIMINGS
820 : toc2 = LAGraph_WallClockTime ( ) ;
821 : timings [7] = toc2 - tic2 ; // prune
822 : tic2 = toc2 ;
823 : #endif
824 : }
825 :
826 : #ifdef TIMINGS
827 : toc = LAGraph_WallClockTime ( ) ;
828 : timings [1] = toc - tic ; // total sampling time
829 : tic = toc ;
830 : #endif
831 :
832 : //--------------------------------------------------------------------------
833 : // check for quick return
834 : //--------------------------------------------------------------------------
835 :
836 : // The sample phase may have already found that G->A has a single component,
837 : // in which case the matrix A is now empty.
838 :
839 67 : if (nvals == 0)
840 : {
841 2 : (*component) = parent ;
842 2 : LG_FREE_WORK ;
843 : #ifdef TIMINGS
844 : print_timings (timings) ;
845 : LG_SET_BURBLE (false) ;
846 : #endif
847 2 : return (GrB_SUCCESS) ;
848 : }
849 :
850 : //--------------------------------------------------------------------------
851 : // final phase
852 : //--------------------------------------------------------------------------
853 :
854 65 : LG_TRY (fastsv (A, parent2, mngp, &gp, &gp_new, t, eq, min, min_2nd,
855 : parent, ramp_v, msg)) ;
856 :
857 : //--------------------------------------------------------------------------
858 : // free workspace and return result
859 : //--------------------------------------------------------------------------
860 :
861 23 : (*component) = parent ;
862 23 : parent = NULL ;
863 23 : LG_FREE_WORK ;
864 : #ifdef TIMINGS
865 : toc = LAGraph_WallClockTime ( ) ;
866 : timings [2] = toc - tic ; // final phase
867 : print_timings (timings) ;
868 : LG_SET_BURBLE (false) ;
869 : #endif
870 23 : return (GrB_SUCCESS) ;
871 : #else
872 : LG_ASSERT (false, GrB_NOT_IMPLEMENTED) ;
873 : #endif
874 : }
|