Line data Source code
1 : //------------------------------------------------------------------------------
2 : // LG_CC_FastSV6: connected components
3 : //------------------------------------------------------------------------------
4 :
5 : // LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved.
6 : // SPDX-License-Identifier: BSD-2-Clause
7 : //
8 : // For additional details (including references to third party source code and
9 : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
10 : // Contributors.txt for a full list of contributors. Created, in part, with
11 : // funding and support from the U.S. Government (see Acknowledgments.txt file).
12 : // DM22-0790
13 :
14 : // Contributed by Yongzhe Zhang, modified by Timothy A. Davis, Texas A&M
15 : // University
16 :
17 : //------------------------------------------------------------------------------
18 :
19 : // This is an Advanced algorithm (G->is_symmetric_structure must be known),
20 : // but it is not user-callable (see LAGr_ConnectedComponents instead).
21 :
22 : // Code is based on the algorithm described in the following paper:
23 : // Zhang, Azad, Hu. FastSV: A Distributed-Memory Connected Component
24 : // Algorithm with Fast Convergence (SIAM PP20)
25 :
26 : // A subsequent update to the algorithm is here (which might not be reflected
27 : // in this code):
28 : // Yongzhe Zhang, Ariful Azad, Aydin Buluc: Parallel algorithms for finding
29 : // connected components using linear algebra. J. Parallel Distributed Comput.
30 : // 144: 14-27 (2020).
31 :
32 : // Modified by Tim Davis, Texas A&M University: revised Reduce_assign to use
33 : // purely GrB* and GxB* methods and the matrix C. Added warmup phase. Changed
34 : // to use GxB pack/unpack instead of GxB import/export. Converted to use the
35 : // LAGraph_Graph object. Exploiting iso status for the temporary matrices
36 : // C and T.
37 :
38 : // The input graph G must be undirected, or directed and with an adjacency
39 : // matrix that has a symmetric structure. Self-edges (diagonal entries) are
40 : // OK, and are ignored. The values and type of A are ignored; just its
41 : // structure is accessed.
42 :
43 : // NOTE: This function must not be called by multiple user threads at the same
44 : // time on the same graph G, since it unpacks G->A and then packs it back when
45 : // done. G->A is unchanged when the function returns, but during execution
46 : // G->A is empty. This will be fixed once the todos are finished below, and
47 : // G->A will then become a truly read-only object (assuming GrB_wait (G->A)
48 : // has been done first).
49 :
50 : #define __STDC_WANT_LIB_EXT1__ 1
51 : #include <string.h>
52 :
53 : #define LG_FREE_ALL ;
54 : #include "LG_internal.h"
55 :
56 : #if LAGRAPH_SUITESPARSE
57 :
58 : //==============================================================================
59 : // fastsv: find the components of a graph
60 : //==============================================================================
61 :
62 36 : static inline GrB_Info fastsv
63 : (
64 : GrB_Matrix A, // adjacency matrix, G->A or a subset of G->A
65 : GrB_Vector parent, // parent vector
66 : GrB_Vector mngp, // min neighbor grandparent
67 : GrB_Vector *gp, // grandparent
68 : GrB_Vector *gp_new, // new grandparent (swapped with gp)
69 : GrB_Vector t, // workspace
70 : GrB_BinaryOp eq, // GrB_EQ_(integer type)
71 : GrB_BinaryOp min, // GrB_MIN_(integer type)
72 : GrB_Semiring min_2nd, // GrB_MIN_SECOND_(integer type)
73 : GrB_Matrix C, // C(i,j) present if i = Px (j)
74 : GrB_Index **Cp, // 0:n, size n+1
75 : GrB_Index **Px, // Px: non-opaque copy of parent vector, size n
76 : void **Cx, // size 1, contents not accessed
77 : char *msg
78 : )
79 : {
80 : GrB_Index n ;
81 36 : GRB_TRY (GrB_Vector_size (&n, parent)) ;
82 36 : GrB_Index Cp_size = (n+1) * sizeof (GrB_Index) ;
83 36 : GrB_Index Ci_size = n * sizeof (GrB_Index) ;
84 36 : GrB_Index Cx_size = sizeof (bool) ;
85 36 : bool iso = true, jumbled = false, done = false ;
86 :
87 : while (true)
88 53 : {
89 :
90 : //----------------------------------------------------------------------
91 : // hooking & shortcutting
92 : //----------------------------------------------------------------------
93 :
94 : // mngp = min (mngp, A*gp) using the MIN_SECOND semiring
95 89 : GRB_TRY (GrB_mxv (mngp, NULL, min, min_2nd, A, *gp, NULL)) ;
96 :
97 : //----------------------------------------------------------------------
98 : // parent = min (parent, C*mngp) where C(i,j) is present if i=Px(j)
99 : //----------------------------------------------------------------------
100 :
101 : // Reduce_assign: The Px array of size n is the non-opaque copy of the
102 : // parent vector, where i = Px [j] if the parent of node j is node i.
103 : // It can thus have duplicates. The vectors parent and mngp are full
104 : // (all entries present). This function computes the following, which
105 : // is done explicitly in the Reduce_assign function in LG_CC_Boruvka:
106 : //
107 : // for (j = 0 ; j < n ; j++)
108 : // {
109 : // uint64_t i = Px [j] ;
110 : // parent [i] = min (parent [i], mngp [j]) ;
111 : // }
112 : //
113 : // If C(i,j) is present where i == Px [j], then this can be written as:
114 : //
115 : // parent = min (parent, C*mngp)
116 : //
117 : // when using the min_2nd semiring. This can be done efficiently
118 : // because C can be constructed in O(1) time and O(1) additional space
119 : // (not counting the prior Cp, Px, and Cx arrays), when using the
120 : // SuiteSparse pack/unpack move constructors. The min_2nd semiring
121 : // ignores the values of C and operates only on the structure, so its
122 : // values are not relevant. Cx is thus chosen as a GrB_BOOL array of
123 : // size 1 where Cx [0] = false, so the all entries present in C are
124 : // equal to false.
125 :
126 : // pack Cp, Px, and Cx into a matrix C with C(i,j) present if Px(j) == i
127 89 : GRB_TRY (GxB_Matrix_pack_CSC (C, Cp, /* Px is Ci: */ Px, Cx,
128 : Cp_size, Ci_size, Cx_size, iso, jumbled, NULL)) ;
129 :
130 : // parent = min (parent, C*mngp) using the MIN_SECOND semiring
131 89 : GRB_TRY (GrB_mxv (parent, NULL, min, min_2nd, C, mngp, NULL)) ;
132 :
133 : // unpack the contents of C, to make Px available to this method again.
134 89 : GRB_TRY (GxB_Matrix_unpack_CSC (C, Cp, Px, Cx,
135 : &Cp_size, &Ci_size, &Cx_size, &iso, &jumbled, NULL)) ;
136 :
137 : //----------------------------------------------------------------------
138 : // parent = min (parent, mngp, gp)
139 : //----------------------------------------------------------------------
140 :
141 85 : GRB_TRY (GrB_eWiseAdd (parent, NULL, min, min, mngp, *gp, NULL)) ;
142 :
143 : //----------------------------------------------------------------------
144 : // calculate grandparent: gp_new = parent (parent), and extract Px
145 : //----------------------------------------------------------------------
146 :
147 : // if parent is uint32, GraphBLAS typecasts to uint64 for Px.
148 85 : GRB_TRY (GrB_Vector_extractTuples (NULL, *Px, &n, parent)) ;
149 85 : GRB_TRY (GrB_extract (*gp_new, NULL, NULL, parent, *Px, n, NULL)) ;
150 :
151 : //----------------------------------------------------------------------
152 : // terminate if gp and gp_new are the same
153 : //----------------------------------------------------------------------
154 :
155 81 : GRB_TRY (GrB_eWiseMult (t, NULL, NULL, eq, *gp_new, *gp, NULL)) ;
156 80 : GRB_TRY (GrB_reduce (&done, NULL, GrB_LAND_MONOID_BOOL, t, NULL)) ;
157 80 : if (done) break ;
158 :
159 : // swap gp and gp_new
160 53 : GrB_Vector s = (*gp) ; (*gp) = (*gp_new) ; (*gp_new) = s ;
161 : }
162 27 : return (GrB_SUCCESS) ;
163 : }
164 :
165 : //==============================================================================
166 : // LG_CC_FastSV6
167 : //==============================================================================
168 :
169 : // The output of LG_CC_FastSV* is a vector component, where component(i)=r if
170 : // node i is in the connected compononent whose representative is node r. If r
171 : // is a representative, then component(r)=r. The number of connected
172 : // components in the graph G is the number of representatives.
173 :
174 : #undef LG_FREE_WORK
175 : #define LG_FREE_WORK \
176 : { \
177 : LAGraph_Free ((void **) &Tp, NULL) ; \
178 : LAGraph_Free ((void **) &Tj, NULL) ; \
179 : LAGraph_Free ((void **) &Tx, NULL) ; \
180 : LAGraph_Free ((void **) &Cp, NULL) ; \
181 : LAGraph_Free ((void **) &Px, NULL) ; \
182 : LAGraph_Free ((void **) &Cx, NULL) ; \
183 : LAGraph_Free ((void **) &ht_key, NULL) ; \
184 : LAGraph_Free ((void **) &ht_count, NULL) ; \
185 : LAGraph_Free ((void **) &count, NULL) ; \
186 : LAGraph_Free ((void **) &range, NULL) ; \
187 : GrB_free (&C) ; \
188 : GrB_free (&T) ; \
189 : GrB_free (&t) ; \
190 : GrB_free (&y) ; \
191 : GrB_free (&gp) ; \
192 : GrB_free (&mngp) ; \
193 : GrB_free (&gp_new) ; \
194 : }
195 :
196 : #undef LG_FREE_ALL
197 : #define LG_FREE_ALL \
198 : { \
199 : LG_FREE_WORK ; \
200 : GrB_free (&parent) ; \
201 : }
202 :
203 : #endif
204 :
205 64 : int LG_CC_FastSV6 // SuiteSparse:GraphBLAS method, with GxB extensions
206 : (
207 : // output:
208 : GrB_Vector *component, // component(i)=r if node is in the component r
209 : // input:
210 : LAGraph_Graph G, // input graph (modified then restored)
211 : char *msg
212 : )
213 : {
214 :
215 : #if !LAGRAPH_SUITESPARSE
216 : LG_ASSERT (false, GrB_NOT_IMPLEMENTED) ;
217 : #else
218 :
219 : //--------------------------------------------------------------------------
220 : // check inputs
221 : //--------------------------------------------------------------------------
222 :
223 64 : LG_CLEAR_MSG ;
224 :
225 64 : int64_t *range = NULL ;
226 64 : GrB_Index n, nvals, Cp_size = 0, *ht_key = NULL, *Px = NULL, *Cp = NULL,
227 64 : *count = NULL, *Tp = NULL, *Tj = NULL ;
228 64 : GrB_Vector parent = NULL, gp_new = NULL, mngp = NULL, gp = NULL, t = NULL,
229 64 : y = NULL ;
230 64 : GrB_Matrix T = NULL, C = NULL ;
231 64 : void *Tx = NULL, *Cx = NULL ;
232 64 : int *ht_count = NULL ;
233 :
234 64 : LG_TRY (LAGraph_CheckGraph (G, msg)) ;
235 63 : LG_ASSERT (component != NULL, GrB_NULL_POINTER) ;
236 63 : (*component) = NULL ;
237 :
238 63 : LG_ASSERT_MSG ((G->kind == LAGraph_ADJACENCY_UNDIRECTED ||
239 : (G->kind == LAGraph_ADJACENCY_DIRECTED &&
240 : G->is_symmetric_structure == LAGraph_TRUE)),
241 : LAGRAPH_SYMMETRIC_STRUCTURE_REQUIRED,
242 : "G->A must be known to be symmetric") ;
243 :
244 : //--------------------------------------------------------------------------
245 : // initializations
246 : //--------------------------------------------------------------------------
247 :
248 62 : GrB_Matrix A = G->A ;
249 62 : GRB_TRY (GrB_Matrix_nrows (&n, A)) ;
250 62 : GRB_TRY (GrB_Matrix_nvals (&nvals, A)) ;
251 :
252 : // determine the integer type, operators, and semirings to use
253 : GrB_Type Uint, Int ;
254 : GrB_IndexUnaryOp ramp ;
255 : GrB_Semiring min_2nd, min_2ndi ;
256 : GrB_BinaryOp min, eq, imin ;
257 : #ifdef COVERAGE
258 : // Just for test coverage, use 64-bit ints for n > 100. Do not use this
259 : // rule in production!
260 : #define NBIG 100
261 : #else
262 : // For production use: 64-bit integers if n > 2^31
263 : #define NBIG INT32_MAX
264 : #endif
265 62 : if (n > NBIG)
266 : {
267 : // use 64-bit integers throughout
268 8 : Uint = GrB_UINT64 ;
269 8 : Int = GrB_INT64 ;
270 8 : ramp = GrB_ROWINDEX_INT64 ;
271 8 : min = GrB_MIN_UINT64 ;
272 8 : imin = GrB_MIN_INT64 ;
273 8 : eq = GrB_EQ_UINT64 ;
274 8 : min_2nd = GrB_MIN_SECOND_SEMIRING_UINT64 ;
275 8 : min_2ndi = GxB_MIN_SECONDI_INT64 ;
276 : }
277 : else
278 : {
279 : // use 32-bit integers, except for Px and for constructing the matrix C
280 54 : Uint = GrB_UINT32 ;
281 54 : Int = GrB_INT32 ;
282 54 : ramp = GrB_ROWINDEX_INT32 ;
283 54 : min = GrB_MIN_UINT32 ;
284 54 : imin = GrB_MIN_INT32 ;
285 54 : eq = GrB_EQ_UINT32 ;
286 54 : min_2nd = GrB_MIN_SECOND_SEMIRING_UINT32 ;
287 54 : min_2ndi = GxB_MIN_SECONDI_INT32 ;
288 : }
289 :
290 : // FASTSV_SAMPLES: number of samples to take from each row A(i,:).
291 : // Sampling is used if the average degree is > 8 and if n > 1024.
292 : #define FASTSV_SAMPLES 4
293 62 : bool sampling = (nvals > n * FASTSV_SAMPLES * 2 && n > 1024) ;
294 :
295 : // [ todo: nthreads will not be needed once GxB_select with a GxB_RankUnaryOp
296 : // and a new GxB_extract are added to SuiteSparse:GraphBLAS.
297 : // determine # of threads to use
298 : int nthreads, nthreads_outer, nthreads_inner ;
299 62 : LG_TRY (LAGraph_GetNumThreads (&nthreads_outer, &nthreads_inner, msg)) ;
300 62 : nthreads = nthreads_outer * nthreads_inner ;
301 62 : nthreads = LAGRAPH_MIN (nthreads, n / 16) ;
302 62 : nthreads = LAGRAPH_MAX (nthreads, 1) ;
303 : // ]
304 :
305 62 : LG_TRY (LAGraph_Calloc ((void **) &Cx, 1, sizeof (bool), msg)) ;
306 61 : LG_TRY (LAGraph_Malloc ((void **) &Px, n, sizeof (GrB_Index), msg)) ;
307 :
308 : // create Cp = 0:n (always 64-bit) and the empty C matrix
309 60 : GRB_TRY (GrB_Matrix_new (&C, GrB_BOOL, n, n)) ;
310 57 : GRB_TRY (GrB_Vector_new (&t, GrB_INT64, n+1)) ;
311 55 : GRB_TRY (GrB_assign (t, NULL, NULL, 0, GrB_ALL, n+1, NULL)) ;
312 54 : GRB_TRY (GrB_apply (t, NULL, NULL, GrB_ROWINDEX_INT64, t, 0, NULL)) ;
313 53 : GRB_TRY (GxB_Vector_unpack_Full (t, (void **) &Cp, &Cp_size, NULL, NULL)) ;
314 52 : GRB_TRY (GrB_free (&t)) ;
315 :
316 : //--------------------------------------------------------------------------
317 : // warmup: parent = min (0:n-1, A*1) using the MIN_SECONDI semiring
318 : //--------------------------------------------------------------------------
319 :
320 : // y (i) = min (i, j) for all entries A(i,j). This warmup phase takes only
321 : // O(n) time, because of how the MIN_SECONDI semiring is implemented in
322 : // SuiteSparse:GraphBLAS. A is held by row, and the first entry in A(i,:)
323 : // is the minimum index j, so only the first entry in A(i,:) needs to be
324 : // considered for each row i.
325 :
326 52 : GRB_TRY (GrB_Vector_new (&t, Int, n)) ;
327 50 : GRB_TRY (GrB_Vector_new (&y, Int, n)) ;
328 48 : GRB_TRY (GrB_assign (t, NULL, NULL, 0, GrB_ALL, n, NULL)) ;
329 47 : GRB_TRY (GrB_assign (y, NULL, NULL, 0, GrB_ALL, n, NULL)) ;
330 46 : GRB_TRY (GrB_apply (y, NULL, NULL, ramp, y, 0, NULL)) ;
331 45 : GRB_TRY (GrB_mxv (y, NULL, imin, min_2ndi, A, t, NULL)) ;
332 45 : GRB_TRY (GrB_free (&t)) ;
333 :
334 : // The typecast from Int to Uint is required because the ROWINDEX operator
335 : // and MIN_SECONDI do not work in the UINT* domains, as built-in operators.
336 : // parent = (Uint) y
337 45 : GRB_TRY (GrB_Vector_new (&parent, Uint, n)) ;
338 43 : GRB_TRY (GrB_assign (parent, NULL, NULL, y, GrB_ALL, n, NULL)) ;
339 42 : GRB_TRY (GrB_free (&y)) ;
340 :
341 : // copy parent into gp, mngp, and Px. Px is a non-opaque 64-bit copy of the
342 : // parent GrB_Vector. The Px array is always of type GrB_Index since it
343 : // must be used as the input array for extractTuples and as Ci for pack_CSR.
344 : // If parent is uint32, GraphBLAS typecasts it to the uint64 Px array.
345 42 : GRB_TRY (GrB_Vector_extractTuples (NULL, Px, &n, parent)) ;
346 42 : GRB_TRY (GrB_Vector_dup (&gp, parent)) ;
347 40 : GRB_TRY (GrB_Vector_dup (&mngp, parent)) ;
348 38 : GRB_TRY (GrB_Vector_new (&gp_new, Uint, n)) ;
349 36 : GRB_TRY (GrB_Vector_new (&t, GrB_BOOL, n)) ;
350 :
351 : //--------------------------------------------------------------------------
352 : // sample phase
353 : //--------------------------------------------------------------------------
354 :
355 34 : if (sampling)
356 : {
357 :
358 : // [ todo: GxB_select, using a new operator: GxB_RankUnaryOp, will do all this,
359 : // with GxB_Matrix_select_RankOp_Scalar with operator GxB_LEASTRANK and a
360 : // GrB_Scalar input equal to FASTSV_SAMPLES. Built-in operators will be,
361 : // (where y is INT64):
362 : //
363 : // GxB_LEASTRANK (aij, i, j, k, d, y): select if aij has rank k <= y
364 : // GxB_NLEASTRANK: select if aij has rank k > y
365 : // GxB_GREATESTRANK (...) select if aij has rank k >= (d-y) where
366 : // d = # of entries in A(i,:).
367 : // GxB_NGREATESTRANK (...): select if aij has rank k < (d-y)
368 : // and perhaps other operators such as:
369 : // GxB_LEASTRELRANK (...): select aij if rank k <= y*d where y is double
370 : // GxB_GREATESTRELRANK (...): select aij rank k > y*d where y is double
371 : //
372 : // By default, the rank of aij is its relative position as the kth entry in its
373 : // row (from "left" to "right"). If a new GxB setting in the descriptor is
374 : // set, then k is the relative position of aij as the kth entry in its column.
375 : // The default would be that the rank is the position of aij in its row A(i,:).
376 :
377 : // Other:
378 : // give me 3 random items from the row (y = 3)
379 : // give me the 4 biggest *values* in each row (y = 4)
380 :
381 : // mxv:
382 : // C = A*diag(D)
383 :
384 : //----------------------------------------------------------------------
385 : // unpack A in CSR format
386 : //----------------------------------------------------------------------
387 :
388 : void *Ax ;
389 : GrB_Index *Ap, *Aj, Ap_size, Aj_size, Ax_size ;
390 : bool A_jumbled, A_iso ;
391 4 : GRB_TRY (GxB_Matrix_unpack_CSR (A, &Ap, &Aj, &Ax,
392 : &Ap_size, &Aj_size, &Ax_size, &A_iso, &A_jumbled, NULL)) ;
393 :
394 : //----------------------------------------------------------------------
395 : // allocate workspace, including space to construct T
396 : //----------------------------------------------------------------------
397 :
398 4 : GrB_Index Tp_size = (n+1) * sizeof (GrB_Index) ;
399 4 : GrB_Index Tj_size = nvals * sizeof (GrB_Index) ;
400 4 : GrB_Index Tx_size = sizeof (bool) ;
401 4 : LG_TRY (LAGraph_Malloc ((void **) &Tp, n+1, sizeof (GrB_Index), msg)) ;
402 4 : LG_TRY (LAGraph_Malloc ((void **) &Tj, nvals, sizeof (GrB_Index),
403 : msg)) ;
404 4 : LG_TRY (LAGraph_Calloc ((void **) &Tx, 1, sizeof (bool), msg)) ;
405 4 : LG_TRY (LAGraph_Malloc ((void **) &range, nthreads + 1,
406 : sizeof (int64_t), msg)) ;
407 4 : LG_TRY (LAGraph_Calloc ((void **) &count, nthreads + 1,
408 : sizeof (GrB_Index), msg)) ;
409 :
410 : //----------------------------------------------------------------------
411 : // define parallel tasks to construct T
412 : //----------------------------------------------------------------------
413 :
414 : // thread tid works on rows range[tid]:range[tid+1]-1 of A and T
415 : int tid;
416 12 : for (tid = 0 ; tid <= nthreads ; tid++)
417 : {
418 8 : range [tid] = (n * tid + nthreads - 1) / nthreads ;
419 : }
420 :
421 : //----------------------------------------------------------------------
422 : // determine the number entries to be constructed in T for each thread
423 : //----------------------------------------------------------------------
424 :
425 : #pragma omp parallel for num_threads(nthreads) schedule(static)
426 8 : for (tid = 0 ; tid < nthreads ; tid++)
427 : {
428 9756 : for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
429 : {
430 9752 : int64_t deg = Ap [i + 1] - Ap [i] ;
431 9752 : count [tid + 1] += LAGRAPH_MIN (FASTSV_SAMPLES, deg) ;
432 : }
433 : }
434 :
435 : //----------------------------------------------------------------------
436 : // count = cumsum (count)
437 : //----------------------------------------------------------------------
438 :
439 8 : for (tid = 0 ; tid < nthreads ; tid++)
440 : {
441 4 : count [tid + 1] += count [tid] ;
442 : }
443 :
444 : //----------------------------------------------------------------------
445 : // construct T
446 : //----------------------------------------------------------------------
447 :
448 : // T (i,:) consists of the first FASTSV_SAMPLES of A (i,:).
449 :
450 : #pragma omp parallel for num_threads(nthreads) schedule(static)
451 8 : for (tid = 0 ; tid < nthreads ; tid++)
452 : {
453 4 : GrB_Index p = count [tid] ;
454 4 : Tp [range [tid]] = p ;
455 9756 : for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
456 : {
457 : // construct T (i,:) from the first entries in A (i,:)
458 9752 : for (int64_t j = 0 ;
459 40390 : j < FASTSV_SAMPLES && Ap [i] + j < Ap [i + 1] ; j++)
460 : {
461 30638 : Tj [p++] = Aj [Ap [i] + j] ;
462 : }
463 9752 : Tp [i + 1] = p ;
464 : }
465 : }
466 :
467 : //----------------------------------------------------------------------
468 : // import the result into the GrB_Matrix T
469 : //----------------------------------------------------------------------
470 :
471 4 : GRB_TRY (GrB_Matrix_new (&T, GrB_BOOL, n, n)) ;
472 4 : GRB_TRY (GxB_Matrix_pack_CSR (T, &Tp, &Tj, &Tx, Tp_size, Tj_size,
473 : Tx_size, /* T is iso: */ true, A_jumbled, NULL)) ;
474 :
475 : // ] todo: the above will all be done as a single call to GxB_select.
476 :
477 : //----------------------------------------------------------------------
478 : // find the connected components of T
479 : //----------------------------------------------------------------------
480 :
481 4 : LG_TRY (fastsv (T, parent, mngp, &gp, &gp_new, t, eq, min, min_2nd,
482 : C, &Cp, &Px, &Cx, msg)) ;
483 :
484 : //----------------------------------------------------------------------
485 : // use sampling to estimate the largest connected component in T
486 : //----------------------------------------------------------------------
487 :
488 : // The sampling below computes an estimate of the mode of the parent
489 : // vector, the contents of which are currently in the non-opaque Px
490 : // array.
491 :
492 : // hash table size must be a power of 2
493 : #define HASH_SIZE 1024
494 : // number of samples to insert into the hash table
495 : #define HASH_SAMPLES 864
496 : #define HASH(x) (((x << 4) + x) & (HASH_SIZE-1))
497 : #define NEXT(x) ((x + 23) & (HASH_SIZE-1))
498 :
499 : // allocate and initialize the hash table
500 4 : LG_TRY (LAGraph_Malloc ((void **) &ht_key, HASH_SIZE,
501 : sizeof (GrB_Index), msg)) ;
502 4 : LG_TRY (LAGraph_Calloc ((void **) &ht_count, HASH_SIZE,
503 : sizeof (int), msg)) ;
504 4100 : for (int k = 0 ; k < HASH_SIZE ; k++)
505 : {
506 4096 : ht_key [k] = UINT64_MAX ;
507 : }
508 :
509 : // hash the samples and find the most frequent entry
510 4 : uint64_t seed = n ; // random number seed
511 4 : int64_t key = -1 ; // most frequent entry
512 4 : int max_count = 0 ; // frequency of most frequent entry
513 3460 : for (int64_t k = 0 ; k < HASH_SAMPLES ; k++)
514 : {
515 : // select an entry from Px at random
516 3456 : GrB_Index x = Px [LG_Random60 (&seed) % n] ;
517 : // find x in the hash table
518 3456 : GrB_Index h = HASH (x) ;
519 3650 : while (ht_key [h] != UINT64_MAX && ht_key [h] != x) h = NEXT (h) ;
520 : // add x to the hash table
521 3456 : ht_key [h] = x ;
522 3456 : ht_count [h]++ ;
523 : // keep track of the most frequent value
524 3456 : if (ht_count [h] > max_count)
525 : {
526 1864 : key = ht_key [h] ;
527 1864 : max_count = ht_count [h] ;
528 : }
529 : }
530 :
531 : //----------------------------------------------------------------------
532 : // compact the largest connected component in A
533 : //----------------------------------------------------------------------
534 :
535 : // Construct a new matrix T from the input matrix A (the matrix A is
536 : // not changed). The key node is the representative of the (estimated)
537 : // largest component. T is constructed as a copy of A, except:
538 : // (1) all edges A(i,:) for nodes i in the key component deleted, and
539 : // (2) for nodes i not in the key component, A(i,j) is deleted if
540 : // j is in the key component.
541 : // (3) If A(i,:) has any deletions from (2), T(i,key) is added to T.
542 :
543 : // [ todo: replace this with GxB_extract with GrB_Vector index arrays.
544 : // See https://github.com/GraphBLAS/graphblas-api-c/issues/67 .
545 : // This method will not insert the new entries T(i,key) for rows i that have
546 : // had entries deleted. That can be done with GrB_assign, with an n-by-1 mask
547 : // M computed from the before-and-after row degrees of A and T:
548 : // M = (parent != key) && (out_degree(T) < out_degree(A))
549 : // J [0] = key.
550 : // GxB_Matrix_subassign_BOOL (T, M, NULL, true, GrB_ALL, n, J, 1, NULL)
551 : // or with
552 : // GrB_Col_assign (T, M, NULL, t, GrB_ALL, j, NULL) with an all-true
553 : // vector t.
554 :
555 : // unpack T to reuse the space (all content is overwritten below)
556 : bool T_jumbled, T_iso ;
557 4 : GRB_TRY (GxB_Matrix_unpack_CSR (T, &Tp, &Tj, &Tx, &Tp_size, &Tj_size,
558 : &Tx_size, &T_iso, &T_jumbled, NULL)) ;
559 : #pragma omp parallel for num_threads(nthreads) schedule(static)
560 8 : for (tid = 0 ; tid < nthreads ; tid++)
561 : {
562 4 : GrB_Index p = Ap [range [tid]] ;
563 : // thread tid scans A (range [tid]:range [tid+1]-1,:),
564 : // and constructs T(i,:) for all rows in this range.
565 9756 : for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
566 : {
567 9752 : int64_t pi = Px [i] ; // pi = parent (i)
568 9752 : Tp [i] = p ; // start the construction of T(i,:)
569 : // T(i,:) is empty if pi == key
570 9752 : if (pi != key)
571 : {
572 : // scan A(i,:)
573 46550 : for (GrB_Index pS = Ap [i] ; pS < Ap [i+1] ; pS++)
574 : {
575 : // get A(i,j)
576 41274 : int64_t j = Aj [pS] ;
577 41274 : if (Px [j] != key)
578 : {
579 : // add the entry T(i,j) to T, but skip it if
580 : // Px [j] is equal to key
581 41264 : Tj [p++] = j ;
582 : }
583 : }
584 : // Add the entry T(i,key) if there is room for it in T(i,:);
585 : // if and only if node i is adjacent to a node j in the
586 : // largest component. The only way there can be space if
587 : // at least one T(i,j) appears with Px [j] equal to the key
588 : // (that is, node j is in the largest connected component,
589 : // key == Px [j]. One of these j's can then be replaced
590 : // with the key. If node i is not adjacent to any node in
591 : // the largest component, then there is no space in T(i,:)
592 : // and no new edge to the largest component is added.
593 5276 : if (p - Tp [i] < Ap [i+1] - Ap [i])
594 : {
595 2 : Tj [p++] = key ;
596 : }
597 : }
598 : }
599 : // count the number of entries inserted into T by this thread
600 4 : count [tid] = p - Tp [range [tid]] ;
601 : }
602 :
603 : // Compact empty space out of Tj not filled in from the above phase.
604 4 : nvals = 0 ;
605 8 : for (tid = 0 ; tid < nthreads ; tid++)
606 : {
607 4 : memmove (Tj + nvals,
608 4 : Tj + Tp [range [tid]], sizeof (GrB_Index) * count [tid]) ;
609 4 : nvals += count [tid] ;
610 4 : count [tid] = nvals - count [tid] ;
611 : }
612 :
613 : // Compact empty space out of Tp
614 : #pragma omp parallel for num_threads(nthreads) schedule(static)
615 8 : for (tid = 0 ; tid < nthreads ; tid++)
616 : {
617 4 : GrB_Index p = Tp [range [tid]] ;
618 9756 : for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
619 : {
620 9752 : Tp [i] -= p - count [tid] ;
621 : }
622 : }
623 :
624 : // finalize T
625 4 : Tp [n] = nvals ;
626 :
627 : // pack T for the final phase
628 4 : GRB_TRY (GxB_Matrix_pack_CSR (T, &Tp, &Tj, &Tx, Tp_size, Tj_size,
629 : Tx_size, T_iso, /* T is now jumbled */ true, NULL)) ;
630 :
631 : // pack A (unchanged since last unpack); this is the original G->A.
632 4 : GRB_TRY (GxB_Matrix_pack_CSR (A, &Ap, &Aj, &Ax, Ap_size, Aj_size,
633 : Ax_size, A_iso, A_jumbled, NULL)) ;
634 :
635 : // ]. The unpack/pack of A into Ap, Aj, Ax will not be needed, and G->A
636 : // will become truly a read-only matrix.
637 :
638 : // final phase uses the pruned matrix T
639 4 : A = T ;
640 : }
641 :
642 : //--------------------------------------------------------------------------
643 : // check for quick return
644 : //--------------------------------------------------------------------------
645 :
646 : // The sample phase may have already found that G->A has a single component,
647 : // in which case the matrix A is now empty.
648 :
649 34 : if (nvals == 0)
650 : {
651 2 : (*component) = parent ;
652 2 : LG_FREE_WORK ;
653 2 : return (GrB_SUCCESS) ;
654 : }
655 :
656 : //--------------------------------------------------------------------------
657 : // final phase
658 : //--------------------------------------------------------------------------
659 :
660 32 : LG_TRY (fastsv (A, parent, mngp, &gp, &gp_new, t, eq, min, min_2nd,
661 : C, &Cp, &Px, &Cx, msg)) ;
662 :
663 : //--------------------------------------------------------------------------
664 : // free workspace and return result
665 : //--------------------------------------------------------------------------
666 :
667 23 : (*component) = parent ;
668 23 : LG_FREE_WORK ;
669 23 : return (GrB_SUCCESS) ;
670 : #endif
671 : }
|