Line data Source code
1 : //------------------------------------------------------------------------------
2 : // LG_CC_FastSV6: connected components
3 : //------------------------------------------------------------------------------
4 :
5 : // LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved.
6 : // SPDX-License-Identifier: BSD-2-Clause
7 : //
8 : // For additional details (including references to third party source code and
9 : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
10 : // Contributors.txt for a full list of contributors. Created, in part, with
11 : // funding and support from the U.S. Government (see Acknowledgments.txt file).
12 : // DM22-0790
13 :
14 : // Contributed by Yongzhe Zhang, modified by Timothy A. Davis, Texas A&M
15 : // University
16 :
17 : //------------------------------------------------------------------------------
18 :
19 : // This is an Advanced algorithm (G->is_symmetric_structure must be known),
20 : // but it is not user-callable (see LAGr_ConnectedComponents instead).
21 :
22 : // Code is based on the algorithm described in the following paper:
23 : // Zhang, Azad, Hu. FastSV: A Distributed-Memory Connected Component
24 : // Algorithm with Fast Convergence (SIAM PP20)
25 :
26 : // A subsequent update to the algorithm is here (which might not be reflected
27 : // in this code):
28 : // Yongzhe Zhang, Ariful Azad, Aydin Buluc: Parallel algorithms for finding
29 : // connected components using linear algebra. J. Parallel Distributed Comput.
30 : // 144: 14-27 (2020).
31 :
32 : // Modified by Tim Davis, Texas A&M University: revised Reduce_assign to use
33 : // purely GrB* and GxB* methods and the matrix C. Added warmup phase. Changed
34 : // to use GxB pack/unpack instead of GxB import/export. Converted to use the
35 : // LAGraph_Graph object. Exploiting iso status for the temporary matrices
36 : // C and T.
37 :
38 : // The input graph G must be undirected, or directed and with an adjacency
39 : // matrix that has a symmetric structure. Self-edges (diagonal entries) are
40 : // OK, and are ignored. The values and type of A are ignored; just its
41 : // structure is accessed.
42 :
43 : // NOTE: This function must not be called by multiple user threads at the same
44 : // time on the same graph G, since it unpacks G->A and then packs it back when
45 : // done. G->A is unchanged when the function returns, but during execution
46 : // G->A is empty. This will be fixed once the todos are finished below, and
47 : // G->A will then become a truly read-only object (assuming GrB_wait (G->A)
48 : // has been done first).
49 :
50 : // #define TIMINGS
51 :
52 : #define __STDC_WANT_LIB_EXT1__ 1
53 : #include <string.h>
54 :
55 : #define LG_FREE_ALL ;
56 : #include "LG_internal.h"
57 :
58 : #if LAGRAPH_SUITESPARSE
59 :
60 : //==============================================================================
61 : // fastsv: find the components of a graph
62 : //==============================================================================
63 :
64 26 : static inline GrB_Info fastsv
65 : (
66 : GrB_Matrix A, // adjacency matrix, G->A or a subset of G->A
67 : GrB_Vector parent, // parent vector
68 : GrB_Vector mngp, // min neighbor grandparent
69 : GrB_Vector *gp, // grandparent
70 : GrB_Vector *gp_new, // new grandparent (swapped with gp)
71 : GrB_Vector t, // workspace
72 : GrB_BinaryOp eq, // GrB_EQ_(integer type)
73 : GrB_BinaryOp min, // GrB_MIN_(integer type)
74 : GrB_Semiring min_2nd, // GrB_MIN_SECOND_(integer type)
75 : GrB_Matrix C, // C(i,j) present if i = Px (j)
76 : GrB_Index **Cp, // 0:n, size n+1
77 : GrB_Index **Px, // Px: non-opaque copy of parent vector, size n
78 : void **Cx, // size 1, contents not accessed
79 : char *msg
80 : )
81 : {
82 : GrB_Index n ;
83 26 : GRB_TRY (GrB_Vector_size (&n, parent)) ;
84 26 : GrB_Index Cp_size = (n+1) * sizeof (GrB_Index) ;
85 26 : GrB_Index Ci_size = n * sizeof (GrB_Index) ;
86 26 : GrB_Index Cx_size = sizeof (bool) ;
87 26 : bool iso = true, jumbled = false, done = false ;
88 :
89 : // #ifdef TIMINGS
90 : // int pass = 0 ;
91 : // #endif
92 :
93 : while (true)
94 48 : {
95 : // #ifdef TIMINGS
96 : // printf ("\n-------------------------------------------fastsv: %d\n",
97 : // ++pass) ;
98 : // #endif
99 :
100 : //----------------------------------------------------------------------
101 : // hooking & shortcutting
102 : //----------------------------------------------------------------------
103 :
104 : // mngp = min (mngp, A*gp) using the MIN_SECOND semiring
105 74 : GRB_TRY (GrB_mxv (mngp, NULL, min, min_2nd, A, *gp, NULL)) ;
106 :
107 : //----------------------------------------------------------------------
108 : // parent = min (parent, C*mngp) where C(i,j) is present if i=Px(j)
109 : //----------------------------------------------------------------------
110 :
111 : // Reduce_assign: The Px array of size n is the non-opaque copy of the
112 : // parent vector, where i = Px [j] if the parent of node j is node i.
113 : // It can thus have duplicates. The vectors parent and mngp are full
114 : // (all entries present). This function computes the following, which
115 : // is done explicitly in the Reduce_assign function in LG_CC_Boruvka:
116 : //
117 : // for (j = 0 ; j < n ; j++)
118 : // {
119 : // uint64_t i = Px [j] ;
120 : // parent [i] = min (parent [i], mngp [j]) ;
121 : // }
122 : //
123 : // If C(i,j) is present where i == Px [j], then this can be written as:
124 : //
125 : // parent = min (parent, C*mngp)
126 : //
127 : // when using the min_2nd semiring. This can be done efficiently
128 : // because C can be constructed in O(1) time and O(1) additional space
129 : // (not counting the prior Cp, Px, and Cx arrays), when using the
130 : // SuiteSparse pack/unpack move constructors. The min_2nd semiring
131 : // ignores the values of C and operates only on the structure, so its
132 : // values are not relevant. Cx is thus chosen as a GrB_BOOL array of
133 : // size 1 where Cx [0] = false, so the all entries present in C are
134 : // equal to false.
135 :
136 : // pack Cp, Px, and Cx into a matrix C with C(i,j) present if Px(j) == i
137 74 : GRB_TRY (GxB_Matrix_pack_CSC (C, Cp, /* Px is Ci: */ Px, Cx,
138 : Cp_size, Ci_size, Cx_size, iso, jumbled, NULL)) ;
139 :
140 : // parent = min (parent, C*mngp) using the MIN_SECOND semiring
141 74 : GRB_TRY (GrB_mxv (parent, NULL, min, min_2nd, C, mngp, NULL)) ;
142 :
143 : // unpack the contents of C, to make Px available to this method again.
144 74 : GRB_TRY (GxB_Matrix_unpack_CSC (C, Cp, Px, Cx,
145 : &Cp_size, &Ci_size, &Cx_size, &iso, &jumbled, NULL)) ;
146 :
147 : //----------------------------------------------------------------------
148 : // parent = min (parent, mngp, gp)
149 : //----------------------------------------------------------------------
150 :
151 74 : GRB_TRY (GrB_eWiseAdd (parent, NULL, min, min, mngp, *gp, NULL)) ;
152 :
153 : //----------------------------------------------------------------------
154 : // calculate grandparent: gp_new = parent (parent), and extract Px
155 : //----------------------------------------------------------------------
156 :
157 : // if parent is uint32, GraphBLAS typecasts to uint64 for Px.
158 74 : GRB_TRY (GrB_Vector_extractTuples (NULL, *Px, &n, parent)) ;
159 74 : GRB_TRY (GrB_extract (*gp_new, NULL, NULL, parent, *Px, n, NULL)) ;
160 :
161 : //----------------------------------------------------------------------
162 : // terminate if gp and gp_new are the same
163 : //----------------------------------------------------------------------
164 :
165 74 : GRB_TRY (GrB_eWiseMult (t, NULL, NULL, eq, *gp_new, *gp, NULL)) ;
166 74 : GRB_TRY (GrB_reduce (&done, NULL, GrB_LAND_MONOID_BOOL, t, NULL)) ;
167 74 : if (done) break ;
168 :
169 : // swap gp and gp_new
170 48 : GrB_Vector s = (*gp) ; (*gp) = (*gp_new) ; (*gp_new) = s ;
171 : }
172 26 : return (GrB_SUCCESS) ;
173 : }
174 :
175 : //==============================================================================
176 : // LG_CC_FastSV6
177 : //==============================================================================
178 :
179 : // The output of LG_CC_FastSV* is a vector component, where component(i)=r if
180 : // node i is in the connected compononent whose representative is node r. If r
181 : // is a representative, then component(r)=r. The number of connected
182 : // components in the graph G is the number of representatives.
183 :
184 : #undef LG_FREE_WORK
185 : #define LG_FREE_WORK \
186 : { \
187 : LAGraph_Free ((void **) &Tp, NULL) ; \
188 : LAGraph_Free ((void **) &Tj, NULL) ; \
189 : LAGraph_Free ((void **) &Tx, NULL) ; \
190 : LAGraph_Free ((void **) &Cp, NULL) ; \
191 : LAGraph_Free ((void **) &Px, NULL) ; \
192 : LAGraph_Free ((void **) &Cx, NULL) ; \
193 : LAGraph_Free ((void **) &ht_key, NULL) ; \
194 : LAGraph_Free ((void **) &ht_count, NULL) ; \
195 : LAGraph_Free ((void **) &count, NULL) ; \
196 : LAGraph_Free ((void **) &range, NULL) ; \
197 : GrB_free (&C) ; \
198 : GrB_free (&T) ; \
199 : GrB_free (&t) ; \
200 : GrB_free (&y) ; \
201 : GrB_free (&gp) ; \
202 : GrB_free (&mngp) ; \
203 : GrB_free (&gp_new) ; \
204 : }
205 :
206 : #undef LG_FREE_ALL
207 : #define LG_FREE_ALL \
208 : { \
209 : LG_FREE_WORK ; \
210 : GrB_free (&parent) ; \
211 : }
212 :
213 : #endif
214 :
215 : #ifdef TIMINGS
216 : static void print_timings (double timings [16])
217 : {
218 : double total = timings [0] + timings [1] + timings [2] ;
219 : printf ("SV6 %12.6f (%4.1f%%) init\n", timings [0], 100. * timings [0] / total) ;
220 : printf ("SV6 %12.6f (%4.1f%%) total sampling:\n", timings [1], 100. * timings [1] / total) ;
221 : printf ("SV6 %12.6f (%4.1f%%) setup T\n", timings [3], 100. * timings [3] / total) ;
222 : printf ("SV6 %12.6f (%4.1f%%) create T\n", timings [4], 100. * timings [4] / total) ;
223 : printf ("SV6 %12.6f (%4.1f%%) fastsv sample\n", timings [5], 100 * timings [5] / total) ;
224 : printf ("SV6 %12.6f (%4.1f%%) hash\n", timings [6], 100. * timings [6] / total) ;
225 : printf ("SV6 %12.6f (%4.1f%%) prune\n", timings [7], 100. * timings [7] / total) ;
226 : printf ("SV6 %12.6f (%4.1f%%) total final\n", timings [2], 100. * timings [2] / total) ;
227 : }
228 : #endif
229 :
230 26 : int LG_CC_FastSV6 // SuiteSparse:GraphBLAS method, with GxB extensions
231 : (
232 : // output:
233 : GrB_Vector *component, // component(i)=r if node is in the component r
234 : // input:
235 : LAGraph_Graph G, // input graph (modified then restored)
236 : char *msg
237 : )
238 : {
239 : #if LAGRAPH_SUITESPARSE
240 :
241 : //--------------------------------------------------------------------------
242 : // check inputs
243 : //--------------------------------------------------------------------------
244 :
245 26 : LG_CLEAR_MSG ;
246 :
247 : #ifdef TIMINGS
248 : double timings [16] ;
249 : for (int kk = 0 ; kk < 16 ; kk++) timings [kk] = 0 ;
250 : double tic = LAGraph_WallClockTime ( ) ;
251 : LG_SET_BURBLE (false) ;
252 : #endif
253 :
254 26 : int64_t *range = NULL ;
255 26 : GrB_Index n, nvals, Cp_size = 0, *ht_key = NULL, *Px = NULL, *Cp = NULL,
256 26 : *count = NULL, *Tp = NULL, *Tj = NULL ;
257 26 : GrB_Vector parent = NULL, gp_new = NULL, mngp = NULL, gp = NULL, t = NULL,
258 26 : y = NULL ;
259 26 : GrB_Matrix T = NULL, C = NULL ;
260 26 : void *Tx = NULL, *Cx = NULL ;
261 26 : int *ht_count = NULL ;
262 :
263 26 : LG_TRY (LAGraph_CheckGraph (G, msg)) ;
264 25 : LG_ASSERT (component != NULL, GrB_NULL_POINTER) ;
265 25 : (*component) = NULL ;
266 :
267 25 : LG_ASSERT_MSG ((G->kind == LAGraph_ADJACENCY_UNDIRECTED ||
268 : (G->kind == LAGraph_ADJACENCY_DIRECTED &&
269 : G->is_symmetric_structure == LAGraph_TRUE)),
270 : LAGRAPH_SYMMETRIC_STRUCTURE_REQUIRED,
271 : "G->A must be known to be symmetric") ;
272 :
273 : //--------------------------------------------------------------------------
274 : // initializations
275 : //--------------------------------------------------------------------------
276 :
277 24 : GrB_Matrix A = G->A ;
278 24 : GRB_TRY (GrB_Matrix_nrows (&n, A)) ;
279 24 : GRB_TRY (GrB_Matrix_nvals (&nvals, A)) ;
280 :
281 : // determine the integer type, operators, and semirings to use
282 : GrB_Type Uint, Int ;
283 : GrB_IndexUnaryOp ramp ;
284 : GrB_Semiring min_2nd, min_2ndi ;
285 : GrB_BinaryOp min, eq, imin ;
286 : #ifdef COVERAGE
287 : // Just for test coverage, use 64-bit ints for n > 100. Do not use this
288 : // rule in production!
289 : #define NBIG 100
290 : #else
291 : // For production use: 64-bit integers if n > 2^31
292 : #define NBIG INT32_MAX
293 : #endif
294 24 : if (n > NBIG)
295 : {
296 : // use 64-bit integers throughout
297 8 : Uint = GrB_UINT64 ;
298 8 : Int = GrB_INT64 ;
299 8 : ramp = GrB_ROWINDEX_INT64 ;
300 8 : min = GrB_MIN_UINT64 ;
301 8 : imin = GrB_MIN_INT64 ;
302 8 : eq = GrB_EQ_UINT64 ;
303 8 : min_2nd = GrB_MIN_SECOND_SEMIRING_UINT64 ;
304 8 : min_2ndi = GxB_MIN_SECONDI_INT64 ;
305 : }
306 : else
307 : {
308 : // use 32-bit integers, except for Px and for constructing the matrix C
309 16 : Uint = GrB_UINT32 ;
310 16 : Int = GrB_INT32 ;
311 16 : ramp = GrB_ROWINDEX_INT32 ;
312 16 : min = GrB_MIN_UINT32 ;
313 16 : imin = GrB_MIN_INT32 ;
314 16 : eq = GrB_EQ_UINT32 ;
315 16 : min_2nd = GrB_MIN_SECOND_SEMIRING_UINT32 ;
316 16 : min_2ndi = GxB_MIN_SECONDI_INT32 ;
317 : }
318 :
319 : // FASTSV_SAMPLES: number of samples to take from each row A(i,:).
320 : // Sampling is used if the average degree is > 8 and if n > 1024.
321 : #define FASTSV_SAMPLES 4
322 24 : bool sampling = (nvals > n * FASTSV_SAMPLES * 2 && n > 1024) ;
323 :
324 : // [ todo: nthreads will not be needed once GxB_select with a GxB_RankUnaryOp
325 : // and a new GxB_extract are added to SuiteSparse:GraphBLAS.
326 : // determine # of threads to use
327 : int nthreads, nthreads_outer, nthreads_inner ;
328 24 : LG_TRY (LAGraph_GetNumThreads (&nthreads_outer, &nthreads_inner, msg)) ;
329 24 : nthreads = nthreads_outer * nthreads_inner ;
330 24 : nthreads = LAGRAPH_MIN (nthreads, n / 16) ;
331 24 : nthreads = LAGRAPH_MAX (nthreads, 1) ;
332 : // ]
333 :
334 24 : LG_TRY (LAGraph_Calloc ((void **) &Cx, 1, sizeof (bool), msg)) ;
335 24 : LG_TRY (LAGraph_Malloc ((void **) &Px, n, sizeof (GrB_Index), msg)) ;
336 :
337 : // create Cp = 0:n (always 64-bit) and the empty C matrix
338 24 : GRB_TRY (GrB_Matrix_new (&C, GrB_BOOL, n, n)) ;
339 24 : GRB_TRY (GrB_Vector_new (&t, GrB_INT64, n+1)) ;
340 24 : GRB_TRY (GrB_assign (t, NULL, NULL, 0, GrB_ALL, n+1, NULL)) ;
341 24 : GRB_TRY (GrB_apply (t, NULL, NULL, GrB_ROWINDEX_INT64, t, 0, NULL)) ;
342 24 : GRB_TRY (GxB_Vector_unpack_Full (t, (void **) &Cp, &Cp_size, NULL, NULL)) ;
343 24 : GRB_TRY (GrB_free (&t)) ;
344 :
345 : //--------------------------------------------------------------------------
346 : // warmup: parent = min (0:n-1, A*1) using the MIN_SECONDI semiring
347 : //--------------------------------------------------------------------------
348 :
349 : // y (i) = min (i, j) for all entries A(i,j). This warmup phase takes only
350 : // O(n) time, because of how the MIN_SECONDI semiring is implemented in
351 : // SuiteSparse:GraphBLAS. A is held by row, and the first entry in A(i,:)
352 : // is the minimum index j, so only the first entry in A(i,:) needs to be
353 : // considered for each row i.
354 :
355 24 : GRB_TRY (GrB_Vector_new (&t, Int, n)) ;
356 24 : GRB_TRY (GrB_Vector_new (&y, Int, n)) ;
357 24 : GRB_TRY (GrB_assign (t, NULL, NULL, 0, GrB_ALL, n, NULL)) ;
358 24 : GRB_TRY (GrB_assign (y, NULL, NULL, 0, GrB_ALL, n, NULL)) ;
359 24 : GRB_TRY (GrB_apply (y, NULL, NULL, ramp, y, 0, NULL)) ;
360 24 : GRB_TRY (GrB_mxv (y, NULL, imin, min_2ndi, A, t, NULL)) ;
361 24 : GRB_TRY (GrB_free (&t)) ;
362 :
363 : // The typecast from Int to Uint is required because the ROWINDEX operator
364 : // and MIN_SECONDI do not work in the UINT* domains, as built-in operators.
365 : // parent = (Uint) y
366 24 : GRB_TRY (GrB_Vector_new (&parent, Uint, n)) ;
367 24 : GRB_TRY (GrB_assign (parent, NULL, NULL, y, GrB_ALL, n, NULL)) ;
368 24 : GRB_TRY (GrB_free (&y)) ;
369 :
370 : // copy parent into gp, mngp, and Px. Px is a non-opaque 64-bit copy of the
371 : // parent GrB_Vector. The Px array is always of type GrB_Index since it
372 : // must be used as the input array for extractTuples and as Ci for pack_CSR.
373 : // If parent is uint32, GraphBLAS typecasts it to the uint64 Px array.
374 24 : GRB_TRY (GrB_Vector_extractTuples (NULL, Px, &n, parent)) ;
375 24 : GRB_TRY (GrB_Vector_dup (&gp, parent)) ;
376 24 : GRB_TRY (GrB_Vector_dup (&mngp, parent)) ;
377 24 : GRB_TRY (GrB_Vector_new (&gp_new, Uint, n)) ;
378 24 : GRB_TRY (GrB_Vector_new (&t, GrB_BOOL, n)) ;
379 :
380 : #ifdef TIMINGS
381 : double toc = LAGraph_WallClockTime ( ) ;
382 : timings [0] = toc - tic ; // init time
383 : tic = toc ;
384 : #endif
385 :
386 : //--------------------------------------------------------------------------
387 : // sample phase
388 : //--------------------------------------------------------------------------
389 :
390 24 : if (sampling)
391 : {
392 :
393 : // [ todo: GxB_select, using a new operator: GxB_RankUnaryOp, will do all this,
394 : // with GxB_Matrix_select_RankOp_Scalar with operator GxB_LEASTRANK and a
395 : // GrB_Scalar input equal to FASTSV_SAMPLES. Built-in operators will be,
396 : // (where y is INT64):
397 : //
398 : // GxB_LEASTRANK (aij, i, j, k, d, y): select if aij has rank k <= y
399 : // GxB_NLEASTRANK: select if aij has rank k > y
400 : // GxB_GREATESTRANK (...) select if aij has rank k >= (d-y) where
401 : // d = # of entries in A(i,:).
402 : // GxB_NGREATESTRANK (...): select if aij has rank k < (d-y)
403 : // and perhaps other operators such as:
404 : // GxB_LEASTRELRANK (...): select aij if rank k <= y*d where y is double
405 : // GxB_GREATESTRELRANK (...): select aij rank k > y*d where y is double
406 : //
407 : // By default, the rank of aij is its relative position as the kth entry in its
408 : // row (from "left" to "right"). If a new GxB setting in the descriptor is
409 : // set, then k is the relative position of aij as the kth entry in its column.
410 : // The default would be that the rank is the position of aij in its row A(i,:).
411 :
412 : // Other:
413 : // give me 3 random items from the row (y = 3)
414 : // give me the 4 biggest *values* in each row (y = 4)
415 :
416 : // mxv:
417 : // C = A*diag(D)
418 :
419 : //----------------------------------------------------------------------
420 : // unpack A in CSR format
421 : //----------------------------------------------------------------------
422 :
423 : #ifdef TIMINGS
424 : double tic2 = LAGraph_WallClockTime ( ) ;
425 : #endif
426 :
427 : void *Ax ;
428 : GrB_Index *Ap, *Aj, Ap_size, Aj_size, Ax_size ;
429 : bool A_jumbled, A_iso ;
430 4 : GRB_TRY (GxB_Matrix_unpack_CSR (A, &Ap, &Aj, &Ax,
431 : &Ap_size, &Aj_size, &Ax_size, &A_iso, &A_jumbled, NULL)) ;
432 :
433 : //----------------------------------------------------------------------
434 : // allocate workspace, including space to construct T
435 : //----------------------------------------------------------------------
436 :
437 4 : GrB_Index Tp_size = (n+1) * sizeof (GrB_Index) ;
438 4 : GrB_Index Tj_size = nvals * sizeof (GrB_Index) ;
439 4 : GrB_Index Tx_size = sizeof (bool) ;
440 4 : LG_TRY (LAGraph_Malloc ((void **) &Tp, n+1, sizeof (GrB_Index), msg)) ;
441 4 : LG_TRY (LAGraph_Malloc ((void **) &Tj, nvals, sizeof (GrB_Index),
442 : msg)) ;
443 4 : LG_TRY (LAGraph_Calloc ((void **) &Tx, 1, sizeof (bool), msg)) ;
444 4 : LG_TRY (LAGraph_Malloc ((void **) &range, nthreads + 1,
445 : sizeof (int64_t), msg)) ;
446 4 : LG_TRY (LAGraph_Calloc ((void **) &count, nthreads + 1,
447 : sizeof (GrB_Index), msg)) ;
448 :
449 : //----------------------------------------------------------------------
450 : // define parallel tasks to construct T
451 : //----------------------------------------------------------------------
452 :
453 : // thread tid works on rows range[tid]:range[tid+1]-1 of A and T
454 : int tid;
455 12 : for (tid = 0 ; tid <= nthreads ; tid++)
456 : {
457 8 : range [tid] = (n * tid + nthreads - 1) / nthreads ;
458 : }
459 :
460 : //----------------------------------------------------------------------
461 : // determine the number entries to be constructed in T for each thread
462 : //----------------------------------------------------------------------
463 :
464 : #pragma omp parallel for num_threads(nthreads) schedule(static)
465 8 : for (tid = 0 ; tid < nthreads ; tid++)
466 : {
467 9756 : for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
468 : {
469 9752 : int64_t deg = Ap [i + 1] - Ap [i] ;
470 9752 : count [tid + 1] += LAGRAPH_MIN (FASTSV_SAMPLES, deg) ;
471 : }
472 : }
473 :
474 : //----------------------------------------------------------------------
475 : // count = cumsum (count)
476 : //----------------------------------------------------------------------
477 :
478 8 : for (tid = 0 ; tid < nthreads ; tid++)
479 : {
480 4 : count [tid + 1] += count [tid] ;
481 : }
482 :
483 : #ifdef TIMINGS
484 : double toc2 = LAGraph_WallClockTime ( ) ;
485 : timings [3] = toc2 - tic2 ; // setup T
486 : tic2 = toc2 ;
487 : #endif
488 :
489 : //----------------------------------------------------------------------
490 : // construct T
491 : //----------------------------------------------------------------------
492 :
493 : // T (i,:) consists of the first FASTSV_SAMPLES of A (i,:).
494 :
495 : #pragma omp parallel for num_threads(nthreads) schedule(static)
496 8 : for (tid = 0 ; tid < nthreads ; tid++)
497 : {
498 4 : GrB_Index p = count [tid] ;
499 4 : Tp [range [tid]] = p ;
500 9756 : for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
501 : {
502 : // construct T (i,:) from the first entries in A (i,:)
503 9752 : for (int64_t j = 0 ;
504 40390 : j < FASTSV_SAMPLES && Ap [i] + j < Ap [i + 1] ; j++)
505 : {
506 30638 : Tj [p++] = Aj [Ap [i] + j] ;
507 : }
508 9752 : Tp [i + 1] = p ;
509 : }
510 : }
511 :
512 : //----------------------------------------------------------------------
513 : // import the result into the GrB_Matrix T
514 : //----------------------------------------------------------------------
515 :
516 4 : GRB_TRY (GrB_Matrix_new (&T, GrB_BOOL, n, n)) ;
517 4 : GRB_TRY (GxB_Matrix_pack_CSR (T, &Tp, &Tj, &Tx, Tp_size, Tj_size,
518 : Tx_size, /* T is iso: */ true, A_jumbled, NULL)) ;
519 :
520 : // ] todo: the above will all be done as a single call to GxB_select.
521 :
522 : #ifdef TIMINGS
523 : toc2 = LAGraph_WallClockTime ( ) ;
524 : timings [4] = toc2 - tic2 ; // create T
525 : tic2 = toc2 ;
526 : #endif
527 :
528 : //----------------------------------------------------------------------
529 : // find the connected components of T
530 : //----------------------------------------------------------------------
531 :
532 4 : LG_TRY (fastsv (T, parent, mngp, &gp, &gp_new, t, eq, min, min_2nd,
533 : C, &Cp, &Px, &Cx, msg)) ;
534 :
535 : #ifdef TIMINGS
536 : toc2 = LAGraph_WallClockTime ( ) ;
537 : timings [5] = toc2 - tic2 ; // fastsv, in sampling
538 : tic2 = toc2 ;
539 : #endif
540 :
541 : //----------------------------------------------------------------------
542 : // use sampling to estimate the largest connected component in T
543 : //----------------------------------------------------------------------
544 :
545 : // The sampling below computes an estimate of the mode of the parent
546 : // vector, the contents of which are currently in the non-opaque Px
547 : // array.
548 :
549 : // hash table size must be a power of 2
550 : #define HASH_SIZE 1024
551 : // number of samples to insert into the hash table
552 : #define HASH_SAMPLES 864
553 : #define HASH(x) (((x << 4) + x) & (HASH_SIZE-1))
554 : #define NEXT(x) ((x + 23) & (HASH_SIZE-1))
555 :
556 : // allocate and initialize the hash table
557 4 : LG_TRY (LAGraph_Malloc ((void **) &ht_key, HASH_SIZE,
558 : sizeof (GrB_Index), msg)) ;
559 4 : LG_TRY (LAGraph_Calloc ((void **) &ht_count, HASH_SIZE,
560 : sizeof (int), msg)) ;
561 4100 : for (int k = 0 ; k < HASH_SIZE ; k++)
562 : {
563 4096 : ht_key [k] = UINT64_MAX ;
564 : }
565 :
566 : // hash the samples and find the most frequent entry
567 4 : uint64_t seed = n ; // random number seed
568 4 : int64_t key = -1 ; // most frequent entry
569 4 : int max_count = 0 ; // frequency of most frequent entry
570 3460 : for (int64_t k = 0 ; k < HASH_SAMPLES ; k++)
571 : {
572 : // select an entry from Px at random
573 3456 : GrB_Index x = Px [LG_Random64 (&seed) % n] ;
574 : // find x in the hash table
575 3456 : GrB_Index h = HASH (x) ;
576 3598 : while (ht_key [h] != UINT64_MAX && ht_key [h] != x) h = NEXT (h) ;
577 : // add x to the hash table
578 3456 : ht_key [h] = x ;
579 3456 : ht_count [h]++ ;
580 : // keep track of the most frequent value
581 3456 : if (ht_count [h] > max_count)
582 : {
583 1870 : key = ht_key [h] ;
584 1870 : max_count = ht_count [h] ;
585 : }
586 : }
587 :
588 : #ifdef TIMINGS
589 : toc2 = LAGraph_WallClockTime ( ) ;
590 : timings [6] = toc2 - tic2 ; // hash
591 : tic2 = toc2 ;
592 : #endif
593 :
594 : //----------------------------------------------------------------------
595 : // compact the largest connected component in A
596 : //----------------------------------------------------------------------
597 :
598 : // Construct a new matrix T from the input matrix A (the matrix A is
599 : // not changed). The key node is the representative of the (estimated)
600 : // largest component. T is constructed as a copy of A, except:
601 : // (1) all edges A(i,:) for nodes i in the key component deleted, and
602 : // (2) for nodes i not in the key component, A(i,j) is deleted if
603 : // j is in the key component.
604 : // (3) If A(i,:) has any deletions from (2), T(i,key) is added to T.
605 :
606 : // [ todo: replace this with GxB_extract with GrB_Vector index arrays.
607 : // See https://github.com/GraphBLAS/graphblas-api-c/issues/67 .
608 : // This method will not insert the new entries T(i,key) for rows i that have
609 : // had entries deleted. That can be done with GrB_assign, with an n-by-1 mask
610 : // M computed from the before-and-after row degrees of A and T:
611 : // M = (parent != key) && (out_degree(T) < out_degree(A))
612 : // J [0] = key.
613 : // GxB_Matrix_subassign_BOOL (T, M, NULL, true, GrB_ALL, n, J, 1, NULL)
614 : // or with
615 : // GrB_Col_assign (T, M, NULL, t, GrB_ALL, j, NULL) with an all-true
616 : // vector t.
617 :
618 : // unpack T to reuse the space (all content is overwritten below)
619 : bool T_jumbled, T_iso ;
620 4 : GRB_TRY (GxB_Matrix_unpack_CSR (T, &Tp, &Tj, &Tx, &Tp_size, &Tj_size,
621 : &Tx_size, &T_iso, &T_jumbled, NULL)) ;
622 : #pragma omp parallel for num_threads(nthreads) schedule(static)
623 8 : for (tid = 0 ; tid < nthreads ; tid++)
624 : {
625 4 : GrB_Index p = Ap [range [tid]] ;
626 : // thread tid scans A (range [tid]:range [tid+1]-1,:),
627 : // and constructs T(i,:) for all rows in this range.
628 9756 : for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
629 : {
630 9752 : int64_t pi = Px [i] ; // pi = parent (i)
631 9752 : Tp [i] = p ; // start the construction of T(i,:)
632 : // T(i,:) is empty if pi == key
633 9752 : if (pi != key)
634 : {
635 : // scan A(i,:)
636 47770 : for (GrB_Index pS = Ap [i] ; pS < Ap [i+1] ; pS++)
637 : {
638 : // get A(i,j)
639 42470 : int64_t j = Aj [pS] ;
640 42470 : if (Px [j] != key)
641 : {
642 : // add the entry T(i,j) to T, but skip it if
643 : // Px [j] is equal to key
644 42460 : Tj [p++] = j ;
645 : }
646 : }
647 : // Add the entry T(i,key) if there is room for it in T(i,:);
648 : // if and only if node i is adjacent to a node j in the
649 : // largest component. The only way there can be space if
650 : // at least one T(i,j) appears with Px [j] equal to the key
651 : // (that is, node j is in the largest connected component,
652 : // key == Px [j]. One of these j's can then be replaced
653 : // with the key. If node i is not adjacent to any node in
654 : // the largest component, then there is no space in T(i,:)
655 : // and no new edge to the largest component is added.
656 5300 : if (p - Tp [i] < Ap [i+1] - Ap [i])
657 : {
658 2 : Tj [p++] = key ;
659 : }
660 : }
661 : }
662 : // count the number of entries inserted into T by this thread
663 4 : count [tid] = p - Tp [range [tid]] ;
664 : }
665 :
666 : // Compact empty space out of Tj not filled in from the above phase.
667 4 : nvals = 0 ;
668 8 : for (tid = 0 ; tid < nthreads ; tid++)
669 : {
670 4 : memmove (Tj + nvals,
671 4 : Tj + Tp [range [tid]], sizeof (GrB_Index) * count [tid]) ;
672 4 : nvals += count [tid] ;
673 4 : count [tid] = nvals - count [tid] ;
674 : }
675 :
676 : // Compact empty space out of Tp
677 : #pragma omp parallel for num_threads(nthreads) schedule(static)
678 8 : for (tid = 0 ; tid < nthreads ; tid++)
679 : {
680 4 : GrB_Index p = Tp [range [tid]] ;
681 9756 : for (int64_t i = range [tid] ; i < range [tid+1] ; i++)
682 : {
683 9752 : Tp [i] -= p - count [tid] ;
684 : }
685 : }
686 :
687 : // finalize T
688 4 : Tp [n] = nvals ;
689 :
690 : // pack T for the final phase
691 4 : GRB_TRY (GxB_Matrix_pack_CSR (T, &Tp, &Tj, &Tx, Tp_size, Tj_size,
692 : Tx_size, T_iso, /* T is now jumbled */ true, NULL)) ;
693 :
694 : // pack A (unchanged since last unpack); this is the original G->A.
695 4 : GRB_TRY (GxB_Matrix_pack_CSR (A, &Ap, &Aj, &Ax, Ap_size, Aj_size,
696 : Ax_size, A_iso, A_jumbled, NULL)) ;
697 :
698 : // ]. The unpack/pack of A into Ap, Aj, Ax will not be needed, and G->A
699 : // will become truly a read-only matrix.
700 :
701 : // final phase uses the pruned matrix T
702 4 : A = T ;
703 :
704 : #ifdef TIMINGS
705 : toc2 = LAGraph_WallClockTime ( ) ;
706 : timings [7] = toc2 - tic2 ; // prune
707 : tic2 = toc2 ;
708 : #endif
709 : }
710 :
711 : #ifdef TIMINGS
712 : toc = LAGraph_WallClockTime ( ) ;
713 : timings [1] = toc - tic ; // total sampling time
714 : tic = toc ;
715 : #endif
716 :
717 : //--------------------------------------------------------------------------
718 : // check for quick return
719 : //--------------------------------------------------------------------------
720 :
721 : // The sample phase may have already found that G->A has a single component,
722 : // in which case the matrix A is now empty.
723 :
724 24 : if (nvals == 0)
725 : {
726 2 : (*component) = parent ;
727 2 : LG_FREE_WORK ;
728 : #ifdef TIMINGS
729 : print_timings (timings) ;
730 : LG_SET_BURBLE (false) ;
731 : #endif
732 2 : return (GrB_SUCCESS) ;
733 : }
734 :
735 : //--------------------------------------------------------------------------
736 : // final phase
737 : //--------------------------------------------------------------------------
738 :
739 22 : LG_TRY (fastsv (A, parent, mngp, &gp, &gp_new, t, eq, min, min_2nd,
740 : C, &Cp, &Px, &Cx, msg)) ;
741 :
742 : //--------------------------------------------------------------------------
743 : // free workspace and return result
744 : //--------------------------------------------------------------------------
745 :
746 22 : (*component) = parent ;
747 22 : LG_FREE_WORK ;
748 : #ifdef TIMINGS
749 : toc = LAGraph_WallClockTime ( ) ;
750 : timings [2] = toc - tic ; // final phase
751 : print_timings (timings) ;
752 : LG_SET_BURBLE (false) ;
753 : #endif
754 22 : return (GrB_SUCCESS) ;
755 : #else
756 : return (GrB_NOT_IMPLEMENTED) ;
757 : #endif
758 : }
|