Line data Source code
1 : //------------------------------------------------------------------------------
2 : // LAGr_Betweenness: vertex betweenness-centrality
3 : //------------------------------------------------------------------------------
4 :
5 : // LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved.
6 : // SPDX-License-Identifier: BSD-2-Clause
7 : //
8 : // For additional details (including references to third party source code and
9 : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
10 : // Contributors.txt for a full list of contributors. Created, in part, with
11 : // funding and support from the U.S. Government (see Acknowledgments.txt file).
12 : // DM22-0790
13 :
14 : // Contributed by Scott Kolodziej and Tim Davis, Texas A&M University;
15 : // Adapted and revised from GraphBLAS C API Spec, Appendix B.4.
16 :
17 : //------------------------------------------------------------------------------
18 :
19 : // LAGr_Betweenness: Batch algorithm for computing
20 : // betweeness centrality, using push-pull optimization.
21 :
22 : // This is an Advanced algorithm (G->AT is required).
23 :
24 : // This method computes an approximation of the betweenness algorithm.
25 : // ____
26 : // \ sigma(s,t | i)
27 : // Betweenness centrality = \ ----------------
28 : // of node i / sigma(s,t)
29 : // /___
30 : // s != i != t
31 : //
32 : // Where sigma(s,t) is the total number of shortest paths from node s to
33 : // node t, and sigma(s,t | i) is the total number of shortest paths from
34 : // node s to node t that pass through node i.
35 : //
36 : // Note that the true betweenness centrality requires computing shortest paths
37 : // from all nodes s to all nodes t (or all-pairs shortest paths), which can be
38 : // expensive to compute. By using a reasonably sized subset of source nodes, an
39 : // approximation can be made.
40 : //
41 : // This method performs simultaneous breadth-first searches of the entire graph
42 : // starting at a given set of source nodes. This pass discovers all shortest
43 : // paths from the source nodes to all other nodes in the graph. After the BFS
44 : // is complete, the number of shortest paths that pass through a given node is
45 : // tallied by reversing the traversal. From this, the (approximate) betweenness
46 : // centrality is computed.
47 :
48 : // G->A represents the graph, and G->AT must be present. G->A must be square,
49 : // and can be unsymmetric. Self-edges are OK. The values of G->A and G->AT
50 : // are ignored; just the structure of two matrices are used.
51 :
52 : // Each phase uses push-pull direction optimization.
53 :
54 : //------------------------------------------------------------------------------
55 :
56 : #define LG_FREE_WORK \
57 : { \
58 : GrB_free (&frontier) ; \
59 : GrB_free (&paths) ; \
60 : GrB_free (&bc_update) ; \
61 : GrB_free (&W) ; \
62 : if (S != NULL) \
63 : { \
64 : for (int64_t i = 0 ; i < n ; i++) \
65 : { \
66 : if (S [i] == NULL) break ; \
67 : GrB_free (&(S [i])) ; \
68 : } \
69 : LAGraph_Free ((void **) &S, NULL) ; \
70 : } \
71 : }
72 :
73 : #define LG_FREE_ALL \
74 : { \
75 : LG_FREE_WORK ; \
76 : GrB_free (centrality) ; \
77 : }
78 :
79 : #include "LG_internal.h"
80 :
81 : //------------------------------------------------------------------------------
82 : // LAGr_Betweenness: vertex betweenness-centrality
83 : //------------------------------------------------------------------------------
84 :
85 143 : int LAGr_Betweenness
86 : (
87 : // output:
88 : GrB_Vector *centrality, // centrality(i): betweeness centrality of i
89 : // input:
90 : LAGraph_Graph G, // input graph
91 : const GrB_Index *sources, // source vertices to compute shortest paths
92 : int32_t ns, // number of source vertices
93 : char *msg
94 : )
95 : {
96 :
97 : //--------------------------------------------------------------------------
98 : // check inputs
99 : //--------------------------------------------------------------------------
100 :
101 143 : LG_CLEAR_MSG ;
102 :
103 : // Array of BFS search matrices.
104 : // S [i] is a sparse matrix that stores the depth at which each vertex is
105 : // first seen thus far in each BFS at the current depth i. Each column
106 : // corresponds to a BFS traversal starting from a source node.
107 143 : GrB_Matrix *S = NULL ;
108 :
109 : // Frontier matrix, a sparse matrix.
110 : // Stores # of shortest paths to vertices at current BFS depth
111 143 : GrB_Matrix frontier = NULL ;
112 :
113 : // Paths matrix holds the number of shortest paths for each node and
114 : // starting node discovered so far. A dense matrix that is updated with
115 : // sparse updates, and also used as a mask.
116 143 : GrB_Matrix paths = NULL ;
117 :
118 : // Update matrix for betweenness centrality, values for each node for
119 : // each starting node. A dense matrix.
120 143 : GrB_Matrix bc_update = NULL ;
121 :
122 : // Temporary workspace matrix (sparse).
123 143 : GrB_Matrix W = NULL ;
124 :
125 143 : GrB_Index n = 0 ; // # nodes in the graph
126 :
127 143 : LG_ASSERT (centrality != NULL && sources != NULL, GrB_NULL_POINTER) ;
128 143 : (*centrality) = NULL ;
129 143 : LG_TRY (LAGraph_CheckGraph (G, msg)) ;
130 :
131 143 : GrB_Matrix A = G->A ;
132 : GrB_Matrix AT ;
133 143 : if (G->kind == LAGraph_ADJACENCY_UNDIRECTED ||
134 1 : G->is_symmetric_structure == LAGraph_TRUE)
135 : {
136 : // A and A' have the same structure
137 142 : AT = A ;
138 : }
139 : else
140 : {
141 : // A and A' differ
142 1 : AT = G->AT ;
143 1 : LG_ASSERT_MSG (AT != NULL, LAGRAPH_NOT_CACHED, "G->AT is required") ;
144 : }
145 :
146 : // =========================================================================
147 : // === initializations =====================================================
148 : // =========================================================================
149 :
150 : // Initialize paths and frontier with source notes
151 143 : GRB_TRY (GrB_Matrix_nrows (&n, A)) ;
152 143 : GRB_TRY (GrB_Matrix_new (&paths, GrB_FP64, ns, n)) ;
153 140 : GRB_TRY (GrB_Matrix_new (&frontier, GrB_FP64, ns, n)) ;
154 : #if LAGRAPH_SUITESPARSE
155 137 : GRB_TRY (GxB_set (paths, GxB_SPARSITY_CONTROL, GxB_BITMAP + GxB_FULL)) ;
156 : #endif
157 655 : for (GrB_Index i = 0 ; i < ns ; i++)
158 : {
159 : // paths (i,s(i)) = 1
160 : // frontier (i,s(i)) = 1
161 525 : double one = 1 ;
162 525 : GrB_Index src = sources [i] ;
163 525 : LG_ASSERT_MSG (src < n, GrB_INVALID_INDEX, "invalid source node") ;
164 525 : GRB_TRY (GrB_Matrix_setElement (paths, one, i, src)) ;
165 524 : GRB_TRY (GrB_Matrix_setElement (frontier, one, i, src)) ;
166 : }
167 :
168 : // Initial frontier: frontier<!paths>= frontier*A
169 130 : GRB_TRY (GrB_mxm (frontier, paths, NULL, LAGraph_plus_first_fp64,
170 : frontier, A, GrB_DESC_RSC)) ;
171 :
172 : // Allocate memory for the array of S matrices
173 117 : LG_TRY (LAGraph_Malloc ((void **) &S, n+1, sizeof (GrB_Matrix), msg)) ;
174 116 : S [0] = NULL ;
175 :
176 : // =========================================================================
177 : // === Breadth-first search stage ==========================================
178 : // =========================================================================
179 :
180 116 : bool last_was_pull = false ;
181 116 : GrB_Index frontier_size, last_frontier_size = 0 ;
182 116 : GRB_TRY (GrB_Matrix_nvals (&frontier_size, frontier)) ;
183 :
184 : int64_t depth ;
185 567 : for (depth = 0 ; frontier_size > 0 && depth < n ; depth++)
186 : {
187 :
188 : //----------------------------------------------------------------------
189 : // S [depth] = structure of frontier
190 : //----------------------------------------------------------------------
191 :
192 496 : S [depth+1] = NULL ;
193 556 : LG_TRY (LAGraph_Matrix_Structure (&(S [depth]), frontier, msg)) ;
194 :
195 : //----------------------------------------------------------------------
196 : // Accumulate path counts: paths += frontier
197 : //----------------------------------------------------------------------
198 :
199 467 : GRB_TRY (GrB_assign (paths, NULL, GrB_PLUS_FP64, frontier, GrB_ALL, ns,
200 : GrB_ALL, n, NULL)) ;
201 :
202 : //----------------------------------------------------------------------
203 : // Update frontier: frontier<!paths> = frontier*A
204 : //----------------------------------------------------------------------
205 :
206 : // pull if frontier is more than 10% dense,
207 : // or > 6% dense and last step was pull
208 465 : double frontier_density = ((double) frontier_size) / (double) (ns*n) ;
209 465 : bool do_pull = frontier_density > (last_was_pull ? 0.06 : 0.10 ) ;
210 :
211 465 : if (do_pull)
212 : {
213 : // frontier<!paths> = frontier*AT'
214 : #if LAGRAPH_SUITESPARSE
215 387 : GRB_TRY (GxB_set (frontier, GxB_SPARSITY_CONTROL, GxB_BITMAP)) ;
216 : #endif
217 407 : GRB_TRY (GrB_mxm (frontier, paths, NULL, LAGraph_plus_first_fp64,
218 : frontier, AT, GrB_DESC_RSCT1)) ;
219 : }
220 : else // push
221 : {
222 : // frontier<!paths> = frontier*A
223 : #if LAGRAPH_SUITESPARSE
224 93 : GRB_TRY (GxB_set (frontier, GxB_SPARSITY_CONTROL, GxB_SPARSE)) ;
225 : #endif
226 90 : GRB_TRY (GrB_mxm (frontier, paths, NULL, LAGraph_plus_first_fp64,
227 : frontier, A, GrB_DESC_RSC)) ;
228 : }
229 :
230 : //----------------------------------------------------------------------
231 : // Get size of current frontier: frontier_size = nvals(frontier)
232 : //----------------------------------------------------------------------
233 :
234 451 : last_frontier_size = frontier_size ;
235 451 : last_was_pull = do_pull ;
236 451 : GRB_TRY (GrB_Matrix_nvals (&frontier_size, frontier)) ;
237 : }
238 :
239 71 : GRB_TRY (GrB_free (&frontier)) ;
240 :
241 : // =========================================================================
242 : // === Betweenness centrality computation phase ============================
243 : // =========================================================================
244 :
245 : // bc_update = ones (ns, n) ; a full matrix (and stays full)
246 86 : GRB_TRY (GrB_Matrix_new (&bc_update, GrB_FP64, ns, n)) ;
247 73 : GRB_TRY (GrB_assign (bc_update, NULL, NULL, 1, GrB_ALL, ns, GrB_ALL, n,
248 : NULL)) ;
249 : // W: empty ns-by-n array, as workspace
250 82 : GRB_TRY (GrB_Matrix_new (&W, GrB_FP64, ns, n)) ;
251 :
252 : // Backtrack through the BFS and compute centrality updates for each vertex
253 175 : for (int64_t i = depth-1 ; i > 0 ; i--)
254 : {
255 :
256 : //----------------------------------------------------------------------
257 : // W<S[i]> = bc_update ./ paths
258 : //----------------------------------------------------------------------
259 :
260 : // Add contributions by successors and mask with that level's frontier
261 301 : GRB_TRY (GrB_eWiseMult (W, S [i], NULL, GrB_DIV_FP64, bc_update, paths,
262 : GrB_DESC_RS)) ;
263 :
264 : //----------------------------------------------------------------------
265 : // W<S[i−1]> = W * A'
266 : //----------------------------------------------------------------------
267 :
268 : // pull if W is more than 10% dense and nnz(W)/nnz(S[i-1]) > 1
269 : // or if W is more than 1% dense and nnz(W)/nnz(S[i-1]) > 10
270 : GrB_Index wsize, ssize ;
271 146 : GrB_Matrix_nvals (&wsize, W) ;
272 146 : GrB_Matrix_nvals (&ssize, S [i-1]) ;
273 146 : double w_density = ((double) wsize) / ((double) (ns*n)) ;
274 146 : double w_to_s_ratio = ((double) wsize) / ((double) ssize) ;
275 218 : bool do_pull = (w_density > 0.1 && w_to_s_ratio > 1.) ||
276 72 : (w_density > 0.01 && w_to_s_ratio > 10.) ;
277 :
278 146 : if (do_pull)
279 : {
280 : // W<S[i−1]> = W * A'
281 : #if LAGRAPH_SUITESPARSE
282 26 : GRB_TRY (GxB_set (W, GxB_SPARSITY_CONTROL, GxB_BITMAP)) ;
283 : #endif
284 24 : GRB_TRY (GrB_mxm (W, S [i-1], NULL, LAGraph_plus_first_fp64, W, A,
285 : GrB_DESC_RST1)) ;
286 : }
287 : else // push
288 : {
289 : // W<S[i−1]> = W * AT
290 : #if LAGRAPH_SUITESPARSE
291 130 : GRB_TRY (GxB_set (W, GxB_SPARSITY_CONTROL, GxB_SPARSE)) ;
292 : #endif
293 225 : GRB_TRY (GrB_mxm (W, S [i-1], NULL, LAGraph_plus_first_fp64, W, AT,
294 : GrB_DESC_RS)) ;
295 : }
296 :
297 : //----------------------------------------------------------------------
298 : // bc_update += W .* paths
299 : //----------------------------------------------------------------------
300 :
301 183 : GRB_TRY (GrB_eWiseMult (bc_update, NULL, GrB_PLUS_FP64, GrB_TIMES_FP64,
302 : W, paths, NULL)) ;
303 : }
304 :
305 : // =========================================================================
306 : // === finalize the centrality =============================================
307 : // =========================================================================
308 :
309 : // Initialize the centrality array with -ns to avoid counting
310 : // zero length paths
311 19 : GRB_TRY (GrB_Vector_new (centrality, GrB_FP64, n)) ;
312 12 : GRB_TRY (GrB_assign (*centrality, NULL, NULL, -ns, GrB_ALL, n, NULL)) ;
313 :
314 : // centrality (i) += sum (bc_update (:,i)) for all nodes i
315 21 : GRB_TRY (GrB_reduce (*centrality, NULL, GrB_PLUS_FP64, GrB_PLUS_MONOID_FP64,
316 : bc_update, GrB_DESC_T0)) ;
317 :
318 18 : LG_FREE_WORK ;
319 3 : return (GrB_SUCCESS) ;
320 : }
|