Line data Source code
1 : //------------------------------------------------------------------------------
2 : // LAGr_Betweenness: vertex betweenness-centrality
3 : //------------------------------------------------------------------------------
4 :
5 : // LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved.
6 : // SPDX-License-Identifier: BSD-2-Clause
7 : //
8 : // For additional details (including references to third party source code and
9 : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
10 : // Contributors.txt for a full list of contributors. Created, in part, with
11 : // funding and support from the U.S. Government (see Acknowledgments.txt file).
12 : // DM22-0790
13 :
14 : // Contributed by Scott Kolodziej and Tim Davis, Texas A&M University;
15 : // Adapted and revised from GraphBLAS C API Spec, Appendix B.4.
16 :
17 : //------------------------------------------------------------------------------
18 :
19 : // LAGr_Betweenness: Batch algorithm for computing
20 : // betweeness centrality, using push-pull optimization.
21 :
22 : // This is an Advanced algorithm (G->AT is required).
23 :
24 : // This method computes an approximation of the betweenness algorithm.
25 : // ____
26 : // \ sigma(s,t | i)
27 : // Betweenness centrality = \ ----------------
28 : // of node i / sigma(s,t)
29 : // /___
30 : // s != i != t
31 : //
32 : // Where sigma(s,t) is the total number of shortest paths from node s to
33 : // node t, and sigma(s,t | i) is the total number of shortest paths from
34 : // node s to node t that pass through node i.
35 : //
36 : // Note that the true betweenness centrality requires computing shortest paths
37 : // from all nodes s to all nodes t (or all-pairs shortest paths), which can be
38 : // expensive to compute. By using a reasonably sized subset of source nodes, an
39 : // approximation can be made.
40 : //
41 : // This method performs simultaneous breadth-first searches of the entire graph
42 : // starting at a given set of source nodes. This pass discovers all shortest
43 : // paths from the source nodes to all other nodes in the graph. After the BFS
44 : // is complete, the number of shortest paths that pass through a given node is
45 : // tallied by reversing the traversal. From this, the (approximate) betweenness
46 : // centrality is computed.
47 :
48 : // G->A represents the graph, and G->AT must be present. G->A must be square,
49 : // and can be unsymmetric. Self-edges are OK. The values of G->A and G->AT
50 : // are ignored; just the structure of two matrices are used.
51 :
52 : // Each phase uses push-pull direction optimization.
53 :
54 : //------------------------------------------------------------------------------
55 :
56 : #define LG_FREE_WORK \
57 : { \
58 : GrB_free (&frontier) ; \
59 : GrB_free (&paths) ; \
60 : GrB_free (&bc_update) ; \
61 : GrB_free (&W) ; \
62 : if (S != NULL) \
63 : { \
64 : for (int64_t i = 0 ; i < n ; i++) \
65 : { \
66 : if (S [i] == NULL) break ; \
67 : GrB_free (&(S [i])) ; \
68 : } \
69 : LAGraph_Free ((void **) &S, NULL) ; \
70 : } \
71 : }
72 :
73 : #define LG_FREE_ALL \
74 : { \
75 : LG_FREE_WORK ; \
76 : GrB_free (centrality) ; \
77 : }
78 :
79 : #include "LG_internal.h"
80 :
81 : //------------------------------------------------------------------------------
82 : // LAGr_Betweenness: vertex betweenness-centrality
83 : //------------------------------------------------------------------------------
84 :
85 146 : int LAGr_Betweenness
86 : (
87 : // output:
88 : GrB_Vector *centrality, // centrality(i): betweeness centrality of i
89 : // input:
90 : LAGraph_Graph G, // input graph
91 : const GrB_Index *sources, // source vertices to compute shortest paths
92 : int32_t ns, // number of source vertices
93 : char *msg
94 : )
95 : {
96 :
97 : //--------------------------------------------------------------------------
98 : // check inputs
99 : //--------------------------------------------------------------------------
100 :
101 146 : LG_CLEAR_MSG ;
102 :
103 : // Array of BFS search matrices.
104 : // S [i] is a sparse matrix that stores the depth at which each vertex is
105 : // first seen thus far in each BFS at the current depth i. Each column
106 : // corresponds to a BFS traversal starting from a source node.
107 146 : GrB_Matrix *S = NULL ;
108 :
109 : // Frontier matrix, a sparse matrix.
110 : // Stores # of shortest paths to vertices at current BFS depth
111 146 : GrB_Matrix frontier = NULL ;
112 :
113 : // Paths matrix holds the number of shortest paths for each node and
114 : // starting node discovered so far. A dense matrix that is updated with
115 : // sparse updates, and also used as a mask.
116 146 : GrB_Matrix paths = NULL ;
117 :
118 : // Update matrix for betweenness centrality, values for each node for
119 : // each starting node. A dense matrix.
120 146 : GrB_Matrix bc_update = NULL ;
121 :
122 : // Temporary workspace matrix (sparse).
123 146 : GrB_Matrix W = NULL ;
124 :
125 146 : GrB_Index n = 0 ; // # nodes in the graph
126 :
127 146 : LG_ASSERT (centrality != NULL && sources != NULL, GrB_NULL_POINTER) ;
128 146 : (*centrality) = NULL ;
129 146 : LG_TRY (LAGraph_CheckGraph (G, msg)) ;
130 :
131 146 : GrB_Matrix A = G->A ;
132 : GrB_Matrix AT ;
133 146 : if (G->kind == LAGraph_ADJACENCY_UNDIRECTED ||
134 1 : G->is_symmetric_structure == LAGraph_TRUE)
135 : {
136 : // A and A' have the same structure
137 145 : AT = A ;
138 : }
139 : else
140 : {
141 : // A and A' differ
142 1 : AT = G->AT ;
143 1 : LG_ASSERT_MSG (AT != NULL, LAGRAPH_NOT_CACHED, "G->AT is required") ;
144 : }
145 :
146 : // =========================================================================
147 : // === initializations =====================================================
148 : // =========================================================================
149 :
150 : // Initialize paths and frontier with source notes
151 146 : GRB_TRY (GrB_Matrix_nrows (&n, A)) ;
152 146 : GRB_TRY (GrB_Matrix_new (&paths, GrB_FP64, ns, n)) ;
153 143 : GRB_TRY (GrB_Matrix_new (&frontier, GrB_FP64, ns, n)) ;
154 140 : GRB_TRY (LG_SET_FORMAT_HINT (paths, LG_BITMAP + LG_FULL)) ;
155 :
156 670 : for (GrB_Index i = 0 ; i < ns ; i++)
157 : {
158 : // paths (i,s(i)) = 1
159 : // frontier (i,s(i)) = 1
160 537 : double one = 1 ;
161 537 : GrB_Index src = sources [i] ;
162 537 : LG_ASSERT_MSG (src < n, GrB_INVALID_INDEX, "invalid source node") ;
163 537 : GRB_TRY (GrB_Matrix_setElement (paths, one, i, src)) ;
164 536 : GRB_TRY (GrB_Matrix_setElement (frontier, one, i, src)) ;
165 : }
166 :
167 : // Initial frontier: frontier<!paths>= frontier*A
168 133 : GRB_TRY (GrB_mxm (frontier, paths, NULL, LAGraph_plus_first_fp64,
169 : frontier, A, GrB_DESC_RSC)) ;
170 :
171 : // Allocate memory for the array of S matrices
172 120 : LG_TRY (LAGraph_Malloc ((void **) &S, n+1, sizeof (GrB_Matrix), msg)) ;
173 119 : S [0] = NULL ;
174 :
175 : // =========================================================================
176 : // === Breadth-first search stage ==========================================
177 : // =========================================================================
178 :
179 119 : bool last_was_pull = false ;
180 119 : GrB_Index frontier_size, last_frontier_size = 0 ;
181 119 : GRB_TRY (GrB_Matrix_nvals (&frontier_size, frontier)) ;
182 :
183 : int64_t depth ;
184 585 : for (depth = 0 ; frontier_size > 0 && depth < n ; depth++)
185 : {
186 :
187 : //----------------------------------------------------------------------
188 : // S [depth] = structure of frontier
189 : //----------------------------------------------------------------------
190 :
191 511 : S [depth+1] = NULL ;
192 571 : LG_TRY (LAGraph_Matrix_Structure (&(S [depth]), frontier, msg)) ;
193 :
194 : //----------------------------------------------------------------------
195 : // Accumulate path counts: paths += frontier
196 : //----------------------------------------------------------------------
197 :
198 482 : GRB_TRY (GrB_assign (paths, NULL, GrB_PLUS_FP64, frontier, GrB_ALL, ns,
199 : GrB_ALL, n, NULL)) ;
200 :
201 : //----------------------------------------------------------------------
202 : // Update frontier: frontier<!paths> = frontier*A
203 : //----------------------------------------------------------------------
204 :
205 : // pull if frontier is more than 10% dense,
206 : // or > 6% dense and last step was pull
207 480 : double frontier_density = ((double) frontier_size) / (double) (ns*n) ;
208 480 : bool do_pull = frontier_density > (last_was_pull ? 0.06 : 0.10 ) ;
209 :
210 480 : if (do_pull)
211 : {
212 : // frontier<!paths> = frontier*AT'
213 399 : GRB_TRY (LG_SET_FORMAT_HINT (frontier, LG_BITMAP)) ;
214 419 : GRB_TRY (GrB_mxm (frontier, paths, NULL, LAGraph_plus_first_fp64,
215 : frontier, AT, GrB_DESC_RSCT1)) ;
216 : }
217 : else // push
218 : {
219 : // frontier<!paths> = frontier*A
220 96 : GRB_TRY (LG_SET_FORMAT_HINT (frontier, LG_SPARSE)) ;
221 93 : GRB_TRY (GrB_mxm (frontier, paths, NULL, LAGraph_plus_first_fp64,
222 : frontier, A, GrB_DESC_RSC)) ;
223 : }
224 :
225 : //----------------------------------------------------------------------
226 : // Get size of current frontier: frontier_size = nvals(frontier)
227 : //----------------------------------------------------------------------
228 :
229 466 : last_frontier_size = frontier_size ;
230 466 : last_was_pull = do_pull ;
231 466 : GRB_TRY (GrB_Matrix_nvals (&frontier_size, frontier)) ;
232 : }
233 :
234 74 : GRB_TRY (GrB_free (&frontier)) ;
235 :
236 : // =========================================================================
237 : // === Betweenness centrality computation phase ============================
238 : // =========================================================================
239 :
240 : // bc_update = ones (ns, n) ; a full matrix (and stays full)
241 89 : GRB_TRY (GrB_Matrix_new (&bc_update, GrB_FP64, ns, n)) ;
242 76 : GRB_TRY (GrB_assign (bc_update, NULL, NULL, 1, GrB_ALL, ns, GrB_ALL, n,
243 : NULL)) ;
244 : // W: empty ns-by-n array, as workspace
245 85 : GRB_TRY (GrB_Matrix_new (&W, GrB_FP64, ns, n)) ;
246 :
247 : // Backtrack through the BFS and compute centrality updates for each vertex
248 181 : for (int64_t i = depth-1 ; i > 0 ; i--)
249 : {
250 :
251 : //----------------------------------------------------------------------
252 : // W<S[i]> = bc_update ./ paths
253 : //----------------------------------------------------------------------
254 :
255 : // Add contributions by successors and mask with that level's frontier
256 310 : GRB_TRY (GrB_eWiseMult (W, S [i], NULL, GrB_DIV_FP64, bc_update, paths,
257 : GrB_DESC_RS)) ;
258 :
259 : //----------------------------------------------------------------------
260 : // W<S[i−1]> = W * A'
261 : //----------------------------------------------------------------------
262 :
263 : // pull if W is more than 10% dense and nnz(W)/nnz(S[i-1]) > 1
264 : // or if W is more than 1% dense and nnz(W)/nnz(S[i-1]) > 10
265 : GrB_Index wsize, ssize ;
266 152 : GrB_Matrix_nvals (&wsize, W) ;
267 152 : GrB_Matrix_nvals (&ssize, S [i-1]) ;
268 152 : double w_density = ((double) wsize) / ((double) (ns*n)) ;
269 152 : double w_to_s_ratio = ((double) wsize) / ((double) ssize) ;
270 227 : bool do_pull = (w_density > 0.1 && w_to_s_ratio > 1.) ||
271 75 : (w_density > 0.01 && w_to_s_ratio > 10.) ;
272 :
273 152 : if (do_pull)
274 : {
275 : // W<S[i−1]> = W * A'
276 26 : GRB_TRY (LG_SET_FORMAT_HINT (W, LG_BITMAP)) ;
277 24 : GRB_TRY (GrB_mxm (W, S [i-1], NULL, LAGraph_plus_first_fp64, W, A,
278 : GrB_DESC_RST1)) ;
279 : }
280 : else // push
281 : {
282 : // W<S[i−1]> = W * AT
283 136 : GRB_TRY (LG_SET_FORMAT_HINT (W, LG_SPARSE)) ;
284 246 : GRB_TRY (GrB_mxm (W, S [i-1], NULL, LAGraph_plus_first_fp64, W, AT,
285 : GrB_DESC_RS)) ;
286 : }
287 :
288 : //----------------------------------------------------------------------
289 : // bc_update += W .* paths
290 : //----------------------------------------------------------------------
291 :
292 186 : GRB_TRY (GrB_eWiseMult (bc_update, NULL, GrB_PLUS_FP64, GrB_TIMES_FP64,
293 : W, paths, NULL)) ;
294 : }
295 :
296 : // =========================================================================
297 : // === finalize the centrality =============================================
298 : // =========================================================================
299 :
300 : // Initialize the centrality array with -ns to avoid counting
301 : // zero length paths
302 19 : GRB_TRY (GrB_Vector_new (centrality, GrB_FP64, n)) ;
303 12 : GRB_TRY (GrB_assign (*centrality, NULL, NULL, -ns, GrB_ALL, n, NULL)) ;
304 :
305 : // centrality (i) += sum (bc_update (:,i)) for all nodes i
306 21 : GRB_TRY (GrB_reduce (*centrality, NULL, GrB_PLUS_FP64, GrB_PLUS_MONOID_FP64,
307 : bc_update, GrB_DESC_T0)) ;
308 :
309 18 : LG_FREE_WORK ;
310 3 : return (GrB_SUCCESS) ;
311 : }
|