LCOV - code coverage report
Current view: top level - src/algorithm - LAGr_Betweenness.c (source / functions) Hit Total Coverage
Test: LAGraph code coverage report. Commit id: 7b9d2ee. Current time (UTC): 2025-06-03T21:57:17Z Lines: 70 70 100.0 %
Date: 2025-06-03 22:02:40 Functions: 1 1 100.0 %

          Line data    Source code
       1             : //------------------------------------------------------------------------------
       2             : // LAGr_Betweenness: vertex betweenness-centrality
       3             : //------------------------------------------------------------------------------
       4             : 
       5             : // LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved.
       6             : // SPDX-License-Identifier: BSD-2-Clause
       7             : //
       8             : // For additional details (including references to third party source code and
       9             : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
      10             : // Contributors.txt for a full list of contributors. Created, in part, with
      11             : // funding and support from the U.S. Government (see Acknowledgments.txt file).
      12             : // DM22-0790
      13             : 
      14             : // Contributed by Scott Kolodziej and Tim Davis, Texas A&M University;
      15             : // Adapted and revised from GraphBLAS C API Spec, Appendix B.4.
      16             : 
      17             : //------------------------------------------------------------------------------
      18             : 
      19             : // LAGr_Betweenness: Batch algorithm for computing
      20             : // betweeness centrality, using push-pull optimization.
      21             : 
      22             : // This is an Advanced algorithm (G->AT is required).
      23             : 
      24             : // This method computes an approximation of the betweenness algorithm.
      25             : //                               ____
      26             : //                               \      sigma(s,t | i)
      27             : //    Betweenness centrality =    \    ----------------
      28             : //           of node i            /       sigma(s,t)
      29             : //                               /___
      30             : //                            s != i != t
      31             : //
      32             : // Where sigma(s,t) is the total number of shortest paths from node s to
      33             : // node t, and sigma(s,t | i) is the total number of shortest paths from
      34             : // node s to node t that pass through node i.
      35             : //
      36             : // Note that the true betweenness centrality requires computing shortest paths
      37             : // from all nodes s to all nodes t (or all-pairs shortest paths), which can be
      38             : // expensive to compute. By using a reasonably sized subset of source nodes, an
      39             : // approximation can be made.
      40             : //
      41             : // This method performs simultaneous breadth-first searches of the entire graph
      42             : // starting at a given set of source nodes. This pass discovers all shortest
      43             : // paths from the source nodes to all other nodes in the graph.  After the BFS
      44             : // is complete, the number of shortest paths that pass through a given node is
      45             : // tallied by reversing the traversal. From this, the (approximate) betweenness
      46             : // centrality is computed.
      47             : 
      48             : // G->A represents the graph, and G->AT must be present.  G->A must be square,
      49             : // and can be unsymmetric.  Self-edges are OK.  The values of G->A and G->AT
      50             : // are ignored; just the structure of two matrices are used.
      51             : 
      52             : // Each phase uses push-pull direction optimization.
      53             : 
      54             : //------------------------------------------------------------------------------
      55             : 
      56             : #define LG_FREE_WORK                            \
      57             : {                                               \
      58             :     GrB_free (&frontier) ;                      \
      59             :     GrB_free (&paths) ;                         \
      60             :     GrB_free (&bc_update) ;                     \
      61             :     GrB_free (&W) ;                             \
      62             :     if (S != NULL)                              \
      63             :     {                                           \
      64             :         for (int64_t i = 0 ; i < n ; i++)       \
      65             :         {                                       \
      66             :             if (S [i] == NULL) break ;          \
      67             :             GrB_free (&(S [i])) ;               \
      68             :         }                                       \
      69             :         LAGraph_Free ((void **) &S, NULL) ;     \
      70             :     }                                           \
      71             : }
      72             : 
      73             : #define LG_FREE_ALL                 \
      74             : {                                   \
      75             :     LG_FREE_WORK ;                  \
      76             :     GrB_free (centrality) ;         \
      77             : }
      78             : 
      79             : #include "LG_internal.h"
      80             : 
      81             : //------------------------------------------------------------------------------
      82             : // LAGr_Betweenness: vertex betweenness-centrality
      83             : //------------------------------------------------------------------------------
      84             : 
      85         146 : int LAGr_Betweenness
      86             : (
      87             :     // output:
      88             :     GrB_Vector *centrality,     // centrality(i): betweeness centrality of i
      89             :     // input:
      90             :     LAGraph_Graph G,            // input graph
      91             :     const GrB_Index *sources,   // source vertices to compute shortest paths
      92             :     int32_t ns,                 // number of source vertices
      93             :     char *msg
      94             : )
      95             : {
      96             : 
      97             :     //--------------------------------------------------------------------------
      98             :     // check inputs
      99             :     //--------------------------------------------------------------------------
     100             : 
     101         146 :     LG_CLEAR_MSG ;
     102             : 
     103             :     // Array of BFS search matrices.
     104             :     // S [i] is a sparse matrix that stores the depth at which each vertex is
     105             :     // first seen thus far in each BFS at the current depth i. Each column
     106             :     // corresponds to a BFS traversal starting from a source node.
     107         146 :     GrB_Matrix *S = NULL ;
     108             : 
     109             :     // Frontier matrix, a sparse matrix.
     110             :     // Stores # of shortest paths to vertices at current BFS depth
     111         146 :     GrB_Matrix frontier = NULL ;
     112             : 
     113             :     // Paths matrix holds the number of shortest paths for each node and
     114             :     // starting node discovered so far.  A dense matrix that is updated with
     115             :     // sparse updates, and also used as a mask.
     116         146 :     GrB_Matrix paths = NULL ;
     117             : 
     118             :     // Update matrix for betweenness centrality, values for each node for
     119             :     // each starting node.  A dense matrix.
     120         146 :     GrB_Matrix bc_update = NULL ;
     121             : 
     122             :     // Temporary workspace matrix (sparse).
     123         146 :     GrB_Matrix W = NULL ;
     124             : 
     125         146 :     GrB_Index n = 0 ;                   // # nodes in the graph
     126             : 
     127         146 :     LG_ASSERT (centrality != NULL && sources != NULL, GrB_NULL_POINTER) ;
     128         146 :     (*centrality) = NULL ;
     129         146 :     LG_TRY (LAGraph_CheckGraph (G, msg)) ;
     130             : 
     131         146 :     GrB_Matrix A = G->A ;
     132             :     GrB_Matrix AT ;
     133         146 :     if (G->kind == LAGraph_ADJACENCY_UNDIRECTED ||
     134           1 :         G->is_symmetric_structure == LAGraph_TRUE)
     135             :     {
     136             :         // A and A' have the same structure
     137         145 :         AT = A ;
     138             :     }
     139             :     else
     140             :     {
     141             :         // A and A' differ
     142           1 :         AT = G->AT ;
     143           1 :         LG_ASSERT_MSG (AT != NULL, LAGRAPH_NOT_CACHED, "G->AT is required") ;
     144             :     }
     145             : 
     146             :     // =========================================================================
     147             :     // === initializations =====================================================
     148             :     // =========================================================================
     149             : 
     150             :     // Initialize paths and frontier with source notes
     151         146 :     GRB_TRY (GrB_Matrix_nrows (&n, A)) ;
     152         146 :     GRB_TRY (GrB_Matrix_new (&paths,    GrB_FP64, ns, n)) ;
     153         143 :     GRB_TRY (GrB_Matrix_new (&frontier, GrB_FP64, ns, n)) ;
     154         140 :     GRB_TRY (LG_SET_FORMAT_HINT (paths, LG_BITMAP + LG_FULL)) ;
     155             : 
     156         670 :     for (GrB_Index i = 0 ; i < ns ; i++)
     157             :     {
     158             :         // paths (i,s(i)) = 1
     159             :         // frontier (i,s(i)) = 1
     160         537 :         double one = 1 ;
     161         537 :         GrB_Index src = sources [i] ;
     162         537 :         LG_ASSERT_MSG (src < n, GrB_INVALID_INDEX, "invalid source node") ;
     163         537 :         GRB_TRY (GrB_Matrix_setElement (paths,    one, i, src)) ;
     164         536 :         GRB_TRY (GrB_Matrix_setElement (frontier, one, i, src)) ;
     165             :     }
     166             : 
     167             :     // Initial frontier: frontier<!paths>= frontier*A
     168         133 :     GRB_TRY (GrB_mxm (frontier, paths, NULL, LAGraph_plus_first_fp64,
     169             :         frontier, A, GrB_DESC_RSC)) ;
     170             : 
     171             :     // Allocate memory for the array of S matrices
     172         120 :     LG_TRY (LAGraph_Malloc ((void **) &S, n+1, sizeof (GrB_Matrix), msg)) ;
     173         119 :     S [0] = NULL ;
     174             : 
     175             :     // =========================================================================
     176             :     // === Breadth-first search stage ==========================================
     177             :     // =========================================================================
     178             : 
     179         119 :     bool last_was_pull = false ;
     180         119 :     GrB_Index frontier_size, last_frontier_size = 0 ;
     181         119 :     GRB_TRY (GrB_Matrix_nvals (&frontier_size, frontier)) ;
     182             : 
     183             :     int64_t depth ;
     184         585 :     for (depth = 0 ; frontier_size > 0 && depth < n ; depth++)
     185             :     {
     186             : 
     187             :         //----------------------------------------------------------------------
     188             :         // S [depth] = structure of frontier
     189             :         //----------------------------------------------------------------------
     190             : 
     191         511 :         S [depth+1] = NULL ;
     192         571 :         LG_TRY (LAGraph_Matrix_Structure (&(S [depth]), frontier, msg)) ;
     193             : 
     194             :         //----------------------------------------------------------------------
     195             :         // Accumulate path counts: paths += frontier
     196             :         //----------------------------------------------------------------------
     197             : 
     198         482 :         GRB_TRY (GrB_assign (paths, NULL, GrB_PLUS_FP64, frontier, GrB_ALL, ns,
     199             :             GrB_ALL, n, NULL)) ;
     200             : 
     201             :         //----------------------------------------------------------------------
     202             :         // Update frontier: frontier<!paths> = frontier*A
     203             :         //----------------------------------------------------------------------
     204             : 
     205             :         // pull if frontier is more than 10% dense,
     206             :         // or > 6% dense and last step was pull
     207         480 :         double frontier_density = ((double) frontier_size) / (double) (ns*n) ;
     208         480 :         bool do_pull = frontier_density > (last_was_pull ? 0.06 : 0.10 ) ;
     209             : 
     210         480 :         if (do_pull)
     211             :         {
     212             :             // frontier<!paths> = frontier*AT'
     213         399 :             GRB_TRY (LG_SET_FORMAT_HINT (frontier, LG_BITMAP)) ;
     214         419 :             GRB_TRY (GrB_mxm (frontier, paths, NULL, LAGraph_plus_first_fp64,
     215             :                 frontier, AT, GrB_DESC_RSCT1)) ;
     216             :         }
     217             :         else // push
     218             :         {
     219             :             // frontier<!paths> = frontier*A
     220          96 :             GRB_TRY (LG_SET_FORMAT_HINT (frontier, LG_SPARSE)) ;
     221          93 :             GRB_TRY (GrB_mxm (frontier, paths, NULL, LAGraph_plus_first_fp64,
     222             :                 frontier, A, GrB_DESC_RSC)) ;
     223             :         }
     224             : 
     225             :         //----------------------------------------------------------------------
     226             :         // Get size of current frontier: frontier_size = nvals(frontier)
     227             :         //----------------------------------------------------------------------
     228             : 
     229         466 :         last_frontier_size = frontier_size ;
     230         466 :         last_was_pull = do_pull ;
     231         466 :         GRB_TRY (GrB_Matrix_nvals (&frontier_size, frontier)) ;
     232             :     }
     233             : 
     234          74 :     GRB_TRY (GrB_free (&frontier)) ;
     235             : 
     236             :     // =========================================================================
     237             :     // === Betweenness centrality computation phase ============================
     238             :     // =========================================================================
     239             : 
     240             :     // bc_update = ones (ns, n) ; a full matrix (and stays full)
     241          89 :     GRB_TRY (GrB_Matrix_new (&bc_update, GrB_FP64, ns, n)) ;
     242          76 :     GRB_TRY (GrB_assign (bc_update, NULL, NULL, 1, GrB_ALL, ns, GrB_ALL, n,
     243             :         NULL)) ;
     244             :     // W: empty ns-by-n array, as workspace
     245          85 :     GRB_TRY (GrB_Matrix_new (&W, GrB_FP64, ns, n)) ;
     246             : 
     247             :     // Backtrack through the BFS and compute centrality updates for each vertex
     248         181 :     for (int64_t i = depth-1 ; i > 0 ; i--)
     249             :     {
     250             : 
     251             :         //----------------------------------------------------------------------
     252             :         // W<S[i]> = bc_update ./ paths
     253             :         //----------------------------------------------------------------------
     254             : 
     255             :         // Add contributions by successors and mask with that level's frontier
     256         310 :         GRB_TRY (GrB_eWiseMult (W, S [i], NULL, GrB_DIV_FP64, bc_update, paths,
     257             :             GrB_DESC_RS)) ;
     258             : 
     259             :         //----------------------------------------------------------------------
     260             :         // W<S[i−1]> = W * A'
     261             :         //----------------------------------------------------------------------
     262             : 
     263             :         // pull if W is more than 10% dense and nnz(W)/nnz(S[i-1]) > 1
     264             :         // or if W is more than 1% dense and nnz(W)/nnz(S[i-1]) > 10
     265             :         GrB_Index wsize, ssize ;
     266         152 :         GrB_Matrix_nvals (&wsize, W) ;
     267         152 :         GrB_Matrix_nvals (&ssize, S [i-1]) ;
     268         152 :         double w_density    = ((double) wsize) / ((double) (ns*n)) ;
     269         152 :         double w_to_s_ratio = ((double) wsize) / ((double) ssize) ;
     270         227 :         bool do_pull = (w_density > 0.1  && w_to_s_ratio > 1.) ||
     271          75 :                        (w_density > 0.01 && w_to_s_ratio > 10.) ;
     272             : 
     273         152 :         if (do_pull)
     274             :         {
     275             :             // W<S[i−1]> = W * A'
     276          26 :             GRB_TRY (LG_SET_FORMAT_HINT (W, LG_BITMAP)) ;
     277          24 :             GRB_TRY (GrB_mxm (W, S [i-1], NULL, LAGraph_plus_first_fp64, W, A,
     278             :                 GrB_DESC_RST1)) ;
     279             :         }
     280             :         else // push
     281             :         {
     282             :             // W<S[i−1]> = W * AT
     283         136 :             GRB_TRY (LG_SET_FORMAT_HINT (W, LG_SPARSE)) ;
     284         246 :             GRB_TRY (GrB_mxm (W, S [i-1], NULL, LAGraph_plus_first_fp64, W, AT,
     285             :                 GrB_DESC_RS)) ;
     286             :         }
     287             : 
     288             :         //----------------------------------------------------------------------
     289             :         // bc_update += W .* paths
     290             :         //----------------------------------------------------------------------
     291             : 
     292         186 :         GRB_TRY (GrB_eWiseMult (bc_update, NULL, GrB_PLUS_FP64, GrB_TIMES_FP64,
     293             :             W, paths, NULL)) ;
     294             :     }
     295             : 
     296             :     // =========================================================================
     297             :     // === finalize the centrality =============================================
     298             :     // =========================================================================
     299             : 
     300             :     // Initialize the centrality array with -ns to avoid counting
     301             :     // zero length paths
     302          19 :     GRB_TRY (GrB_Vector_new (centrality, GrB_FP64, n)) ;
     303          12 :     GRB_TRY (GrB_assign (*centrality, NULL, NULL, -ns, GrB_ALL, n, NULL)) ;
     304             : 
     305             :     // centrality (i) += sum (bc_update (:,i)) for all nodes i
     306          21 :     GRB_TRY (GrB_reduce (*centrality, NULL, GrB_PLUS_FP64, GrB_PLUS_MONOID_FP64,
     307             :         bc_update, GrB_DESC_T0)) ;
     308             : 
     309          18 :     LG_FREE_WORK ;
     310           3 :     return (GrB_SUCCESS) ;
     311             : }

Generated by: LCOV version 1.14