LCOV - code coverage report
Current view: top level - src/algorithm - LAGr_Betweenness.c (source / functions) Hit Total Coverage
Test: LAGraph code coverage report. Commit id: 3b461aa. Current time (UTC): 2024-01-25T16:04:32Z Lines: 70 70 100.0 %
Date: 2024-01-25 16:05:28 Functions: 1 1 100.0 %

          Line data    Source code
       1             : //------------------------------------------------------------------------------
       2             : // LAGr_Betweenness: vertex betweenness-centrality
       3             : //------------------------------------------------------------------------------
       4             : 
       5             : // LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved.
       6             : // SPDX-License-Identifier: BSD-2-Clause
       7             : //
       8             : // For additional details (including references to third party source code and
       9             : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
      10             : // Contributors.txt for a full list of contributors. Created, in part, with
      11             : // funding and support from the U.S. Government (see Acknowledgments.txt file).
      12             : // DM22-0790
      13             : 
      14             : // Contributed by Scott Kolodziej and Tim Davis, Texas A&M University;
      15             : // Adapted and revised from GraphBLAS C API Spec, Appendix B.4.
      16             : 
      17             : //------------------------------------------------------------------------------
      18             : 
      19             : // LAGr_Betweenness: Batch algorithm for computing
      20             : // betweeness centrality, using push-pull optimization.
      21             : 
      22             : // This is an Advanced algorithm (G->AT is required).
      23             : 
      24             : // This method computes an approximation of the betweenness algorithm.
      25             : //                               ____
      26             : //                               \      sigma(s,t | i)
      27             : //    Betweenness centrality =    \    ----------------
      28             : //           of node i            /       sigma(s,t)
      29             : //                               /___
      30             : //                            s != i != t
      31             : //
      32             : // Where sigma(s,t) is the total number of shortest paths from node s to
      33             : // node t, and sigma(s,t | i) is the total number of shortest paths from
      34             : // node s to node t that pass through node i.
      35             : //
      36             : // Note that the true betweenness centrality requires computing shortest paths
      37             : // from all nodes s to all nodes t (or all-pairs shortest paths), which can be
      38             : // expensive to compute. By using a reasonably sized subset of source nodes, an
      39             : // approximation can be made.
      40             : //
      41             : // This method performs simultaneous breadth-first searches of the entire graph
      42             : // starting at a given set of source nodes. This pass discovers all shortest
      43             : // paths from the source nodes to all other nodes in the graph.  After the BFS
      44             : // is complete, the number of shortest paths that pass through a given node is
      45             : // tallied by reversing the traversal. From this, the (approximate) betweenness
      46             : // centrality is computed.
      47             : 
      48             : // G->A represents the graph, and G->AT must be present.  G->A must be square,
      49             : // and can be unsymmetric.  Self-edges are OK.  The values of G->A and G->AT
      50             : // are ignored; just the structure of two matrices are used.
      51             : 
      52             : // Each phase uses push-pull direction optimization.
      53             : 
      54             : //------------------------------------------------------------------------------
      55             : 
      56             : #define LG_FREE_WORK                            \
      57             : {                                               \
      58             :     GrB_free (&frontier) ;                      \
      59             :     GrB_free (&paths) ;                         \
      60             :     GrB_free (&bc_update) ;                     \
      61             :     GrB_free (&W) ;                             \
      62             :     if (S != NULL)                              \
      63             :     {                                           \
      64             :         for (int64_t i = 0 ; i < n ; i++)       \
      65             :         {                                       \
      66             :             if (S [i] == NULL) break ;          \
      67             :             GrB_free (&(S [i])) ;               \
      68             :         }                                       \
      69             :         LAGraph_Free ((void **) &S, NULL) ;     \
      70             :     }                                           \
      71             : }
      72             : 
      73             : #define LG_FREE_ALL                 \
      74             : {                                   \
      75             :     LG_FREE_WORK ;                  \
      76             :     GrB_free (centrality) ;         \
      77             : }
      78             : 
      79             : #include "LG_internal.h"
      80             : 
      81             : //------------------------------------------------------------------------------
      82             : // LAGr_Betweenness: vertex betweenness-centrality
      83             : //------------------------------------------------------------------------------
      84             : 
      85         143 : int LAGr_Betweenness
      86             : (
      87             :     // output:
      88             :     GrB_Vector *centrality,     // centrality(i): betweeness centrality of i
      89             :     // input:
      90             :     LAGraph_Graph G,            // input graph
      91             :     const GrB_Index *sources,   // source vertices to compute shortest paths
      92             :     int32_t ns,                 // number of source vertices
      93             :     char *msg
      94             : )
      95             : {
      96             : 
      97             :     //--------------------------------------------------------------------------
      98             :     // check inputs
      99             :     //--------------------------------------------------------------------------
     100             : 
     101         143 :     LG_CLEAR_MSG ;
     102             : 
     103             :     // Array of BFS search matrices.
     104             :     // S [i] is a sparse matrix that stores the depth at which each vertex is
     105             :     // first seen thus far in each BFS at the current depth i. Each column
     106             :     // corresponds to a BFS traversal starting from a source node.
     107         143 :     GrB_Matrix *S = NULL ;
     108             : 
     109             :     // Frontier matrix, a sparse matrix.
     110             :     // Stores # of shortest paths to vertices at current BFS depth
     111         143 :     GrB_Matrix frontier = NULL ;
     112             : 
     113             :     // Paths matrix holds the number of shortest paths for each node and
     114             :     // starting node discovered so far.  A dense matrix that is updated with
     115             :     // sparse updates, and also used as a mask.
     116         143 :     GrB_Matrix paths = NULL ;
     117             : 
     118             :     // Update matrix for betweenness centrality, values for each node for
     119             :     // each starting node.  A dense matrix.
     120         143 :     GrB_Matrix bc_update = NULL ;
     121             : 
     122             :     // Temporary workspace matrix (sparse).
     123         143 :     GrB_Matrix W = NULL ;
     124             : 
     125         143 :     GrB_Index n = 0 ;                   // # nodes in the graph
     126             : 
     127         143 :     LG_ASSERT (centrality != NULL && sources != NULL, GrB_NULL_POINTER) ;
     128         143 :     (*centrality) = NULL ;
     129         143 :     LG_TRY (LAGraph_CheckGraph (G, msg)) ;
     130             : 
     131         143 :     GrB_Matrix A = G->A ;
     132             :     GrB_Matrix AT ;
     133         143 :     if (G->kind == LAGraph_ADJACENCY_UNDIRECTED ||
     134           1 :         G->is_symmetric_structure == LAGraph_TRUE)
     135             :     {
     136             :         // A and A' have the same structure
     137         142 :         AT = A ;
     138             :     }
     139             :     else
     140             :     {
     141             :         // A and A' differ
     142           1 :         AT = G->AT ;
     143           1 :         LG_ASSERT_MSG (AT != NULL, LAGRAPH_NOT_CACHED, "G->AT is required") ;
     144             :     }
     145             : 
     146             :     // =========================================================================
     147             :     // === initializations =====================================================
     148             :     // =========================================================================
     149             : 
     150             :     // Initialize paths and frontier with source notes
     151         143 :     GRB_TRY (GrB_Matrix_nrows (&n, A)) ;
     152         143 :     GRB_TRY (GrB_Matrix_new (&paths,    GrB_FP64, ns, n)) ;
     153         140 :     GRB_TRY (GrB_Matrix_new (&frontier, GrB_FP64, ns, n)) ;
     154             :     #if LAGRAPH_SUITESPARSE
     155         137 :     GRB_TRY (GxB_set (paths, GxB_SPARSITY_CONTROL, GxB_BITMAP + GxB_FULL)) ;
     156             :     #endif
     157         655 :     for (GrB_Index i = 0 ; i < ns ; i++)
     158             :     {
     159             :         // paths (i,s(i)) = 1
     160             :         // frontier (i,s(i)) = 1
     161         525 :         double one = 1 ;
     162         525 :         GrB_Index src = sources [i] ;
     163         525 :         LG_ASSERT_MSG (src < n, GrB_INVALID_INDEX, "invalid source node") ;
     164         525 :         GRB_TRY (GrB_Matrix_setElement (paths,    one, i, src)) ;
     165         524 :         GRB_TRY (GrB_Matrix_setElement (frontier, one, i, src)) ;
     166             :     }
     167             : 
     168             :     // Initial frontier: frontier<!paths>= frontier*A
     169         130 :     GRB_TRY (GrB_mxm (frontier, paths, NULL, LAGraph_plus_first_fp64,
     170             :         frontier, A, GrB_DESC_RSC)) ;
     171             : 
     172             :     // Allocate memory for the array of S matrices
     173         117 :     LG_TRY (LAGraph_Malloc ((void **) &S, n+1, sizeof (GrB_Matrix), msg)) ;
     174         116 :     S [0] = NULL ;
     175             : 
     176             :     // =========================================================================
     177             :     // === Breadth-first search stage ==========================================
     178             :     // =========================================================================
     179             : 
     180         116 :     bool last_was_pull = false ;
     181         116 :     GrB_Index frontier_size, last_frontier_size = 0 ;
     182         116 :     GRB_TRY (GrB_Matrix_nvals (&frontier_size, frontier)) ;
     183             : 
     184             :     int64_t depth ;
     185         567 :     for (depth = 0 ; frontier_size > 0 && depth < n ; depth++)
     186             :     {
     187             : 
     188             :         //----------------------------------------------------------------------
     189             :         // S [depth] = structure of frontier
     190             :         //----------------------------------------------------------------------
     191             : 
     192         496 :         S [depth+1] = NULL ;
     193         556 :         LG_TRY (LAGraph_Matrix_Structure (&(S [depth]), frontier, msg)) ;
     194             : 
     195             :         //----------------------------------------------------------------------
     196             :         // Accumulate path counts: paths += frontier
     197             :         //----------------------------------------------------------------------
     198             : 
     199         467 :         GRB_TRY (GrB_assign (paths, NULL, GrB_PLUS_FP64, frontier, GrB_ALL, ns,
     200             :             GrB_ALL, n, NULL)) ;
     201             : 
     202             :         //----------------------------------------------------------------------
     203             :         // Update frontier: frontier<!paths> = frontier*A
     204             :         //----------------------------------------------------------------------
     205             : 
     206             :         // pull if frontier is more than 10% dense,
     207             :         // or > 6% dense and last step was pull
     208         465 :         double frontier_density = ((double) frontier_size) / (double) (ns*n) ;
     209         465 :         bool do_pull = frontier_density > (last_was_pull ? 0.06 : 0.10 ) ;
     210             : 
     211         465 :         if (do_pull)
     212             :         {
     213             :             // frontier<!paths> = frontier*AT'
     214             :             #if LAGRAPH_SUITESPARSE
     215         387 :             GRB_TRY (GxB_set (frontier, GxB_SPARSITY_CONTROL, GxB_BITMAP)) ;
     216             :             #endif
     217         407 :             GRB_TRY (GrB_mxm (frontier, paths, NULL, LAGraph_plus_first_fp64,
     218             :                 frontier, AT, GrB_DESC_RSCT1)) ;
     219             :         }
     220             :         else // push
     221             :         {
     222             :             // frontier<!paths> = frontier*A
     223             :             #if LAGRAPH_SUITESPARSE
     224          93 :             GRB_TRY (GxB_set (frontier, GxB_SPARSITY_CONTROL, GxB_SPARSE)) ;
     225             :             #endif
     226          90 :             GRB_TRY (GrB_mxm (frontier, paths, NULL, LAGraph_plus_first_fp64,
     227             :                 frontier, A, GrB_DESC_RSC)) ;
     228             :         }
     229             : 
     230             :         //----------------------------------------------------------------------
     231             :         // Get size of current frontier: frontier_size = nvals(frontier)
     232             :         //----------------------------------------------------------------------
     233             : 
     234         451 :         last_frontier_size = frontier_size ;
     235         451 :         last_was_pull = do_pull ;
     236         451 :         GRB_TRY (GrB_Matrix_nvals (&frontier_size, frontier)) ;
     237             :     }
     238             : 
     239          71 :     GRB_TRY (GrB_free (&frontier)) ;
     240             : 
     241             :     // =========================================================================
     242             :     // === Betweenness centrality computation phase ============================
     243             :     // =========================================================================
     244             : 
     245             :     // bc_update = ones (ns, n) ; a full matrix (and stays full)
     246          86 :     GRB_TRY (GrB_Matrix_new (&bc_update, GrB_FP64, ns, n)) ;
     247          73 :     GRB_TRY (GrB_assign (bc_update, NULL, NULL, 1, GrB_ALL, ns, GrB_ALL, n,
     248             :         NULL)) ;
     249             :     // W: empty ns-by-n array, as workspace
     250          82 :     GRB_TRY (GrB_Matrix_new (&W, GrB_FP64, ns, n)) ;
     251             : 
     252             :     // Backtrack through the BFS and compute centrality updates for each vertex
     253         175 :     for (int64_t i = depth-1 ; i > 0 ; i--)
     254             :     {
     255             : 
     256             :         //----------------------------------------------------------------------
     257             :         // W<S[i]> = bc_update ./ paths
     258             :         //----------------------------------------------------------------------
     259             : 
     260             :         // Add contributions by successors and mask with that level's frontier
     261         301 :         GRB_TRY (GrB_eWiseMult (W, S [i], NULL, GrB_DIV_FP64, bc_update, paths,
     262             :             GrB_DESC_RS)) ;
     263             : 
     264             :         //----------------------------------------------------------------------
     265             :         // W<S[i−1]> = W * A'
     266             :         //----------------------------------------------------------------------
     267             : 
     268             :         // pull if W is more than 10% dense and nnz(W)/nnz(S[i-1]) > 1
     269             :         // or if W is more than 1% dense and nnz(W)/nnz(S[i-1]) > 10
     270             :         GrB_Index wsize, ssize ;
     271         146 :         GrB_Matrix_nvals (&wsize, W) ;
     272         146 :         GrB_Matrix_nvals (&ssize, S [i-1]) ;
     273         146 :         double w_density    = ((double) wsize) / ((double) (ns*n)) ;
     274         146 :         double w_to_s_ratio = ((double) wsize) / ((double) ssize) ;
     275         218 :         bool do_pull = (w_density > 0.1  && w_to_s_ratio > 1.) ||
     276          72 :                        (w_density > 0.01 && w_to_s_ratio > 10.) ;
     277             : 
     278         146 :         if (do_pull)
     279             :         {
     280             :             // W<S[i−1]> = W * A'
     281             :             #if LAGRAPH_SUITESPARSE
     282          26 :             GRB_TRY (GxB_set (W, GxB_SPARSITY_CONTROL, GxB_BITMAP)) ;
     283             :             #endif
     284          24 :             GRB_TRY (GrB_mxm (W, S [i-1], NULL, LAGraph_plus_first_fp64, W, A,
     285             :                 GrB_DESC_RST1)) ;
     286             :         }
     287             :         else // push
     288             :         {
     289             :             // W<S[i−1]> = W * AT
     290             :             #if LAGRAPH_SUITESPARSE
     291         130 :             GRB_TRY (GxB_set (W, GxB_SPARSITY_CONTROL, GxB_SPARSE)) ;
     292             :             #endif
     293         225 :             GRB_TRY (GrB_mxm (W, S [i-1], NULL, LAGraph_plus_first_fp64, W, AT,
     294             :                 GrB_DESC_RS)) ;
     295             :         }
     296             : 
     297             :         //----------------------------------------------------------------------
     298             :         // bc_update += W .* paths
     299             :         //----------------------------------------------------------------------
     300             : 
     301         183 :         GRB_TRY (GrB_eWiseMult (bc_update, NULL, GrB_PLUS_FP64, GrB_TIMES_FP64,
     302             :             W, paths, NULL)) ;
     303             :     }
     304             : 
     305             :     // =========================================================================
     306             :     // === finalize the centrality =============================================
     307             :     // =========================================================================
     308             : 
     309             :     // Initialize the centrality array with -ns to avoid counting
     310             :     // zero length paths
     311          19 :     GRB_TRY (GrB_Vector_new (centrality, GrB_FP64, n)) ;
     312          12 :     GRB_TRY (GrB_assign (*centrality, NULL, NULL, -ns, GrB_ALL, n, NULL)) ;
     313             : 
     314             :     // centrality (i) += sum (bc_update (:,i)) for all nodes i
     315          21 :     GRB_TRY (GrB_reduce (*centrality, NULL, GrB_PLUS_FP64, GrB_PLUS_MONOID_FP64,
     316             :         bc_update, GrB_DESC_T0)) ;
     317             : 
     318          18 :     LG_FREE_WORK ;
     319           3 :     return (GrB_SUCCESS) ;
     320             : }

Generated by: LCOV version 1.14