LCOV - code coverage report
Current view: top level - experimental/algorithm - LAGraph_SquareClustering.c (source / functions) Hit Total Coverage
Test: LAGraph code coverage report. Commit id: cc56ed4. Current time (UTC): 2024-08-30T17:14:30Z Lines: 39 39 100.0 %
Date: 2024-08-30 17:16:41 Functions: 1 1 100.0 %

          Line data    Source code
       1             : //------------------------------------------------------------------------------
       2             : // LAGraph_SquareClustering: vertex square-clustering
       3             : //------------------------------------------------------------------------------
       4             : 
       5             : // LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved.
       6             : // SPDX-License-Identifier: BSD-2-Clause
       7             : //
       8             : // For additional details (including references to third party source code and
       9             : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
      10             : // Contributors.txt for a full list of contributors. Created, in part, with
      11             : // funding and support from the U.S. Government (see Acknowledgments.txt file).
      12             : // DM22-0790
      13             : 
      14             : // Contributed by Erik Welch, NVIDIA.
      15             : 
      16             : //------------------------------------------------------------------------------
      17             : 
      18             : // Compute the square clustering coefficient for each node of an undirected
      19             : // graph, which is the fraction of possible squares that exist at each node.
      20             : // It is a clustering coefficient suitable for bipartite graphs and is fully
      21             : // described here:
      22             : //      https://arxiv.org/pdf/0710.0117v1.pdf
      23             : // which uses a different denominator than the original definition:
      24             : //      https://arxiv.org/pdf/cond-mat/0504241.pdf
      25             : // Furthermore, we count squares based on
      26             : //      https://arxiv.org/pdf/2007.11111.pdf (sigma_12, c_4)
      27             : // which is implemented in LAGraph_FastGraphletTransform.c (thanks Tim Davis
      28             : // for mentioning this to me!).
      29             : 
      30             : // The NetworkX implementation of square clustering was used heavily during
      31             : // development.  I used it to determine the contributions to the denominator
      32             : // and to verify correctness (including on larger graphs).
      33             : //      https://networkx.org/documentation/stable/reference/algorithms/\
      34             : //      generated/networkx.algorithms.cluster.square_clustering.html
      35             : 
      36             : // Pseudocode (doesn't show dropping 0s in the final result):
      37             : //
      38             : //    P2(~degrees.diag().S) = plus_pair(A @ A.T)
      39             : //    tri = first(P2 & A).reduce_rowwise()
      40             : //    squares = (P2 * (P2 - 1)).reduce_rowwise() / 2
      41             : //    uw_count = degrees * (degrees - 1)
      42             : //    uw_degrees = plus_times(A @ degrees) * (degrees - 1)
      43             : //    square_clustering = squares / (uw_degrees - uw_count - tri - squares)
      44             : 
      45             : // The coefficient as described in https://arxiv.org/pdf/0710.0117v1.pdf
      46             : // where m and n are different neighbors of node i.
      47             : // Note that summations over mn are implied in the numerator and denominator:
      48             : //
      49             : //    C_{4,mn}(i) = q_imn / ((k_m - eta_imn) + (k_n - eta_imn) + q_imn)
      50             : //    q_imn = # of common neighbors between m and n (i.e., squares)
      51             : //    k_m = number of neighbors of m (i.e., degrees[m])
      52             : //    eta_imn = 1 + q_imn + theta_mn
      53             : //    theta_mn = 1 if m and n are connected, otherwise 0 (i.e., triangles)
      54             : 
      55             : // Here are the corresponding terms between the equation and pseudocode:
      56             : //    theta_mn          <--> tri
      57             : //    q_imn             <--> squares
      58             : //    eta_imn = 1 + ... <--> uw_count
      59             : //    k_m               <--> uw_degrees
      60             : 
      61             : // I first implemented this in the Python library graphblas-algorithms
      62             : //      https://github.com/python-graphblas/graphblas-algorithms/\
      63             : //      blob/main/graphblas_algorithms/algorithms/cluster.py
      64             : // and I copy/pasted C code generated from the Recorder in Python-graphblas
      65             : //      https://github.com/python-graphblas/python-graphblas
      66             : 
      67             : // This implementation requires that `out_degree` property is already cached.
      68             : // 0 values are omitted from the result (i.e., missing values <--> zero).
      69             : // Also, it computes `P2 = A @ A.T`, which may be very large.  We could modify
      70             : // the algorithm to compute coefficients for a subset of nodes, which would
      71             : // allow expert users to compute in batches.  Also, since this algorithm only
      72             : // operates on undirected or symmetric graphs, we only need to compute the
      73             : // upper (or lower) triangle of P2, which should reduce memory by about half.
      74             : // However, this is not easy to do, and would complicate the implementation.
      75             : 
      76             : //------------------------------------------------------------------------------
      77             : 
      78             : #define LG_FREE_WORK                \
      79             : {                                   \
      80             :     GrB_free (&squares) ;           \
      81             :     GrB_free (&denom) ;             \
      82             :     GrB_free (&neg_denom) ;         \
      83             :     GrB_free (&P2) ;                \
      84             :     GrB_free (&D) ;                 \
      85             : }
      86             : 
      87             : #define LG_FREE_ALL                 \
      88             : {                                   \
      89             :     LG_FREE_WORK ;                  \
      90             :     GrB_free (&r) ;                 \
      91             : }
      92             : 
      93             : #include <LAGraph.h>
      94             : #include <LAGraphX.h>
      95             : #include <LG_internal.h>  // from src/utility
      96             : 
      97           1 : int LAGraph_SquareClustering
      98             : (
      99             :     // outputs:
     100             :     GrB_Vector *square_clustering,
     101             :     // inputs:
     102             :     LAGraph_Graph G,
     103             :     char *msg
     104             : )
     105             : {
     106           1 :     LG_CLEAR_MSG ;
     107             : 
     108             :     // The number of squares each node is part of
     109           1 :     GrB_Vector squares = NULL ;
     110             : 
     111             :     // Thought of as the total number of possible squares for each node
     112           1 :     GrB_Vector denom = NULL ;
     113             : 
     114             :     // Negative contributions to the denominator
     115           1 :     GrB_Vector neg_denom = NULL ;
     116             : 
     117             :     // Final result: the square coefficients for each node (squares / denom)
     118           1 :     GrB_Vector r = NULL ;
     119             : 
     120             :     // out_degrees assigned to diagonal matrix
     121             :     // Then used as triangles: first(P2 & A)
     122           1 :     GrB_Matrix D = NULL ;
     123             : 
     124             :     // P2 = plus_pair(A @ A.T).new(mask=~D.S)
     125             :     // Then used as a temporary workspace matrix (int64)
     126           1 :     GrB_Matrix P2 = NULL ;
     127             : 
     128           1 :     GrB_Vector deg = G->out_degree ;
     129           1 :     GrB_Matrix A = G->A ;
     130           1 :     GrB_Index n = 0 ;
     131             : 
     132             :     //--------------------------------------------------------------------------
     133             :     // check inputs
     134             :     //--------------------------------------------------------------------------
     135             : 
     136           1 :     LG_ASSERT (square_clustering != NULL, GrB_NULL_POINTER) ;
     137           1 :     (*square_clustering) = NULL ;
     138             : 
     139           1 :     LG_ASSERT_MSG (deg != NULL,
     140             :         LAGRAPH_NOT_CACHED, "G->out_degree is required") ;
     141             : 
     142           1 :     LG_TRY (LAGraph_CheckGraph (G, msg)) ;
     143             : 
     144           1 :     LG_ASSERT_MSG ((G->kind == LAGraph_ADJACENCY_UNDIRECTED ||
     145             :        (G->kind == LAGraph_ADJACENCY_DIRECTED &&
     146             :         G->is_symmetric_structure == LAGraph_TRUE)),
     147             :         LAGRAPH_SYMMETRIC_STRUCTURE_REQUIRED,
     148             :         "G->A must be known to be symmetric") ;
     149             : 
     150             :     // # of nodes
     151           1 :     GRB_TRY (GrB_Matrix_nrows (&n, A)) ;
     152             : 
     153             :     // out_degrees as a diagonal matrix.
     154             :     #if LAGRAPH_SUITESPARSE
     155             :         #if GxB_IMPLEMENTATION >= GxB_VERSION (7,0,0)
     156             :         // SuiteSparse 7.x and later:
     157           1 :         GRB_TRY (GrB_Matrix_diag(&D, deg, 0)) ;
     158             :         #else
     159             :         // SuiteSparse 6.x and earlier, which had the incorrect signature:
     160             :         GRB_TRY (GrB_Matrix_new(&D, GrB_INT64, n, n)) ;
     161             :         GRB_TRY (GrB_Matrix_diag(D, deg, 0)) ;
     162             :         #endif
     163             :     #else
     164             :     // standard GrB:
     165             :     GRB_TRY (GrB_Matrix_diag(&D, deg, 0)) ;
     166             :     #endif
     167             : 
     168             :     // We use ~D.S as a mask so P2 won't have values along the diagonal.
     169             :     //    P2(~D.S) = plus_pair(A @ A.T)
     170           1 :     GRB_TRY (GrB_Matrix_new (&P2, GrB_INT64, n, n)) ;
     171           1 :     GRB_TRY (GrB_mxm (P2, D, NULL, LAGraph_plus_one_int64, A, A, GrB_DESC_SCT1)) ;
     172             : 
     173             :     // Denominator is thought of as total number of squares that could exist.
     174             :     // It has four terms (indicated below), and we use the definition from:
     175             :     //      https://arxiv.org/pdf/0710.0117v1.pdf.
     176             :     //
     177             :     // (1) tri = first(P2 & A).reduce_rowwise()
     178             :     // Subtract 1 for each edge where u-w or w-u are connected.
     179             :     // In other words, triangles.  Use P2, since we already have it.
     180             :     //     D = first(P2 & A)
     181             :     //     neg_denom = D.reduce_rowwise()
     182           1 :     GRB_TRY (GrB_Matrix_eWiseMult_BinaryOp (D, NULL, NULL, GrB_FIRST_INT64, P2,
     183             :         A, NULL)) ;
     184           1 :     GRB_TRY (GrB_Vector_new (&neg_denom, GrB_INT64, n)) ;
     185           1 :     GRB_TRY (GrB_Matrix_reduce_Monoid (neg_denom, NULL, NULL,
     186             :         GrB_PLUS_MONOID_INT64, D, NULL)) ;
     187           1 :     GrB_free (&D) ;
     188             : 
     189             :     // squares = (P2 * (P2 - 1)).reduce_rowwise() / 2
     190             :     // Now compute the number of squares (the numerator).  We count squares
     191             :     // based on https://arxiv.org/pdf/2007.11111.pdf (sigma_12, c_4).
     192             :     //     P2 *= P2 - 1
     193             :     //     squares = P2.reduce_rowwise() / 2  (and drop zeros)
     194           1 :     GRB_TRY (GrB_Matrix_apply_BinaryOp2nd_INT64 (P2, NULL, GrB_TIMES_INT64,
     195             :         GrB_MINUS_INT64, P2, 1, NULL)) ;
     196           1 :     GRB_TRY (GrB_Vector_new (&squares, GrB_INT64, n)) ;
     197           1 :     GRB_TRY (GrB_Matrix_reduce_Monoid (squares, NULL, NULL,
     198             :         GrB_PLUS_MONOID_INT64, P2, NULL)) ;
     199           1 :     GrB_free (&P2) ;
     200             :     // Divide by 2, and use squares as value mask to drop zeros
     201           1 :     GRB_TRY (GrB_Vector_apply_BinaryOp2nd_INT64 (squares, squares, NULL,
     202             :         GrB_DIV_INT64, squares, 2, GrB_DESC_R)) ;
     203             : 
     204             :     // (2) uw_count = degrees * (degrees - 1).
     205             :     // Subtract 1 for each u and 1 for each w for all combos.
     206             :     //    denom(squares.S) = degrees - 1
     207           1 :     GRB_TRY (GrB_Vector_new (&denom, GrB_INT64, n)) ;
     208           1 :     GRB_TRY (GrB_Vector_apply_BinaryOp2nd_INT64(denom, squares, NULL,
     209             :         GrB_MINUS_INT64, deg, 1, GrB_DESC_S)) ;
     210             :     // neg_denom += degrees * (degrees - 1)
     211           1 :     GRB_TRY (GrB_Vector_eWiseMult_BinaryOp(neg_denom, NULL, GrB_PLUS_INT64,
     212             :         GrB_TIMES_INT64, deg, denom, NULL)) ;
     213             : 
     214             :     // (3) uw_degrees = plus_times(A @ degrees) * (degrees - 1).
     215             :     // The main contribution to (and only positive term of) the denominator:
     216             :     // degrees[u] + degrees[w] for each u-w combo.
     217             :     // Recall that `denom = degrees - 1` from above.
     218             :     //    denom(denom.S) *= plus_times(A @ deg)
     219           1 :     GRB_TRY (GrB_mxv(denom, denom, GrB_TIMES_INT64,
     220             :         GrB_PLUS_TIMES_SEMIRING_INT64, A, deg, GrB_DESC_S)) ;
     221             : 
     222             :     // (4) squares.  Subtract the number of squares
     223             :     //    denom -= neg_denom + squares
     224           1 :     GRB_TRY (GrB_Vector_eWiseMult_BinaryOp(denom, NULL, GrB_MINUS_INT64,
     225             :         GrB_PLUS_INT64, neg_denom, squares, NULL)) ;
     226             : 
     227             :     // square_clustering = squares / (uw_degrees - uw_count - tri - squares)
     228             :     // Almost done!  Now compute the final result:
     229             :     //    square_clustering = r = squares / denom
     230           1 :     GRB_TRY (GrB_Vector_new (&r, GrB_FP64, n)) ;
     231           1 :     GRB_TRY (GrB_Vector_eWiseMult_BinaryOp (r, NULL, NULL, GrB_DIV_FP64,
     232             :         squares, denom, NULL)) ;
     233             : 
     234           1 :     (*square_clustering) = r ;
     235           1 :     LG_FREE_WORK ;
     236           1 :     return (GrB_SUCCESS) ;
     237             : }

Generated by: LCOV version 1.14