LCOV - code coverage report
Current view: top level - experimental/algorithm - LAGraph_SquareClustering.c (source / functions) Hit Total Coverage
Test: LAGraph code coverage report. Commit id: 7b9d2ee. Current time (UTC): 2025-06-03T21:57:17Z Lines: 39 39 100.0 %
Date: 2025-06-03 22:02:40 Functions: 1 1 100.0 %

          Line data    Source code
       1             : //------------------------------------------------------------------------------
       2             : // LAGraph_SquareClustering: vertex square-clustering
       3             : //------------------------------------------------------------------------------
       4             : 
       5             : // LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved.
       6             : // SPDX-License-Identifier: BSD-2-Clause
       7             : //
       8             : // For additional details (including references to third party source code and
       9             : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
      10             : // Contributors.txt for a full list of contributors. Created, in part, with
      11             : // funding and support from the U.S. Government (see Acknowledgments.txt file).
      12             : // DM22-0790
      13             : 
      14             : // Contributed by Erik Welch, NVIDIA.
      15             : 
      16             : //------------------------------------------------------------------------------
      17             : 
      18             : // TODO: ready for src, except this is advanced; need basic method.
      19             : 
      20             : // Compute the square clustering coefficient for each node of an undirected
      21             : // graph, which is the fraction of possible squares that exist at each node.
      22             : // It is a clustering coefficient suitable for bipartite graphs and is fully
      23             : // described here:
      24             : //      https://arxiv.org/pdf/0710.0117v1.pdf
      25             : // which uses a different denominator than the original definition:
      26             : //      https://arxiv.org/pdf/cond-mat/0504241.pdf
      27             : // Furthermore, we count squares based on
      28             : //      https://arxiv.org/pdf/2007.11111.pdf (sigma_12, c_4)
      29             : // which is implemented in LAGraph_FastGraphletTransform.c (thanks Tim Davis
      30             : // for mentioning this to me!).
      31             : 
      32             : // The NetworkX implementation of square clustering was used heavily during
      33             : // development.  I used it to determine the contributions to the denominator
      34             : // and to verify correctness (including on larger graphs).
      35             : //      https://networkx.org/documentation/stable/reference/algorithms/\
      36             : //      generated/networkx.algorithms.cluster.square_clustering.html
      37             : 
      38             : // Pseudocode (doesn't show dropping 0s in the final result):
      39             : //
      40             : //    P2(~degrees.diag().S) = plus_pair(A @ A.T)
      41             : //    tri = first(P2 & A).reduce_rowwise()
      42             : //    squares = (P2 * (P2 - 1)).reduce_rowwise() / 2
      43             : //    uw_count = degrees * (degrees - 1)
      44             : //    uw_degrees = plus_times(A @ degrees) * (degrees - 1)
      45             : //    square_clustering = squares / (uw_degrees - uw_count - tri - squares)
      46             : 
      47             : // The coefficient as described in https://arxiv.org/pdf/0710.0117v1.pdf
      48             : // where m and n are different neighbors of node i.
      49             : // Note that summations over mn are implied in the numerator and denominator:
      50             : //
      51             : //    C_{4,mn}(i) = q_imn / ((k_m - eta_imn) + (k_n - eta_imn) + q_imn)
      52             : //    q_imn = # of common neighbors between m and n (i.e., squares)
      53             : //    k_m = number of neighbors of m (i.e., degrees[m])
      54             : //    eta_imn = 1 + q_imn + theta_mn
      55             : //    theta_mn = 1 if m and n are connected, otherwise 0 (i.e., triangles)
      56             : 
      57             : // Here are the corresponding terms between the equation and pseudocode:
      58             : //    theta_mn          <--> tri
      59             : //    q_imn             <--> squares
      60             : //    eta_imn = 1 + ... <--> uw_count
      61             : //    k_m               <--> uw_degrees
      62             : 
      63             : // I first implemented this in the Python library graphblas-algorithms
      64             : //      https://github.com/python-graphblas/graphblas-algorithms/\
      65             : //      blob/main/graphblas_algorithms/algorithms/cluster.py
      66             : // and I copy/pasted C code generated from the Recorder in Python-graphblas
      67             : //      https://github.com/python-graphblas/python-graphblas
      68             : 
      69             : // This implementation requires that `out_degree` property is already cached.
      70             : // 0 values are omitted from the result (i.e., missing values <--> zero).
      71             : // Also, it computes `P2 = A @ A.T`, which may be very large.  We could modify
      72             : // the algorithm to compute coefficients for a subset of nodes, which would
      73             : // allow expert users to compute in batches.  Also, since this algorithm only
      74             : // operates on undirected or symmetric graphs, we only need to compute the
      75             : // upper (or lower) triangle of P2, which should reduce memory by about half.
      76             : // However, this is not easy to do, and would complicate the implementation.
      77             : 
      78             : //------------------------------------------------------------------------------
      79             : 
      80             : #define LG_FREE_WORK                \
      81             : {                                   \
      82             :     GrB_free (&squares) ;           \
      83             :     GrB_free (&denom) ;             \
      84             :     GrB_free (&neg_denom) ;         \
      85             :     GrB_free (&P2) ;                \
      86             :     GrB_free (&D) ;                 \
      87             : }
      88             : 
      89             : #define LG_FREE_ALL                 \
      90             : {                                   \
      91             :     LG_FREE_WORK ;                  \
      92             :     GrB_free (&r) ;                 \
      93             : }
      94             : 
      95             : #include <LAGraph.h>
      96             : #include <LAGraphX.h>
      97             : #include <LG_internal.h>  // from src/utility
      98             : 
      99           1 : int LAGraph_SquareClustering
     100             : (
     101             :     // outputs:
     102             :     GrB_Vector *square_clustering,
     103             :     // inputs:
     104             :     LAGraph_Graph G,
     105             :     char *msg
     106             : )
     107             : {
     108           1 :     LG_CLEAR_MSG ;
     109             : 
     110             :     // The number of squares each node is part of
     111           1 :     GrB_Vector squares = NULL ;
     112             : 
     113             :     // Thought of as the total number of possible squares for each node
     114           1 :     GrB_Vector denom = NULL ;
     115             : 
     116             :     // Negative contributions to the denominator
     117           1 :     GrB_Vector neg_denom = NULL ;
     118             : 
     119             :     // Final result: the square coefficients for each node (squares / denom)
     120           1 :     GrB_Vector r = NULL ;
     121             : 
     122             :     // out_degrees assigned to diagonal matrix
     123             :     // Then used as triangles: first(P2 & A)
     124           1 :     GrB_Matrix D = NULL ;
     125             : 
     126             :     // P2 = plus_pair(A @ A.T).new(mask=~D.S)
     127             :     // Then used as a temporary workspace matrix (int64)
     128           1 :     GrB_Matrix P2 = NULL ;
     129             : 
     130           1 :     GrB_Vector deg = G->out_degree ;
     131           1 :     GrB_Matrix A = G->A ;
     132           1 :     GrB_Index n = 0 ;
     133             : 
     134             :     //--------------------------------------------------------------------------
     135             :     // check inputs
     136             :     //--------------------------------------------------------------------------
     137             : 
     138           1 :     LG_ASSERT (square_clustering != NULL, GrB_NULL_POINTER) ;
     139           1 :     (*square_clustering) = NULL ;
     140             : 
     141           1 :     LG_ASSERT_MSG (deg != NULL,
     142             :         LAGRAPH_NOT_CACHED, "G->out_degree is required") ;
     143             : 
     144           1 :     LG_TRY (LAGraph_CheckGraph (G, msg)) ;
     145             : 
     146           1 :     LG_ASSERT_MSG ((G->kind == LAGraph_ADJACENCY_UNDIRECTED ||
     147             :        (G->kind == LAGraph_ADJACENCY_DIRECTED &&
     148             :         G->is_symmetric_structure == LAGraph_TRUE)),
     149             :         LAGRAPH_SYMMETRIC_STRUCTURE_REQUIRED,
     150             :         "G->A must be known to be symmetric") ;
     151             : 
     152             :     // # of nodes
     153           1 :     GRB_TRY (GrB_Matrix_nrows (&n, A)) ;
     154             : 
     155             :     // out_degrees as a diagonal matrix.
     156           1 :     GRB_TRY (GrB_Matrix_diag(&D, deg, 0)) ;
     157             : 
     158             :     // We use ~D.S as a mask so P2 won't have values along the diagonal.
     159             :     //    P2(~D.S) = plus_pair(A @ A.T)
     160           1 :     GRB_TRY (GrB_Matrix_new (&P2, GrB_INT64, n, n)) ;
     161           1 :     GRB_TRY (GrB_mxm (P2, D, NULL, LAGraph_plus_one_int64, A, A, GrB_DESC_SCT1)) ;
     162             : 
     163             :     // Denominator is thought of as total number of squares that could exist.
     164             :     // It has four terms (indicated below), and we use the definition from:
     165             :     //      https://arxiv.org/pdf/0710.0117v1.pdf.
     166             :     //
     167             :     // (1) tri = first(P2 & A).reduce_rowwise()
     168             :     // Subtract 1 for each edge where u-w or w-u are connected.
     169             :     // In other words, triangles.  Use P2, since we already have it.
     170             :     //     D = first(P2 & A)
     171             :     //     neg_denom = D.reduce_rowwise()
     172           1 :     GRB_TRY (GrB_Matrix_eWiseMult_BinaryOp (D, NULL, NULL, GrB_FIRST_INT64, P2,
     173             :         A, NULL)) ;
     174           1 :     GRB_TRY (GrB_Vector_new (&neg_denom, GrB_INT64, n)) ;
     175           1 :     GRB_TRY (GrB_Matrix_reduce_Monoid (neg_denom, NULL, NULL,
     176             :         GrB_PLUS_MONOID_INT64, D, NULL)) ;
     177           1 :     GrB_free (&D) ;
     178             : 
     179             :     // squares = (P2 * (P2 - 1)).reduce_rowwise() / 2
     180             :     // Now compute the number of squares (the numerator).  We count squares
     181             :     // based on https://arxiv.org/pdf/2007.11111.pdf (sigma_12, c_4).
     182             :     //     P2 *= P2 - 1
     183             :     //     squares = P2.reduce_rowwise() / 2  (and drop zeros)
     184           1 :     GRB_TRY (GrB_Matrix_apply_BinaryOp2nd_INT64 (P2, NULL, GrB_TIMES_INT64,
     185             :         GrB_MINUS_INT64, P2, 1, NULL)) ;
     186           1 :     GRB_TRY (GrB_Vector_new (&squares, GrB_INT64, n)) ;
     187           1 :     GRB_TRY (GrB_Matrix_reduce_Monoid (squares, NULL, NULL,
     188             :         GrB_PLUS_MONOID_INT64, P2, NULL)) ;
     189           1 :     GrB_free (&P2) ;
     190             :     // Divide by 2, and use squares as value mask to drop zeros
     191           1 :     GRB_TRY (GrB_Vector_apply_BinaryOp2nd_INT64 (squares, squares, NULL,
     192             :         GrB_DIV_INT64, squares, 2, GrB_DESC_R)) ;
     193             : 
     194             :     // (2) uw_count = degrees * (degrees - 1).
     195             :     // Subtract 1 for each u and 1 for each w for all combos.
     196             :     //    denom(squares.S) = degrees - 1
     197           1 :     GRB_TRY (GrB_Vector_new (&denom, GrB_INT64, n)) ;
     198           1 :     GRB_TRY (GrB_Vector_apply_BinaryOp2nd_INT64(denom, squares, NULL,
     199             :         GrB_MINUS_INT64, deg, 1, GrB_DESC_S)) ;
     200             :     // neg_denom += degrees * (degrees - 1)
     201           1 :     GRB_TRY (GrB_Vector_eWiseMult_BinaryOp(neg_denom, NULL, GrB_PLUS_INT64,
     202             :         GrB_TIMES_INT64, deg, denom, NULL)) ;
     203             : 
     204             :     // (3) uw_degrees = plus_times(A @ degrees) * (degrees - 1).
     205             :     // The main contribution to (and only positive term of) the denominator:
     206             :     // degrees[u] + degrees[w] for each u-w combo.
     207             :     // Recall that `denom = degrees - 1` from above.
     208             :     //    denom(denom.S) *= plus_times(A @ deg)
     209           1 :     GRB_TRY (GrB_mxv(denom, denom, GrB_TIMES_INT64,
     210             :         GrB_PLUS_TIMES_SEMIRING_INT64, A, deg, GrB_DESC_S)) ;
     211             : 
     212             :     // (4) squares.  Subtract the number of squares
     213             :     //    denom -= neg_denom + squares
     214           1 :     GRB_TRY (GrB_Vector_eWiseMult_BinaryOp(denom, NULL, GrB_MINUS_INT64,
     215             :         GrB_PLUS_INT64, neg_denom, squares, NULL)) ;
     216             : 
     217             :     // square_clustering = squares / (uw_degrees - uw_count - tri - squares)
     218             :     // Almost done!  Now compute the final result:
     219             :     //    square_clustering = r = squares / denom
     220           1 :     GRB_TRY (GrB_Vector_new (&r, GrB_FP64, n)) ;
     221           1 :     GRB_TRY (GrB_Vector_eWiseMult_BinaryOp (r, NULL, NULL, GrB_DIV_FP64,
     222             :         squares, denom, NULL)) ;
     223             : 
     224           1 :     (*square_clustering) = r ;
     225           1 :     LG_FREE_WORK ;
     226           1 :     return (GrB_SUCCESS) ;
     227             : }

Generated by: LCOV version 1.14