LCOV - code coverage report
Current view: top level - experimental/algorithm - LAGr_Modularity.c (source / functions) Hit Total Coverage
Test: LAGraph code coverage report. Commit id: 7b9d2ee. Current time (UTC): 2025-06-03T21:57:17Z Lines: 69 69 100.0 %
Date: 2025-06-03 22:02:40 Functions: 1 1 100.0 %

          Line data    Source code
       1             : //------------------------------------------------------------------------------
       2             : // LAGr_Modularity: computes modularity of a graph clustering
       3             : //------------------------------------------------------------------------------
       4             : 
       5             : // LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved.
       6             : // SPDX-License-Identifier: BSD-2-Clause
       7             : //
       8             : // For additional details (including references to third party source code and
       9             : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
      10             : // Contributors.txt for a full list of contributors. Created, in part, with
      11             : // funding and support from the U.S. Government (see Acknowledgments.txt file).
      12             : // DM22-0790
      13             : 
      14             : // Contributed by Cameron Quilici, Texas A&M University
      15             : 
      16             : //------------------------------------------------------------------------------
      17             : 
      18             : // TODO: ready to consider for src
      19             : 
      20             : // The modularity (Q) of a graph clustering C is defined as (directed case):
      21             : //
      22             : // Q = \sum{c = 1}^n [\frac{L_c}{m} - \gamma(\frac{k_c^{in} k_c^{out}}{m})^2]
      23             : //
      24             : // where the sum iterates over all communities c = 1,...,n, L_c is the the
      25             : // number of total edges in cluster c, m is the total number of edges in the
      26             : // graph, k_c^{in} and k_c^{out} are the total in and out degrees of cluster c,
      27             : // and \gamma is the resolution parameter which controls the relative importance
      28             : // of the intra-cluster and inter-cluster edges.
      29             : 
      30             : // Modularity works by comparing the intra-cluster density of a particular
      31             : // clustering to that of a random graph with the same degree distribution. The
      32             : // range of Q is [-.5, 1]. When Q ~ 0, this indicates that the clustering is
      33             : // not much better than at random whereas Q ~ 1 indicates a strong community
      34             : // structure.
      35             : 
      36             : // https://arxiv.org/abs/0906.0612 pp. 15-16
      37             : 
      38             : #define LG_FREE_WORK                                                           \
      39             :     {                                                                          \
      40             :         GrB_free(&l);                                                          \
      41             :         GrB_free(&vmask);                                                      \
      42             :         GrB_free(&k_in);                                                       \
      43             :         GrB_free(&k_out);                                                      \
      44             :         GrB_free(&in_degree);                                                  \
      45             :         GrB_free(&out_degree);                                                 \
      46             :         GrB_free(&A);                                                          \
      47             :         GrB_free(&CA);                                                         \
      48             :         GrB_free(&C);                                                          \
      49             :         GrB_free(&ONE_INT64);                                                  \
      50             :         LAGraph_Free((void **)&lX, NULL);                                      \
      51             :         LAGraph_Free((void **)&k_outX, NULL);                                  \
      52             :         LAGraph_Free((void **)&k_inX, NULL);                                   \
      53             :     }
      54             : 
      55             : #define LG_FREE_ALL                                                            \
      56             :     {                                                                          \
      57             :         LG_FREE_WORK;                                                          \
      58             :     }
      59             : 
      60             : #include "LG_internal.h"
      61             : #include <LAGraphX.h>
      62             : 
      63          37 : int LAGr_Modularity(
      64             :     // Outputs
      65             :     double *mod_handle, // output modularity
      66             :     // Inputs
      67             :     double resolution, // resolution parameter
      68             :     GrB_Vector c,      // input cluster vector
      69             :     LAGraph_Graph G,   // original graph from which clustering was obtained
      70             :     char *msg)
      71             : {
      72             : #if LAGRAPH_SUITESPARSE
      73          37 :     GrB_Vector l = NULL;
      74          37 :     GrB_Vector vmask = NULL;
      75          37 :     GrB_Vector k_in = NULL, k_out = NULL;
      76          37 :     GrB_Vector out_degree = NULL, in_degree = NULL;
      77          37 :     GrB_Matrix C = NULL, CA = NULL, A = NULL;
      78          37 :     GrB_Scalar ONE_INT64 = NULL;
      79             : 
      80          37 :     GrB_Index *lX = NULL, *k_outX = NULL, *k_inX = NULL;
      81             : 
      82             :     //--------------------------------------------------------------------------
      83             :     // check inputs
      84             :     //--------------------------------------------------------------------------
      85             : 
      86          37 :     LG_CLEAR_MSG;
      87             : 
      88          37 :     LG_ASSERT_MSG(mod_handle != NULL, GrB_NULL_POINTER, "mod_handle is NULL");
      89          36 :     LG_ASSERT_MSG(resolution >= 0, GrB_INVALID_VALUE,
      90             :                   "resolution parameter must be non-negative");
      91             : 
      92          35 :     LG_TRY(LAGraph_CheckGraph(G, msg));
      93             : 
      94             :     GrB_Index n, nedges;
      95          34 :     GRB_TRY(GrB_Matrix_nrows(&n, G->A));
      96             : 
      97             :     // Do not consider edge weights for modularity
      98             :     // FUTURE: There is a way to consider edge weights in modularity calculations,
      99             :     // so could give users the option to pass in a 'weights' parameter specifying 
     100             :     // that edge weights should be used in the calculations.
     101          34 :     GRB_TRY(GrB_Matrix_new(&A, GrB_INT64, n, n));
     102          34 :     GRB_TRY(GrB_apply(A, NULL, NULL, GxB_ONE_INT64, G->A, NULL));
     103             : 
     104             :     // remove self-edges, not relevant to clustering metrics
     105          34 :     GRB_TRY(GrB_select(A, NULL, NULL, GrB_OFFDIAG, A, 0, NULL));
     106             : 
     107          34 :     GRB_TRY(GrB_Matrix_nvals(&nedges, A));
     108             : 
     109             :     //--------------------------------------------------------------------------
     110             :     // initializations
     111             :     //--------------------------------------------------------------------------
     112             : 
     113          34 :     GRB_TRY(GrB_Matrix_new(&C, GrB_INT64, n, n));
     114          34 :     GRB_TRY(GrB_Matrix_new(&CA, GrB_INT64, n, n));
     115          34 :     GRB_TRY(GrB_Vector_new(&l, GrB_INT64, n));
     116          34 :     GRB_TRY(GrB_Vector_new(&vmask, GrB_INT64, n));
     117          34 :     GRB_TRY(GrB_Vector_new(&k_in, GrB_INT64, n));
     118          34 :     GRB_TRY(GrB_Vector_new(&k_out, GrB_INT64, n));
     119          34 :     GRB_TRY(GrB_Vector_new(&out_degree, GrB_INT64, n));
     120          34 :     GRB_TRY(GrB_Vector_new(&in_degree, GrB_INT64, n));
     121          34 :     GRB_TRY(GrB_Scalar_new(&ONE_INT64, GrB_INT64));
     122             : 
     123          34 :     GRB_TRY(GrB_Scalar_setElement_INT64(ONE_INT64, (int64_t)1));
     124             : 
     125             :     // Convert the cluster vector to a uint64_t matrix C where
     126             :     // C(i, j) = 1 if and only if vertex j is in cluster i
     127             :     GrB_Index *cI, *cX;
     128          34 :     LAGRAPH_TRY(LAGraph_Malloc((void **)&cI, n, sizeof(GrB_Index), msg));
     129          34 :     LAGRAPH_TRY(LAGraph_Malloc((void **)&cX, n, sizeof(GrB_Index), msg));
     130          34 :     GRB_TRY(GrB_Vector_extractTuples_INT64(cI, (int64_t *) cX, &n, c));
     131          34 :     GRB_TRY(GxB_Matrix_build_Scalar(C, cX, cI, ONE_INT64, n));
     132          34 :     GrB_Matrix_wait(C, GrB_MATERIALIZE);
     133          34 :     LAGraph_Free((void **)&cI, NULL);
     134          34 :     LAGraph_Free((void **)&cX, NULL);
     135             : 
     136             :     // Calculate actual number of intra-cluster edges
     137          34 :     GRB_TRY(GrB_mxm(CA, NULL, NULL, LAGraph_plus_one_int64, C, A, NULL));
     138          34 :     GRB_TRY(GrB_mxm(CA, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_INT64, CA, C,
     139             :                     GrB_DESC_T1));
     140          34 :     GRB_TRY(GxB_Vector_diag(l, CA, 0, NULL));
     141             : 
     142             :     // Calculate the combined degree for each cluster
     143          34 :     GRB_TRY(GrB_reduce(out_degree, NULL, NULL, GrB_PLUS_MONOID_INT64, A, NULL));
     144          34 :     GRB_TRY(GrB_reduce(in_degree, NULL, NULL, GrB_PLUS_MONOID_INT64, A,
     145             :                        GrB_DESC_T0));
     146          34 :     GRB_TRY(GrB_mxv(k_out, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_INT64, C,
     147             :                     out_degree, NULL));
     148          34 :     GRB_TRY(GrB_mxv(k_in, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_INT64, C,
     149             :                     in_degree, NULL));
     150             : 
     151             :     // vmask (i) = 0 if cluster i is non-empty (has any vertices)
     152          34 :     GRB_TRY(GrB_reduce(vmask, NULL, NULL, GrB_LOR_MONOID_BOOL, C, NULL));
     153          34 :     GRB_TRY(GrB_apply(vmask, vmask, NULL, GxB_LNOT_BOOL, vmask, NULL));
     154             : 
     155             :     // If any of the above vectors have fewer entries than nclusters, this means
     156             :     // that there are singleton clusters with one vertex/no out-degree/no
     157             :     // in-degree. So we need to add explicit zeros wherever values are missing
     158             :     // for further calculations.
     159             :     GrB_Index nclusters, nl, nk_out, nk_in;
     160          34 :     GRB_TRY(GrB_Vector_nvals(&nclusters, vmask));
     161          34 :     GRB_TRY(GrB_Vector_nvals(&nl, l));
     162          34 :     GRB_TRY(GrB_Vector_nvals(&nk_out, l));
     163          34 :     GRB_TRY(GrB_Vector_nvals(&nk_in, l));
     164             : 
     165          34 :     if (nclusters != nl)
     166             :     {
     167          11 :         GRB_TRY(GrB_assign(l, l, NULL, vmask, GrB_ALL, nclusters, GrB_DESC_SC));
     168             :     }
     169          34 :     if (nclusters != nk_out)
     170             :     {
     171          11 :         GRB_TRY(GrB_assign(k_out, k_out, NULL, vmask, GrB_ALL, nclusters,
     172             :                            GrB_DESC_SC));
     173             :     }
     174          34 :     if (nclusters != nk_in)
     175             :     {
     176          11 :         GRB_TRY(GrB_assign(k_in, k_in, NULL, vmask, GrB_ALL, nclusters,
     177             :                            GrB_DESC_SC));
     178             :     }
     179             : 
     180             :     // Extract actual values of l, k_out, and k_in for modularity calculations
     181          34 :     LAGRAPH_TRY(
     182             :         LAGraph_Malloc((void **)&lX, nclusters, sizeof(GrB_Index), msg));
     183          34 :     LAGRAPH_TRY(
     184             :         LAGraph_Malloc((void **)&k_outX, nclusters, sizeof(GrB_Index), msg));
     185          34 :     LAGRAPH_TRY(
     186             :         LAGraph_Malloc((void **)&k_inX, nclusters, sizeof(GrB_Index), msg));
     187          34 :     GRB_TRY(GrB_Vector_extractTuples_INT64(NULL, (int64_t *) lX, &nclusters, l));
     188          34 :     GRB_TRY(GrB_Vector_extractTuples_INT64(NULL, (int64_t *) k_outX, &nclusters, k_out));
     189          34 :     GRB_TRY(GrB_Vector_extractTuples_INT64(NULL, (int64_t *) k_inX, &nclusters, k_in));
     190             : 
     191             :     GrB_Index m, out_degree_sum, in_degree_sum, L_c;
     192          34 :     GRB_TRY(GrB_reduce(&out_degree_sum, NULL, GrB_PLUS_MONOID_INT64, out_degree,
     193             :                        NULL));
     194             : 
     195          34 :     m = out_degree_sum;
     196          34 :     double norm = 1.0 / (m * m);
     197             : 
     198             :     // compute modularity
     199          34 :     double mod = 0.0;
     200        4037 :     for (int c = 0; c < nclusters; c++)
     201             :     {
     202        4003 :         mod += (1.0 * lX[c] / nedges) -
     203        4003 :                (resolution * ((k_outX[c] * k_inX[c]) * norm));
     204             :     }
     205             : 
     206          34 :     (*mod_handle) = mod;
     207          34 :     LG_FREE_WORK;
     208          34 :     return (GrB_SUCCESS);
     209             : #else
     210             :     return (GrB_NOT_IMPLEMENTED);
     211             : #endif
     212             : }

Generated by: LCOV version 1.14