Line data Source code
1 : //------------------------------------------------------------------------------
2 : // LAGr_Modularity: computes modularity of a graph clustering
3 : //------------------------------------------------------------------------------
4 :
5 : // LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved.
6 : // SPDX-License-Identifier: BSD-2-Clause
7 : //
8 : // For additional details (including references to third party source code and
9 : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
10 : // Contributors.txt for a full list of contributors. Created, in part, with
11 : // funding and support from the U.S. Government (see Acknowledgments.txt file).
12 : // DM22-0790
13 :
14 : // Contributed by Cameron Quilici, Texas A&M University
15 :
16 : //------------------------------------------------------------------------------
17 :
18 : // TODO: ready to consider for src
19 :
20 : // The modularity (Q) of a graph clustering C is defined as (directed case):
21 : //
22 : // Q = \sum{c = 1}^n [\frac{L_c}{m} - \gamma(\frac{k_c^{in} k_c^{out}}{m})^2]
23 : //
24 : // where the sum iterates over all communities c = 1,...,n, L_c is the the
25 : // number of total edges in cluster c, m is the total number of edges in the
26 : // graph, k_c^{in} and k_c^{out} are the total in and out degrees of cluster c,
27 : // and \gamma is the resolution parameter which controls the relative importance
28 : // of the intra-cluster and inter-cluster edges.
29 :
30 : // Modularity works by comparing the intra-cluster density of a particular
31 : // clustering to that of a random graph with the same degree distribution. The
32 : // range of Q is [-.5, 1]. When Q ~ 0, this indicates that the clustering is
33 : // not much better than at random whereas Q ~ 1 indicates a strong community
34 : // structure.
35 :
36 : // https://arxiv.org/abs/0906.0612 pp. 15-16
37 :
38 : #define LG_FREE_WORK \
39 : { \
40 : GrB_free(&l); \
41 : GrB_free(&vmask); \
42 : GrB_free(&k_in); \
43 : GrB_free(&k_out); \
44 : GrB_free(&in_degree); \
45 : GrB_free(&out_degree); \
46 : GrB_free(&A); \
47 : GrB_free(&CA); \
48 : GrB_free(&C); \
49 : GrB_free(&ONE_INT64); \
50 : LAGraph_Free((void **)&lX, NULL); \
51 : LAGraph_Free((void **)&k_outX, NULL); \
52 : LAGraph_Free((void **)&k_inX, NULL); \
53 : }
54 :
55 : #define LG_FREE_ALL \
56 : { \
57 : LG_FREE_WORK; \
58 : }
59 :
60 : #include "LG_internal.h"
61 : #include <LAGraphX.h>
62 :
63 37 : int LAGr_Modularity(
64 : // Outputs
65 : double *mod_handle, // output modularity
66 : // Inputs
67 : double resolution, // resolution parameter
68 : GrB_Vector c, // input cluster vector
69 : LAGraph_Graph G, // original graph from which clustering was obtained
70 : char *msg)
71 : {
72 : #if LAGRAPH_SUITESPARSE
73 37 : GrB_Vector l = NULL;
74 37 : GrB_Vector vmask = NULL;
75 37 : GrB_Vector k_in = NULL, k_out = NULL;
76 37 : GrB_Vector out_degree = NULL, in_degree = NULL;
77 37 : GrB_Matrix C = NULL, CA = NULL, A = NULL;
78 37 : GrB_Scalar ONE_INT64 = NULL;
79 :
80 37 : GrB_Index *lX = NULL, *k_outX = NULL, *k_inX = NULL;
81 :
82 : //--------------------------------------------------------------------------
83 : // check inputs
84 : //--------------------------------------------------------------------------
85 :
86 37 : LG_CLEAR_MSG;
87 :
88 37 : LG_ASSERT_MSG(mod_handle != NULL, GrB_NULL_POINTER, "mod_handle is NULL");
89 36 : LG_ASSERT_MSG(resolution >= 0, GrB_INVALID_VALUE,
90 : "resolution parameter must be non-negative");
91 :
92 35 : LG_TRY(LAGraph_CheckGraph(G, msg));
93 :
94 : GrB_Index n, nedges;
95 34 : GRB_TRY(GrB_Matrix_nrows(&n, G->A));
96 :
97 : // Do not consider edge weights for modularity
98 : // FUTURE: There is a way to consider edge weights in modularity calculations,
99 : // so could give users the option to pass in a 'weights' parameter specifying
100 : // that edge weights should be used in the calculations.
101 34 : GRB_TRY(GrB_Matrix_new(&A, GrB_INT64, n, n));
102 34 : GRB_TRY(GrB_apply(A, NULL, NULL, GxB_ONE_INT64, G->A, NULL));
103 :
104 : // remove self-edges, not relevant to clustering metrics
105 34 : GRB_TRY(GrB_select(A, NULL, NULL, GrB_OFFDIAG, A, 0, NULL));
106 :
107 34 : GRB_TRY(GrB_Matrix_nvals(&nedges, A));
108 :
109 : //--------------------------------------------------------------------------
110 : // initializations
111 : //--------------------------------------------------------------------------
112 :
113 34 : GRB_TRY(GrB_Matrix_new(&C, GrB_INT64, n, n));
114 34 : GRB_TRY(GrB_Matrix_new(&CA, GrB_INT64, n, n));
115 34 : GRB_TRY(GrB_Vector_new(&l, GrB_INT64, n));
116 34 : GRB_TRY(GrB_Vector_new(&vmask, GrB_INT64, n));
117 34 : GRB_TRY(GrB_Vector_new(&k_in, GrB_INT64, n));
118 34 : GRB_TRY(GrB_Vector_new(&k_out, GrB_INT64, n));
119 34 : GRB_TRY(GrB_Vector_new(&out_degree, GrB_INT64, n));
120 34 : GRB_TRY(GrB_Vector_new(&in_degree, GrB_INT64, n));
121 34 : GRB_TRY(GrB_Scalar_new(&ONE_INT64, GrB_INT64));
122 :
123 34 : GRB_TRY(GrB_Scalar_setElement_INT64(ONE_INT64, (int64_t)1));
124 :
125 : // Convert the cluster vector to a uint64_t matrix C where
126 : // C(i, j) = 1 if and only if vertex j is in cluster i
127 : GrB_Index *cI, *cX;
128 34 : LAGRAPH_TRY(LAGraph_Malloc((void **)&cI, n, sizeof(GrB_Index), msg));
129 34 : LAGRAPH_TRY(LAGraph_Malloc((void **)&cX, n, sizeof(GrB_Index), msg));
130 34 : GRB_TRY(GrB_Vector_extractTuples_INT64(cI, (int64_t *) cX, &n, c));
131 34 : GRB_TRY(GxB_Matrix_build_Scalar(C, cX, cI, ONE_INT64, n));
132 34 : GrB_Matrix_wait(C, GrB_MATERIALIZE);
133 34 : LAGraph_Free((void **)&cI, NULL);
134 34 : LAGraph_Free((void **)&cX, NULL);
135 :
136 : // Calculate actual number of intra-cluster edges
137 34 : GRB_TRY(GrB_mxm(CA, NULL, NULL, LAGraph_plus_one_int64, C, A, NULL));
138 34 : GRB_TRY(GrB_mxm(CA, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_INT64, CA, C,
139 : GrB_DESC_T1));
140 34 : GRB_TRY(GxB_Vector_diag(l, CA, 0, NULL));
141 :
142 : // Calculate the combined degree for each cluster
143 34 : GRB_TRY(GrB_reduce(out_degree, NULL, NULL, GrB_PLUS_MONOID_INT64, A, NULL));
144 34 : GRB_TRY(GrB_reduce(in_degree, NULL, NULL, GrB_PLUS_MONOID_INT64, A,
145 : GrB_DESC_T0));
146 34 : GRB_TRY(GrB_mxv(k_out, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_INT64, C,
147 : out_degree, NULL));
148 34 : GRB_TRY(GrB_mxv(k_in, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_INT64, C,
149 : in_degree, NULL));
150 :
151 : // vmask (i) = 0 if cluster i is non-empty (has any vertices)
152 34 : GRB_TRY(GrB_reduce(vmask, NULL, NULL, GrB_LOR_MONOID_BOOL, C, NULL));
153 34 : GRB_TRY(GrB_apply(vmask, vmask, NULL, GxB_LNOT_BOOL, vmask, NULL));
154 :
155 : // If any of the above vectors have fewer entries than nclusters, this means
156 : // that there are singleton clusters with one vertex/no out-degree/no
157 : // in-degree. So we need to add explicit zeros wherever values are missing
158 : // for further calculations.
159 : GrB_Index nclusters, nl, nk_out, nk_in;
160 34 : GRB_TRY(GrB_Vector_nvals(&nclusters, vmask));
161 34 : GRB_TRY(GrB_Vector_nvals(&nl, l));
162 34 : GRB_TRY(GrB_Vector_nvals(&nk_out, l));
163 34 : GRB_TRY(GrB_Vector_nvals(&nk_in, l));
164 :
165 34 : if (nclusters != nl)
166 : {
167 11 : GRB_TRY(GrB_assign(l, l, NULL, vmask, GrB_ALL, nclusters, GrB_DESC_SC));
168 : }
169 34 : if (nclusters != nk_out)
170 : {
171 11 : GRB_TRY(GrB_assign(k_out, k_out, NULL, vmask, GrB_ALL, nclusters,
172 : GrB_DESC_SC));
173 : }
174 34 : if (nclusters != nk_in)
175 : {
176 11 : GRB_TRY(GrB_assign(k_in, k_in, NULL, vmask, GrB_ALL, nclusters,
177 : GrB_DESC_SC));
178 : }
179 :
180 : // Extract actual values of l, k_out, and k_in for modularity calculations
181 34 : LAGRAPH_TRY(
182 : LAGraph_Malloc((void **)&lX, nclusters, sizeof(GrB_Index), msg));
183 34 : LAGRAPH_TRY(
184 : LAGraph_Malloc((void **)&k_outX, nclusters, sizeof(GrB_Index), msg));
185 34 : LAGRAPH_TRY(
186 : LAGraph_Malloc((void **)&k_inX, nclusters, sizeof(GrB_Index), msg));
187 34 : GRB_TRY(GrB_Vector_extractTuples_INT64(NULL, (int64_t *) lX, &nclusters, l));
188 34 : GRB_TRY(GrB_Vector_extractTuples_INT64(NULL, (int64_t *) k_outX, &nclusters, k_out));
189 34 : GRB_TRY(GrB_Vector_extractTuples_INT64(NULL, (int64_t *) k_inX, &nclusters, k_in));
190 :
191 : GrB_Index m, out_degree_sum, in_degree_sum, L_c;
192 34 : GRB_TRY(GrB_reduce(&out_degree_sum, NULL, GrB_PLUS_MONOID_INT64, out_degree,
193 : NULL));
194 :
195 34 : m = out_degree_sum;
196 34 : double norm = 1.0 / (m * m);
197 :
198 : // compute modularity
199 34 : double mod = 0.0;
200 4037 : for (int c = 0; c < nclusters; c++)
201 : {
202 4003 : mod += (1.0 * lX[c] / nedges) -
203 4003 : (resolution * ((k_outX[c] * k_inX[c]) * norm));
204 : }
205 :
206 34 : (*mod_handle) = mod;
207 34 : LG_FREE_WORK;
208 34 : return (GrB_SUCCESS);
209 : #else
210 : return (GrB_NOT_IMPLEMENTED);
211 : #endif
212 : }
|