Line data Source code
1 : //------------------------------------------------------------------------------
2 : // LAGraph_SquareClustering: vertex square-clustering
3 : //------------------------------------------------------------------------------
4 :
5 : // LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved.
6 : // SPDX-License-Identifier: BSD-2-Clause
7 : //
8 : // For additional details (including references to third party source code and
9 : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
10 : // Contributors.txt for a full list of contributors. Created, in part, with
11 : // funding and support from the U.S. Government (see Acknowledgments.txt file).
12 : // DM22-0790
13 :
14 : // Contributed by Erik Welch, NVIDIA.
15 :
16 : //------------------------------------------------------------------------------
17 :
18 : // Compute the square clustering coefficient for each node of an undirected
19 : // graph, which is the fraction of possible squares that exist at each node.
20 : // It is a clustering coefficient suitable for bipartite graphs and is fully
21 : // described here:
22 : // https://arxiv.org/pdf/0710.0117v1.pdf
23 : // which uses a different denominator than the original definition:
24 : // https://arxiv.org/pdf/cond-mat/0504241.pdf
25 : // Furthermore, we count squares based on
26 : // https://arxiv.org/pdf/2007.11111.pdf (sigma_12, c_4)
27 : // which is implemented in LAGraph_FastGraphletTransform.c (thanks Tim Davis
28 : // for mentioning this to me!).
29 :
30 : // The NetworkX implementation of square clustering was used heavily during
31 : // development. I used it to determine the contributions to the denominator
32 : // and to verify correctness (including on larger graphs).
33 : // https://networkx.org/documentation/stable/reference/algorithms/\
34 : // generated/networkx.algorithms.cluster.square_clustering.html
35 :
36 : // Pseudocode (doesn't show dropping 0s in the final result):
37 : //
38 : // P2(~degrees.diag().S) = plus_pair(A @ A.T)
39 : // tri = first(P2 & A).reduce_rowwise()
40 : // squares = (P2 * (P2 - 1)).reduce_rowwise() / 2
41 : // uw_count = degrees * (degrees - 1)
42 : // uw_degrees = plus_times(A @ degrees) * (degrees - 1)
43 : // square_clustering = squares / (uw_degrees - uw_count - tri - squares)
44 :
45 : // The coefficient as described in https://arxiv.org/pdf/0710.0117v1.pdf
46 : // where m and n are different neighbors of node i.
47 : // Note that summations over mn are implied in the numerator and denominator:
48 : //
49 : // C_{4,mn}(i) = q_imn / ((k_m - eta_imn) + (k_n - eta_imn) + q_imn)
50 : // q_imn = # of common neighbors between m and n (i.e., squares)
51 : // k_m = number of neighbors of m (i.e., degrees[m])
52 : // eta_imn = 1 + q_imn + theta_mn
53 : // theta_mn = 1 if m and n are connected, otherwise 0 (i.e., triangles)
54 :
55 : // Here are the corresponding terms between the equation and pseudocode:
56 : // theta_mn <--> tri
57 : // q_imn <--> squares
58 : // eta_imn = 1 + ... <--> uw_count
59 : // k_m <--> uw_degrees
60 :
61 : // I first implemented this in the Python library graphblas-algorithms
62 : // https://github.com/python-graphblas/graphblas-algorithms/\
63 : // blob/main/graphblas_algorithms/algorithms/cluster.py
64 : // and I copy/pasted C code generated from the Recorder in Python-graphblas
65 : // https://github.com/python-graphblas/python-graphblas
66 :
67 : // This implementation requires that `out_degree` property is already cached.
68 : // 0 values are omitted from the result (i.e., missing values <--> zero).
69 : // Also, it computes `P2 = A @ A.T`, which may be very large. We could modify
70 : // the algorithm to compute coefficients for a subset of nodes, which would
71 : // allow expert users to compute in batches. Also, since this algorithm only
72 : // operates on undirected or symmetric graphs, we only need to compute the
73 : // upper (or lower) triangle of P2, which should reduce memory by about half.
74 : // However, this is not easy to do, and would complicate the implementation.
75 :
76 : //------------------------------------------------------------------------------
77 :
78 : #define LG_FREE_WORK \
79 : { \
80 : GrB_free (&squares) ; \
81 : GrB_free (&denom) ; \
82 : GrB_free (&neg_denom) ; \
83 : GrB_free (&P2) ; \
84 : GrB_free (&D) ; \
85 : }
86 :
87 : #define LG_FREE_ALL \
88 : { \
89 : LG_FREE_WORK ; \
90 : GrB_free (&r) ; \
91 : }
92 :
93 : #include <LAGraph.h>
94 : #include <LAGraphX.h>
95 : #include <LG_internal.h> // from src/utility
96 :
97 1 : int LAGraph_SquareClustering
98 : (
99 : // outputs:
100 : GrB_Vector *square_clustering,
101 : // inputs:
102 : LAGraph_Graph G,
103 : char *msg
104 : )
105 : {
106 1 : LG_CLEAR_MSG ;
107 :
108 : // The number of squares each node is part of
109 1 : GrB_Vector squares = NULL ;
110 :
111 : // Thought of as the total number of possible squares for each node
112 1 : GrB_Vector denom = NULL ;
113 :
114 : // Negative contributions to the denominator
115 1 : GrB_Vector neg_denom = NULL ;
116 :
117 : // Final result: the square coefficients for each node (squares / denom)
118 1 : GrB_Vector r = NULL ;
119 :
120 : // out_degrees assigned to diagonal matrix
121 : // Then used as triangles: first(P2 & A)
122 1 : GrB_Matrix D = NULL ;
123 :
124 : // P2 = plus_pair(A @ A.T).new(mask=~D.S)
125 : // Then used as a temporary workspace matrix (int64)
126 1 : GrB_Matrix P2 = NULL ;
127 :
128 1 : GrB_Vector deg = G->out_degree ;
129 1 : GrB_Matrix A = G->A ;
130 1 : GrB_Index n = 0 ;
131 :
132 : //--------------------------------------------------------------------------
133 : // check inputs
134 : //--------------------------------------------------------------------------
135 :
136 1 : LG_ASSERT (square_clustering != NULL, GrB_NULL_POINTER) ;
137 1 : (*square_clustering) = NULL ;
138 :
139 1 : LG_ASSERT_MSG (deg != NULL,
140 : LAGRAPH_NOT_CACHED, "G->out_degree is required") ;
141 :
142 1 : LG_TRY (LAGraph_CheckGraph (G, msg)) ;
143 :
144 1 : LG_ASSERT_MSG ((G->kind == LAGraph_ADJACENCY_UNDIRECTED ||
145 : (G->kind == LAGraph_ADJACENCY_DIRECTED &&
146 : G->is_symmetric_structure == LAGraph_TRUE)),
147 : LAGRAPH_SYMMETRIC_STRUCTURE_REQUIRED,
148 : "G->A must be known to be symmetric") ;
149 :
150 : // # of nodes
151 1 : GRB_TRY (GrB_Matrix_nrows (&n, A)) ;
152 :
153 : // out_degrees as a diagonal matrix.
154 : #if LAGRAPH_SUITESPARSE
155 : #if GxB_IMPLEMENTATION >= GxB_VERSION (7,0,0)
156 : // SuiteSparse 7.x and later:
157 1 : GRB_TRY (GrB_Matrix_diag(&D, deg, 0)) ;
158 : #else
159 : // SuiteSparse 6.x and earlier, which had the incorrect signature:
160 : GRB_TRY (GrB_Matrix_new(&D, GrB_INT64, n, n)) ;
161 : GRB_TRY (GrB_Matrix_diag(D, deg, 0)) ;
162 : #endif
163 : #else
164 : // standard GrB:
165 : GRB_TRY (GrB_Matrix_diag(&D, deg, 0)) ;
166 : #endif
167 :
168 : // We use ~D.S as a mask so P2 won't have values along the diagonal.
169 : // P2(~D.S) = plus_pair(A @ A.T)
170 1 : GRB_TRY (GrB_Matrix_new (&P2, GrB_INT64, n, n)) ;
171 1 : GRB_TRY (GrB_mxm (P2, D, NULL, LAGraph_plus_one_int64, A, A, GrB_DESC_SCT1)) ;
172 :
173 : // Denominator is thought of as total number of squares that could exist.
174 : // It has four terms (indicated below), and we use the definition from:
175 : // https://arxiv.org/pdf/0710.0117v1.pdf.
176 : //
177 : // (1) tri = first(P2 & A).reduce_rowwise()
178 : // Subtract 1 for each edge where u-w or w-u are connected.
179 : // In other words, triangles. Use P2, since we already have it.
180 : // D = first(P2 & A)
181 : // neg_denom = D.reduce_rowwise()
182 1 : GRB_TRY (GrB_Matrix_eWiseMult_BinaryOp (D, NULL, NULL, GrB_FIRST_INT64, P2,
183 : A, NULL)) ;
184 1 : GRB_TRY (GrB_Vector_new (&neg_denom, GrB_INT64, n)) ;
185 1 : GRB_TRY (GrB_Matrix_reduce_Monoid (neg_denom, NULL, NULL,
186 : GrB_PLUS_MONOID_INT64, D, NULL)) ;
187 1 : GrB_free (&D) ;
188 :
189 : // squares = (P2 * (P2 - 1)).reduce_rowwise() / 2
190 : // Now compute the number of squares (the numerator). We count squares
191 : // based on https://arxiv.org/pdf/2007.11111.pdf (sigma_12, c_4).
192 : // P2 *= P2 - 1
193 : // squares = P2.reduce_rowwise() / 2 (and drop zeros)
194 1 : GRB_TRY (GrB_Matrix_apply_BinaryOp2nd_INT64 (P2, NULL, GrB_TIMES_INT64,
195 : GrB_MINUS_INT64, P2, 1, NULL)) ;
196 1 : GRB_TRY (GrB_Vector_new (&squares, GrB_INT64, n)) ;
197 1 : GRB_TRY (GrB_Matrix_reduce_Monoid (squares, NULL, NULL,
198 : GrB_PLUS_MONOID_INT64, P2, NULL)) ;
199 1 : GrB_free (&P2) ;
200 : // Divide by 2, and use squares as value mask to drop zeros
201 1 : GRB_TRY (GrB_Vector_apply_BinaryOp2nd_INT64 (squares, squares, NULL,
202 : GrB_DIV_INT64, squares, 2, GrB_DESC_R)) ;
203 :
204 : // (2) uw_count = degrees * (degrees - 1).
205 : // Subtract 1 for each u and 1 for each w for all combos.
206 : // denom(squares.S) = degrees - 1
207 1 : GRB_TRY (GrB_Vector_new (&denom, GrB_INT64, n)) ;
208 1 : GRB_TRY (GrB_Vector_apply_BinaryOp2nd_INT64(denom, squares, NULL,
209 : GrB_MINUS_INT64, deg, 1, GrB_DESC_S)) ;
210 : // neg_denom += degrees * (degrees - 1)
211 1 : GRB_TRY (GrB_Vector_eWiseMult_BinaryOp(neg_denom, NULL, GrB_PLUS_INT64,
212 : GrB_TIMES_INT64, deg, denom, NULL)) ;
213 :
214 : // (3) uw_degrees = plus_times(A @ degrees) * (degrees - 1).
215 : // The main contribution to (and only positive term of) the denominator:
216 : // degrees[u] + degrees[w] for each u-w combo.
217 : // Recall that `denom = degrees - 1` from above.
218 : // denom(denom.S) *= plus_times(A @ deg)
219 1 : GRB_TRY (GrB_mxv(denom, denom, GrB_TIMES_INT64,
220 : GrB_PLUS_TIMES_SEMIRING_INT64, A, deg, GrB_DESC_S)) ;
221 :
222 : // (4) squares. Subtract the number of squares
223 : // denom -= neg_denom + squares
224 1 : GRB_TRY (GrB_Vector_eWiseMult_BinaryOp(denom, NULL, GrB_MINUS_INT64,
225 : GrB_PLUS_INT64, neg_denom, squares, NULL)) ;
226 :
227 : // square_clustering = squares / (uw_degrees - uw_count - tri - squares)
228 : // Almost done! Now compute the final result:
229 : // square_clustering = r = squares / denom
230 1 : GRB_TRY (GrB_Vector_new (&r, GrB_FP64, n)) ;
231 1 : GRB_TRY (GrB_Vector_eWiseMult_BinaryOp (r, NULL, NULL, GrB_DIV_FP64,
232 : squares, denom, NULL)) ;
233 :
234 1 : (*square_clustering) = r ;
235 1 : LG_FREE_WORK ;
236 1 : return (GrB_SUCCESS) ;
237 : }
|