Line data Source code
1 : //------------------------------------------------------------------------------
2 : // LAGraph_lcc: local clustering coefficient
3 : //------------------------------------------------------------------------------
4 :
5 : // LAGraph, (c) 2019-2023 by The LAGraph Contributors, All Rights Reserved.
6 : // SPDX-License-Identifier: BSD-2-Clause
7 : //
8 : // For additional details (including references to third party source code and
9 : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
10 : // Contributors.txt for a full list of contributors. Created, in part, with
11 : // funding and support from the U.S. Government (see Acknowledgments.txt file).
12 : // DM22-0790
13 :
14 : // Contributed by Gabor Szarnyas and Balint Hegyi, Budapest University of
15 : // Technology and Economics (with accented characters: G\'{a}bor Sz\'{a}rnyas
16 : // and B\'{a}lint Hegyi, using LaTeX syntax).
17 : // https://inf.mit.bme.hu/en/members/szarnyasg .
18 : // Modified by Timothy A. Davis, Texas A&M University
19 : // Modified by Pascal Costanza, Intel, Belgium
20 :
21 : //------------------------------------------------------------------------------
22 :
23 : // This function was originally written for the LDBC Graphalytics benchmark,
24 : // at https://graphalytics.org/ .
25 :
26 : // TODO: ready to add to src
27 : // TODO: rename this method
28 :
29 : // The local clustering coefficient is a measure for each node of a graph.
30 : // Its definition is fully described in the following document:
31 : // https://ldbc.github.io/ldbc_graphalytics_docs/graphalytics_spec.pdf
32 :
33 : // For each node v, the lcc(v) is the ratio between the number of edges between
34 : // neighbors of the node v, and the maximum possible number of edges between
35 : // these neighbors. If a node v has fewer than 2 neighbors, then its
36 : // coefficient is defined as zero, and the vth entry does not appear in the
37 : // sparse vector LCC returned.
38 :
39 : // Let N_in(v) = the set of nodes u such that (u,v) is an edge.
40 : // Let N_out(v) = the set of nodes u such that (v,u) is an edge.
41 : // Let N(v) = union (N_in(v), N_out(v)).
42 : // Then the metric lcc(v) is defined as:
43 :
44 : // lcc(v) = (sum for all u in N(v) of |intersection (N(v), N_out(u))) /
45 : // ( |N(v)| * (|N(v)|-1) )
46 :
47 : // That is, for directed graphs, the set of neighbors N(v) is found without
48 : // taking directions into account, but a node u that has both an edge (u,v) and
49 : // (v,u) is counted just once. However, edge directions are enforced when
50 : // considering two nodes u1 and u2 that are both in N(v), i.e. when counting
51 : // the number of edges between neighbors, (u,v) and (v,u) are counted as two.
52 : // To account for this, the maximum possible number of edges for vertex v is
53 : // determined as the 2-combination of |N(v)| for undirected graphs and as the
54 : // 2-permutation of |N(v)| for directed graphs.
55 :
56 : // The input matrix A must be square. If A is known to be binary (with all
57 : // explicit edge weights equal to 1), then sanitize can be false. This is the
58 : // case for the LDBC benchmark.
59 :
60 : // Otherwise, if sanitize is true, edge weights of A are ignored and only the
61 : // structure of A is used. This step takes extra time and memory to sanitize the
62 : // input matrix A. For a fair comparison in the LDBC benchmark, sanitize
63 : // should be false.
64 :
65 : // Results are undefined if sanitize is false, and the matrix A has any entries
66 : // not equal to 1 (even zero-weight edges are not allowed), or if it has self
67 : // edges.
68 :
69 : #define LG_FREE_ALL \
70 : { \
71 : GrB_free (&CL) ; \
72 : GrB_free (&S) ; \
73 : GrB_free (&U) ; \
74 : GrB_free (&W) ; \
75 : GrB_free (&x) ; \
76 : GrB_free (&LCC) ; \
77 : GrB_free (&LAGraph_COMB_FP64) ; \
78 : }
79 :
80 : #include "LG_internal.h"
81 : #include <LAGraph.h>
82 : #include <LAGraphX.h>
83 :
84 : //------------------------------------------------------------------------------
85 :
86 : #define F_UNARY(f) ((void (*)(void *, const void *)) f)
87 :
88 :
89 : // z = x * (x - 1), used by LAGraph_lcc.
90 : // This operator calculates the 2-permutation of d(v).
91 67 : void LAGraph_comb_dir_fp64
92 : (
93 : void *z,
94 : const void *x
95 : )
96 : {
97 67 : double xd = *(double *) x ;
98 67 : double *zd = (double *) z ;
99 67 : (*zd) = ((xd) * (xd - 1)) ;
100 67 : }
101 :
102 : #define LAGRAPH_COMB_DIR_FP64 \
103 : "void LAGraph_comb_dir_fp64 \n" \
104 : "( \n" \
105 : " void *z, \n" \
106 : " const void *x \n" \
107 : ") \n" \
108 : "{ \n" \
109 : " double xd = *(double *) x ; \n" \
110 : " double *zd = (double *) z ; \n" \
111 : " (*zd) = ((xd) * (xd - 1)); \n" \
112 : "}"
113 :
114 : // z = x * (x - 1) / 2, used by LAGraph_lcc.
115 : // This operator calculates the 2-combination of d(v).
116 3227 : void LAGraph_comb_undir_fp64
117 : (
118 : void *z,
119 : const void *x
120 : )
121 : {
122 3227 : double xd = *(double *) x ;
123 3227 : double *zd = (double *) z ;
124 3227 : (*zd) = ((xd) * (xd - 1)) / 2;
125 3227 : }
126 :
127 : #define LAGRAPH_COMB_UNDIR_FP64 \
128 : "void LAGraph_comb_undir_fp64 \n" \
129 : "( \n" \
130 : " void *z, \n" \
131 : " const void *x \n" \
132 : ") \n" \
133 : "{ \n" \
134 : " double xd = *(double *) x ; \n" \
135 : " double *zd = (double *) z ; \n" \
136 : " (*zd) = ((xd) * (xd - 1)) / 2; \n" \
137 : "}"
138 :
139 : //------------------------------------------------------------------------------
140 :
141 21 : int LAGraph_lcc // compute lcc for all nodes in A
142 : (
143 : GrB_Vector *LCC_handle, // output vector
144 : LAGraph_Graph G, // input graph
145 : char *msg
146 : )
147 : {
148 :
149 : //--------------------------------------------------------------------------
150 : // check inputs
151 : //--------------------------------------------------------------------------
152 :
153 21 : LG_CLEAR_MSG ;
154 21 : if (LCC_handle == NULL)
155 : {
156 1 : return (GrB_NULL_POINTER) ;
157 : }
158 :
159 20 : GrB_Matrix CL = NULL, S = NULL, U = NULL ;
160 20 : GrB_Vector W = NULL, LCC = NULL, x = NULL ;
161 20 : GrB_UnaryOp LAGraph_COMB_FP64 = NULL ;
162 : GrB_Info info ;
163 :
164 20 : LG_ASSERT_MSG (G->is_symmetric_structure != LAGraph_BOOLEAN_UNKNOWN,
165 : LAGRAPH_NOT_CACHED,
166 : "G->is_symmetric_structure is required") ;
167 :
168 20 : LG_ASSERT_MSG (G->nself_edges != LAGRAPH_UNKNOWN,
169 : LAGRAPH_NOT_CACHED,
170 : "G->nself_edges is required") ;
171 :
172 20 : GrB_Matrix A = G->A ;
173 :
174 : // n = size of A (# of nodes in the graph)
175 : GrB_Index n ;
176 20 : GRB_TRY (GrB_Matrix_nrows (&n, A)) ;
177 :
178 : //--------------------------------------------------------------------------
179 : // ensure input is binary and has no self-edges
180 : //--------------------------------------------------------------------------
181 :
182 20 : GRB_TRY (GrB_Matrix_new (&S, GrB_FP64, n, n));
183 20 : GRB_TRY (GrB_apply (S, GrB_NULL, GrB_NULL, GrB_ONEB_FP64, A, 0, GrB_NULL));
184 20 : if (G->nself_edges != 0) {
185 6 : GRB_TRY (GrB_select (S, GrB_NULL, GrB_NULL, GrB_OFFDIAG, S, 0, GrB_NULL));
186 : }
187 :
188 : //--------------------------------------------------------------------------
189 : // create the operators for LAGraph_lcc
190 : //--------------------------------------------------------------------------
191 :
192 : #if LAGRAPH_SUITESPARSE
193 20 : if (G->is_symmetric_structure == LAGraph_TRUE) {
194 18 : GRB_TRY (GxB_UnaryOp_new(&LAGraph_COMB_FP64,
195 : F_UNARY(LAGraph_comb_undir_fp64),
196 : GrB_FP64, GrB_FP64,
197 : "LAGraph_comb_undir_fp64",
198 : LAGRAPH_COMB_UNDIR_FP64
199 : ));
200 : } else {
201 2 : GRB_TRY (GxB_UnaryOp_new(&LAGraph_COMB_FP64,
202 : F_UNARY(LAGraph_comb_dir_fp64),
203 : GrB_FP64, GrB_FP64,
204 : "LAGraph_comb_dir_fp64",
205 : LAGRAPH_COMB_DIR_FP64
206 : ));
207 : }
208 : #else
209 : if (G->is_symmetric_structure == LAGraph_TRUE) {
210 : GRB_TRY (GrB_UnaryOp_new(&LAGraph_COMB_FP64,
211 : F_UNARY(LAGraph_comb_undir_fp64),
212 : GrB_FP64, GrB_FP64
213 : ));
214 : } else {
215 : GRB_TRY (GrB_UnaryOp_new(&LAGraph_COMB_FP64,
216 : F_UNARY(LAGraph_comb_dir_fp64),
217 : GrB_FP64, GrB_FP64
218 : ));
219 : }
220 : #endif
221 :
222 20 : GRB_TRY (GrB_Matrix_new (&U, GrB_FP64, n, n)) ;
223 :
224 20 : if (G->is_symmetric_structure == LAGraph_FALSE) {
225 :
226 : //----------------------------------------------------------------------
227 : // S = S + S' to create an undirected multigraph D
228 : //----------------------------------------------------------------------
229 :
230 2 : GRB_TRY (GrB_eWiseAdd (S, NULL, NULL, GrB_PLUS_FP64, S, S, GrB_DESC_T1))
231 : }
232 :
233 : //----------------------------------------------------------------------
234 : // U = triu(C)
235 : //----------------------------------------------------------------------
236 :
237 20 : GRB_TRY (GrB_select (U, NULL, NULL, GrB_TRIU, S, 0, NULL)) ;
238 :
239 : //--------------------------------------------------------------------------
240 : // Find wedges of each node
241 : //--------------------------------------------------------------------------
242 :
243 : // W(i) = sum (C (i,:)) = # of entries in C(i,:) because all entries
244 : // present in C are equal to 1.
245 20 : GRB_TRY (GrB_Vector_new (&W, GrB_FP64, n)) ;
246 : // x = zeros (n,1)
247 20 : GRB_TRY (GrB_Vector_new (&x, GrB_INT64, n)) ;
248 20 : GRB_TRY (GrB_assign (x, NULL, NULL, 0, GrB_ALL, n, NULL)) ;
249 : // W = C*x using the plus_one semiring
250 20 : GRB_TRY (GrB_mxv (W, NULL, NULL, LAGraph_plus_one_fp64, S, x, NULL)) ;
251 20 : GrB_free (&x) ;
252 :
253 : // Compute vector W defining the number of wedges per vertex
254 20 : GRB_TRY (GrB_apply(W, NULL, NULL, LAGraph_COMB_FP64, W, NULL));
255 :
256 : //--------------------------------------------------------------------------
257 : // Calculate triangles
258 : //--------------------------------------------------------------------------
259 :
260 : // CL<C> = C*L = C*U' using a masked dot product
261 20 : GRB_TRY (GrB_Matrix_new (&CL, GrB_FP64, n, n)) ;
262 20 : GRB_TRY (GrB_mxm (CL, S, NULL, LAGraph_plus_second_fp64, S, U, GrB_DESC_ST1));
263 20 : GRB_TRY (GrB_free (&S)) ;
264 20 : GRB_TRY (GrB_free (&U)) ;
265 :
266 : //--------------------------------------------------------------------------
267 : // Calculate LCC
268 : //--------------------------------------------------------------------------
269 :
270 : // LCC(i) = sum (CL (i,:)) = # of triangles at each node
271 20 : GRB_TRY (GrB_Vector_new (&LCC, GrB_FP64, n)) ;
272 20 : GRB_TRY (GrB_reduce (LCC, NULL, NULL, GrB_PLUS_FP64, CL, NULL)) ;
273 20 : GRB_TRY (GrB_free (&CL)) ;
274 :
275 : // LCC = LCC ./ W
276 20 : GRB_TRY (GrB_eWiseMult (LCC, NULL, NULL, GrB_DIV_FP64, LCC, W, NULL)) ;
277 :
278 : //--------------------------------------------------------------------------
279 : // free workspace and return result
280 : //--------------------------------------------------------------------------
281 :
282 20 : (*LCC_handle) = LCC ; LCC = NULL ;
283 :
284 20 : LG_FREE_ALL ;
285 20 : return (GrB_SUCCESS) ;
286 : }
|