Line data Source code
1 : //------------------------------------------------------------------------------
2 : // LAGraph_lcc: local clustering coefficient
3 : //------------------------------------------------------------------------------
4 :
5 : // LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved.
6 : // SPDX-License-Identifier: BSD-2-Clause
7 : //
8 : // For additional details (including references to third party source code and
9 : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
10 : // Contributors.txt for a full list of contributors. Created, in part, with
11 : // funding and support from the U.S. Government (see Acknowledgments.txt file).
12 : // DM22-0790
13 :
14 : // Contributed by Gabor Szarnyas and Balint Hegyi, Budapest University of
15 : // Technology and Economics (with accented characters: G\'{a}bor Sz\'{a}rnyas
16 : // and B\'{a}lint Hegyi, using LaTeX syntax).
17 : // https://inf.mit.bme.hu/en/members/szarnyasg .
18 : // Modified by Timothy A. Davis, Texas A&M University
19 :
20 : //------------------------------------------------------------------------------
21 :
22 : // This function was originally written for the LDBC Graphalytics benchmark,
23 : // at https://graphalytics.org/ .
24 :
25 : // The local clustering coefficient is a measure for each node of a graph.
26 : // Its definition is fully described in the following document:
27 : // https://ldbc.github.io/ldbc_graphalytics_docs/graphalytics_spec.pdf
28 :
29 : // For each node v, the lcc(v) is the ratio between the number of edges between
30 : // neighbors of the node v, and the maximum possible number of edges between
31 : // these neighbors. If a node v has fewer than 2 neighbors, then its
32 : // coefficient is defined as zero, and the vth entry does not appear in the
33 : // sparse vector LCC returned.
34 :
35 : // Let N_in(v) = the set of nodes u such that (u,v) is an edge.
36 : // Let N_out(v) = the set of nodes u such that (v,u) is an edge.
37 : // Let N(v) = union (N_in(v), N_out(v)).
38 : // Then the metric lcc(v) is defined as:
39 :
40 : // lcc(v) = (sum for all u in N(v) of |intersection (N(v), N_out(u))) /
41 : // ( |N(v)| * (|N(v)|-1) )
42 :
43 : // That is, for directed graphs, the set of neighbors N(v) is found without
44 : // taking directions into account, but a node u that has both an edge (u,v) and
45 : // (v,u) is counted just once. However, edge directions are enforced when
46 : // considering two nodes u1 and u2 that are both in N(v), i.e. when counting
47 : // the number of edges between neighbors, (u,v) and (v,u) are counted as two.
48 : // To account for this, the maximum possible number of edges for vertex v is
49 : // determined as the 2-combination of |N(v)| for undirected graphs and as the
50 : // 2-permutation of |N(v)| for directed graphs.
51 :
52 : // The input matrix A must be square. If A is known to be binary (with all
53 : // explicit edge weights equal to 1), then sanitize can be false. This is the
54 : // case for the LDBC benchmark.
55 :
56 : // Otherwise, if sanitize is true, edge weights of A are ignored and only the
57 : // structure of A is used. This step takes extra time and memory to sanitize the
58 : // input matrix A. For a fair comparison in the LDBC benchmark, sanitize
59 : // should be false.
60 :
61 : // Results are undefined if sanitize is false, and the matrix A has any entries
62 : // not equal to 1 (even zero-weight edges are not allowed), or if it has self
63 : // edges.
64 :
65 : #define LG_FREE_ALL \
66 : { \
67 : GrB_free (&A2) ; \
68 : GrB_free (&C2) ; \
69 : GrB_free (&CL) ; \
70 : GrB_free (&U) ; \
71 : GrB_free (&W) ; \
72 : GrB_free (&LCC) ; \
73 : GrB_free (&LAGraph_COMB_DIR_FP64) ; \
74 : GrB_free (&LAGraph_COMB_UNDIR_FP64) ; \
75 : }
76 :
77 : #include "LG_internal.h"
78 : #include <LAGraph.h>
79 : #include <LAGraphX.h>
80 :
81 : //------------------------------------------------------------------------------
82 :
83 : #define F_UNARY(f) ((void (*)(void *, const void *)) f)
84 :
85 : // z = x * (x - 1), used by LAGraph_lcc.
86 : // This operator calculates the 2-permutation of d(v).
87 67 : void LAGraph_comb_dir_fp64
88 : (
89 : void *z,
90 : const void *x
91 : )
92 : {
93 67 : double xd = *(double *) x ;
94 67 : double *zd = (double *) z ;
95 67 : (*zd) = ((xd) * (xd - 1)) ;
96 67 : }
97 :
98 : // z = x * (x - 1) / 2, used by LAGraph_lcc.
99 : // This operator calculates the 2-combination of d(v).
100 3227 : void LAGraph_comb_undir_fp64
101 : (
102 : void *z,
103 : const void *x
104 : )
105 : {
106 3227 : double xd = *(double *) x ;
107 3227 : double *zd = (double *) z ;
108 3227 : (*zd) = ((xd) * (xd - 1)) / 2;
109 3227 : }
110 :
111 : //------------------------------------------------------------------------------
112 :
113 12 : int LAGraph_lcc // compute lcc for all nodes in A
114 : (
115 : GrB_Vector *LCC_handle, // output vector
116 : const GrB_Matrix A, // input matrix
117 : bool symmetric, // if true, the matrix is symmetric
118 : bool sanitize, // if true, ensure A is binary
119 : double t [2], // t [0] = sanitize time, t [1] = lcc time,
120 : // in seconds
121 : char *msg
122 : )
123 : {
124 :
125 : //--------------------------------------------------------------------------
126 : // check inputs
127 : //--------------------------------------------------------------------------
128 :
129 12 : LG_CLEAR_MSG ;
130 12 : if (LCC_handle == NULL)
131 : {
132 1 : return (GrB_NULL_POINTER) ;
133 : }
134 :
135 11 : GrB_Matrix C = NULL, CL = NULL, S = NULL, U = NULL ;
136 11 : GrB_Vector W = NULL, LCC = NULL ;
137 11 : GrB_Matrix A2 = NULL, C2 = NULL ;
138 11 : GrB_UnaryOp LAGraph_COMB_DIR_FP64 = NULL ;
139 11 : GrB_UnaryOp LAGraph_COMB_UNDIR_FP64 = NULL ;
140 : GrB_Info info ;
141 :
142 : #if !LAGRAPH_SUITESPARSE
143 : LG_ASSERT (false, GrB_NOT_IMPLEMENTED) ;
144 : #else
145 :
146 : // n = size of A (# of nodes in the graph)
147 : GrB_Index n ;
148 11 : GRB_TRY (GrB_Matrix_nrows (&n, A)) ;
149 : #if LAGRAPH_SUITESPARSE
150 : GxB_Format_Value fmt ;
151 11 : GRB_TRY (GxB_get (A, GxB_FORMAT, &fmt)) ;
152 11 : if (fmt != GxB_BY_ROW)
153 : {
154 1 : return (GrB_INVALID_VALUE) ;
155 : }
156 : #endif
157 :
158 : //--------------------------------------------------------------------------
159 : // ensure input is binary and has no self-edges
160 : //--------------------------------------------------------------------------
161 :
162 10 : t [0] = 0 ; // sanitize time
163 10 : t [1] = 0 ; // LCC time
164 :
165 : // fixme: use operators that ignore the values of A
166 10 : if (sanitize)
167 : {
168 3 : t [0] = LAGraph_WallClockTime ( ) ;
169 :
170 : // S = binary structure of A
171 3 : GRB_TRY (GrB_Matrix_new (&A2, GrB_FP64, n, n)) ;
172 3 : GRB_TRY (GrB_apply (A2, NULL, NULL, GrB_ONEB_FP64, A, 0, NULL)) ;
173 :
174 : // remove all self edges
175 3 : GRB_TRY (GrB_select (A2, NULL, NULL, GrB_OFFDIAG, A2, 0, NULL)) ;
176 3 : t [0] = LAGraph_WallClockTime ( ) - t [0] ;
177 3 : S = A2 ;
178 : }
179 : else
180 : {
181 : // Use the input as-is, and assume it is binary with no self edges.
182 : // Results are undefined if this condition does not hold.
183 7 : S = A ;
184 : }
185 :
186 10 : t [1] = LAGraph_WallClockTime ( ) ;
187 :
188 : //--------------------------------------------------------------------------
189 : // create the operators for LAGraph_lcc
190 : //--------------------------------------------------------------------------
191 :
192 10 : GRB_TRY (GrB_UnaryOp_new (&LAGraph_COMB_DIR_FP64,
193 : F_UNARY (LAGraph_comb_dir_fp64),
194 : GrB_FP64, GrB_FP64)) ;
195 :
196 10 : GRB_TRY (GrB_UnaryOp_new (&LAGraph_COMB_UNDIR_FP64,
197 : F_UNARY (LAGraph_comb_undir_fp64),
198 : GrB_FP64, GrB_FP64)) ;
199 :
200 10 : GRB_TRY (GrB_Matrix_new (&U, GrB_UINT32, n, n)) ;
201 :
202 10 : if (symmetric)
203 : {
204 9 : C = S ;
205 :
206 : //----------------------------------------------------------------------
207 : // U = triu(C)
208 : //----------------------------------------------------------------------
209 :
210 9 : GRB_TRY (GxB_select (U, NULL, NULL, GxB_TRIU, C, NULL, NULL)) ;
211 :
212 : }
213 : else
214 : {
215 1 : GrB_Matrix AT = NULL, D = NULL ;
216 :
217 1 : GRB_TRY (GrB_Matrix_new (&AT, GrB_FP64, n, n)) ;
218 1 : GRB_TRY (GrB_transpose (AT, NULL, NULL, S, NULL)) ;
219 :
220 : //----------------------------------------------------------------------
221 : // C = A \/ A' to create an undirected graph C
222 : //----------------------------------------------------------------------
223 :
224 1 : GRB_TRY (GrB_Matrix_new (&C2, GrB_FP64, n, n)) ;
225 1 : C = C2 ;
226 1 : GRB_TRY (GrB_eWiseAdd (C, NULL, NULL, GrB_LOR, S, AT, NULL)) ;
227 :
228 : //----------------------------------------------------------------------
229 : // D = A + A' to create an undirected multigraph D
230 : //----------------------------------------------------------------------
231 :
232 1 : GRB_TRY (GrB_Matrix_new (&D, GrB_FP64, n, n)) ;
233 1 : GRB_TRY (GrB_eWiseAdd (D, NULL, NULL, GrB_PLUS_FP64, S, AT, NULL)) ;
234 :
235 1 : GrB_free (&AT) ;
236 :
237 : //----------------------------------------------------------------------
238 : // U = triu(D)
239 : //----------------------------------------------------------------------
240 :
241 : // note that L=U' since D is symmetric
242 1 : GRB_TRY (GxB_select (U, NULL, NULL, GxB_TRIU, D, NULL, NULL)) ;
243 1 : GrB_free (&D) ;
244 : }
245 :
246 : //--------------------------------------------------------------------------
247 : // Find wedges of each node
248 : //--------------------------------------------------------------------------
249 :
250 : // W(i) = sum (C (i,:))
251 10 : GRB_TRY (GrB_Vector_new (&W, GrB_FP64, n)) ;
252 10 : GRB_TRY (GrB_reduce (W, NULL, NULL, GrB_PLUS_FP64, C, NULL)) ;
253 :
254 : // Compute vector W defining the number of wedges per vertex
255 10 : if (symmetric)
256 : {
257 : // the graph is undirected
258 9 : GRB_TRY (GrB_apply(W, NULL, NULL, LAGraph_COMB_UNDIR_FP64, W, NULL));
259 : }
260 : else
261 : {
262 : // the graph is directed
263 1 : GRB_TRY (GrB_apply(W, NULL, NULL, LAGraph_COMB_DIR_FP64, W, NULL)) ;
264 : }
265 :
266 : //--------------------------------------------------------------------------
267 : // Calculate triangles
268 : //--------------------------------------------------------------------------
269 :
270 : // CL<C> = C*L = C*U' using a masked dot product
271 10 : GRB_TRY (GrB_Matrix_new (&CL, GrB_FP64, n, n)) ;
272 10 : GRB_TRY (GrB_mxm (CL, C, NULL, GrB_PLUS_TIMES_SEMIRING_FP64, C, U,
273 : GrB_DESC_T1));
274 10 : GrB_free (&U) ; U = NULL;
275 :
276 : //--------------------------------------------------------------------------
277 : // Calculate LCC
278 : //--------------------------------------------------------------------------
279 :
280 : // LCC(i) = sum (CL (i,:)) = # of triangles at each node
281 10 : GRB_TRY (GrB_Vector_new (&LCC, GrB_FP64, n)) ;
282 10 : GRB_TRY (GrB_reduce (LCC, NULL, NULL, GrB_PLUS_FP64, CL, NULL)) ;
283 10 : GrB_free (&CL) ; CL = NULL;
284 :
285 : // LCC = LCC ./ W
286 10 : GRB_TRY (GrB_eWiseMult (LCC, NULL, NULL, GrB_DIV_FP64, LCC, W, NULL)) ;
287 :
288 : //--------------------------------------------------------------------------
289 : // free workspace and return result
290 : //--------------------------------------------------------------------------
291 :
292 10 : (*LCC_handle) = LCC ; LCC = NULL ;
293 :
294 10 : LG_FREE_ALL ;
295 10 : t [1] = LAGraph_WallClockTime ( ) - t [1] ;
296 10 : return (GrB_SUCCESS) ;
297 : #endif
298 : }
|