Line data Source code
1 : //------------------------------------------------------------------------------
2 : // LAGr_TriangleCount: Triangle counting using various methods
3 : //------------------------------------------------------------------------------
4 :
5 : // LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved.
6 : // SPDX-License-Identifier: BSD-2-Clause
7 : //
8 : // For additional details (including references to third party source code and
9 : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
10 : // Contributors.txt for a full list of contributors. Created, in part, with
11 : // funding and support from the U.S. Government (see Acknowledgments.txt file).
12 : // DM22-0790
13 :
14 : // Contributed by Timothy A. Davis, Texas A&M University
15 :
16 : //------------------------------------------------------------------------------
17 :
18 : // Count the number of triangles in a graph,
19 :
20 : // This is an Advanced algorithm (G->nself_edges, G->out_degree,
21 : // G->is_symmetric_structure are required).
22 :
23 : // Given a symmetric graph A with no-self edges, LAGr_TriangleCount counts the
24 : // number of triangles in the graph. A triangle is a clique of size three,
25 : // that is, 3 nodes that are all pairwise connected.
26 :
27 : // One of 6 methods are used, defined below where L and U are the strictly
28 : // lower and strictly upper triangular parts of the symmetrix matrix A,
29 : // respectively. Each method computes the same result, ntri:
30 :
31 : // 0: default: use the default method (currently method Sandia_LUT)
32 : // 1: Burkhardt: ntri = sum (sum ((A^2) .* A)) / 6
33 : // 2: Cohen: ntri = sum (sum ((L * U) .* A)) / 2
34 : // 3: Sandia_LL: ntri = sum (sum ((L * L) .* L))
35 : // 4: Sandia_UU: ntri = sum (sum ((U * U) .* U))
36 : // 5: Sandia_LUT: ntri = sum (sum ((L * U') .* L)). Note that L=U'.
37 : // 6: Sandia_ULT: ntri = sum (sum ((U * L') .* U)). Note that U=L'.
38 :
39 : // A is a square symmetric matrix, of any type. Its values are ignored.
40 : // Results are undefined for methods 1 and 2 if self-edges exist in A. Results
41 : // are undefined for all methods if A is unsymmetric.
42 :
43 : // The Sandia_* methods all tend to be faster than the Burkhardt or Cohen
44 : // methods. For the largest graphs, Sandia_LUT tends to be fastest, except for
45 : // the GAP-urand matrix, where the saxpy-based Sandia_LL method (L*L.*L) is
46 : // fastest. For many small graphs, the saxpy-based Sandia_LL and Sandia_UU
47 : // methods are often faster that the dot-product-based methods.
48 :
49 : // Reference for the Burkhardt method: Burkhardt, Paul. "Graphing Trillions of
50 : // Triangles." Information Visualization 16, no. 3 (July 2017): 157–66.
51 : // https://doi.org/10.1177/1473871616666393.
52 :
53 : // Reference for the Cohen method: J. Cohen, "Graph twiddling in a mapreduce
54 : // world," Computing in Science & Engineering, vol. 11, no. 4, pp. 29–41, 2009.
55 : // https://doi.org/10.1109/MCSE.2009.120
56 :
57 : // Reference for the "Sandia_*" methods: Wolf, Deveci, Berry, Hammond,
58 : // Rajamanickam, "Fast linear algebra- based triangle counting with
59 : // KokkosKernels", IEEE HPEC'17, https://dx.doi.org/10.1109/HPEC.2017.8091043
60 :
61 : #define LG_FREE_ALL \
62 : { \
63 : GrB_free (L) ; \
64 : GrB_free (U) ; \
65 : }
66 :
67 : #include "LG_internal.h"
68 :
69 : //------------------------------------------------------------------------------
70 : // tricount_prep: construct L and U for LAGr_TriangleCount
71 : //------------------------------------------------------------------------------
72 :
73 3074 : static int tricount_prep
74 : (
75 : GrB_Matrix *L, // if present, compute L = tril (A,-1)
76 : GrB_Matrix *U, // if present, compute U = triu (A, 1)
77 : GrB_Matrix A, // input matrix
78 : char *msg
79 : )
80 : {
81 : GrB_Index n ;
82 3074 : GRB_TRY (GrB_Matrix_nrows (&n, A)) ;
83 :
84 3074 : if (L != NULL)
85 : {
86 : // L = tril (A,-1)
87 2714 : GRB_TRY (GrB_Matrix_new (L, GrB_BOOL, n, n)) ;
88 2282 : GRB_TRY (GrB_select (*L, NULL, NULL, GrB_TRIL, A, (int64_t) (-1),
89 : NULL)) ;
90 1823 : GRB_TRY (GrB_Matrix_wait (*L, GrB_MATERIALIZE)) ;
91 : }
92 :
93 2183 : if (U != NULL)
94 : {
95 : // U = triu (A,1)
96 1986 : GRB_TRY (GrB_Matrix_new (U, GrB_BOOL, n, n)) ;
97 1527 : GRB_TRY (GrB_select (*U, NULL, NULL, GrB_TRIU, A, (int64_t) 1, NULL)) ;
98 1054 : GRB_TRY (GrB_Matrix_wait (*U, GrB_MATERIALIZE)) ;
99 : }
100 1251 : return (GrB_SUCCESS) ;
101 : }
102 :
103 : //------------------------------------------------------------------------------
104 : // LAGraph_tricount: count the number of triangles in a graph
105 : //------------------------------------------------------------------------------
106 :
107 : #undef LG_FREE_ALL
108 : #define LG_FREE_ALL \
109 : { \
110 : GrB_free (&C) ; \
111 : GrB_free (&L) ; \
112 : GrB_free (&T) ; \
113 : GrB_free (&U) ; \
114 : LAGraph_Free ((void **) &P, NULL) ; \
115 : }
116 :
117 4401 : int LAGr_TriangleCount
118 : (
119 : // output:
120 : uint64_t *ntriangles,
121 : // input:
122 : const LAGraph_Graph G,
123 : LAGr_TriangleCount_Method *p_method,
124 : LAGr_TriangleCount_Presort *p_presort,
125 : char *msg
126 : )
127 : {
128 :
129 : //--------------------------------------------------------------------------
130 : // check inputs
131 : //--------------------------------------------------------------------------
132 :
133 4401 : LG_CLEAR_MSG ;
134 4401 : GrB_Matrix C = NULL, L = NULL, U = NULL, T = NULL ;
135 4401 : int64_t *P = NULL ;
136 :
137 : // get the method
138 : LAGr_TriangleCount_Method method ;
139 4401 : method = (p_method == NULL) ? LAGr_TriangleCount_AutoMethod : (*p_method) ;
140 4401 : LG_ASSERT_MSG (
141 : method == LAGr_TriangleCount_AutoMethod || // 0: use auto method
142 : method == LAGr_TriangleCount_Burkhardt || // 1: sum (sum ((A^2) .* A))/6
143 : method == LAGr_TriangleCount_Cohen || // 2: sum (sum ((L * U) .*A))/2
144 : method == LAGr_TriangleCount_Sandia_LL || // 3: sum (sum ((L * L) .* L))
145 : method == LAGr_TriangleCount_Sandia_UU || // 4: sum (sum ((U * U) .* U))
146 : method == LAGr_TriangleCount_Sandia_LUT || // 5: sum (sum ((L * U') .* L))
147 : method == LAGr_TriangleCount_Sandia_ULT, // 6: sum (sum ((U * L') .* U))
148 : GrB_INVALID_VALUE, "method is invalid") ;
149 :
150 : // get the presort
151 : LAGr_TriangleCount_Presort presort ;
152 4392 : presort = (p_presort == NULL) ? LAGr_TriangleCount_AutoSort : (*p_presort) ;
153 4392 : LG_ASSERT_MSG (
154 : presort == LAGr_TriangleCount_NoSort ||
155 : presort == LAGr_TriangleCount_Ascending ||
156 : presort == LAGr_TriangleCount_Descending ||
157 : presort == LAGr_TriangleCount_AutoSort,
158 : GrB_INVALID_VALUE, "presort is invalid") ;
159 :
160 4383 : LG_TRY (LAGraph_CheckGraph (G, msg)) ;
161 4383 : LG_ASSERT (ntriangles != NULL, GrB_NULL_POINTER) ;
162 4383 : LG_ASSERT (G->nself_edges == 0, LAGRAPH_NO_SELF_EDGES_ALLOWED) ;
163 :
164 4383 : LG_ASSERT_MSG ((G->kind == LAGraph_ADJACENCY_UNDIRECTED ||
165 : (G->kind == LAGraph_ADJACENCY_DIRECTED &&
166 : G->is_symmetric_structure == LAGraph_TRUE)),
167 : LAGRAPH_SYMMETRIC_STRUCTURE_REQUIRED,
168 : "G->A must be known to be symmetric") ;
169 :
170 4383 : if (method == LAGr_TriangleCount_AutoMethod)
171 : {
172 : // AutoMethod: use default, Sandia_LUT: sum (sum ((L * U') .* L))
173 1040 : method = LAGr_TriangleCount_Sandia_LUT ;
174 : }
175 :
176 : // only the Sandia_* methods can benefit from the presort
177 4383 : bool method_can_use_presort =
178 3836 : method == LAGr_TriangleCount_Sandia_LL || // sum (sum ((L * L) .* L))
179 3289 : method == LAGr_TriangleCount_Sandia_UU || // sum (sum ((U * U) .* U))
180 8219 : method == LAGr_TriangleCount_Sandia_LUT || // sum (sum ((L * U') .* L))
181 : method == LAGr_TriangleCount_Sandia_ULT ; // sum (sum ((U * L') .* U))
182 :
183 4383 : GrB_Matrix A = G->A ;
184 4383 : GrB_Vector Degree = G->out_degree ;
185 :
186 4383 : bool auto_sort = (presort == LAGr_TriangleCount_AutoSort) ;
187 4383 : if (auto_sort && method_can_use_presort)
188 : {
189 1258 : LG_ASSERT_MSG (Degree != NULL,
190 : LAGRAPH_NOT_CACHED, "G->out_degree is required") ;
191 : }
192 :
193 : //--------------------------------------------------------------------------
194 : // initializations
195 : //--------------------------------------------------------------------------
196 :
197 : GrB_Index n ;
198 4379 : GRB_TRY (GrB_Matrix_nrows (&n, A)) ;
199 4379 : GRB_TRY (GrB_Matrix_new (&C, GrB_INT64, n, n)) ;
200 : #if LAGRAPH_SUITESPARSE
201 3776 : GrB_Semiring semiring = GxB_PLUS_PAIR_INT64 ;
202 : #else
203 : GrB_Semiring semiring = LAGraph_plus_one_int64 ;
204 : #endif
205 3776 : GrB_Monoid monoid = GrB_PLUS_MONOID_INT64 ;
206 :
207 : //--------------------------------------------------------------------------
208 : // heuristic sort rule
209 : //--------------------------------------------------------------------------
210 :
211 3776 : if (!method_can_use_presort)
212 : {
213 : // no sorting for the Burkhardt and Cohen methods: presort parameter
214 : // is ignored.
215 691 : presort = LAGr_TriangleCount_NoSort ;
216 : }
217 3085 : else if (auto_sort)
218 : {
219 : // auto selection of sorting method for Sandia_* methods
220 1083 : presort = LAGr_TriangleCount_NoSort ; // default is not to sort
221 :
222 1083 : if (method_can_use_presort)
223 : {
224 : // This rule is very similar to Scott Beamer's rule in the GAP TC
225 : // benchmark, except that it is extended to handle the ascending
226 : // sort needed by methods 3 and 5. It also uses a stricter rule,
227 : // since the performance of triangle counting in SuiteSparse:
228 : // GraphBLAS is less sensitive to the sorting as compared to the
229 : // GAP algorithm. This is because the dot products in SuiteSparse:
230 : // GraphBLAS use binary search if one vector is very sparse
231 : // compared to the other. As a result, SuiteSparse:GraphBLAS needs
232 : // the sort for fewer matrices, as compared to the GAP algorithm.
233 :
234 : // With this rule, the GAP-kron and GAP-twitter matrices are
235 : // sorted, and the others remain unsorted. With the rule in the
236 : // GAP tc.cc benchmark, GAP-kron and GAP-twitter are sorted, and so
237 : // is GAP-web, but GAP-web is not sorted here.
238 :
239 : #define NSAMPLES 1000
240 : GrB_Index nvals ;
241 1089 : GRB_TRY (GrB_Matrix_nvals (&nvals, A)) ;
242 1083 : if (n > NSAMPLES && ((double) nvals / ((double) n)) >= 10)
243 : {
244 : // estimate the mean and median degrees
245 : double mean, median ;
246 156 : LG_TRY (LAGr_SampleDegree (&mean, &median,
247 : G, true, NSAMPLES, n, msg)) ;
248 : // sort if the average degree is very high vs the median
249 150 : if (mean > 4 * median)
250 : {
251 6 : switch (method)
252 : {
253 1 : case LAGr_TriangleCount_Sandia_LL:
254 : // 3:sum (sum ((L * L) .* L))
255 1 : presort = LAGr_TriangleCount_Ascending ;
256 1 : break ;
257 1 : case LAGr_TriangleCount_Sandia_UU:
258 : // 4: sum (sum ((U * U) .* U))
259 1 : presort = LAGr_TriangleCount_Descending ;
260 1 : break ;
261 3 : default:
262 : case LAGr_TriangleCount_Sandia_LUT:
263 : // 5: sum (sum ((L * U') .* L))
264 3 : presort = LAGr_TriangleCount_Ascending ;
265 3 : break ;
266 1 : case LAGr_TriangleCount_Sandia_ULT:
267 : // 6: sum (sum ((U * L') .* U))
268 1 : presort = LAGr_TriangleCount_Descending ;
269 1 : break ;
270 : }
271 : }
272 : }
273 : }
274 : }
275 :
276 : //--------------------------------------------------------------------------
277 : // sort the input matrix, if requested
278 : //--------------------------------------------------------------------------
279 :
280 3770 : if (presort != LAGr_TriangleCount_NoSort)
281 : {
282 : // P = permutation that sorts the rows by their degree
283 1247 : LG_TRY (LAGr_SortByDegree (&P, G, true,
284 : presort == LAGr_TriangleCount_Ascending, msg)) ;
285 :
286 : // T = A (P,P) and typecast to boolean
287 1112 : GRB_TRY (GrB_Matrix_new (&T, GrB_BOOL, n, n)) ;
288 977 : GRB_TRY (GrB_extract (T, NULL, NULL, A, (GrB_Index *) P, n,
289 : (GrB_Index *) P, n, NULL)) ;
290 727 : A = T ;
291 :
292 : // free workspace
293 727 : LG_TRY (LAGraph_Free ((void **) &P, NULL)) ;
294 : }
295 :
296 : //--------------------------------------------------------------------------
297 : // count triangles
298 : //--------------------------------------------------------------------------
299 :
300 : int64_t ntri ;
301 :
302 3250 : switch (method)
303 : {
304 :
305 176 : case LAGr_TriangleCount_Burkhardt: // 1: sum (sum ((A^2) .* A)) / 6
306 :
307 176 : GRB_TRY (GrB_mxm (C, A, NULL, semiring, A, A, GrB_DESC_S)) ;
308 56 : GRB_TRY (GrB_reduce (&ntri, NULL, monoid, C, NULL)) ;
309 56 : ntri /= 6 ;
310 56 : break ;
311 :
312 515 : case LAGr_TriangleCount_Cohen: // 2: sum (sum ((L * U) .* A)) / 2
313 :
314 515 : LG_TRY (tricount_prep (&L, &U, A, msg)) ;
315 179 : GRB_TRY (GrB_mxm (C, A, NULL, semiring, L, U, GrB_DESC_S)) ;
316 56 : GRB_TRY (GrB_reduce (&ntri, NULL, monoid, C, NULL)) ;
317 56 : ntri /= 2 ;
318 56 : break ;
319 :
320 360 : case LAGr_TriangleCount_Sandia_LL: // 3: sum (sum ((L * L) .* L))
321 :
322 : // using the masked saxpy3 method
323 360 : LG_TRY (tricount_prep (&L, NULL, A, msg)) ;
324 197 : GRB_TRY (GrB_mxm (C, L, NULL, semiring, L, L, GrB_DESC_S)) ;
325 56 : GRB_TRY (GrB_reduce (&ntri, NULL, monoid, C, NULL)) ;
326 56 : break ;
327 :
328 360 : case LAGr_TriangleCount_Sandia_UU: // 4: sum (sum ((U * U) .* U))
329 :
330 : // using the masked saxpy3 method
331 360 : LG_TRY (tricount_prep (NULL, &U, A, msg)) ;
332 197 : GRB_TRY (GrB_mxm (C, U, NULL, semiring, U, U, GrB_DESC_S)) ;
333 56 : GRB_TRY (GrB_reduce (&ntri, NULL, monoid, C, NULL)) ;
334 56 : break ;
335 :
336 1328 : default:
337 : case LAGr_TriangleCount_Sandia_LUT: // 5: sum (sum ((L * U') .* L))
338 :
339 : // This tends to be the fastest method for most large matrices, but
340 : // the Sandia_ULT method is also very fast.
341 :
342 : // using the masked dot product
343 1328 : LG_TRY (tricount_prep (&L, &U, A, msg)) ;
344 493 : GRB_TRY (GrB_mxm (C, L, NULL, semiring, L, U, GrB_DESC_ST1)) ;
345 149 : GRB_TRY (GrB_reduce (&ntri, NULL, monoid, C, NULL)) ;
346 149 : break ;
347 :
348 511 : case LAGr_TriangleCount_Sandia_ULT: // 6: sum (sum ((U * L') .* U))
349 :
350 : // using the masked dot product
351 511 : LG_TRY (tricount_prep (&L, &U, A, msg)) ;
352 185 : GRB_TRY (GrB_mxm (C, U, NULL, semiring, U, L, GrB_DESC_ST1)) ;
353 56 : GRB_TRY (GrB_reduce (&ntri, NULL, monoid, C, NULL)) ;
354 56 : break ;
355 : }
356 :
357 : //--------------------------------------------------------------------------
358 : // return result
359 : //--------------------------------------------------------------------------
360 :
361 429 : LG_FREE_ALL ;
362 429 : if (p_method != NULL) (*p_method) = method ;
363 429 : if (p_presort != NULL) (*p_presort) = presort ;
364 429 : (*ntriangles) = (uint64_t) ntri ;
365 429 : return (GrB_SUCCESS) ;
366 : }
|