Line data Source code
1 : //------------------------------------------------------------------------------
2 : // LAGraph_cdlp: community detection using label propagation
3 : //------------------------------------------------------------------------------
4 :
5 : // LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved.
6 : // SPDX-License-Identifier: BSD-2-Clause
7 : //
8 : // For additional details (including references to third party source code and
9 : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
10 : // Contributors.txt for a full list of contributors. Created, in part, with
11 : // funding and support from the U.S. Government (see Acknowledgments.txt file).
12 : // DM22-0790
13 :
14 : // Contributed by Gabor Szarnyas and Balint Hegyi, Budapest University of
15 : // Technology and Economics (with accented characters: G\'{a}bor Sz\'{a}rnyas
16 : // and B\'{a}lint Hegyi, using LaTeX syntax).
17 : // https://inf.mit.bme.hu/en/members/szarnyasg .
18 :
19 : // Modified by Pascal Costanza, Intel, Belgium
20 :
21 : // NOTE: the calloc/free below must be thread-safe,
22 : // so it cannot use LAGraph_Malloc/LAGraph_Free (which can use
23 : // the Rapids Memory Manager methods when using CUDA, and those
24 : // methods are not yet thread-safe).
25 :
26 : //------------------------------------------------------------------------------
27 :
28 : // ## Background
29 : //
30 : // This function was originally written for the LDBC Graphalytics benchmark.
31 : //
32 : // The community detection using label propagation (CDLP) algorithm is
33 : // defined both for directed and undirected graphs.
34 : //
35 : // The definition implemented here is described in the following document:
36 : // https://ldbc.github.io/ldbc_graphalytics_docs/graphalytics_spec.pdf
37 : //
38 : // The algorithm is based on the one given in the following paper:
39 : //
40 : // Usha Raghavan, Reka Albert, and Soundar Kumara. "Near linear time algorithm
41 : // to detect community structures in large-scale networks". In: Physical
42 : // Review E 76.3 (2007), p. 036106, https://arxiv.org/abs/0709.2938
43 : //
44 : // The key idea of the algorithm is that each vertex is assigned the label
45 : // that is most frequent among its neighbors. To allow reproducible
46 : // experiments, the algorithm is modified to guarantee deterministic behavior:
47 : // it always picks the smallest label in case of a tie:
48 : //
49 : // min ( argmax_{l} (#neighbors with label l) )
50 : //
51 : // In other words, we need to compute the *minimum mode value* (minmode) for
52 : // the labels among the neighbors.
53 : //
54 : // For directed graphs, a label on a neighbor that is connected through both
55 : // an outgoing and on an incoming edge counts twice:
56 : //
57 : // min ( argmax_{l} (#incoming neighbors with l + #outgoing neighbors with l) )
58 :
59 : #define LG_FREE_ALL \
60 : { \
61 : GrB_free (&S) ; \
62 : GrB_free (&T) ; \
63 : LAGraph_Free ((void **) &Sp, NULL) ; \
64 : LAGraph_Free ((void **) &Si, NULL) ; \
65 : LAGraph_Free ((void **) &Tp, NULL) ; \
66 : LAGraph_Free ((void **) &Ti, NULL) ; \
67 : LAGraph_Free ((void **) &L, NULL) ; \
68 : LAGraph_Free ((void **) &L_next, NULL) ; \
69 : ptable_pool_free (counts_pool, max_threads) ; \
70 : counts_pool = NULL ; \
71 : GrB_free (&CDLP) ; \
72 : }
73 :
74 : #include <LAGraph.h>
75 : #include <LAGraphX.h>
76 : #include <stdalign.h>
77 : #include "LG_internal.h"
78 :
79 : // A Go-style slice / Lisp-style property list
80 : typedef struct {
81 : GrB_Index* entries;
82 : size_t len, cap;
83 : } plist;
84 :
85 5120 : void plist_free(plist *list) {
86 5120 : free(list->entries); // NOTE: cannot be LAGraph_Free
87 5120 : }
88 :
89 161728512 : void plist_clear(plist *list) {
90 161728512 : list->len = 0;
91 161728512 : }
92 :
93 727057 : void plist_append(plist* list, GrB_Index key, GrB_Index value) {
94 727057 : if (list->len == list->cap) {
95 1177 : size_t new_size = list->cap == 0 ? 16 : 2*list->cap;
96 1177 : list->entries = (GrB_Index*)realloc(list->entries, new_size * sizeof(GrB_Index));
97 1177 : list->cap = new_size;
98 : }
99 727057 : list->entries[list->len] = key;
100 727057 : list->entries[list->len+1] = value;
101 727057 : list->len += 2;
102 727057 : }
103 :
104 17652992 : GrB_Index plist_add(plist* list, GrB_Index entry) {
105 17655030 : for (size_t i = 0; i < list->len; i += 2) {
106 16927973 : if (list->entries[i] == entry) {
107 16925935 : return ++list->entries[i+1];
108 : }
109 : }
110 727057 : plist_append(list, entry, 1);
111 727057 : return 1;
112 : }
113 :
114 : typedef void (*plist_reducer) (GrB_Index* entry1, GrB_Index* count1, GrB_Index entry2, GrB_Index count2);
115 :
116 161728512 : void plist_reduce(plist* list, GrB_Index* entry, GrB_Index* count, plist_reducer reducer) {
117 162455569 : for (size_t i = 0; i < list->len; i += 2) {
118 727057 : reducer(entry, count, list->entries[i], list->entries[i+1]);
119 : }
120 161728512 : }
121 :
122 727057 : void counts_reducer(GrB_Index* e1, GrB_Index* c1, GrB_Index e2, GrB_Index c2) {
123 727057 : if (*c1 > c2) {
124 187542 : return;
125 : }
126 539515 : if (c2 > *c1) {
127 428889 : *e1 = e2;
128 428889 : *c1 = c2;
129 428889 : return;
130 : }
131 110626 : if (*e1 < e2) {
132 93071 : return;
133 : }
134 17555 : *e1 = e2;
135 17555 : *c1 = c2;
136 : }
137 :
138 :
139 : #define bucket_bits 9llu
140 : #define nof_buckets (1llu << bucket_bits)
141 : #define bucket_shift (64llu - bucket_bits)
142 :
143 : typedef struct {
144 : alignas(64) plist buckets[nof_buckets];
145 : } ptable;
146 :
147 10 : void ptable_free(ptable* table) {
148 5130 : for (size_t i = 0; i < nof_buckets; i++) {
149 5120 : plist_free(&table->buckets[i]);
150 : }
151 10 : }
152 :
153 20 : void ptable_pool_free(ptable* table, size_t n) {
154 20 : if (table == NULL) {
155 10 : return;
156 : }
157 20 : for (size_t i = 0; i < n; i++) {
158 10 : ptable_free(&table[i]);
159 : }
160 10 : free(table); // NOTE: cannot be LAGraph_Free
161 : }
162 :
163 315876 : void ptable_clear(ptable* table) {
164 162044388 : for (size_t i = 0; i < nof_buckets; i++) {
165 161728512 : plist_clear(&table->buckets[i]);
166 : }
167 315876 : }
168 :
169 17652992 : GrB_Index fib_reduce(GrB_Index x)
170 : {
171 : // 2^64 / golden ratio = 11400714819323198485
172 17652992 : GrB_Index fibhash = x * 11400714819323198485llu;
173 : // fast reduce
174 17652992 : return fibhash >> bucket_shift;
175 : }
176 :
177 17652992 : void ptable_add(ptable* table, GrB_Index entry) {
178 17652992 : plist_add(&table->buckets[fib_reduce(entry)], entry);
179 17652992 : }
180 :
181 315876 : void ptable_reduce(ptable* table, GrB_Index* entry, GrB_Index* count, plist_reducer reducer) {
182 315876 : *entry = GrB_INDEX_MAX + 1;
183 315876 : *count = 0;
184 162044388 : for (GrB_Index i = 0; i < nof_buckets; i++) {
185 161728512 : plist_reduce(&table->buckets[i], entry, count, reducer);
186 : }
187 315876 : }
188 :
189 : //****************************************************************************
190 11 : int LAGraph_cdlp
191 : (
192 : GrB_Vector *CDLP_handle, // output vector
193 : LAGraph_Graph G, // input graph
194 : int itermax, // max number of iterations
195 : char *msg
196 : )
197 : {
198 : GrB_Info info;
199 11 : LG_CLEAR_MSG ;
200 :
201 11 : GrB_Matrix S = NULL, T = NULL ;
202 : GrB_Vector CDLP ;
203 11 : GrB_Index *Sp = NULL, *Si = NULL, *Tp = NULL, *Ti = NULL, *L = NULL, *L_next = NULL ;
204 11 : ptable *counts_pool = NULL ;
205 :
206 : #ifdef _OPENMP
207 : size_t max_threads = omp_get_max_threads();
208 : #else
209 11 : size_t max_threads = 1 ;
210 : #endif
211 :
212 : //--------------------------------------------------------------------------
213 : // check inputs
214 : //--------------------------------------------------------------------------
215 :
216 11 : if (CDLP_handle == NULL)
217 : {
218 1 : return GrB_NULL_POINTER;
219 : }
220 :
221 10 : GrB_Matrix A = G->A ;
222 :
223 : //--------------------------------------------------------------------------
224 : // ensure input is binary and has no self-edges
225 : //--------------------------------------------------------------------------
226 :
227 : GrB_Index n;
228 10 : GRB_TRY (GrB_Matrix_nrows(&n, A)) ;
229 :
230 10 : GRB_TRY (GrB_Matrix_new (&S, GrB_UINT64, n, n)) ;
231 10 : GRB_TRY (GrB_apply (S, GrB_NULL, GrB_NULL, GrB_ONEB_UINT64, A, 0, GrB_NULL)) ;
232 :
233 10 : if (G->kind == LAGraph_ADJACENCY_DIRECTED) {
234 10 : GRB_TRY (GrB_Matrix_new (&T, GrB_UINT64, n, n)) ;
235 10 : GRB_TRY (GrB_transpose (T, GrB_NULL, GrB_NULL, S, GrB_NULL)) ;
236 10 : void * Tx = NULL ;
237 : GrB_Index Tps, Tis, Txs ;
238 : #if LAGRAPH_SUITESPARSE
239 : bool Tiso, Tjumbled ;
240 10 : GRB_TRY (GxB_Matrix_unpack_CSR (T, &Tp, &Ti, &Tx, &Tps, &Tis, &Txs, &Tiso, &Tjumbled, GrB_NULL)) ;
241 : #else
242 : GRB_TRY (GrB_Matrix_exportSize (&Tps, &Tis, &Txs, GrB_CSR_FORMAT, T)) ;
243 : LAGRAPH_TRY (LAGraph_Malloc ((void *)&Tp, Tps, sizeof(GrB_Index), NULL)) ;
244 : LAGRAPH_TRY (LAGraph_Malloc ((void *)&Ti, Tis, sizeof(GrB_Index), NULL)) ;
245 : LAGRAPH_TRY (LAGraph_Malloc ((void *)&Tx, Txs, sizeof(GrB_UINT64), NULL)) ;
246 : GRB_TRY (GrB_Matrix_export (Tp, Ti, (uint64_t *)Tx, &Tps, &Tis, &Txs, GrB_CSR_FORMAT, T)) ;
247 : #endif
248 10 : LAGRAPH_TRY (LAGraph_Free ((void *)&Tx, NULL)) ;
249 10 : GRB_TRY (GrB_free (&T)) ;
250 : }
251 :
252 : {
253 10 : void * Sx = NULL ;
254 : GrB_Index Sps, Sis, Sxs ;
255 : #if LAGRAPH_SUITESPARSE
256 : bool Siso, Sjumbled ;
257 10 : GRB_TRY (GxB_Matrix_unpack_CSR (S, &Sp, &Si, &Sx, &Sps, &Sis, &Sxs, &Siso, &Sjumbled, GrB_NULL)) ;
258 : #else
259 : GRB_TRY (GrB_Matrix_exportSize (&Sps, &Sis, &Sxs, GrB_CSR_FORMAT, S)) ;
260 : LAGRAPH_TRY (LAGraph_Malloc ((void *)&Sp, Sps, sizeof(GrB_Index), NULL)) ;
261 : LAGRAPH_TRY (LAGraph_Malloc ((void *)&Si, Sis, sizeof(GrB_Index), NULL)) ;
262 : LAGRAPH_TRY (LAGraph_Malloc ((void *)&Sx, Sxs, sizeof(GrB_UINT64), NULL)) ;
263 : GRB_TRY (GrB_Matrix_export (Sp, Si, (uint64_t *)Sx, &Sps, &Sis, &Sxs, GrB_CSR_FORMAT, S)) ;
264 : #endif
265 10 : LAGRAPH_TRY (LAGraph_Free((void *)&Sx, NULL)) ;
266 10 : GRB_TRY (GrB_free (&S)) ;
267 : }
268 :
269 10 : LG_TRY (LAGraph_Malloc ((void **) &L, n, sizeof (GrB_Index), msg)) ;
270 :
271 3304 : for (GrB_Index i = 0; i < n; i++) {
272 3294 : L[i] = i ;
273 : }
274 10 : LG_TRY (LAGraph_Malloc ((void **) &L_next, n, sizeof (GrB_Index), msg)) ;
275 :
276 10 : counts_pool = calloc(max_threads, sizeof(ptable));
277 :
278 353 : for (int iteration = 0; iteration < itermax; iteration++) {
279 :
280 : int64_t i;
281 : #pragma omp parallel for schedule(dynamic)
282 316226 : for (i = 0; i < n; i++) {
283 : #ifdef _OPENMP
284 : int thread_id = omp_get_thread_num() ;
285 : #else
286 315876 : int thread_id = 0 ;
287 : #endif
288 315876 : ptable *counts = &counts_pool [thread_id] ;
289 315876 : GrB_Index* neighbors = Si + Sp[i] ;
290 315876 : GrB_Index sz = Sp[i+1] - Sp[i] ;
291 9142372 : for (GrB_Index j = 0; j < sz; j++) {
292 8826496 : ptable_add (counts, L[neighbors[j]]) ;
293 : }
294 315876 : if (G->kind == LAGraph_ADJACENCY_DIRECTED) {
295 315876 : neighbors = Ti + Tp[i] ;
296 315876 : sz = Tp[i+1] - Tp[i] ;
297 9142372 : for (GrB_Index j = 0; j < sz; j++) {
298 8826496 : ptable_add (counts, L[neighbors[j]]) ;
299 : }
300 : }
301 : GrB_Index best_label, best_count ;
302 315876 : ptable_reduce (counts, &best_label, &best_count, counts_reducer) ;
303 315876 : L_next[i] = best_label ;
304 315876 : ptable_clear (counts) ;
305 : }
306 :
307 350 : GrB_Index* tmp = L ; L = L_next ; L_next = tmp ;
308 350 : bool changed = false ;
309 47967 : for (GrB_Index i = 0; i < n; i++) {
310 47960 : if (L[i] != L_next[i]) {
311 343 : changed = true ;
312 343 : break ;
313 : }
314 : }
315 350 : if (!changed) {
316 7 : break ;
317 : }
318 : }
319 :
320 10 : ptable_pool_free(counts_pool, max_threads); counts_pool = NULL;
321 :
322 : //--------------------------------------------------------------------------
323 : // extract final labels to the result vector
324 : //--------------------------------------------------------------------------
325 :
326 10 : GRB_TRY (GrB_Vector_new(&CDLP, GrB_UINT64, n))
327 3304 : for (GrB_Index i = 0; i < n; i++)
328 : {
329 3294 : GrB_Index l = L[i];
330 : // if (l == GrB_INDEX_MAX + 1) { l = i ; }
331 3294 : l = (l == GrB_INDEX_MAX + 1) ? i : l ;
332 3294 : GRB_TRY (GrB_Vector_setElement(CDLP, l, i))
333 : }
334 :
335 : //--------------------------------------------------------------------------
336 : // free workspace and return result
337 : //--------------------------------------------------------------------------
338 :
339 10 : (*CDLP_handle) = CDLP;
340 10 : CDLP = NULL; // set to NULL so LG_FREE_ALL doesn't free it
341 10 : LG_FREE_ALL;
342 :
343 10 : return (GrB_SUCCESS);
344 : }
|