LCOV - code coverage report
Current view: top level - experimental/algorithm - LAGraph_cdlp.c (source / functions) Hit Total Coverage
Test: LAGraph code coverage report. Commit id: 7b9d2ee. Current time (UTC): 2025-06-03T21:57:17Z Lines: 124 124 100.0 %
Date: 2025-06-03 22:02:40 Functions: 13 13 100.0 %

          Line data    Source code
       1             : //------------------------------------------------------------------------------
       2             : // LAGraph_cdlp: community detection using label propagation
       3             : //------------------------------------------------------------------------------
       4             : 
       5             : // LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved.
       6             : // SPDX-License-Identifier: BSD-2-Clause
       7             : //
       8             : // For additional details (including references to third party source code and
       9             : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
      10             : // Contributors.txt for a full list of contributors. Created, in part, with
      11             : // funding and support from the U.S. Government (see Acknowledgments.txt file).
      12             : // DM22-0790
      13             : 
      14             : // Contributed by Gabor Szarnyas and Balint Hegyi, Budapest University of
      15             : // Technology and Economics (with accented characters: G\'{a}bor Sz\'{a}rnyas
      16             : // and B\'{a}lint Hegyi, using LaTeX syntax).
      17             : // https://inf.mit.bme.hu/en/members/szarnyasg .
      18             : 
      19             : // Modified by Pascal Costanza, Intel, Belgium
      20             : 
      21             : // NOTE: the calloc/free below must be thread-safe,
      22             : // so it cannot use LAGraph_Malloc/LAGraph_Free (which can use
      23             : // the Rapids Memory Manager methods when using CUDA, and those
      24             : // methods are not yet thread-safe).
      25             : 
      26             : //------------------------------------------------------------------------------
      27             : 
      28             : // ## Background
      29             : //
      30             : // This function was originally written for the LDBC Graphalytics benchmark.
      31             : //
      32             : // The community detection using label propagation (CDLP) algorithm is
      33             : // defined both for directed and undirected graphs.
      34             : //
      35             : // The definition implemented here is described in the following document:
      36             : // https://ldbc.github.io/ldbc_graphalytics_docs/graphalytics_spec.pdf
      37             : //
      38             : // The algorithm is based on the one given in the following paper:
      39             : //
      40             : // Usha Raghavan, Reka Albert, and Soundar Kumara. "Near linear time algorithm
      41             : // to detect community structures in large-scale networks". In: Physical
      42             : // Review E 76.3 (2007), p. 036106, https://arxiv.org/abs/0709.2938
      43             : //
      44             : // The key idea of the algorithm is that each vertex is assigned the label
      45             : // that is most frequent among its neighbors. To allow reproducible
      46             : // experiments, the algorithm is modified to guarantee deterministic behavior:
      47             : // it always picks the smallest label in case of a tie:
      48             : //
      49             : // min ( argmax_{l} (#neighbors with label l) )
      50             : //
      51             : // In other words, we need to compute the *minimum mode value* (minmode) for
      52             : // the labels among the neighbors.
      53             : //
      54             : // For directed graphs, a label on a neighbor that is connected through both
      55             : // an outgoing and on an incoming edge counts twice:
      56             : //
      57             : // min ( argmax_{l} (#incoming neighbors with l + #outgoing neighbors with l) )
      58             : 
      59             : #define LG_FREE_ALL                                                     \
      60             : {                                                                       \
      61             :     GrB_free (&S) ;                                                     \
      62             :     GrB_free (&T) ;                                                     \
      63             :     LAGraph_Free ((void **) &Sp, NULL) ;                                \
      64             :     LAGraph_Free ((void **) &Si, NULL) ;                                \
      65             :     LAGraph_Free ((void **) &Tp, NULL) ;                                \
      66             :     LAGraph_Free ((void **) &Ti, NULL) ;                                \
      67             :     LAGraph_Free ((void **) &L, NULL) ;                                 \
      68             :     LAGraph_Free ((void **) &L_next, NULL) ;                            \
      69             :     ptable_pool_free (counts_pool, max_threads) ;                       \
      70             :     counts_pool = NULL ;                                                \
      71             :     GrB_free (&CDLP) ;                                                  \
      72             : }
      73             : 
      74             : #include <LAGraph.h>
      75             : #include <LAGraphX.h>
      76             : #include <stdalign.h>
      77             : #include "LG_internal.h"
      78             : 
      79             : // A Go-style slice / Lisp-style property list
      80             : typedef struct {
      81             :     GrB_Index* entries;
      82             :     size_t len, cap;
      83             : } plist;
      84             : 
      85        5120 : void plist_free(plist *list) {
      86        5120 :     free(list->entries);        // NOTE: cannot be LAGraph_Free
      87        5120 : }
      88             : 
      89   161728512 : void plist_clear(plist *list) {
      90   161728512 :     list->len = 0;
      91   161728512 : }
      92             : 
      93      727057 : void plist_append(plist* list, GrB_Index key, GrB_Index value) {
      94      727057 :     if (list->len == list->cap) {
      95        1177 :         size_t new_size = list->cap == 0 ? 16 : 2*list->cap;
      96        1177 :         list->entries = (GrB_Index*)realloc(list->entries, new_size * sizeof(GrB_Index));
      97        1177 :         list->cap = new_size;
      98             :     }
      99      727057 :     list->entries[list->len] = key;
     100      727057 :     list->entries[list->len+1] = value;
     101      727057 :     list->len += 2;
     102      727057 : }
     103             : 
     104    17652992 : GrB_Index plist_add(plist* list, GrB_Index entry) {
     105    17655030 :     for (size_t i = 0; i < list->len; i += 2) {
     106    16927973 :         if (list->entries[i] == entry) {
     107    16925935 :             return ++list->entries[i+1];
     108             :         }
     109             :     }
     110      727057 :     plist_append(list, entry, 1);
     111      727057 :     return 1;
     112             : }
     113             : 
     114             : typedef void (*plist_reducer) (GrB_Index* entry1, GrB_Index* count1, GrB_Index entry2, GrB_Index count2);
     115             : 
     116   161728512 : void plist_reduce(plist* list, GrB_Index* entry, GrB_Index* count, plist_reducer reducer) {
     117   162455569 :     for (size_t i = 0; i < list->len; i += 2) {
     118      727057 :         reducer(entry, count, list->entries[i], list->entries[i+1]);
     119             :     }
     120   161728512 : }
     121             : 
     122      727057 : void counts_reducer(GrB_Index* e1, GrB_Index* c1, GrB_Index e2, GrB_Index c2) {
     123      727057 :     if (*c1 > c2) {
     124      187542 :         return;
     125             :     }
     126      539515 :     if (c2 > *c1) {
     127      428889 :         *e1 = e2;
     128      428889 :         *c1 = c2;
     129      428889 :         return;
     130             :     }
     131      110626 :     if (*e1 < e2) {
     132       93071 :         return;
     133             :     }
     134       17555 :     *e1 = e2;
     135       17555 :     *c1 = c2;
     136             : }
     137             : 
     138             : 
     139             : #define bucket_bits 9llu
     140             : #define nof_buckets (1llu << bucket_bits)
     141             : #define bucket_shift (64llu - bucket_bits)
     142             : 
     143             : typedef struct {
     144             :     alignas(64) plist buckets[nof_buckets];
     145             : } ptable;
     146             : 
     147          10 : void ptable_free(ptable* table) {
     148        5130 :     for (size_t i = 0; i < nof_buckets; i++) {
     149        5120 :         plist_free(&table->buckets[i]);
     150             :     }
     151          10 : }
     152             : 
     153          20 : void ptable_pool_free(ptable* table, size_t n) {
     154          20 :     if (table == NULL) {
     155          10 :         return;
     156             :     }
     157          20 :     for (size_t i = 0; i < n; i++) {
     158          10 :         ptable_free(&table[i]);
     159             :     }
     160          10 :     free(table);        // NOTE: cannot be LAGraph_Free
     161             : }
     162             : 
     163      315876 : void ptable_clear(ptable* table) {
     164   162044388 :     for (size_t i = 0; i < nof_buckets; i++) {
     165   161728512 :         plist_clear(&table->buckets[i]);
     166             :     }
     167      315876 : }
     168             : 
     169    17652992 : GrB_Index fib_reduce(GrB_Index x)
     170             : {
     171             :     // 2^64 / golden ratio = 11400714819323198485
     172    17652992 :     GrB_Index fibhash = x * 11400714819323198485llu;
     173             :     // fast reduce
     174    17652992 :     return fibhash >> bucket_shift;
     175             : }
     176             : 
     177    17652992 : void ptable_add(ptable* table, GrB_Index entry) {
     178    17652992 :     plist_add(&table->buckets[fib_reduce(entry)], entry);
     179    17652992 : }
     180             : 
     181      315876 : void ptable_reduce(ptable* table, GrB_Index* entry, GrB_Index* count, plist_reducer reducer) {
     182      315876 :     *entry = GrB_INDEX_MAX + 1;
     183      315876 :     *count = 0;
     184   162044388 :     for (GrB_Index i = 0; i < nof_buckets; i++) {
     185   161728512 :         plist_reduce(&table->buckets[i], entry, count, reducer);
     186             :     }
     187      315876 : }
     188             : 
     189             : //****************************************************************************
     190          11 : int LAGraph_cdlp
     191             :         (
     192             :                 GrB_Vector *CDLP_handle,    // output vector
     193             :                 LAGraph_Graph G,            // input graph
     194             :                 int itermax,                // max number of iterations
     195             :                 char *msg
     196             :         )
     197             : {
     198             :     GrB_Info info;
     199          11 :     LG_CLEAR_MSG ;
     200             : 
     201          11 :     GrB_Matrix S = NULL, T = NULL ;
     202             :     GrB_Vector CDLP ;
     203          11 :     GrB_Index *Sp = NULL, *Si = NULL, *Tp = NULL, *Ti = NULL, *L = NULL, *L_next = NULL ;
     204          11 :     ptable *counts_pool = NULL ;
     205             : 
     206             :     #ifdef _OPENMP
     207             :     size_t max_threads = omp_get_max_threads();
     208             :     #else
     209          11 :     size_t max_threads = 1 ;
     210             :     #endif
     211             : 
     212             :     //--------------------------------------------------------------------------
     213             :     // check inputs
     214             :     //--------------------------------------------------------------------------
     215             : 
     216          11 :     if (CDLP_handle == NULL)
     217             :     {
     218           1 :         return GrB_NULL_POINTER;
     219             :     }
     220             : 
     221          10 :     GrB_Matrix A = G->A ;
     222             : 
     223             :     //--------------------------------------------------------------------------
     224             :     // ensure input is binary and has no self-edges
     225             :     //--------------------------------------------------------------------------
     226             : 
     227             :     GrB_Index n;
     228          10 :     GRB_TRY (GrB_Matrix_nrows(&n, A)) ;
     229             : 
     230          10 :     GRB_TRY (GrB_Matrix_new (&S, GrB_UINT64, n, n)) ;
     231          10 :     GRB_TRY (GrB_apply (S, GrB_NULL, GrB_NULL, GrB_ONEB_UINT64, A, 0, GrB_NULL)) ;
     232             : 
     233          10 :     if (G->kind == LAGraph_ADJACENCY_DIRECTED) {
     234          10 :         GRB_TRY (GrB_Matrix_new (&T, GrB_UINT64, n, n)) ;
     235          10 :         GRB_TRY (GrB_transpose (T, GrB_NULL, GrB_NULL, S, GrB_NULL)) ;
     236          10 :         void * Tx = NULL ;
     237             :         GrB_Index Tps, Tis, Txs ;
     238             : #if LAGRAPH_SUITESPARSE
     239             :         bool Tiso, Tjumbled ;
     240          10 :         GRB_TRY (GxB_Matrix_unpack_CSR (T, &Tp, &Ti, &Tx, &Tps, &Tis, &Txs, &Tiso, &Tjumbled, GrB_NULL)) ;
     241             : #else
     242             :         GRB_TRY (GrB_Matrix_exportSize (&Tps, &Tis, &Txs, GrB_CSR_FORMAT, T)) ;
     243             :         LAGRAPH_TRY (LAGraph_Malloc ((void *)&Tp, Tps, sizeof(GrB_Index), NULL)) ;
     244             :         LAGRAPH_TRY (LAGraph_Malloc ((void *)&Ti, Tis, sizeof(GrB_Index), NULL)) ;
     245             :         LAGRAPH_TRY (LAGraph_Malloc ((void *)&Tx, Txs, sizeof(GrB_UINT64), NULL)) ;
     246             :         GRB_TRY (GrB_Matrix_export (Tp, Ti, (uint64_t *)Tx, &Tps, &Tis, &Txs, GrB_CSR_FORMAT, T)) ;
     247             : #endif
     248          10 :         LAGRAPH_TRY (LAGraph_Free ((void *)&Tx, NULL)) ;
     249          10 :         GRB_TRY (GrB_free (&T)) ;
     250             :     }
     251             : 
     252             :     {
     253          10 :         void * Sx = NULL ;
     254             :         GrB_Index Sps, Sis, Sxs ;
     255             : #if LAGRAPH_SUITESPARSE
     256             :         bool Siso, Sjumbled ;
     257          10 :         GRB_TRY (GxB_Matrix_unpack_CSR (S, &Sp, &Si, &Sx, &Sps, &Sis, &Sxs, &Siso, &Sjumbled, GrB_NULL)) ;
     258             : #else
     259             :         GRB_TRY (GrB_Matrix_exportSize (&Sps, &Sis, &Sxs, GrB_CSR_FORMAT, S)) ;
     260             :         LAGRAPH_TRY (LAGraph_Malloc ((void *)&Sp, Sps, sizeof(GrB_Index), NULL)) ;
     261             :         LAGRAPH_TRY (LAGraph_Malloc ((void *)&Si, Sis, sizeof(GrB_Index), NULL)) ;
     262             :         LAGRAPH_TRY (LAGraph_Malloc ((void *)&Sx, Sxs, sizeof(GrB_UINT64), NULL)) ;
     263             :         GRB_TRY (GrB_Matrix_export (Sp, Si, (uint64_t *)Sx, &Sps, &Sis, &Sxs, GrB_CSR_FORMAT, S)) ;
     264             : #endif
     265          10 :         LAGRAPH_TRY (LAGraph_Free((void *)&Sx, NULL)) ;
     266          10 :         GRB_TRY (GrB_free (&S)) ;
     267             :     }
     268             : 
     269          10 :     LG_TRY (LAGraph_Malloc ((void **) &L, n, sizeof (GrB_Index), msg)) ;
     270             : 
     271        3304 :     for (GrB_Index i = 0; i < n; i++) {
     272        3294 :         L[i] = i ;
     273             :     }
     274          10 :     LG_TRY (LAGraph_Malloc ((void **) &L_next, n, sizeof (GrB_Index), msg)) ;
     275             : 
     276          10 :     counts_pool = calloc(max_threads, sizeof(ptable));
     277             : 
     278         353 :     for (int iteration = 0; iteration < itermax; iteration++) {
     279             : 
     280             :         int64_t i;
     281             : #pragma omp parallel for schedule(dynamic)
     282      316226 :         for (i = 0; i < n; i++) {
     283             :             #ifdef _OPENMP
     284             :             int thread_id = omp_get_thread_num() ;
     285             :             #else
     286      315876 :             int thread_id = 0 ;
     287             :             #endif
     288      315876 :             ptable *counts = &counts_pool [thread_id] ;
     289      315876 :             GrB_Index* neighbors = Si + Sp[i] ;
     290      315876 :             GrB_Index sz = Sp[i+1] - Sp[i] ;
     291     9142372 :             for (GrB_Index j = 0; j < sz; j++) {
     292     8826496 :                 ptable_add (counts, L[neighbors[j]]) ;
     293             :             }
     294      315876 :             if (G->kind == LAGraph_ADJACENCY_DIRECTED) {
     295      315876 :                 neighbors = Ti + Tp[i] ;
     296      315876 :                 sz = Tp[i+1] - Tp[i] ;
     297     9142372 :                 for (GrB_Index j = 0; j < sz; j++) {
     298     8826496 :                     ptable_add (counts, L[neighbors[j]]) ;
     299             :                 }
     300             :             }
     301             :             GrB_Index best_label, best_count ;
     302      315876 :             ptable_reduce (counts, &best_label, &best_count, counts_reducer) ;
     303      315876 :             L_next[i] = best_label ;
     304      315876 :             ptable_clear (counts) ;
     305             :         }
     306             : 
     307         350 :         GrB_Index* tmp = L ; L = L_next ; L_next = tmp ;
     308         350 :         bool changed = false ;
     309       47967 :         for (GrB_Index i = 0; i < n; i++) {
     310       47960 :             if (L[i] != L_next[i]) {
     311         343 :                 changed = true ;
     312         343 :                 break ;
     313             :             }
     314             :         }
     315         350 :         if (!changed) {
     316           7 :              break ;
     317             :         }
     318             :     }
     319             : 
     320          10 :     ptable_pool_free(counts_pool, max_threads); counts_pool = NULL;
     321             : 
     322             :     //--------------------------------------------------------------------------
     323             :     // extract final labels to the result vector
     324             :     //--------------------------------------------------------------------------
     325             : 
     326          10 :     GRB_TRY (GrB_Vector_new(&CDLP, GrB_UINT64, n))
     327        3304 :     for (GrB_Index i = 0; i < n; i++)
     328             :     {
     329        3294 :         GrB_Index l = L[i];
     330             : //      if (l == GrB_INDEX_MAX + 1) { l = i ; }
     331        3294 :         l = (l == GrB_INDEX_MAX + 1) ? i : l ;
     332        3294 :         GRB_TRY (GrB_Vector_setElement(CDLP, l, i))
     333             :     }
     334             : 
     335             :     //--------------------------------------------------------------------------
     336             :     // free workspace and return result
     337             :     //--------------------------------------------------------------------------
     338             : 
     339          10 :     (*CDLP_handle) = CDLP;
     340          10 :     CDLP = NULL;            // set to NULL so LG_FREE_ALL doesn't free it
     341          10 :     LG_FREE_ALL;
     342             : 
     343          10 :     return (GrB_SUCCESS);
     344             : }

Generated by: LCOV version 1.14