LCOV - code coverage report
Current view: top level - experimental/algorithm - LAGraph_cdlp_withsort.c (source / functions) Hit Total Coverage
Test: LAGraph code coverage report. Commit id: 7b9d2ee. Current time (UTC): 2025-06-03T21:57:17Z Lines: 75 75 100.0 %
Date: 2025-06-03 22:02:40 Functions: 1 1 100.0 %

          Line data    Source code
       1             : //------------------------------------------------------------------------------
       2             : // LAGraph_cdlp_withsort: community detection using label propagation
       3             : //------------------------------------------------------------------------------
       4             : 
       5             : // LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved.
       6             : // SPDX-License-Identifier: BSD-2-Clause
       7             : //
       8             : // For additional details (including references to third party source code and
       9             : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
      10             : // Contributors.txt for a full list of contributors. Created, in part, with
      11             : // funding and support from the U.S. Government (see Acknowledgments.txt file).
      12             : // DM22-0790
      13             : 
      14             : // Contributed by Gabor Szarnyas and Balint Hegyi, Budapest University of
      15             : // Technology and Economics (with accented characters: G\'{a}bor Sz\'{a}rnyas
      16             : // and B\'{a}lint Hegyi, using LaTeX syntax).
      17             : // https://inf.mit.bme.hu/en/members/szarnyasg .
      18             : 
      19             : //------------------------------------------------------------------------------
      20             : 
      21             : // ## Background
      22             : //
      23             : // This function was originally written for the LDBC Graphalytics benchmark.
      24             : //
      25             : // The community detection using label propagation (CDLP) algorithm is
      26             : // defined both for directed and undirected graphs.
      27             : //
      28             : // The definition implemented here is described in the following document:
      29             : // https://ldbc.github.io/ldbc_graphalytics_docs/graphalytics_spec.pdf
      30             : //
      31             : // The algorithm is based on the one given in the following paper:
      32             : //
      33             : // Usha Raghavan, Reka Albert, and Soundar Kumara. "Near linear time algorithm
      34             : // to detect community structures in large-scale networks". In: Physical
      35             : // Review E 76.3 (2007), p. 036106, https://arxiv.org/abs/0709.2938
      36             : //
      37             : // The key idea of the algorithm is that each vertex is assigned the label
      38             : // that is most frequent among its neighbors. To allow reproducible
      39             : // experiments, the algorithm is modified to guarantee deterministic behavior:
      40             : // it always picks the smallest label in case of a tie:
      41             : //
      42             : // min ( argmax_{l} (#neighbors with label l) )
      43             : //
      44             : // In other words, we need to compute the *minimum mode value* (minmode) for
      45             : // the labels among the neighbors.
      46             : //
      47             : // For directed graphs, a label on a neighbor that is connected through both
      48             : // an outgoing and on an incoming edge counts twice:
      49             : //
      50             : // min ( argmax_{l} (#incoming neighbors with l + #outgoing neighbors with l) )
      51             : //
      52             : // ## Example (undirected)
      53             : //
      54             : // For an example, let's assume an undirected graph where vertex 1 has four
      55             : // neighbors {2, 3, 4, 5}, and the current labels in the graph are
      56             : // L = [3, 5, 4, 5, 4].
      57             : //
      58             : // In this example, the distribution of labels among the neighbors of vertex 1
      59             : // is {4 => 2, 5 => 2}, therefore, the minimum mode value is 4.
      60             : //
      61             : // Next, we capture this operation using GraphBLAS operations and
      62             : // data structures. Notice that the neighbors of vertex 1 are encoded
      63             : // as a sparse vector in the adjacency matrix:
      64             : //
      65             : // A = | 0 1 1 1 1 |
      66             : //     | 1 . . .   |
      67             : //     | 1 .       |
      68             : //     | 1 .       |
      69             : //     | 1         |
      70             : //
      71             : // To allow propagating the labels along edges, we use a diagonal matrix
      72             : // with the elements of the diagonal set to the values of L:
      73             : //
      74             : // diag(L) = | 3 0 0 0 0 |
      75             : //           | 0 5 0 0 0 |
      76             : //           | 0 0 4 0 0 |
      77             : //           | 0 0 0 5 0 |
      78             : //           | 0 0 0 0 4 |
      79             : //
      80             : // If we multiply adjacency matrix with diag(L), we get a matrix
      81             : // containing the labels of the neighbor nodes. We use the 'sel2nd' operator
      82             : // for multiplication to avoid having to lookup the value on the left.
      83             : // The conventional plus.times semiring would also work: 1 * y = sel2nd(1, y).
      84             : // Note that we multiply with a diagonal matrix so the addition operator
      85             : // is not used. In the implementation, we use "min" so the semiring is
      86             : // "min.sel2nd" on uint64 values.
      87             : //
      88             : // In the example, this gives the following:
      89             : //
      90             : // AL = A min.sel2nd diag(L) = | 0 5 4 5 4 |
      91             : //                             | 3 . . . . |
      92             : //
      93             : // ## Selecting the minimum mode value
      94             : //
      95             : // Next, we need to compute the minimum mode value for each row. As it is
      96             : // difficult to capture this operation as a monoid, we use a sort operation
      97             : // on each row. In the undirected case, we extract tuples <I, _, X> from the
      98             : // matrix, then use <I, X> for sorting. In the directed case, we extract
      99             : // tuples <I1, _, X1> and <I2, _, X2>, then use <I1+I2, X1+X2>,
     100             : // where '+' denotes concatenation. Column indices (J) are not used.
     101             : //
     102             : // The resulting two-tuples are sorted using a parallel merge sort.
     103             : // Finally, we use the sorted arrays compute the minimum mode value for each
     104             : // row.
     105             : //
     106             : // ## Fixed point
     107             : //
     108             : // At the end of each iteration, we check whether L[i-1] == L[i] and
     109             : // terminate if we reached a fixed point.
     110             : //
     111             : // ## Further optimizations
     112             : //
     113             : // A possible optimization is that the first iteration is rather trivial:
     114             : //
     115             : // * in the undirected case, each vertex gets the minimal initial label (=id)
     116             : //   of its neighbors.
     117             : // * in the directed case, each vertex gets the minimal initial label (=id)
     118             : //   of its neighbors which are doubly-linked (on an incoming and on an
     119             : //   outgoing edge). In the absence of such a neighbor, it picks the minimal
     120             : //   label of its neighbors (connected through either an incoming or through
     121             : //   an outgoing edge).
     122             : 
     123             : #define LG_FREE_ALL                                                     \
     124             : {                                                                       \
     125             :     LAGraph_Free ((void *) &I, NULL) ;                                  \
     126             :     LAGraph_Free ((void *) &X, NULL) ;                                  \
     127             :     LAGraph_Free ((void *) &LP, NULL) ;                                 \
     128             :     LAGraph_Free ((void *) &LI, NULL) ;                                 \
     129             :     LAGraph_Free ((void *) &LX, NULL) ;                                 \
     130             :     GrB_free (&L) ;                                                     \
     131             :     GrB_free (&L_prev) ;                                                \
     132             :     GrB_free (&S) ;                                                     \
     133             :     GrB_free (&AT) ;                                                    \
     134             : }
     135             : 
     136             : #include <LAGraph.h>
     137             : #include <LAGraphX.h>
     138             : #include "LG_internal.h"
     139             : 
     140             : //****************************************************************************
     141          10 : int LAGraph_cdlp_withsort
     142             : (
     143             :     GrB_Vector *CDLP_handle, // output vector
     144             :     LAGraph_Graph G,      // input graph
     145             :     int itermax,             // max number of iterations,
     146             :     char *msg
     147             : )
     148             : {
     149             :     GrB_Info info;
     150          10 :     LG_CLEAR_MSG ;
     151             : 
     152          10 :     GrB_Matrix A = G->A ;
     153          20 :     bool symmetric = (G->kind == LAGraph_ADJACENCY_UNDIRECTED) ||
     154          10 :             ((G->kind == LAGraph_ADJACENCY_DIRECTED) &&
     155          10 :                     (G->is_symmetric_structure == LAGraph_TRUE)) ;
     156             : 
     157             :     // Diagonal label matrix
     158          10 :     GrB_Matrix L = NULL;
     159          10 :     GrB_Matrix L_prev = NULL;
     160             :     // Source adjacency matrix
     161          10 :     GrB_Matrix S = NULL;
     162             :     // Transposed matrix for the unsymmetric case
     163          10 :     GrB_Matrix AT = NULL;
     164             :     // Result CDLP vector
     165          10 :     GrB_Vector CDLP = NULL;
     166             : 
     167             :     // Arrays for constructing initial labels
     168          10 :     GrB_Index *LP = NULL, *LI = NULL, *LX = NULL ;
     169             : 
     170             :     // Arrays holding extracted tuples during the algorithm
     171          10 :     GrB_Index *I = NULL;
     172          10 :     GrB_Index *X = NULL;
     173             : 
     174             :     //--------------------------------------------------------------------------
     175             :     // check inputs
     176             :     //--------------------------------------------------------------------------
     177             : 
     178          10 :     LG_ASSERT (CDLP_handle != NULL, GrB_NULL_POINTER) ;
     179             : 
     180             :     //--------------------------------------------------------------------------
     181             :     // ensure input is binary and has no self-edges
     182             :     //--------------------------------------------------------------------------
     183             : 
     184             :     // n = size of A (# of nodes in the graph)
     185             :     // nz = # of non-zero elements in the matrix
     186             :     // nnz = # of non-zero elements used in the computations
     187             :     //   (twice as many for directed graphs)
     188             :     GrB_Index n, nz, nnz;
     189          10 :     GRB_TRY (GrB_Matrix_nrows(&n, A))
     190          10 :     GRB_TRY (GrB_Matrix_nvals(&nz, A))
     191          10 :     if (!symmetric)
     192             :     {
     193           1 :         nnz = 2 * nz;
     194             :     }
     195             :     else
     196             :     {
     197           9 :         nnz = nz;
     198             :     }
     199             : 
     200          10 :     GRB_TRY (GrB_Matrix_new (&S, GrB_UINT64, n, n)) ;
     201          10 :     GRB_TRY (GrB_apply (S, GrB_NULL, GrB_NULL, GrB_ONEB_UINT64, A, 0, GrB_NULL)) ;
     202             : 
     203             :     // Initialize L with diagonal elements 1..n
     204          10 :     LAGRAPH_TRY (LAGraph_Malloc ((void **) &LP, n+1, sizeof (GrB_Index), msg)) ;
     205        3314 :     for (GrB_Index i = 0; i <= n; i++) {
     206        3304 :         LP[i] = i ;
     207             :     }
     208          10 :     LAGRAPH_TRY (LAGraph_Malloc((void **) &LI, n, sizeof (GrB_Index), msg)) ;
     209          10 :     LAGRAPH_TRY (LAGraph_Malloc((void **) &LX, n, sizeof (GrB_Index), msg)) ;
     210        3304 :     for (GrB_Index i = 0; i < n; i++) {
     211        3294 :         LI[i] = i ;
     212        3294 :         LX[i] = i ;
     213             :     }
     214             : #if LAGRAPH_SUITESPARSE
     215          10 :     GRB_TRY (GrB_Matrix_new (&L, GrB_UINT64, n, n)) ;
     216          10 :     GRB_TRY (GxB_Matrix_pack_CSC (L, &LP, &LI, (void **) &LX, (n+1)*sizeof(GrB_Index), n*sizeof(GrB_Index), n*sizeof(GrB_Index), false, false, GrB_NULL)) ;
     217             : #else
     218             :     GRB_TRY (GrB_Matrix_import (&L, GrB_UINT64, n, n, LP, LI, LX, n+1, n, n, GrB_CSC_FORMAT)) ;
     219             :     LAGRAPH_TRY (LAGraph_Free ((void **) &LP, NULL)) ;
     220             :     LAGRAPH_TRY (LAGraph_Free ((void **) &LI, NULL)) ;
     221             :     LAGRAPH_TRY (LAGraph_Free ((void **) &LX, NULL)) ;
     222             : #endif
     223             : 
     224             :     // Initialize matrix for storing previous labels
     225          10 :     GRB_TRY (GrB_Matrix_new(&L_prev, GrB_UINT64, n, n))
     226             : 
     227          10 :     if (!symmetric)
     228             :     {
     229             :         // compute AT for the unsymmetric case as it will be used
     230             :         // to compute A' = A' min.2nd L in each iteration
     231           1 :         GRB_TRY (GrB_Matrix_new (&AT, GrB_UINT64, n, n)) ;
     232           1 :         GRB_TRY (GrB_transpose (AT, NULL, NULL, S, NULL)) ;
     233             :     }
     234             : 
     235             :     // Initialize data structures for extraction from 'AL_in' and (for directed graphs) 'AL_out'
     236          10 :     LAGRAPH_TRY (LAGraph_Malloc((void **) &I, nnz, sizeof(GrB_Index), msg));
     237          10 :     LAGRAPH_TRY (LAGraph_Malloc((void **) &X, nnz, sizeof(GrB_Index), msg));
     238             : 
     239         353 :     for (int iteration = 0; iteration < itermax; iteration++)
     240             :     {
     241             :         // A = A min.2nd L
     242             :         // (using the "push" (saxpy) method)
     243         350 :         GRB_TRY (GrB_mxm(S, GrB_NULL, GrB_NULL,
     244             :                            GrB_MIN_SECOND_SEMIRING_UINT64, S, L, NULL));
     245         350 :         GRB_TRY (GrB_Matrix_extractTuples_UINT64(I, GrB_NULL, X, &nz, S));
     246             : 
     247         350 :         if (!symmetric)
     248             :         {
     249             :             // A' = A' min.2nd L
     250             :             // (using the "push" (saxpy) method)
     251           6 :             GRB_TRY (GrB_mxm(AT, GrB_NULL, GrB_NULL,
     252             :                                GrB_MIN_SECOND_SEMIRING_UINT64, AT, L, NULL));
     253           6 :             GRB_TRY (GrB_Matrix_extractTuples_UINT64(&I[nz],
     254             :                                                        GrB_NULL, &X[nz], &nz, AT));
     255             :         }
     256             : 
     257         350 :         LG_msort2((int64_t *) I, (int64_t *) X, nnz, NULL);
     258             : 
     259             :         // save current labels for comparison by swapping L and L_prev
     260         350 :         GrB_Matrix L_swap = L;
     261         350 :         L = L_prev;
     262         350 :         L_prev = L_swap;
     263             : 
     264         350 :         GrB_Index mode_value = -1;
     265         350 :         GrB_Index mode_length = 0;
     266         350 :         GrB_Index run_length = 1;
     267             : 
     268             :         // I[k] is the current row index
     269             :         // X[k] is the current value
     270             :         // we iterate in range 1..nnz and use the last index (nnz) to process the last row of the matrix
     271     8828598 :         for (GrB_Index k = 1; k <= nnz; k++)
     272             :         {
     273             :             // check if we have a reason to recompute the mode value
     274     8828248 :             if (k == nnz           // we surpassed the last element
     275     8827898 :                 || I[k-1] != I[k]  // the row index has changed
     276     8512372 :                 || X[k-1] != X[k]) // the run value has changed
     277             :             {
     278      727057 :                 if (run_length > mode_length)
     279             :                 {
     280      431414 :                     mode_value = X[k-1];
     281      431414 :                     mode_length = run_length;
     282             :                 }
     283      727057 :                 run_length = 0;
     284             :             }
     285     8828248 :             run_length++;
     286             : 
     287             :             // check if we passed a row
     288     8828248 :             if (k == nnz           // we surpassed the last element
     289     8827898 :                 || I[k-1] != I[k]) // or the row index has changed
     290             :             {
     291      315876 :                 GrB_Matrix_setElement(L, mode_value, I[k-1], I[k-1]);
     292      315876 :                 mode_length = 0;
     293             :             }
     294             :         }
     295             : 
     296             :         bool isequal;
     297         350 :         LAGraph_Matrix_IsEqual (&isequal, L_prev, L, NULL);
     298         350 :         if (isequal) {
     299           7 :             break;
     300             :         }
     301             :     }
     302             : 
     303          10 :     LAGraph_Free ((void **) &I, NULL) ;
     304          10 :     LAGraph_Free ((void **) &X, NULL) ;
     305             : 
     306             :     //--------------------------------------------------------------------------
     307             :     // extract final labels to the result vector
     308             :     //--------------------------------------------------------------------------
     309             : 
     310          10 :     GRB_TRY (GrB_Vector_new(&CDLP, GrB_UINT64, n)) ;
     311             : #if LAGRAPH_SUITESPARSE
     312          10 :     GRB_TRY (GxB_Vector_diag (CDLP, L, 0, GrB_NULL)) ;
     313             : #else
     314             :     for (GrB_Index i = 0; i < n; i++)
     315             :     {
     316             :         uint64_t x;
     317             :         GRB_TRY (GrB_Matrix_extractElement(&x, L, i, i))
     318             :         GRB_TRY (GrB_Vector_setElement(CDLP, x, i))
     319             :     }
     320             : #endif
     321             : 
     322             :     //--------------------------------------------------------------------------
     323             :     // free workspace and return result
     324             :     //--------------------------------------------------------------------------
     325             : 
     326          10 :     (*CDLP_handle) = CDLP;
     327          10 :     CDLP = NULL;            // set to NULL so LG_FREE_ALL doesn't free it
     328          10 :     LG_FREE_ALL;
     329             : 
     330          10 :     return (GrB_SUCCESS);
     331             : }

Generated by: LCOV version 1.14