LCOV - code coverage report
Current view: top level - experimental/algorithm - LAGraph_lcc.c (source / functions) Hit Total Coverage
Test: LAGraph code coverage report. Commit id: cc56ed4. Current time (UTC): 2024-08-30T17:14:30Z Lines: 67 67 100.0 %
Date: 2024-08-30 17:16:41 Functions: 3 3 100.0 %

          Line data    Source code
       1             : //------------------------------------------------------------------------------
       2             : // LAGraph_lcc: local clustering coefficient
       3             : //------------------------------------------------------------------------------
       4             : 
       5             : // LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved.
       6             : // SPDX-License-Identifier: BSD-2-Clause
       7             : //
       8             : // For additional details (including references to third party source code and
       9             : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
      10             : // Contributors.txt for a full list of contributors. Created, in part, with
      11             : // funding and support from the U.S. Government (see Acknowledgments.txt file).
      12             : // DM22-0790
      13             : 
      14             : // Contributed by Gabor Szarnyas and Balint Hegyi, Budapest University of
      15             : // Technology and Economics (with accented characters: G\'{a}bor Sz\'{a}rnyas
      16             : // and B\'{a}lint Hegyi, using LaTeX syntax).
      17             : // https://inf.mit.bme.hu/en/members/szarnyasg .
      18             : // Modified by Timothy A. Davis, Texas A&M University
      19             : 
      20             : //------------------------------------------------------------------------------
      21             : 
      22             : // This function was originally written for the LDBC Graphalytics benchmark,
      23             : // at https://graphalytics.org/ .
      24             : 
      25             : // The local clustering coefficient is a measure for each node of a graph.
      26             : // Its definition is fully described in the following document:
      27             : // https://ldbc.github.io/ldbc_graphalytics_docs/graphalytics_spec.pdf
      28             : 
      29             : // For each node v, the lcc(v) is the ratio between the number of edges between
      30             : // neighbors of the node v, and the maximum possible number of edges between
      31             : // these neighbors.  If a node v has fewer than 2 neighbors, then its
      32             : // coefficient is defined as zero, and the vth entry does not appear in the
      33             : // sparse vector LCC returned.
      34             : 
      35             : // Let N_in(v)  = the set of nodes u such that (u,v) is an edge.
      36             : // Let N_out(v) = the set of nodes u such that (v,u) is an edge.
      37             : // Let N(v) = union (N_in(v), N_out(v)).
      38             : // Then the metric lcc(v) is defined as:
      39             : 
      40             : // lcc(v) = (sum for all u in N(v) of |intersection (N(v), N_out(u))) /
      41             : //          ( |N(v)| * (|N(v)|-1) )
      42             : 
      43             : // That is, for directed graphs, the set of neighbors N(v) is found without
      44             : // taking directions into account, but a node u that has both an edge (u,v) and
      45             : // (v,u) is counted just once.  However, edge directions are enforced when
      46             : // considering two nodes u1 and u2 that are both in N(v), i.e. when counting
      47             : // the number of edges between neighbors, (u,v) and (v,u) are counted as two.
      48             : // To account for this, the maximum possible number of edges for vertex v is
      49             : // determined as the 2-combination of |N(v)| for undirected graphs and as the
      50             : // 2-permutation of |N(v)| for directed graphs.
      51             : 
      52             : // The input matrix A must be square.  If A is known to be binary (with all
      53             : // explicit edge weights equal to 1), then sanitize can be false.  This is the
      54             : // case for the LDBC benchmark.
      55             : 
      56             : // Otherwise, if sanitize is true, edge weights of A are ignored and only the
      57             : // structure of A is used.  This step takes extra time and memory to sanitize the
      58             : // input matrix A.  For a fair comparison in the LDBC benchmark, sanitize
      59             : // should be false.
      60             : 
      61             : // Results are undefined if sanitize is false, and the matrix A has any entries
      62             : // not equal to 1 (even zero-weight edges are not allowed), or if it has self
      63             : // edges.
      64             : 
      65             : #define LG_FREE_ALL                 \
      66             : {                                   \
      67             :     GrB_free (&A2) ;                \
      68             :     GrB_free (&C2) ;                \
      69             :     GrB_free (&CL) ;                \
      70             :     GrB_free (&U) ;                 \
      71             :     GrB_free (&W) ;                 \
      72             :     GrB_free (&LCC) ;               \
      73             :     GrB_free (&LAGraph_COMB_DIR_FP64) ;                 \
      74             :     GrB_free (&LAGraph_COMB_UNDIR_FP64) ;                 \
      75             : }
      76             : 
      77             : #include "LG_internal.h"
      78             : #include <LAGraph.h>
      79             : #include <LAGraphX.h>
      80             : 
      81             : //------------------------------------------------------------------------------
      82             : 
      83             : #define F_UNARY(f)  ((void (*)(void *, const void *)) f)
      84             : 
      85             : // z = x * (x - 1), used by LAGraph_lcc.
      86             : // This operator calculates the 2-permutation of d(v).
      87          67 : void LAGraph_comb_dir_fp64
      88             : (
      89             :     void *z,
      90             :     const void *x
      91             : )
      92             : {
      93          67 :     double xd = *(double *) x ;
      94          67 :     double *zd = (double *) z ;
      95          67 :     (*zd) = ((xd) * (xd - 1)) ;
      96          67 : }
      97             : 
      98             : // z = x * (x - 1) / 2, used by LAGraph_lcc.
      99             : // This operator calculates the 2-combination of d(v).
     100        3227 : void LAGraph_comb_undir_fp64
     101             : (
     102             :     void *z,
     103             :     const void *x
     104             : )
     105             : {
     106        3227 :     double xd = *(double *) x ;
     107        3227 :     double *zd = (double *) z ;
     108        3227 :     (*zd) = ((xd) * (xd - 1)) / 2;
     109        3227 : }
     110             : 
     111             : //------------------------------------------------------------------------------
     112             : 
     113          12 : int LAGraph_lcc            // compute lcc for all nodes in A
     114             : (
     115             :     GrB_Vector *LCC_handle,     // output vector
     116             :     const GrB_Matrix A,         // input matrix
     117             :     bool symmetric,             // if true, the matrix is symmetric
     118             :     bool sanitize,              // if true, ensure A is binary
     119             :     double t [2],               // t [0] = sanitize time, t [1] = lcc time,
     120             :                                 // in seconds
     121             :     char *msg
     122             : )
     123             : {
     124             : 
     125             :     //--------------------------------------------------------------------------
     126             :     // check inputs
     127             :     //--------------------------------------------------------------------------
     128             : 
     129          12 :     LG_CLEAR_MSG ;
     130          12 :     if (LCC_handle == NULL)
     131             :     {
     132           1 :         return (GrB_NULL_POINTER) ;
     133             :     }
     134             : 
     135          11 :     GrB_Matrix C = NULL, CL = NULL, S = NULL, U = NULL ;
     136          11 :     GrB_Vector W = NULL, LCC = NULL ; 
     137          11 :     GrB_Matrix A2 = NULL, C2 = NULL ;
     138          11 :     GrB_UnaryOp LAGraph_COMB_DIR_FP64 = NULL ;
     139          11 :     GrB_UnaryOp LAGraph_COMB_UNDIR_FP64 = NULL ;
     140             :     GrB_Info info ;
     141             : 
     142             : #if !LAGRAPH_SUITESPARSE
     143             :     LG_ASSERT (false, GrB_NOT_IMPLEMENTED) ;
     144             : #else
     145             : 
     146             :     // n = size of A (# of nodes in the graph)
     147             :     GrB_Index n ;
     148          11 :     GRB_TRY (GrB_Matrix_nrows (&n, A)) ;
     149             : #if LAGRAPH_SUITESPARSE
     150             :     GxB_Format_Value fmt ;
     151          11 :     GRB_TRY (GxB_get (A, GxB_FORMAT, &fmt)) ;
     152          11 :     if (fmt != GxB_BY_ROW)
     153             :     {
     154           1 :         return (GrB_INVALID_VALUE) ;
     155             :     }
     156             : #endif
     157             : 
     158             :     //--------------------------------------------------------------------------
     159             :     // ensure input is binary and has no self-edges
     160             :     //--------------------------------------------------------------------------
     161             : 
     162          10 :     t [0] = 0 ;         // sanitize time
     163          10 :     t [1] = 0 ;         // LCC time
     164             : 
     165             :     // fixme: use operators that ignore the values of A
     166          10 :     if (sanitize)
     167             :     {
     168           3 :         t [0] = LAGraph_WallClockTime ( ) ;
     169             : 
     170             :         // S = binary structure of A
     171           3 :         GRB_TRY (GrB_Matrix_new (&A2, GrB_FP64, n, n)) ;
     172           3 :         GRB_TRY (GrB_apply (A2, NULL, NULL, GrB_ONEB_FP64, A, 0, NULL)) ;
     173             : 
     174             :         // remove all self edges
     175           3 :         GRB_TRY (GrB_select (A2, NULL, NULL, GrB_OFFDIAG, A2, 0, NULL)) ;
     176           3 :         t [0] = LAGraph_WallClockTime ( ) - t [0] ;
     177           3 :         S = A2 ;
     178             :     }
     179             :     else
     180             :     {
     181             :         // Use the input as-is, and assume it is binary with no self edges.
     182             :         // Results are undefined if this condition does not hold.
     183           7 :         S = A ;
     184             :     }
     185             : 
     186          10 :     t [1] = LAGraph_WallClockTime ( ) ;
     187             : 
     188             :     //--------------------------------------------------------------------------
     189             :     // create the operators for LAGraph_lcc
     190             :     //--------------------------------------------------------------------------
     191             : 
     192          10 :     GRB_TRY (GrB_UnaryOp_new (&LAGraph_COMB_DIR_FP64,
     193             :                                  F_UNARY (LAGraph_comb_dir_fp64),
     194             :                                  GrB_FP64, GrB_FP64)) ;
     195             : 
     196          10 :     GRB_TRY (GrB_UnaryOp_new (&LAGraph_COMB_UNDIR_FP64,
     197             :                                  F_UNARY (LAGraph_comb_undir_fp64),
     198             :                                  GrB_FP64, GrB_FP64)) ;
     199             : 
     200          10 :     GRB_TRY (GrB_Matrix_new (&U, GrB_UINT32, n, n)) ;
     201             : 
     202          10 :     if (symmetric)
     203             :     {
     204           9 :         C = S ;
     205             : 
     206             :         //----------------------------------------------------------------------
     207             :         // U = triu(C)
     208             :         //----------------------------------------------------------------------
     209             : 
     210           9 :         GRB_TRY (GxB_select (U, NULL, NULL, GxB_TRIU, C, NULL, NULL)) ;
     211             : 
     212             :     }
     213             :     else
     214             :     {
     215           1 :         GrB_Matrix AT = NULL, D = NULL ;
     216             : 
     217           1 :         GRB_TRY (GrB_Matrix_new (&AT, GrB_FP64, n, n)) ;
     218           1 :         GRB_TRY (GrB_transpose (AT, NULL, NULL, S, NULL)) ;
     219             : 
     220             :         //----------------------------------------------------------------------
     221             :         // C = A \/ A' to create an undirected graph C
     222             :         //----------------------------------------------------------------------
     223             : 
     224           1 :         GRB_TRY (GrB_Matrix_new (&C2, GrB_FP64, n, n)) ;
     225           1 :         C = C2 ;
     226           1 :         GRB_TRY (GrB_eWiseAdd (C, NULL, NULL, GrB_LOR, S, AT, NULL)) ;
     227             : 
     228             :         //----------------------------------------------------------------------
     229             :         // D = A + A' to create an undirected multigraph D
     230             :         //----------------------------------------------------------------------
     231             : 
     232           1 :         GRB_TRY (GrB_Matrix_new (&D, GrB_FP64, n, n)) ;
     233           1 :         GRB_TRY (GrB_eWiseAdd (D, NULL, NULL, GrB_PLUS_FP64, S, AT, NULL)) ;
     234             : 
     235           1 :         GrB_free (&AT) ;
     236             : 
     237             :         //----------------------------------------------------------------------
     238             :         // U = triu(D)
     239             :         //----------------------------------------------------------------------
     240             : 
     241             :         // note that L=U' since D is symmetric
     242           1 :         GRB_TRY (GxB_select (U, NULL, NULL, GxB_TRIU, D, NULL, NULL)) ;
     243           1 :         GrB_free (&D) ;
     244             :     }
     245             : 
     246             :     //--------------------------------------------------------------------------
     247             :     // Find wedges of each node
     248             :     //--------------------------------------------------------------------------
     249             : 
     250             :     // W(i) = sum (C (i,:))
     251          10 :     GRB_TRY (GrB_Vector_new (&W, GrB_FP64, n)) ;
     252          10 :     GRB_TRY (GrB_reduce (W, NULL, NULL, GrB_PLUS_FP64, C, NULL)) ;
     253             : 
     254             :     // Compute vector W defining the number of wedges per vertex
     255          10 :     if (symmetric)
     256             :     {
     257             :         // the graph is undirected
     258           9 :         GRB_TRY (GrB_apply(W, NULL, NULL, LAGraph_COMB_UNDIR_FP64, W, NULL));
     259             :     }
     260             :     else
     261             :     {
     262             :         // the graph is directed
     263           1 :         GRB_TRY (GrB_apply(W, NULL, NULL, LAGraph_COMB_DIR_FP64, W, NULL)) ;
     264             :     }
     265             : 
     266             :     //--------------------------------------------------------------------------
     267             :     // Calculate triangles
     268             :     //--------------------------------------------------------------------------
     269             : 
     270             :     // CL<C> = C*L = C*U' using a masked dot product
     271          10 :     GRB_TRY (GrB_Matrix_new (&CL, GrB_FP64, n, n)) ;
     272          10 :     GRB_TRY (GrB_mxm (CL, C, NULL, GrB_PLUS_TIMES_SEMIRING_FP64, C, U,
     273             :                          GrB_DESC_T1));
     274          10 :     GrB_free (&U) ; U = NULL;
     275             : 
     276             :     //--------------------------------------------------------------------------
     277             :     // Calculate LCC
     278             :     //--------------------------------------------------------------------------
     279             : 
     280             :     // LCC(i) = sum (CL (i,:)) = # of triangles at each node
     281          10 :     GRB_TRY (GrB_Vector_new (&LCC, GrB_FP64, n)) ;
     282          10 :     GRB_TRY (GrB_reduce (LCC, NULL, NULL, GrB_PLUS_FP64, CL, NULL)) ;
     283          10 :     GrB_free (&CL) ; CL = NULL;
     284             : 
     285             :     // LCC = LCC ./ W
     286          10 :     GRB_TRY (GrB_eWiseMult (LCC, NULL, NULL, GrB_DIV_FP64, LCC, W, NULL)) ;
     287             : 
     288             :     //--------------------------------------------------------------------------
     289             :     // free workspace and return result
     290             :     //--------------------------------------------------------------------------
     291             : 
     292          10 :     (*LCC_handle) = LCC ; LCC = NULL ;
     293             : 
     294          10 :     LG_FREE_ALL ;
     295          10 :     t [1] = LAGraph_WallClockTime ( ) - t [1] ;
     296          10 :     return (GrB_SUCCESS) ;
     297             : #endif
     298             : }

Generated by: LCOV version 1.14