LCOV - code coverage report
Current view: top level - experimental/algorithm - LAGr_PartitionQuality.c (source / functions) Hit Total Coverage
Test: LAGraph code coverage report. Commit id: 7b9d2ee. Current time (UTC): 2025-06-03T21:57:17Z Lines: 51 51 100.0 %
Date: 2025-06-03 22:02:40 Functions: 1 1 100.0 %

          Line data    Source code
       1             : //------------------------------------------------------------------------------
       2             : // LAGr_PartitionQuality: computes coverage and performance of a clustering
       3             : //------------------------------------------------------------------------------
       4             : 
       5             : // LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved.
       6             : // SPDX-License-Identifier: BSD-2-Clause
       7             : //
       8             : // For additional details (including references to third party source code and
       9             : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
      10             : // Contributors.txt for a full list of contributors. Created, in part, with
      11             : // funding and support from the U.S. Government (see Acknowledgments.txt file).
      12             : // DM22-0790
      13             : 
      14             : // Contributed by Cameron Quilici, Texas A&M University
      15             : 
      16             : //------------------------------------------------------------------------------
      17             : 
      18             : // TODO: ready to consider for src
      19             : // TODO: define the input vector c that defines the cluster assignment
      20             : 
      21             : // The coverage of a graph clustering C ( Cov(C) ) is defined as the ratio of
      22             : // intra-cluster edges to the total edges in a graph. The performance of a graph
      23             : // clustering C ( Perf(C) ) is defined as the ratio of intra-cluster edges and
      24             : // inter-cluster non-edges to the total number of edges in a graph. These are
      25             : // very simple cluster quality metrics which can be used to evaluate the quality
      26             : // of a clustering, primarily based on the idea of intra-cluster density.
      27             : 
      28             : // Note that coverage and performance are counting problems. Therefore, if a
      29             : // weighted graph is passed to this function, edge weights are ignored.
      30             : 
      31             : // https://arxiv.org/abs/0906.0612 pp. 15
      32             : 
      33             : #define LG_FREE_WORK                                                           \
      34             :     {                                                                          \
      35             :         GrB_free(&trace);                                                      \
      36             :         GrB_free(&k);                                                          \
      37             :         GrB_free(&C);                                                          \
      38             :         GrB_free(&CA);                                                         \
      39             :         GrB_free(&ONE_INT64);                                                  \
      40             :     }
      41             : 
      42             : #define LG_FREE_ALL                                                            \
      43             :     {                                                                          \
      44             :         LG_FREE_WORK;                                                          \
      45             :     }
      46             : 
      47             : #include "LG_internal.h"
      48             : #include <LAGraphX.h>
      49             : 
      50          43 : int LAGr_PartitionQuality(
      51             :     // Outputs
      52             :     double *cov,  // coverage output, can be NULL
      53             :     double *perf, // performance output, can be NULL
      54             :     // Inputs
      55             :     GrB_Vector c,    // input cluster vector
      56             :     LAGraph_Graph G, // original graph from which the clustering was obtained
      57             :     char *msg)
      58             : {
      59             : #if LAGRAPH_SUITESPARSE
      60          43 :     GrB_Vector trace = NULL;
      61          43 :     GrB_Vector k = NULL;
      62          43 :     GrB_Matrix C = NULL;
      63          43 :     GrB_Matrix CA = NULL;
      64             : 
      65          43 :     GrB_Scalar ONE_INT64 = NULL;
      66             : 
      67             :     GrB_Index n, nedges;
      68             :     GrB_Index n_intraEdges, n_interEdges, n_interNonEdges, sum_k2;
      69             : 
      70             :     //--------------------------------------------------------------------------
      71             :     // check inputs
      72             :     //--------------------------------------------------------------------------
      73             : 
      74          43 :     LG_CLEAR_MSG;
      75             : 
      76          43 :     LG_ASSERT_MSG(cov != NULL || perf != NULL, GrB_NULL_POINTER,
      77             :                   "cov and perf "
      78             :                   "cannot both be NULL");
      79             : 
      80          42 :     LG_TRY(LAGraph_CheckGraph(G, msg));
      81             : 
      82          41 :     LG_ASSERT_MSG (G->is_symmetric_structure != LAGRAPH_UNKNOWN,
      83             :             LAGRAPH_NOT_CACHED,
      84             :             "G->is_symmetric_structure is required") ;
      85             : 
      86          40 :     GrB_Matrix A = G->A;
      87             : 
      88             :     // Delete self-edges, not relevant to these clustering metrics
      89          40 :     GRB_TRY(GrB_select(A, NULL, NULL, GrB_OFFDIAG, A, 0, NULL));
      90             : 
      91             : #if 0
      92             :     FILE *f = fopen("./data/pp_sanitized_data.mtx", "w");
      93             :     LAGRAPH_TRY(LAGraph_MMWrite(A, f, NULL, msg));
      94             :     fclose(f);
      95             : #endif
      96             : 
      97          40 :     GRB_TRY(GrB_Matrix_nrows(&n, A));
      98          40 :     GRB_TRY(GrB_Matrix_nvals(&nedges, A));
      99             : 
     100          40 :     GRB_TRY(GrB_Matrix_new(&C, GrB_INT64, n, n));
     101          40 :     GRB_TRY(GrB_Matrix_new(&CA, GrB_INT64, n, n));
     102          40 :     GRB_TRY(GrB_Vector_new(&trace, GrB_INT64, n));
     103          40 :     GRB_TRY(GrB_Vector_new(&k, GrB_INT64, n));
     104          40 :     GRB_TRY(GrB_Scalar_new(&ONE_INT64, GrB_INT64));
     105             : 
     106          40 :     GRB_TRY(GrB_Scalar_setElement_BOOL(ONE_INT64, (uint64_t)1));
     107             : 
     108             :     // convert the cluster vector to a boolean matrix C where
     109             :     // C(i, j) = 1 if and only if vertex j is in cluster i
     110             :     GrB_Index *cI, *cX;
     111          40 :     LAGRAPH_TRY(LAGraph_Malloc((void **)&cI, n, sizeof(GrB_Index), msg));
     112          40 :     LAGRAPH_TRY(LAGraph_Malloc((void **)&cX, n, sizeof(GrB_Index), msg));
     113          40 :     GRB_TRY(GrB_Vector_extractTuples_INT64(cI, (int64_t *) cX, &n, c));
     114          40 :     GRB_TRY(GxB_Matrix_build_Scalar(C, cX, cI, ONE_INT64, n));
     115          40 :     GrB_Matrix_wait(C, GrB_MATERIALIZE);
     116          40 :     LAGraph_Free((void **)&cI, NULL);
     117          40 :     LAGraph_Free((void **)&cX, NULL);
     118             : 
     119          40 :     bool is_undirected = (G->is_symmetric_structure == LAGraph_TRUE);
     120             : 
     121             :     // k = sum(C) .^ 2
     122          40 :     GRB_TRY(GrB_reduce(k, NULL, NULL, GrB_PLUS_MONOID_INT64, C, NULL));
     123          40 :     GRB_TRY(GrB_Vector_apply_BinaryOp2nd_INT64(k, NULL, NULL, GxB_POW_INT64, k,
     124             :                                                2, NULL));
     125             :     // sum_k2 = total number of possible intra-cluster edges
     126          40 :     GRB_TRY(GrB_reduce(&sum_k2, NULL, GrB_PLUS_MONOID_INT64, k, NULL));
     127             : 
     128             :     // Calculate actual number of intra-cluster edges, if A is weighted
     129             :     // then we ignore the weights and just count the number of edges as
     130             :     // performance and coverage are inherently counting problems
     131          40 :     GRB_TRY(GrB_mxm(CA, NULL, NULL, LAGraph_plus_one_int64, C, A, NULL));
     132          40 :     GRB_TRY(GrB_mxm(CA, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_INT64, CA, C,
     133             :                     GrB_DESC_T1));
     134          40 :     GRB_TRY(GxB_Vector_diag(trace, CA, 0, NULL));
     135             : 
     136          40 :     GRB_TRY(
     137             :         GrB_reduce(&n_intraEdges, NULL, GrB_PLUS_MONOID_INT64, trace, NULL));
     138             : 
     139          40 :     if (perf)
     140             :     {
     141             :         // undirected case
     142          34 :         if (is_undirected)
     143             :         {
     144          22 :             nedges /= 2;
     145          22 :             n_intraEdges /= 2;
     146          22 :             n_interEdges = nedges - n_intraEdges;
     147             :             // All possible edges - intra cluster non-edges gives space of
     148             :             // possible inter-cluster edges. Then subtract the actual
     149             :             // inter-cluster edges to get the number of inter-cluster non-edges.
     150          22 :             n_interNonEdges =
     151          22 :                 (n * (n - 1) / 2) - ((sum_k2 - n) / 2) - n_interEdges;
     152          22 :             (*perf) =
     153          22 :                 (n_intraEdges + n_interNonEdges) * 1.0 / (n * (n - 1) / 2);
     154             :         }
     155             :         // directed case
     156             :         else
     157             :         {
     158          12 :             n_interEdges = nedges - n_intraEdges;
     159          12 :             n_interNonEdges = n * (n - 1) - (sum_k2 - n) - n_interEdges;
     160          12 :             (*perf) = (n_intraEdges + n_interNonEdges) * 1.0 / (n * (n - 1));
     161             :         }
     162             :     }
     163             : 
     164          40 :     if (cov)
     165             :     {
     166          34 :         (*cov) = n_intraEdges * 1.0 / nedges;
     167             :     }
     168             : 
     169          40 :     LG_FREE_WORK;
     170          40 :     return (GrB_SUCCESS);
     171             : #else
     172             :     return (GrB_NOT_IMPLEMENTED);
     173             : #endif
     174             : }

Generated by: LCOV version 1.14