LCOV - code coverage report
Current view: top level - experimental/algorithm - LAGraph_CFL_reachability.c (source / functions) Hit Total Coverage
Test: LAGraph code coverage report. Commit id: 7b9d2ee. Current time (UTC): 2025-06-03T21:57:17Z Lines: 112 112 100.0 %
Date: 2025-06-03 22:02:40 Functions: 1 1 100.0 %

          Line data    Source code
       1             : //------------------------------------------------------------------------------
       2             : // LAGraph_CFL_reachability.c: Context-Free Language Reachability Matrix-Based
       3             : // Algorithm
       4             : // ------------------------------------------------------------------------------
       5             : //
       6             : // LAGraph, (c) 2019-2024 by The LAGraph Contributors, All Rights Reserved.
       7             : // SPDX-License-Identifier: BSD-2-Clause
       8             : 
       9             : // Contributed by Ilhom Kombaev, Semyon Grigoriev, St. Petersburg State University.
      10             : 
      11             : //------------------------------------------------------------------------------
      12             : 
      13             : // Code is based on the "A matrix-based CFPQ algorithm" described in the
      14             : // following paper: * Rustam Azimov, Semyon Grigorev, "Context-Free Path
      15             : // Querying Using Linear Algebra", URL:
      16             : // https://disser.spbu.ru/files/2022/disser_azimov.pdf
      17             : 
      18             : #define LG_FREE_WORK                                                        \
      19             :     {                                                                       \
      20             :         LAGraph_Free ((void **) &nnzs, msg) ;                               \
      21             :         GrB_free(&true_scalar);                                             \
      22             :         GrB_free(&identity_matrix);                                         \
      23             :         LAGraph_Free ((void **) &T, msg);                                   \
      24             :         LAGraph_Free ((void **) &indexes, msg);                             \
      25             :         LAGraph_Free((void**) &t_empty_flags, NULL);                        \
      26             :         LAGraph_Free((void**) &eps_rules, NULL);                            \
      27             :         LAGraph_Free((void**) &term_rules, NULL);                           \
      28             :         LAGraph_Free((void**) &bin_rules, NULL);                            \
      29             :     }
      30             : 
      31             : #define LG_FREE_ALL                                                         \
      32             :     {                                                                       \
      33             :         for (int64_t i = 0; i < nonterms_count; i++) {                      \
      34             :             GrB_free(&T[i]);                                                \
      35             :         }                                                                   \
      36             :                                                                             \
      37             :         LG_FREE_WORK;                                                       \
      38             :     }
      39             : 
      40             : #include "LG_internal.h"
      41             : #include <LAGraphX.h>
      42             : 
      43             : #define ERROR_RULE(msg,i)                                                   \
      44             :     {                                                                       \
      45             :         LG_ASSERT_MSGF(false, GrB_INVALID_VALUE,                            \
      46             :             "Rule with index %" PRId64 " is invalid. ", msg, i);            \
      47             :     }
      48             : 
      49             : #define ADD_TO_MSG(...)                                                     \
      50             :     {                                                                       \
      51             :         if (msg_len == 0) {                                                 \
      52             :             msg_len +=                                                      \
      53             :                 snprintf(msg, LAGRAPH_MSG_LEN,                              \
      54             :                          "LAGraph failure (file %s, line %d): ",            \
      55             :                         __FILE__, __LINE__);                                \
      56             :         }                                                                   \
      57             :         if (msg_len < LAGRAPH_MSG_LEN) {                                    \
      58             :             msg_len += snprintf(msg + msg_len, LAGRAPH_MSG_LEN - msg_len,   \
      59             :                 __VA_ARGS__);                                               \
      60             :         }                                                                   \
      61             :     }
      62             : 
      63             : #define ADD_INDEX_TO_ERROR_RULE(rule, i)                                    \
      64             :     {                                                                       \
      65             :         rule.len_indexes_str += snprintf(                                   \
      66             :             rule.indexes_str + rule.len_indexes_str,                        \
      67             :             LAGRAPH_MSG_LEN - rule.len_indexes_str,                         \
      68             :             rule.count == 0 ? "%" PRId64 : ", %" PRId64, i);                \
      69             :         rule.count++;                                                       \
      70             :     }
      71             : 
      72             : 
      73             : 
      74             : // LAGraph_CFL_reachability: Context-Free Language Reachability Matrix-Based Algorithm
      75             : //
      76             : // This function determines the set of vertex pairs (u, v) in a graph (represented by
      77             : // adjacency matrices) such that there is a path from u to v, where the edge labels form a
      78             : // word from the language generated by the context-free grammar (represented by `rules`).
      79             : //
      80             : // Terminals and non-terminals are enumerated by integers starting from zero.
      81             : // The start non-terminal is the non-terminal with index 0.
      82             : //
      83             : // Example:
      84             : //
      85             : // Graph:
      86             : // ┌───┐   ┌───┐   ┌───┐   ┌───┐   ┌───┐
      87             : // │ 0 ├───► 1 ├───► 2 ├───► 3 ├───► 4 │
      88             : // └───┘ a └─┬─┘ a └─▲─┘ b └───┘ b └───┘
      89             : //           │       │
      90             : //           │ ┌───┐ │
      91             : //          a└─► 5 ├─┘b
      92             : //             └───┘
      93             : //
      94             : // Grammar: S -> aSb | ab
      95             : //
      96             : // There are paths from node [1] to node [3] and from node [1] to node [2] that form the
      97             : // word "ab" ([1]-a->[2]-b->[3] and [1]-a->[5]-b->[2]). The word "ab" is in the language
      98             : // generated by our context-free grammar, so the pairs (1, 3) and (1, 2) will be included
      99             : // in the result.
     100             : //
     101             : // Note: It doesn't matter how many paths exist from node [A] to node [B] that form a word
     102             : // in the language. If at least one path exists, the pair ([A], [B]) will be included in
     103             : // the result.
     104             : //
     105             : // In contrast, the path from node [1] to node [4] forms the word "abb"
     106             : // ([1]-a->[2]-b->[3]-b->[4]) and the word "abbb" ([1]-a->[5]-b->[2]-b->[3]-b->[4]).
     107             : // The words "aab" and "abbb" are not in the language, so the pair (1, 4) will not be
     108             : // included in the result.
     109             : //
     110             : // With this graph and grammar, we obtain the following results:
     111             : // (0, 4) - because there exists a path (0-1-2-3-4) that forms the word "aabb"
     112             : // (1, 3) - because there exists a path (1-2-3) that forms "ab"
     113             : // (1, 2) - because there exists a path (1-5-2) that forms the word "ab"
     114             : // (0, 3) - because there exists a path (0-1-5-2-3) that forms the word "aabb"
     115             : 
     116          17 : GrB_Info LAGraph_CFL_reachability
     117             : (
     118             :     // Output
     119             :     GrB_Matrix *outputs, // Array of matrices containing results.
     120             :                          // The size of the array must be equal to nonterms_count.
     121             :                          //
     122             :                          // outputs[k]: (i, j) = true if and only if there is a path
     123             :                          // from node i to node j whose edge labels form a word
     124             :                          // derivable from the non-terminal 'k' of the specified CFG.
     125             :     // Input
     126             :     const GrB_Matrix *adj_matrices, // Array of adjacency matrices representing the graph.
     127             :                                     // The length of this array is equal to the count of
     128             :                                     // terminals (terms_count).
     129             :                                     //
     130             :                                     // adj_matrices[t]: (i, j) == 1 if and only if there
     131             :                                     // is an edge between nodes i and j with the label of
     132             :                                     // the terminal corresponding to index 't' (where t is
     133             :                                     // in the range [0, terms_count - 1]).
     134             :     int64_t terms_count,            // The total number of terminal symbols in the CFG.
     135             :     int64_t nonterms_count,         // The total number of non-terminal symbols in the CFG.
     136             :     const LAGraph_rule_WCNF *rules, // The rules of the CFG.
     137             :     int64_t rules_count,            // The total number of rules in the CFG.
     138             :     char *msg                       // Message string for error reporting.
     139             : )
     140             : {
     141             : 
     142             : #if LAGRAPH_SUITESPARSE
     143             :     // Declare workspace and clear the msg string, if not NULL
     144             :     GrB_Matrix *T;
     145          17 :     bool *t_empty_flags = NULL ; // t_empty_flags[i] == true <=> T[i] is empty
     146          17 :     GrB_Matrix identity_matrix = NULL;
     147          17 :     uint64_t *nnzs = NULL;
     148          17 :     LG_CLEAR_MSG;
     149          17 :     size_t msg_len = 0; // For error formatting
     150          17 :     bool iso_flag = false;
     151          17 :     GrB_Index *indexes = NULL;
     152             : 
     153             :     // Arrays for processing rules
     154          17 :     size_t *eps_rules = NULL, eps_rules_count = 0;   // [Variable -> eps]
     155          17 :     size_t *term_rules = NULL, term_rules_count = 0; // [Variable -> term]
     156          17 :     size_t *bin_rules = NULL, bin_rules_count = 0;   // [Variable -> AB]
     157             : 
     158             :     GrB_Scalar true_scalar;
     159          17 :     GrB_Scalar_new(&true_scalar, GrB_BOOL);
     160          17 :     GrB_Scalar_setElement_BOOL(true_scalar, true);
     161             : 
     162          17 :     LG_TRY(LAGraph_Calloc((void **) &T, nonterms_count, sizeof(GrB_Matrix), msg));
     163          17 :     LG_TRY(LAGraph_Calloc((void **) &t_empty_flags, nonterms_count, sizeof(bool), msg)) ;
     164             : 
     165          17 :     LG_ASSERT_MSG(terms_count > 0, GrB_INVALID_VALUE,
     166             :                   "The number of terminals must be greater than zero.");
     167          17 :     LG_ASSERT_MSG(nonterms_count > 0, GrB_INVALID_VALUE,
     168             :                   "The number of non-terminals must be greater than zero.");
     169          17 :     LG_ASSERT_MSG(rules_count > 0, GrB_INVALID_VALUE,
     170             :                   "The number of rules must be greater than zero.");
     171          21 :     LG_ASSERT_MSG(outputs != NULL, GrB_NULL_POINTER, "The outputs array cannot be null.");
     172          20 :     LG_ASSERT_MSG(rules != NULL, GrB_NULL_POINTER, "The rules array cannot be null.");
     173          19 :     LG_ASSERT_MSG(adj_matrices != NULL, GrB_NULL_POINTER,
     174             :                   "The adjacency matrices array cannot be null.");
     175             : 
     176             :     // Find null adjacency matrices
     177          14 :     bool found_null = false;
     178          40 :     for (int64_t i = 0; i < terms_count; i++) {
     179          26 :         if (adj_matrices[i] != NULL)
     180          24 :             continue;
     181             : 
     182           2 :         if (!found_null) {
     183           1 :             ADD_TO_MSG("Adjacency matrices with these indexes are null: ");
     184           1 :             ADD_TO_MSG("%" PRId64, i);
     185             :         } else {
     186           1 :             ADD_TO_MSG("%" PRId64, i);
     187             :         }
     188             : 
     189           2 :         found_null = true;
     190             :     }
     191             : 
     192          14 :     if (found_null) {
     193           5 :         LG_FREE_ALL;
     194           1 :         return GrB_NULL_POINTER;
     195             :     }
     196             : 
     197             :     GrB_Index n;
     198          13 :     GRB_TRY(GrB_Matrix_ncols(&n, adj_matrices[0]));
     199             : 
     200             :     // Create nonterms matrices
     201          80 :     for (int64_t i = 0; i < nonterms_count; i++) {
     202          67 :         GRB_TRY(GrB_Matrix_new(&T[i], GrB_BOOL, n, n));
     203          67 :         t_empty_flags[i] = true;
     204             :     }
     205             : 
     206          13 :     LG_TRY(LAGraph_Calloc((void **) &eps_rules, rules_count, sizeof(size_t), msg)) ;
     207          13 :     LG_TRY(LAGraph_Calloc((void **) &term_rules, rules_count, sizeof(size_t), msg)) ;
     208          13 :     LG_TRY(LAGraph_Calloc((void **) &bin_rules, rules_count, sizeof(size_t), msg)) ;
     209             : 
     210             :     // Process rules
     211             :     typedef struct {
     212             :         size_t count;
     213             :         size_t len_indexes_str;
     214             :         char indexes_str[LAGRAPH_MSG_LEN];
     215             :     } rule_error_s;
     216          13 :     rule_error_s term_err = {0};
     217          13 :     rule_error_s nonterm_err = {0};
     218          13 :     rule_error_s invalid_err = {0};
     219          95 :     for (int64_t i = 0; i < rules_count; i++) {
     220          82 :         LAGraph_rule_WCNF rule = rules[i];
     221             : 
     222          82 :         bool is_rule_eps = rule.prod_A == -1 && rule.prod_B == -1;
     223          82 :         bool is_rule_term = rule.prod_A != -1 && rule.prod_B == -1;
     224          82 :         bool is_rule_bin = rule.prod_A != -1 && rule.prod_B != -1;
     225             : 
     226             :         // Check that all rules are well-formed
     227          82 :         if (rule.nonterm < 0 || rule.nonterm >= nonterms_count) {
     228           2 :             ADD_INDEX_TO_ERROR_RULE(nonterm_err, i);
     229             :         }
     230             : 
     231             :         // [Variable -> eps]
     232          82 :         if (is_rule_eps) {
     233           2 :             eps_rules[eps_rules_count++] = i;
     234             : 
     235          81 :             continue;
     236             :         }
     237             : 
     238             :         // [Variable -> term]
     239          80 :         if (is_rule_term) {
     240          37 :             term_rules[term_rules_count++] = i;
     241             : 
     242          37 :             if (rule.prod_A < -1 || rule.prod_A >= terms_count) {
     243           1 :                 ADD_INDEX_TO_ERROR_RULE(term_err, i);
     244             :             }
     245             : 
     246          37 :             continue;
     247             :         }
     248             : 
     249             :         // [Variable -> A B]
     250          43 :         if (is_rule_bin) {
     251          42 :             bin_rules[bin_rules_count++] = i;
     252             : 
     253          42 :             if (rule.prod_A < -1 || rule.prod_A >= nonterms_count || rule.prod_B < -1 ||
     254          41 :                 rule.prod_B >= nonterms_count) {
     255           1 :                 ADD_INDEX_TO_ERROR_RULE(nonterm_err, i);
     256             :             }
     257             : 
     258          42 :             continue;
     259             :         }
     260             : 
     261             :         // [Variable -> _ B]
     262           1 :         ADD_INDEX_TO_ERROR_RULE(invalid_err, i);
     263             :     }
     264             : 
     265          13 :     if (term_err.count + nonterm_err.count + invalid_err.count > 0) {
     266           5 :         ADD_TO_MSG("Count of invalid rules: %" PRId64 ".\n",
     267             :             (int64_t) (term_err.count + nonterm_err.count + invalid_err.count));
     268             : 
     269           5 :         if (nonterm_err.count > 0) {
     270           3 :             ADD_TO_MSG("Non-terminals must be in range [0, nonterms_count). ");
     271           3 :             ADD_TO_MSG("Indexes of invalid rules: %s\n", nonterm_err.indexes_str)
     272             :         }
     273           5 :         if (term_err.count > 0) {
     274           1 :             ADD_TO_MSG("Terminals must be in range [-1, nonterms_count). ");
     275           1 :             ADD_TO_MSG("Indexes of invalid rules: %s\n", term_err.indexes_str)
     276             :         }
     277           5 :         if (invalid_err.count > 0) {
     278           1 :             ADD_TO_MSG("[Variable -> _ B] type of rule is not acceptable. ");
     279           1 :             ADD_TO_MSG("Indexes of invalid rules: %.120s\n", invalid_err.indexes_str)
     280             :         }
     281             : 
     282          25 :         LG_FREE_ALL;
     283           5 :         return GrB_INVALID_VALUE;
     284             :     }
     285             : 
     286             :     // Rule [Variable -> term]
     287          34 :     for (int64_t i = 0; i < term_rules_count; i++) {
     288          26 :         LAGraph_rule_WCNF term_rule = rules[term_rules[i]];
     289          26 :         GrB_Index adj_matrix_nnz = 0;
     290          26 :         GRB_TRY(GrB_Matrix_nvals(&adj_matrix_nnz, adj_matrices[term_rule.prod_A]));
     291             : 
     292          26 :         if (adj_matrix_nnz == 0) {
     293           1 :             continue;
     294             :         }
     295             : 
     296          25 :         GxB_eWiseUnion(
     297             :             T[term_rule.nonterm], GrB_NULL, GrB_NULL, GxB_PAIR_BOOL,
     298             :             T[term_rule.nonterm], true_scalar, adj_matrices[term_rule.prod_A], true_scalar, GrB_NULL
     299             :         );
     300             : 
     301          25 :         t_empty_flags[term_rule.nonterm] = false;
     302             : 
     303             :         #ifdef DEBUG_CFL_REACHBILITY
     304             :         GxB_Matrix_iso(&iso_flag, T[term_rule.nonterm]);
     305             :         printf("[TERM] eWiseUnion: NONTERM: %d (ISO: %d)\n", term_rule.nonterm, iso_flag);
     306             :         #endif
     307             :     }
     308             : 
     309             :     GrB_Vector v_diag;
     310           8 :     GRB_TRY(GrB_Vector_new(&v_diag, GrB_BOOL, n));
     311           8 :     GRB_TRY(GrB_Vector_assign_BOOL(v_diag, GrB_NULL, GrB_NULL, true, GrB_ALL, n, NULL));
     312           8 :     GRB_TRY(GrB_Matrix_diag(&identity_matrix, v_diag, 0));
     313           8 :     GRB_TRY(GrB_free(&v_diag));
     314             : 
     315             :     // Rule [Variable -> eps]
     316          10 :     for (int64_t i = 0; i < eps_rules_count; i++) {
     317           2 :         LAGraph_rule_WCNF eps_rule = rules[eps_rules[i]];
     318             : 
     319           2 :         GxB_eWiseUnion (
     320             :             T[eps_rule.nonterm],GrB_NULL,GxB_PAIR_BOOL,GxB_PAIR_BOOL,
     321             :             T[eps_rule.nonterm],true_scalar,identity_matrix,true_scalar,GrB_NULL
     322             :         );
     323             :         
     324           2 :         t_empty_flags[eps_rule.nonterm] = false;
     325             : 
     326             :         #ifdef DEBUG_CFL_REACHBILITY
     327             :         GxB_Matrix_iso(&iso_flag, T[eps_rule.nonterm]);
     328             :         printf("[EPS] eWiseUnion: NONTERM: %d (ISO: %d)\n",
     329             :                 eps_rule.nonterm, iso_flag);
     330             :         #endif
     331             :     }
     332             : 
     333             :     // Rule [Variable -> Variable1 Variable2]
     334           8 :     LG_TRY(LAGraph_Calloc((void **) &nnzs, nonterms_count, sizeof(uint64_t), msg));
     335           8 :     bool changed = true;
     336          35 :     while (changed) {
     337          27 :         changed = false;
     338         145 :         for (int64_t i = 0; i < bin_rules_count; i++) {
     339         118 :             LAGraph_rule_WCNF bin_rule = rules[bin_rules[i]];
     340             : 
     341             :             // If one of matrices is empty then their product will be empty
     342         118 :             if (t_empty_flags[bin_rule.prod_A] || t_empty_flags[bin_rule.prod_B]) {
     343          16 :                 continue;
     344             :             }
     345             : 
     346         102 :             GrB_BinaryOp acc_op = t_empty_flags[bin_rule.nonterm] ? GrB_NULL : GxB_ANY_BOOL;
     347         102 :             GRB_TRY(GrB_mxm(T[bin_rule.nonterm], GrB_NULL, acc_op,
     348             :                         GxB_ANY_PAIR_BOOL, T[bin_rule.prod_A], T[bin_rule.prod_B],
     349             :                         GrB_NULL))
     350             : 
     351             :             GrB_Index new_nnz;
     352         102 :             GRB_TRY(GrB_Matrix_nvals(&new_nnz, T[bin_rule.nonterm]));
     353         102 :             if (new_nnz != 0) t_empty_flags[bin_rule.nonterm] = false;
     354             : 
     355         102 :             changed = changed || (nnzs[bin_rule.nonterm] != new_nnz);
     356         102 :             nnzs[bin_rule.nonterm] = new_nnz;
     357             : 
     358             :             #ifdef DEBUG_CFL_REACHBILITY
     359             :             GxB_Matrix_iso(&iso_flag, T[bin_rule.nonterm]);
     360             :             printf("[TERM1 TERM2] MULTIPLY, S: %d, A: %d, B: %d, "
     361             :                    "I: %" PRId64 " (ISO: %d)\n",
     362             :                    bin_rule.nonterm, bin_rule.prod_A, bin_rule.prod_B, i, iso_flag);
     363             :             #endif
     364             :             
     365             :         }
     366             :     }
     367             : 
     368             :     #ifdef DEBUG_CFL_REACHBILITY
     369             :         for (int64_t i = 0; i < nonterms_count; i++) {
     370             :             printf("MATRIX WITH INDEX %" PRId64 ":\n", i);
     371             :             GxB_print(T[i], GxB_SUMMARY);
     372             :         }
     373             :     #endif
     374             : 
     375          55 :     for (int64_t i = 0; i < nonterms_count; i++) {
     376          47 :         outputs[i] = T[i];
     377             :     }
     378             : 
     379           8 :     LG_FREE_WORK;
     380           8 :     return GrB_SUCCESS;
     381             : #else
     382             :     return (GrB_NOT_IMPLEMENTED) ;
     383             : #endif
     384             : }

Generated by: LCOV version 1.14