Line data Source code
1 : //------------------------------------------------------------------------------
2 : // LAGraph_CFL_reachability.c: Context-Free Language Reachability Matrix-Based
3 : // Algorithm
4 : // ------------------------------------------------------------------------------
5 : //
6 : // LAGraph, (c) 2019-2024 by The LAGraph Contributors, All Rights Reserved.
7 : // SPDX-License-Identifier: BSD-2-Clause
8 :
9 : // Contributed by Ilhom Kombaev, Semyon Grigoriev, St. Petersburg State University.
10 :
11 : //------------------------------------------------------------------------------
12 :
13 : // Code is based on the "A matrix-based CFPQ algorithm" described in the
14 : // following paper: * Rustam Azimov, Semyon Grigorev, "Context-Free Path
15 : // Querying Using Linear Algebra", URL:
16 : // https://disser.spbu.ru/files/2022/disser_azimov.pdf
17 :
18 : #define LG_FREE_WORK \
19 : { \
20 : LAGraph_Free ((void **) &nnzs, msg) ; \
21 : GrB_free(&true_scalar); \
22 : GrB_free(&identity_matrix); \
23 : LAGraph_Free ((void **) &T, msg); \
24 : LAGraph_Free ((void **) &indexes, msg); \
25 : LAGraph_Free((void**) &t_empty_flags, NULL); \
26 : LAGraph_Free((void**) &eps_rules, NULL); \
27 : LAGraph_Free((void**) &term_rules, NULL); \
28 : LAGraph_Free((void**) &bin_rules, NULL); \
29 : }
30 :
31 : #define LG_FREE_ALL \
32 : { \
33 : for (int64_t i = 0; i < nonterms_count; i++) { \
34 : GrB_free(&T[i]); \
35 : } \
36 : \
37 : LG_FREE_WORK; \
38 : }
39 :
40 : #include "LG_internal.h"
41 : #include <LAGraphX.h>
42 :
43 : #define ERROR_RULE(msg,i) \
44 : { \
45 : LG_ASSERT_MSGF(false, GrB_INVALID_VALUE, \
46 : "Rule with index %" PRId64 " is invalid. ", msg, i); \
47 : }
48 :
49 : #define ADD_TO_MSG(...) \
50 : { \
51 : if (msg_len == 0) { \
52 : msg_len += \
53 : snprintf(msg, LAGRAPH_MSG_LEN, \
54 : "LAGraph failure (file %s, line %d): ", \
55 : __FILE__, __LINE__); \
56 : } \
57 : if (msg_len < LAGRAPH_MSG_LEN) { \
58 : msg_len += snprintf(msg + msg_len, LAGRAPH_MSG_LEN - msg_len, \
59 : __VA_ARGS__); \
60 : } \
61 : }
62 :
63 : #define ADD_INDEX_TO_ERROR_RULE(rule, i) \
64 : { \
65 : rule.len_indexes_str += snprintf( \
66 : rule.indexes_str + rule.len_indexes_str, \
67 : LAGRAPH_MSG_LEN - rule.len_indexes_str, \
68 : rule.count == 0 ? "%" PRId64 : ", %" PRId64, i); \
69 : rule.count++; \
70 : }
71 :
72 :
73 :
74 : // LAGraph_CFL_reachability: Context-Free Language Reachability Matrix-Based Algorithm
75 : //
76 : // This function determines the set of vertex pairs (u, v) in a graph (represented by
77 : // adjacency matrices) such that there is a path from u to v, where the edge labels form a
78 : // word from the language generated by the context-free grammar (represented by `rules`).
79 : //
80 : // Terminals and non-terminals are enumerated by integers starting from zero.
81 : // The start non-terminal is the non-terminal with index 0.
82 : //
83 : // Example:
84 : //
85 : // Graph:
86 : // ┌───┐ ┌───┐ ┌───┐ ┌───┐ ┌───┐
87 : // │ 0 ├───► 1 ├───► 2 ├───► 3 ├───► 4 │
88 : // └───┘ a └─┬─┘ a └─▲─┘ b └───┘ b └───┘
89 : // │ │
90 : // │ ┌───┐ │
91 : // a└─► 5 ├─┘b
92 : // └───┘
93 : //
94 : // Grammar: S -> aSb | ab
95 : //
96 : // There are paths from node [1] to node [3] and from node [1] to node [2] that form the
97 : // word "ab" ([1]-a->[2]-b->[3] and [1]-a->[5]-b->[2]). The word "ab" is in the language
98 : // generated by our context-free grammar, so the pairs (1, 3) and (1, 2) will be included
99 : // in the result.
100 : //
101 : // Note: It doesn't matter how many paths exist from node [A] to node [B] that form a word
102 : // in the language. If at least one path exists, the pair ([A], [B]) will be included in
103 : // the result.
104 : //
105 : // In contrast, the path from node [1] to node [4] forms the word "abb"
106 : // ([1]-a->[2]-b->[3]-b->[4]) and the word "abbb" ([1]-a->[5]-b->[2]-b->[3]-b->[4]).
107 : // The words "aab" and "abbb" are not in the language, so the pair (1, 4) will not be
108 : // included in the result.
109 : //
110 : // With this graph and grammar, we obtain the following results:
111 : // (0, 4) - because there exists a path (0-1-2-3-4) that forms the word "aabb"
112 : // (1, 3) - because there exists a path (1-2-3) that forms "ab"
113 : // (1, 2) - because there exists a path (1-5-2) that forms the word "ab"
114 : // (0, 3) - because there exists a path (0-1-5-2-3) that forms the word "aabb"
115 :
116 17 : GrB_Info LAGraph_CFL_reachability
117 : (
118 : // Output
119 : GrB_Matrix *outputs, // Array of matrices containing results.
120 : // The size of the array must be equal to nonterms_count.
121 : //
122 : // outputs[k]: (i, j) = true if and only if there is a path
123 : // from node i to node j whose edge labels form a word
124 : // derivable from the non-terminal 'k' of the specified CFG.
125 : // Input
126 : const GrB_Matrix *adj_matrices, // Array of adjacency matrices representing the graph.
127 : // The length of this array is equal to the count of
128 : // terminals (terms_count).
129 : //
130 : // adj_matrices[t]: (i, j) == 1 if and only if there
131 : // is an edge between nodes i and j with the label of
132 : // the terminal corresponding to index 't' (where t is
133 : // in the range [0, terms_count - 1]).
134 : int64_t terms_count, // The total number of terminal symbols in the CFG.
135 : int64_t nonterms_count, // The total number of non-terminal symbols in the CFG.
136 : const LAGraph_rule_WCNF *rules, // The rules of the CFG.
137 : int64_t rules_count, // The total number of rules in the CFG.
138 : char *msg // Message string for error reporting.
139 : )
140 : {
141 :
142 : #if LAGRAPH_SUITESPARSE
143 : // Declare workspace and clear the msg string, if not NULL
144 : GrB_Matrix *T;
145 17 : bool *t_empty_flags = NULL ; // t_empty_flags[i] == true <=> T[i] is empty
146 17 : GrB_Matrix identity_matrix = NULL;
147 17 : uint64_t *nnzs = NULL;
148 17 : LG_CLEAR_MSG;
149 17 : size_t msg_len = 0; // For error formatting
150 17 : bool iso_flag = false;
151 17 : GrB_Index *indexes = NULL;
152 :
153 : // Arrays for processing rules
154 17 : size_t *eps_rules = NULL, eps_rules_count = 0; // [Variable -> eps]
155 17 : size_t *term_rules = NULL, term_rules_count = 0; // [Variable -> term]
156 17 : size_t *bin_rules = NULL, bin_rules_count = 0; // [Variable -> AB]
157 :
158 : GrB_Scalar true_scalar;
159 17 : GrB_Scalar_new(&true_scalar, GrB_BOOL);
160 17 : GrB_Scalar_setElement_BOOL(true_scalar, true);
161 :
162 17 : LG_TRY(LAGraph_Calloc((void **) &T, nonterms_count, sizeof(GrB_Matrix), msg));
163 17 : LG_TRY(LAGraph_Calloc((void **) &t_empty_flags, nonterms_count, sizeof(bool), msg)) ;
164 :
165 17 : LG_ASSERT_MSG(terms_count > 0, GrB_INVALID_VALUE,
166 : "The number of terminals must be greater than zero.");
167 17 : LG_ASSERT_MSG(nonterms_count > 0, GrB_INVALID_VALUE,
168 : "The number of non-terminals must be greater than zero.");
169 17 : LG_ASSERT_MSG(rules_count > 0, GrB_INVALID_VALUE,
170 : "The number of rules must be greater than zero.");
171 21 : LG_ASSERT_MSG(outputs != NULL, GrB_NULL_POINTER, "The outputs array cannot be null.");
172 20 : LG_ASSERT_MSG(rules != NULL, GrB_NULL_POINTER, "The rules array cannot be null.");
173 19 : LG_ASSERT_MSG(adj_matrices != NULL, GrB_NULL_POINTER,
174 : "The adjacency matrices array cannot be null.");
175 :
176 : // Find null adjacency matrices
177 14 : bool found_null = false;
178 40 : for (int64_t i = 0; i < terms_count; i++) {
179 26 : if (adj_matrices[i] != NULL)
180 24 : continue;
181 :
182 2 : if (!found_null) {
183 1 : ADD_TO_MSG("Adjacency matrices with these indexes are null: ");
184 1 : ADD_TO_MSG("%" PRId64, i);
185 : } else {
186 1 : ADD_TO_MSG("%" PRId64, i);
187 : }
188 :
189 2 : found_null = true;
190 : }
191 :
192 14 : if (found_null) {
193 5 : LG_FREE_ALL;
194 1 : return GrB_NULL_POINTER;
195 : }
196 :
197 : GrB_Index n;
198 13 : GRB_TRY(GrB_Matrix_ncols(&n, adj_matrices[0]));
199 :
200 : // Create nonterms matrices
201 80 : for (int64_t i = 0; i < nonterms_count; i++) {
202 67 : GRB_TRY(GrB_Matrix_new(&T[i], GrB_BOOL, n, n));
203 67 : t_empty_flags[i] = true;
204 : }
205 :
206 13 : LG_TRY(LAGraph_Calloc((void **) &eps_rules, rules_count, sizeof(size_t), msg)) ;
207 13 : LG_TRY(LAGraph_Calloc((void **) &term_rules, rules_count, sizeof(size_t), msg)) ;
208 13 : LG_TRY(LAGraph_Calloc((void **) &bin_rules, rules_count, sizeof(size_t), msg)) ;
209 :
210 : // Process rules
211 : typedef struct {
212 : size_t count;
213 : size_t len_indexes_str;
214 : char indexes_str[LAGRAPH_MSG_LEN];
215 : } rule_error_s;
216 13 : rule_error_s term_err = {0};
217 13 : rule_error_s nonterm_err = {0};
218 13 : rule_error_s invalid_err = {0};
219 95 : for (int64_t i = 0; i < rules_count; i++) {
220 82 : LAGraph_rule_WCNF rule = rules[i];
221 :
222 82 : bool is_rule_eps = rule.prod_A == -1 && rule.prod_B == -1;
223 82 : bool is_rule_term = rule.prod_A != -1 && rule.prod_B == -1;
224 82 : bool is_rule_bin = rule.prod_A != -1 && rule.prod_B != -1;
225 :
226 : // Check that all rules are well-formed
227 82 : if (rule.nonterm < 0 || rule.nonterm >= nonterms_count) {
228 2 : ADD_INDEX_TO_ERROR_RULE(nonterm_err, i);
229 : }
230 :
231 : // [Variable -> eps]
232 82 : if (is_rule_eps) {
233 2 : eps_rules[eps_rules_count++] = i;
234 :
235 81 : continue;
236 : }
237 :
238 : // [Variable -> term]
239 80 : if (is_rule_term) {
240 37 : term_rules[term_rules_count++] = i;
241 :
242 37 : if (rule.prod_A < -1 || rule.prod_A >= terms_count) {
243 1 : ADD_INDEX_TO_ERROR_RULE(term_err, i);
244 : }
245 :
246 37 : continue;
247 : }
248 :
249 : // [Variable -> A B]
250 43 : if (is_rule_bin) {
251 42 : bin_rules[bin_rules_count++] = i;
252 :
253 42 : if (rule.prod_A < -1 || rule.prod_A >= nonterms_count || rule.prod_B < -1 ||
254 41 : rule.prod_B >= nonterms_count) {
255 1 : ADD_INDEX_TO_ERROR_RULE(nonterm_err, i);
256 : }
257 :
258 42 : continue;
259 : }
260 :
261 : // [Variable -> _ B]
262 1 : ADD_INDEX_TO_ERROR_RULE(invalid_err, i);
263 : }
264 :
265 13 : if (term_err.count + nonterm_err.count + invalid_err.count > 0) {
266 5 : ADD_TO_MSG("Count of invalid rules: %" PRId64 ".\n",
267 : (int64_t) (term_err.count + nonterm_err.count + invalid_err.count));
268 :
269 5 : if (nonterm_err.count > 0) {
270 3 : ADD_TO_MSG("Non-terminals must be in range [0, nonterms_count). ");
271 3 : ADD_TO_MSG("Indexes of invalid rules: %s\n", nonterm_err.indexes_str)
272 : }
273 5 : if (term_err.count > 0) {
274 1 : ADD_TO_MSG("Terminals must be in range [-1, nonterms_count). ");
275 1 : ADD_TO_MSG("Indexes of invalid rules: %s\n", term_err.indexes_str)
276 : }
277 5 : if (invalid_err.count > 0) {
278 1 : ADD_TO_MSG("[Variable -> _ B] type of rule is not acceptable. ");
279 1 : ADD_TO_MSG("Indexes of invalid rules: %.120s\n", invalid_err.indexes_str)
280 : }
281 :
282 25 : LG_FREE_ALL;
283 5 : return GrB_INVALID_VALUE;
284 : }
285 :
286 : // Rule [Variable -> term]
287 34 : for (int64_t i = 0; i < term_rules_count; i++) {
288 26 : LAGraph_rule_WCNF term_rule = rules[term_rules[i]];
289 26 : GrB_Index adj_matrix_nnz = 0;
290 26 : GRB_TRY(GrB_Matrix_nvals(&adj_matrix_nnz, adj_matrices[term_rule.prod_A]));
291 :
292 26 : if (adj_matrix_nnz == 0) {
293 1 : continue;
294 : }
295 :
296 25 : GxB_eWiseUnion(
297 : T[term_rule.nonterm], GrB_NULL, GrB_NULL, GxB_PAIR_BOOL,
298 : T[term_rule.nonterm], true_scalar, adj_matrices[term_rule.prod_A], true_scalar, GrB_NULL
299 : );
300 :
301 25 : t_empty_flags[term_rule.nonterm] = false;
302 :
303 : #ifdef DEBUG_CFL_REACHBILITY
304 : GxB_Matrix_iso(&iso_flag, T[term_rule.nonterm]);
305 : printf("[TERM] eWiseUnion: NONTERM: %d (ISO: %d)\n", term_rule.nonterm, iso_flag);
306 : #endif
307 : }
308 :
309 : GrB_Vector v_diag;
310 8 : GRB_TRY(GrB_Vector_new(&v_diag, GrB_BOOL, n));
311 8 : GRB_TRY(GrB_Vector_assign_BOOL(v_diag, GrB_NULL, GrB_NULL, true, GrB_ALL, n, NULL));
312 8 : GRB_TRY(GrB_Matrix_diag(&identity_matrix, v_diag, 0));
313 8 : GRB_TRY(GrB_free(&v_diag));
314 :
315 : // Rule [Variable -> eps]
316 10 : for (int64_t i = 0; i < eps_rules_count; i++) {
317 2 : LAGraph_rule_WCNF eps_rule = rules[eps_rules[i]];
318 :
319 2 : GxB_eWiseUnion (
320 : T[eps_rule.nonterm],GrB_NULL,GxB_PAIR_BOOL,GxB_PAIR_BOOL,
321 : T[eps_rule.nonterm],true_scalar,identity_matrix,true_scalar,GrB_NULL
322 : );
323 :
324 2 : t_empty_flags[eps_rule.nonterm] = false;
325 :
326 : #ifdef DEBUG_CFL_REACHBILITY
327 : GxB_Matrix_iso(&iso_flag, T[eps_rule.nonterm]);
328 : printf("[EPS] eWiseUnion: NONTERM: %d (ISO: %d)\n",
329 : eps_rule.nonterm, iso_flag);
330 : #endif
331 : }
332 :
333 : // Rule [Variable -> Variable1 Variable2]
334 8 : LG_TRY(LAGraph_Calloc((void **) &nnzs, nonterms_count, sizeof(uint64_t), msg));
335 8 : bool changed = true;
336 35 : while (changed) {
337 27 : changed = false;
338 145 : for (int64_t i = 0; i < bin_rules_count; i++) {
339 118 : LAGraph_rule_WCNF bin_rule = rules[bin_rules[i]];
340 :
341 : // If one of matrices is empty then their product will be empty
342 118 : if (t_empty_flags[bin_rule.prod_A] || t_empty_flags[bin_rule.prod_B]) {
343 16 : continue;
344 : }
345 :
346 102 : GrB_BinaryOp acc_op = t_empty_flags[bin_rule.nonterm] ? GrB_NULL : GxB_ANY_BOOL;
347 102 : GRB_TRY(GrB_mxm(T[bin_rule.nonterm], GrB_NULL, acc_op,
348 : GxB_ANY_PAIR_BOOL, T[bin_rule.prod_A], T[bin_rule.prod_B],
349 : GrB_NULL))
350 :
351 : GrB_Index new_nnz;
352 102 : GRB_TRY(GrB_Matrix_nvals(&new_nnz, T[bin_rule.nonterm]));
353 102 : if (new_nnz != 0) t_empty_flags[bin_rule.nonterm] = false;
354 :
355 102 : changed = changed || (nnzs[bin_rule.nonterm] != new_nnz);
356 102 : nnzs[bin_rule.nonterm] = new_nnz;
357 :
358 : #ifdef DEBUG_CFL_REACHBILITY
359 : GxB_Matrix_iso(&iso_flag, T[bin_rule.nonterm]);
360 : printf("[TERM1 TERM2] MULTIPLY, S: %d, A: %d, B: %d, "
361 : "I: %" PRId64 " (ISO: %d)\n",
362 : bin_rule.nonterm, bin_rule.prod_A, bin_rule.prod_B, i, iso_flag);
363 : #endif
364 :
365 : }
366 : }
367 :
368 : #ifdef DEBUG_CFL_REACHBILITY
369 : for (int64_t i = 0; i < nonterms_count; i++) {
370 : printf("MATRIX WITH INDEX %" PRId64 ":\n", i);
371 : GxB_print(T[i], GxB_SUMMARY);
372 : }
373 : #endif
374 :
375 55 : for (int64_t i = 0; i < nonterms_count; i++) {
376 47 : outputs[i] = T[i];
377 : }
378 :
379 8 : LG_FREE_WORK;
380 8 : return GrB_SUCCESS;
381 : #else
382 : return (GrB_NOT_IMPLEMENTED) ;
383 : #endif
384 : }
|