Line data Source code
1 : //------------------------------------------------------------------------------
2 : // LAGraph_RegularPathQuery.c: regular path query
3 : //------------------------------------------------------------------------------
4 : //
5 : // LAGraph, (c) 2019-2024 by The LAGraph Contributors, All Rights Reserved.
6 : // SPDX-License-Identifier: BSD-2-Clause
7 :
8 : // Contributed by Georgiy Belyanin, Semyon Grigoriev, St. Petersburg State
9 : // University.
10 :
11 : //------------------------------------------------------------------------------
12 :
13 : // For an edge-labelled directed graph the algorithm computes the set of nodes
14 : // for which these conditions are held:
15 : // * The node is reachable by a path from one of the source nodes.
16 : // * The concatenation of the labels over this path's edges is a word from the
17 : // specified regular language.
18 : //
19 : // The regular constraints are specified by a non-deterministic finite
20 : // automaton (NFA) over a subset of the graph edge labels. The algorithm assumes
21 : // the labels are enumerated from 0 to some nl-1. The input graph and the NFA
22 : // are defined by adjacency matrix decomposition. They are represented by arrays
23 : // of graphs G and R both of length nl in which G[i]/R[i] represents the
24 : // adjacency matrix of the i-th label for the graph/NFA correspondingly.
25 : //
26 : // Example of adjacency matrix decomposition:
27 : //
28 : // Graph:
29 : // (0) --[a]-> (1)
30 : // | ^
31 : // [b] [a]--/
32 : // | --/
33 : // v /
34 : // (2) --[b]-> (3)
35 : //
36 : // Adjacency matrix decomposition of this graph consists of:
37 : // * Adjacency matrix for the label a:
38 : // 0 1 2 3
39 : // 0 | | t | | |
40 : // 1 | | | | |
41 : // 2 | | t | | |
42 : // 3 | | | | |
43 : // * Adjacency matrix for the label b:
44 : // 0 1 2 3
45 : // 0 | | | t | |
46 : // 1 | | | | |
47 : // 2 | | | | t |
48 : // 3 | | | | |
49 : //
50 : // The algorithm is based on the idea of considering the graph as
51 : // non-deterministic finite automaton having the specified set of stating nodes
52 : // as starting states and evaluating two modified breadth-first traversals of
53 : // the graph and the input NFA at the same time considering the matching labels.
54 : //
55 : // The intuition behind this process is similar to building a direct product
56 : // of the graph automaton and the input automaton. This construction results
57 : // with an intersection of two regular languages. The first one is defined by
58 : // the set of all words that can be obtained through edge label concatenation
59 : // of paths starting in one of the source nodes. And the second one is the set
60 : // of the paths accepted by the NFA thus matching the desired constraints.
61 : //
62 : // On algorithm step n the relation between the NFA edges and the graph nodes
63 : // is built. These conditions are held for the pairs in this relation:
64 : // * The node is reachable by a path of length n from one of the source nodes
65 : // in the graph.
66 : // * The state is reachable by a path of length n from one of the starting
67 : // states in the NFA.
68 : // * These paths have the same length and the same labels.
69 : // The algorithm accumulates these relations. Then it extracts the nodes that
70 : // are in relation with one of the final states.
71 : //
72 : // Full description is available at:
73 : // https://arxiv.org/abs/2412.10287
74 : //
75 : // Performance considerations: the best performance is shown when the algorithm
76 : // receives a minimal deterministic finite automaton as an input.
77 :
78 : #define LG_FREE_WORK \
79 : { \
80 : GrB_free (&frontier) ; \
81 : GrB_free (&next_frontier) ; \
82 : GrB_free (&symbol_frontier) ; \
83 : GrB_free (&visited) ; \
84 : GrB_free (&final_reducer) ; \
85 : LAGraph_Free ((void **) &A, NULL) ; \
86 : LAGraph_Free ((void **) &B, NULL) ; \
87 : LAGraph_Free ((void **) &BT, NULL) ; \
88 : }
89 :
90 : #define LG_FREE_ALL \
91 : { \
92 : LG_FREE_WORK ; \
93 : GrB_free (reachable) ; \
94 : }
95 :
96 : #include "LG_internal.h"
97 : #include "LAGraphX.h"
98 :
99 8 : int LAGraph_RegularPathQuery
100 : (
101 : // output:
102 : GrB_Vector *reachable, // reachable(i) = true if node i is reachable
103 : // from one of the starting nodes by a path
104 : // satisfying regular constraints
105 : // input:
106 : LAGraph_Graph *R, // input non-deterministic finite automaton
107 : // adjacency matrix decomposition
108 : size_t nl, // total label count, # of matrices graph and
109 : // NFA adjacency matrix decomposition
110 : const GrB_Index *QS, // starting states in NFA
111 : size_t nqs, // number of starting states in NFA
112 : const GrB_Index *QF, // final states in NFA
113 : size_t nqf, // number of final states in NFA
114 : LAGraph_Graph *G, // input graph adjacency matrix decomposition
115 : const GrB_Index *S, // source vertices to start searching paths
116 : size_t ns, // number of source vertices
117 : char *msg // LAGraph output message
118 : )
119 : {
120 :
121 : //--------------------------------------------------------------------------
122 : // check inputs
123 : //--------------------------------------------------------------------------
124 :
125 8 : LG_CLEAR_MSG ;
126 :
127 8 : GrB_Matrix frontier = NULL ; // traversal frontier representing
128 : // correspondence between NFA states
129 : // and graph vertices
130 8 : GrB_Matrix symbol_frontier = NULL ; // part of the new frontier for the
131 : // specific label
132 8 : GrB_Matrix next_frontier = NULL ; // frontier value on the next
133 : // traversal step
134 8 : GrB_Matrix visited = NULL ; // visited pairs (state, vertex)
135 8 : GrB_Vector final_reducer = NULL ; // auxiliary vector for reducing the
136 : // visited matrix to an answer
137 :
138 8 : GrB_Index ng = 0 ; // # nodes in the graph
139 8 : GrB_Index nr = 0 ; // # states in the NFA
140 8 : GrB_Index states = ns ; // # pairs in the current
141 : // correspondence between the graph and
142 : // the NFA
143 :
144 8 : GrB_Index rows = 0 ; // utility matrix row count
145 8 : GrB_Index cols = 0 ; // utility matrix column count
146 8 : GrB_Index vals = 0 ; // utility matrix value count
147 :
148 8 : GrB_Matrix *A = NULL ;
149 8 : GrB_Matrix *B = NULL ;
150 8 : GrB_Matrix *BT = NULL ;
151 :
152 8 : LG_ASSERT (reachable != NULL, GrB_NULL_POINTER) ;
153 8 : LG_ASSERT (G != NULL, GrB_NULL_POINTER) ;
154 8 : LG_ASSERT (R != NULL, GrB_NULL_POINTER) ;
155 8 : LG_ASSERT (S != NULL, GrB_NULL_POINTER) ;
156 :
157 8 : (*reachable) = NULL ;
158 :
159 32 : for (size_t i = 0 ; i < nl ; i++)
160 : {
161 24 : if (G[i] == NULL) continue ;
162 16 : LG_TRY (LAGraph_CheckGraph (G[i], msg)) ;
163 : }
164 :
165 32 : for (size_t i = 0 ; i < nl ; i++)
166 : {
167 24 : if (R[i] == NULL) continue ;
168 12 : LG_TRY (LAGraph_CheckGraph (R[i], msg)) ;
169 : }
170 :
171 8 : LG_TRY (LAGraph_Malloc ((void **) &A, nl, sizeof (GrB_Matrix), msg)) ;
172 :
173 32 : for (size_t i = 0 ; i < nl ; i++)
174 : {
175 24 : if (G[i] == NULL)
176 : {
177 8 : A[i] = NULL ;
178 8 : continue ;
179 : }
180 :
181 16 : A[i] = G[i]->A ;
182 : }
183 :
184 8 : LG_TRY (LAGraph_Malloc ((void **) &B, nl, sizeof (GrB_Matrix), msg)) ;
185 8 : LG_TRY (LAGraph_Malloc ((void **) &BT, nl, sizeof (GrB_Matrix), msg)) ;
186 :
187 32 : for (size_t i = 0 ; i < nl ; i++)
188 : {
189 24 : BT[i] = NULL ;
190 :
191 24 : if (R[i] == NULL)
192 : {
193 12 : B[i] = NULL ;
194 12 : continue ;
195 : }
196 :
197 12 : B[i] = R[i]->A ;
198 12 : if (R[i]->is_symmetric_structure == LAGraph_TRUE)
199 : {
200 2 : BT[i] = B[i] ;
201 : }
202 : else
203 : {
204 : // BT[i] could be NULL and the matrix will be transposed by a
205 : // descriptor
206 10 : BT[i] = R[i]->AT ;
207 : }
208 : }
209 :
210 8 : for (size_t i = 0 ; i < nl ; i++)
211 : {
212 8 : if (A[i] == NULL) continue ;
213 :
214 8 : GRB_TRY (GrB_Matrix_nrows (&ng, A[i])) ;
215 8 : break ;
216 : }
217 :
218 10 : for (size_t i = 0 ; i < nl ; i++)
219 : {
220 10 : if (B[i] == NULL) continue ;
221 :
222 8 : GRB_TRY (GrB_Matrix_nrows (&nr, B[i])) ;
223 8 : break ;
224 : }
225 :
226 : // Check all the matrices in graph adjacency matrix decomposition are
227 : // square and of the same dimensions
228 32 : for (size_t i = 0 ; i < nl ; i++)
229 : {
230 24 : if (A[i] == NULL) continue ;
231 :
232 16 : GRB_TRY (GrB_Matrix_nrows (&rows, A[i])) ;
233 16 : GRB_TRY (GrB_Matrix_ncols (&cols, A[i])) ;
234 :
235 16 : LG_ASSERT_MSG (rows == ng && cols == ng, LAGRAPH_NOT_CACHED,
236 : "all the matrices in the graph adjacency matrix decomposition "
237 : "should have the same dimensions and be square") ;
238 : }
239 :
240 : // Check all the matrices in NFA adjacency matrix decomposition are
241 : // square and of the same dimensions
242 32 : for (size_t i = 0 ; i < nl ; i++)
243 : {
244 24 : if (B[i] == NULL) continue ;
245 :
246 12 : GrB_Index rows = 0 ;
247 12 : GrB_Index cols = 0 ;
248 :
249 12 : GRB_TRY (GrB_Matrix_nrows (&rows, B[i])) ;
250 12 : GRB_TRY (GrB_Matrix_ncols (&cols, B[i])) ;
251 :
252 12 : LG_ASSERT_MSG (rows == nr && cols == nr, LAGRAPH_NOT_CACHED,
253 : "all the matrices in the NFA adjacency matrix decomposition "
254 : "should have the same dimensions and be square") ;
255 : }
256 :
257 : // Check source nodes in the graph
258 20 : for (size_t i = 0 ; i < ns ; i++)
259 : {
260 12 : GrB_Index s = S [i] ;
261 12 : LG_ASSERT_MSG (s < ng, GrB_INVALID_INDEX, "invalid graph source node") ;
262 : }
263 :
264 : // Check starting states of the NFA
265 18 : for (size_t i = 0 ; i < nqs ; i++)
266 : {
267 10 : GrB_Index qs = QS [i] ;
268 10 : LG_ASSERT_MSG (qs < nr, GrB_INVALID_INDEX,
269 : "invalid NFA starting state") ;
270 : }
271 :
272 : // Check final states of the NFA
273 16 : for (size_t i = 0 ; i < nqf ; i++)
274 : {
275 8 : GrB_Index qf = QF [i] ;
276 8 : LG_ASSERT_MSG (qf < nr, GrB_INVALID_INDEX, "invalid NFA final state") ;
277 : }
278 :
279 : // -------------------------------------------------------------------------
280 : // initialization
281 : // -------------------------------------------------------------------------
282 :
283 8 : GRB_TRY (GrB_Vector_new (reachable, GrB_BOOL, ng)) ;
284 :
285 8 : GRB_TRY (GrB_Vector_new (&final_reducer, GrB_BOOL, nr)) ;
286 :
287 : // Initialize matrix for reducing the result
288 8 : GrB_assign (final_reducer, NULL, NULL, true, QF, nqf, NULL) ;
289 :
290 8 : GRB_TRY (GrB_Matrix_new (&next_frontier, GrB_BOOL, nr, ng)) ;
291 8 : GRB_TRY (GrB_Matrix_new (&visited, GrB_BOOL, nr, ng)) ;
292 :
293 : // Initialize frontier with the source nodes
294 8 : GrB_assign (next_frontier, NULL, NULL, true, QS, nqs, S, ns, NULL) ;
295 8 : GrB_assign (visited, NULL, NULL, true, QS, nqs, S, ns, NULL) ;
296 :
297 : // Initialize a few utility matrices
298 8 : GRB_TRY (GrB_Matrix_new (&frontier, GrB_BOOL, nr, ng)) ;
299 8 : GRB_TRY (GrB_Matrix_new (&symbol_frontier, GrB_BOOL, nr, ng)) ;
300 :
301 : // Main loop
302 32 : while (states != 0)
303 : {
304 24 : GrB_Matrix old_frontier = frontier ;
305 24 : frontier = next_frontier ;
306 24 : next_frontier = old_frontier ;
307 :
308 24 : GRB_TRY (GrB_Matrix_clear(next_frontier)) ;
309 :
310 : // Obtain a new relation between the NFA states and the graph nodes
311 96 : for (size_t i = 0 ; i < nl ; i++)
312 : {
313 72 : if (A[i] == NULL || B[i] == NULL) continue ;
314 :
315 : // Traverse the NFA
316 : // Try to use a provided transposed matrix or use the descriptor
317 34 : if (BT[i] != NULL)
318 : {
319 17 : GRB_TRY (GrB_mxm (symbol_frontier, GrB_NULL, GrB_NULL,
320 : GrB_LOR_LAND_SEMIRING_BOOL, BT[i], frontier, GrB_DESC_R)) ;
321 : }
322 : else
323 : {
324 17 : GRB_TRY (GrB_mxm (symbol_frontier, GrB_NULL, GrB_NULL,
325 : GrB_LOR_LAND_SEMIRING_BOOL, B[i], frontier, GrB_DESC_RT0)) ;
326 : }
327 :
328 : // Traverse the graph
329 34 : GRB_TRY (GrB_mxm (next_frontier, visited, GrB_LOR,
330 : GrB_LOR_LAND_SEMIRING_BOOL, symbol_frontier, A[i], GrB_DESC_SC)) ;
331 : }
332 :
333 : // Accumulate the new state <-> node correspondence
334 24 : GRB_TRY (GrB_assign (visited, visited, GrB_NULL, next_frontier,
335 : GrB_ALL, nr, GrB_ALL, ng, GrB_DESC_SC)) ;
336 :
337 24 : GRB_TRY (GrB_Matrix_nvals (&states, next_frontier)) ;
338 : }
339 :
340 : // Extract the nodes matching the final NFA states
341 8 : GRB_TRY (GrB_vxm (*reachable, GrB_NULL, GrB_NULL,
342 : GrB_LOR_LAND_SEMIRING_BOOL, final_reducer, visited, GrB_NULL)) ;
343 :
344 8 : LG_FREE_WORK ;
345 8 : return (GrB_SUCCESS) ;
346 : }
|