Line data Source code
1 : //------------------------------------------------------------------------------
2 : // LAGr_MarkovClustering: Graph clustering using the Markov cluster algorithm
3 : //------------------------------------------------------------------------------
4 :
5 : // LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved.
6 : // SPDX-License-Identifier: BSD-2-Clause
7 : //
8 : // For additional details (including references to third party source code and
9 : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
10 : // Contributors.txt for a full list of contributors. Created, in part, with
11 : // funding and support from the U.S. Government (see Acknowledgments.txt file).
12 : // DM22-0790
13 :
14 : // Contributed by Cameron Quilici, Texas A&M University
15 :
16 : //------------------------------------------------------------------------------
17 :
18 : // TODO: ready to consider for src
19 :
20 : #define LG_FREE_WORK \
21 : { \
22 : GrB_free(&T_prev); \
23 : GrB_free(&T); \
24 : GrB_free(&w); \
25 : GrB_free(&D); \
26 : GrB_free(&CC); \
27 : GrB_free(&ones); \
28 : GrB_free(&MSE); \
29 : GrB_free(&argmax_v); \
30 : GrB_free(&argmax_p); \
31 : GrB_free(&zero_FP32); \
32 : }
33 :
34 : #define LG_FREE_ALL \
35 : { \
36 : LG_FREE_WORK; \
37 : GrB_free(c_f); \
38 : }
39 :
40 : #include "LG_internal.h"
41 : #include <LAGraphX.h>
42 :
43 19 : int LAGr_MarkovClustering(
44 : // output:
45 : GrB_Vector *c_f, // output cluster vector
46 : // input
47 : int e, // expansion coefficient
48 : int i, // inflation coefficient
49 : double pruning_threshold, // threshold for pruning values
50 : double convergence_threshold, // MSE threshold for convergence
51 : int max_iter, // maximum iterations
52 : LAGraph_Graph G, // input graph
53 : char *msg)
54 : {
55 : #if LAGRAPH_SUITESPARSE
56 19 : GrB_Matrix T_prev = NULL; // previous iteration transfer matrix
57 19 : GrB_Matrix T = NULL; // current iteration transfer matrix
58 19 : GrB_Matrix CC = NULL;
59 :
60 19 : GrB_Vector w = NULL; // weight vector to normalize T_* matrix
61 :
62 19 : GrB_Matrix D = NULL; // diagonal workspace matrix
63 19 : GrB_Vector ones = NULL; // vector of all 1
64 :
65 19 : GrB_Matrix MSE = NULL; // Mean squared error between T_prev and T (between
66 : // subsequent iterations)
67 :
68 19 : GrB_Vector argmax_v = NULL;
69 19 : GrB_Vector argmax_p = NULL;
70 :
71 19 : GrB_Scalar zero_FP32 = NULL;
72 :
73 : //--------------------------------------------------------------------------
74 : // check inputs
75 : //--------------------------------------------------------------------------
76 :
77 19 : LG_CLEAR_MSG;
78 :
79 19 : LG_ASSERT(c_f != NULL, GrB_NULL_POINTER);
80 18 : (*c_f) = NULL;
81 18 : LG_TRY(LAGraph_CheckGraph(G, msg));
82 :
83 : GrB_Index n, nrows, ncols;
84 17 : GRB_TRY(GrB_Matrix_nrows(&nrows, G->A));
85 17 : GRB_TRY(GrB_Matrix_nrows(&ncols, G->A));
86 :
87 17 : LG_ASSERT_MSG(nrows == ncols, LAGRAPH_INVALID_GRAPH,
88 : "Input matrix must be square");
89 17 : n = nrows;
90 17 : LG_ASSERT_MSG(e >= 2, GrB_INVALID_VALUE, "e must be >= 2");
91 :
92 : //--------------------------------------------------------------------------
93 : // initializations
94 : //--------------------------------------------------------------------------
95 :
96 16 : GRB_TRY(GrB_Matrix_new(&CC, GrB_BOOL, n, n));
97 16 : GRB_TRY(GrB_Matrix_new(&MSE, GrB_FP32, n, n));
98 16 : GRB_TRY(GrB_Vector_new(&w, GrB_FP32, n));
99 16 : GRB_TRY(GrB_Vector_new(&ones, GrB_FP32, n));
100 16 : GRB_TRY(GrB_Vector_new(&argmax_v, GrB_FP32, n));
101 16 : GRB_TRY(GrB_Vector_new(&argmax_p, GrB_INT64, n));
102 16 : GRB_TRY(GrB_Scalar_new(&zero_FP32, GrB_FP32));
103 16 : GRB_TRY(GrB_Scalar_setElement(zero_FP32, 0));
104 :
105 : // Create identity
106 16 : GRB_TRY(GrB_assign(ones, NULL, NULL, 1, GrB_ALL, n, NULL));
107 16 : GRB_TRY(GrB_Matrix_diag(&D, ones, 0));
108 :
109 : // All types of input matrices get cast to type FP32 for this algorithm
110 : // and ensure all vertices have a self-edge
111 16 : GRB_TRY(GrB_Matrix_new(&T, GrB_FP32, n, n));
112 16 : GRB_TRY(GrB_eWiseAdd(T, NULL, NULL, GrB_FIRST_FP32, G->A, D, NULL));
113 :
114 16 : GrB_Index iter = 0;
115 : GrB_Index nvals;
116 :
117 : while (true)
118 : {
119 : // Normalization step: Scale each column in T to add up to 1
120 : // w = 1 ./ sum(T(:j))
121 : // D = diag(w)
122 172 : GRB_TRY(GrB_reduce(w, NULL, NULL, GrB_PLUS_MONOID_FP32, T,
123 : GrB_DESC_T0));
124 172 : GRB_TRY(GrB_apply(w, NULL, NULL, GrB_MINV_FP32, w, NULL));
125 172 : GrB_free(&D);
126 172 : GRB_TRY(GrB_Matrix_diag(&D, w, 0));
127 172 : GRB_TRY(GrB_mxm(T, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_FP32,
128 : T, D, NULL));
129 :
130 172 : if (iter > 0)
131 : {
132 : // Compute mean squared error between subsequent iterations
133 156 : double mse = 0;
134 156 : GRB_TRY(GxB_Matrix_eWiseUnion(MSE, NULL, NULL, GrB_MINUS_FP32, T,
135 : zero_FP32, T_prev, zero_FP32, NULL));
136 156 : GRB_TRY(GrB_apply(MSE, NULL, NULL, GxB_POW_FP32, MSE, (double) 2, NULL));
137 :
138 156 : GRB_TRY(GrB_reduce(&mse, NULL, GrB_PLUS_MONOID_FP32, MSE, NULL));
139 156 : GRB_TRY(GrB_Matrix_nvals(&nvals, MSE));
140 156 : mse /= nvals;
141 156 : if (iter > max_iter || mse < convergence_threshold)
142 : break;
143 : }
144 :
145 : // Set T_prev to the previous iteration
146 156 : GrB_free(&T_prev);
147 156 : T_prev = T ;
148 156 : T = NULL ;
149 :
150 : // compute the next T matrix
151 : // first expansion step for i = 0
152 : // T = T_prev^2
153 156 : GRB_TRY (GrB_Matrix_new (&T, GrB_FP32, n, n)) ;
154 156 : GRB_TRY(GrB_mxm(T, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_FP32,
155 : T_prev, T_prev, NULL));
156 :
157 : // subsequent expansion steps
158 282 : for (int i = 1; i < e - 1; i++)
159 : {
160 : // T = T^2
161 126 : GRB_TRY(GrB_mxm(T, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_FP32,
162 : T, T, NULL));
163 : }
164 :
165 : // Inflation step
166 156 : GRB_TRY(GrB_Matrix_apply_BinaryOp2nd_FP32(
167 : T, NULL, NULL, GxB_POW_FP32, T, (double)i, NULL));
168 :
169 : // Prune values less than some small threshold
170 156 : GRB_TRY(GrB_select(T, NULL, NULL, GrB_VALUEGT_FP32, T,
171 : pruning_threshold, NULL));
172 :
173 156 : iter++;
174 : }
175 :
176 : // In order to interpret the final iteration of the transfer matrix
177 : // (T), let an attractor vertex be a vertex with at least one positive
178 : // value within their corresponding row. Each attractor is attracting the
179 : // vertices (columns) which have positive values within its row. To compute
180 : // the output cluster vector, compute the argmax across columns of the
181 : // steady-state T matrix. Then argmax_p(i) = k means vertex i is in
182 : // cluster k.
183 :
184 : // argmax_v = max (T) where argmax_v(j) = max (T (:,j))
185 16 : GRB_TRY(GrB_mxv(argmax_v, NULL, NULL, GrB_MAX_FIRST_SEMIRING_FP32, T,
186 : ones, GrB_DESC_T0));
187 16 : if (D != NULL) GrB_free(&D);
188 16 : GRB_TRY(GrB_Matrix_diag(&D, argmax_v, 0));
189 16 : GRB_TRY(GrB_mxm(CC, NULL, NULL, GxB_ANY_EQ_FP32, T, D, NULL));
190 16 : GRB_TRY(GrB_select(CC, NULL, NULL, GrB_VALUENE_BOOL, CC, 0, NULL));
191 16 : GRB_TRY(GrB_mxv(argmax_p, NULL, NULL, GxB_MIN_SECONDI_INT64, CC, ones,
192 : GrB_DESC_T0));
193 :
194 : // Sometimes (particularly, when the pruning threshold is high), some
195 : // columns in the steady-state T have no values, i.e., they are not
196 : // attracted to any vertex. In this case, fill in the missing values with
197 : // the index of the vertex (these vertices will be arbitrarily put in the
198 : // cluster of their index).
199 : GrB_Index p_nvals;
200 16 : GRB_TRY(GrB_Vector_nvals(&p_nvals, argmax_p));
201 16 : if (p_nvals < n)
202 : {
203 : // argmax_p <!argmax_p> = 0:n-1
204 2 : GRB_TRY (GrB_apply (argmax_p, argmax_p, NULL, GrB_ROWINDEX_INT64,
205 : ones, 0, GrB_DESC_SC)) ;
206 : }
207 :
208 16 : (*c_f) = argmax_p ; // Set output vector
209 16 : argmax_p = NULL ;
210 :
211 16 : LG_FREE_WORK;
212 :
213 16 : return (GrB_SUCCESS);
214 : #else
215 : return (GrB_NOT_IMPLEMENTED);
216 : #endif
217 : }
|