Line data Source code
1 : //------------------------------------------------------------------------------
2 : // LAGr_PeerPressureClustering: Graph clustering using the peer pressure method
3 : //------------------------------------------------------------------------------
4 :
5 : // LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved.
6 : // SPDX-License-Identifier: BSD-2-Clause
7 : //
8 : // For additional details (including references to third party source code and
9 : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
10 : // Contributors.txt for a full list of contributors. Created, in part, with
11 : // funding and support from the U.S. Government (see Acknowledgments.txt file).
12 : // DM22-0790
13 :
14 : // Contributed by Cameron Quilici, Texas A&M University
15 :
16 : //------------------------------------------------------------------------------
17 :
18 : // TODO: ready to consider for src
19 : // TODO: define the output vector c_f that defines the cluster assignment
20 :
21 : #define LG_FREE_WORK \
22 : { \
23 : GrB_free(&A); \
24 : GrB_free(&S); \
25 : GrB_free(&T); \
26 : GrB_free(&C); \
27 : GrB_free(&C_temp); \
28 : GrB_free(&CD); \
29 : GrB_free(&W); \
30 : GrB_free(&w_temp); \
31 : GrB_free(&out_degree); \
32 : GrB_free(&m); \
33 : GrB_free(&m_index); \
34 : GrB_free(&D); \
35 : GrB_free(&E); \
36 : GrB_free(&I); \
37 : GrB_free(&ones); \
38 : LAGraph_Free((void **)&m_index_values, NULL); \
39 : LAGraph_Free((void **)&CfI, NULL); \
40 : LAGraph_Free((void **)&CfJ, NULL); \
41 : }
42 :
43 : #define LG_FREE_ALL \
44 : { \
45 : LG_FREE_WORK; \
46 : GrB_free(c_f); \
47 : }
48 :
49 : #include "LG_internal.h"
50 : #include <LAGraphX.h>
51 :
52 15 : int LAGr_PeerPressureClustering(
53 : // output:
54 : GrB_Vector *c_f, // output cluster vector
55 : // input:
56 : bool normalize, // if true, normalize the input graph via out-degree
57 : bool make_undirected, // if true, make G undirected which generally leads to
58 : // a coarser partitioning
59 : double thresh, // Threshold for convergence (percent of vertices that
60 : // changed clusters)
61 : int max_iter, // Maximum number of iterations
62 : LAGraph_Graph G, // input graph
63 : char *msg)
64 : {
65 : #if LAGRAPH_SUITESPARSE
66 :
67 15 : GrB_Matrix A = NULL;
68 15 : GrB_Matrix S = NULL; // symmetrized matrix, if needed
69 15 : GrB_Matrix T = NULL; // Tally matrix
70 15 : GrB_Matrix C = NULL; // Cluster workspace matrix
71 15 : GrB_Matrix C_temp = NULL; // Subsequent iteration cluster matrix
72 15 : GrB_Matrix CD = NULL;
73 :
74 : // Workspaces for weights (normalization and scaling)
75 15 : GrB_Matrix W = NULL;
76 15 : GrB_Vector w_temp = NULL;
77 15 : GrB_Vector out_degree = NULL;
78 :
79 : // Objects used for the argmax functionality
80 15 : GrB_Vector m = NULL;
81 15 : GrB_Vector m_index = NULL;
82 15 : GrB_Matrix D = NULL;
83 15 : GrB_Matrix E = NULL;
84 :
85 : // Identity matrix
86 15 : GrB_Matrix I = NULL;
87 15 : GrB_Vector ones = NULL;
88 :
89 15 : GrB_Index *m_index_values = NULL;
90 15 : GrB_Index *CfI = NULL, *CfJ = NULL;
91 :
92 : //--------------------------------------------------------------------------
93 : // check inputs
94 : //--------------------------------------------------------------------------
95 :
96 15 : LG_CLEAR_MSG;
97 :
98 15 : LG_ASSERT(c_f != NULL, GrB_NULL_POINTER);
99 14 : (*c_f) = NULL;
100 14 : LG_TRY(LAGraph_CheckGraph(G, msg));
101 :
102 : GrB_Index n;
103 14 : GRB_TRY(GrB_Matrix_nrows(&n, G->A));
104 :
105 : GrB_Matrix A2;
106 14 : if (make_undirected && (G->kind == LAGraph_ADJACENCY_DIRECTED ||
107 1 : G->is_symmetric_structure == LAGraph_FALSE))
108 : {
109 : // A and A' differ so set A = A + A'
110 8 : LG_ASSERT_MSG(G->AT != NULL, LAGRAPH_NOT_CACHED, "G->AT is required");
111 6 : GRB_TRY(GrB_Matrix_new(&S, GrB_FP64, n, n));
112 6 : GRB_TRY(GrB_eWiseAdd(S, NULL, NULL, GrB_ONEB_FP64, G->A, G->AT, NULL));
113 6 : A2 = S;
114 : }
115 : else
116 : {
117 6 : A2 = G->A;
118 : }
119 :
120 : // If the threshold is negative, set it to 0
121 12 : thresh = fmax(thresh, 0);
122 :
123 : // All types of input matrices get cast to type FP64 for this algorithm
124 12 : GRB_TRY(GrB_Matrix_new(&A, GrB_FP64, n, n));
125 12 : GRB_TRY(GrB_apply(A, NULL, NULL, GrB_IDENTITY_FP64, A2, NULL));
126 :
127 : //--------------------------------------------------------------------------
128 : // initializations
129 : //--------------------------------------------------------------------------
130 :
131 12 : GRB_TRY(GrB_Matrix_new(&T, GrB_FP64, n, n));
132 12 : GRB_TRY(GrB_Matrix_new(&CD, GrB_BOOL, n, n));
133 12 : GRB_TRY(GrB_Matrix_new(&E, GrB_BOOL, n, n));
134 12 : GRB_TRY(GrB_Vector_new(&m, GrB_FP64, n));
135 12 : GRB_TRY(GrB_Vector_new(&m_index, GrB_INT64, n));
136 12 : GRB_TRY(GrB_Vector_new(&out_degree, GrB_INT64, n));
137 12 : GRB_TRY(GrB_Vector_new(&ones, GrB_FP64, n));
138 :
139 12 : GRB_TRY(GrB_assign(ones, NULL, NULL, 1, GrB_ALL, n, NULL));
140 :
141 : // Identity matrix of all 1 (cast throughout to float, bool, int)
142 12 : GRB_TRY(GrB_Matrix_diag(&I, ones, 0));
143 :
144 : // Ensure that all vertices have self-edges
145 12 : GRB_TRY(GrB_eWiseAdd(A, NULL, NULL, GrB_ONEB_FP64, A, I, NULL));
146 :
147 : //--------------------------------------------------------------------------
148 : // assuring vertices have equal votes by normalizing weights via out-degrees
149 : //--------------------------------------------------------------------------
150 :
151 12 : if (normalize)
152 : {
153 6 : GRB_TRY(
154 : GrB_reduce(out_degree, NULL, NULL, GrB_PLUS_MONOID_INT64, A, NULL));
155 6 : GRB_TRY(GrB_Vector_new(&w_temp, GrB_FP64, n));
156 6 : GRB_TRY(GrB_apply(w_temp, NULL, NULL, GrB_MINV_FP64, out_degree, NULL));
157 6 : GRB_TRY(GrB_Matrix_diag(&W, w_temp, 0));
158 6 : GrB_free(&w_temp);
159 6 : GRB_TRY(
160 : GrB_mxm(A, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_FP64, W, A, NULL));
161 : }
162 :
163 : // Initial cluster vector (each vertex is in its own cluster)
164 12 : GRB_TRY(GrB_Matrix_diag(&C, ones, 0));
165 :
166 12 : GrB_Index last_num_changed = n;
167 : GrB_Index num_changed;
168 :
169 12 : LG_TRY(LAGraph_Malloc((void **)&m_index_values, n, sizeof(GrB_Index), msg));
170 :
171 : //--------------------------------------------------------------------------
172 : // main algorithm logic
173 : //--------------------------------------------------------------------------
174 :
175 12 : GrB_Index iter = 0;
176 : while (true)
177 239 : {
178 : // Voting phase (T = A (plus,second) C)
179 : // T (i, j) = k <==> k votes for vertex j to be in cluster i
180 251 : GRB_TRY(GrB_mxm(T, NULL, NULL, GxB_PLUS_SECOND_FP64, C, A, NULL));
181 :
182 : // m = max (T (:, j))
183 251 : GRB_TRY(GrB_vxm(m, NULL, NULL, GrB_MAX_SECOND_SEMIRING_FP64, ones, T,
184 : NULL));
185 :
186 : //------------------------------------------------------------------------
187 : // argmax across columns of T (T. Davis SS User Guide p. 286)
188 : //------------------------------------------------------------------------
189 :
190 251 : if (D != NULL)
191 239 : GrB_free(&D);
192 251 : GRB_TRY(GrB_Matrix_diag(&D, m, 0));
193 251 : GRB_TRY(GrB_mxm(E, NULL, NULL, GxB_ANY_EQ_FP64, T, D, NULL));
194 : // E = G in the pseudocode
195 251 : GRB_TRY(GrB_select(E, NULL, NULL, GrB_VALUENE_BOOL, E, 0, NULL));
196 : // Ties broken by minimum row index
197 251 : GRB_TRY(
198 : GrB_vxm(m_index, NULL, NULL, GxB_MIN_SECONDI_INT64, ones, E, NULL));
199 : // m_index_values(i) = argmax(T(:, i))
200 251 : GRB_TRY(
201 : GrB_Vector_extractTuples_INT64(NULL, (int64_t *) m_index_values, &n, m_index));
202 251 : GRB_TRY(GrB_Matrix_new(&C_temp, GrB_BOOL, n, n));
203 251 : GRB_TRY(GrB_extract(C_temp, NULL, NULL, I, GrB_ALL, n, m_index_values,
204 : n, NULL));
205 :
206 : // Calculate change in cluster matrix between iterations
207 251 : GRB_TRY(GrB_eWiseMult(CD, NULL, NULL, GrB_ONEB_BOOL, C, C_temp, NULL));
208 251 : GRB_TRY(
209 : GrB_reduce(&num_changed, NULL, GrB_PLUS_MONOID_INT64, CD, NULL));
210 251 : num_changed = n - num_changed;
211 251 : double percent_updated = num_changed * 1.0 / n;
212 :
213 : // Terminate when cluster matrix reaches a steady-state
214 251 : if (percent_updated <= thresh || iter > max_iter)
215 : {
216 : // Convert cluster matrix to cluster vector
217 12 : LG_TRY(LAGraph_Malloc((void **)&CfI, n, sizeof(GrB_Index), msg));
218 12 : LG_TRY(LAGraph_Malloc((void **)&CfJ, n, sizeof(GrB_Index), msg));
219 12 : GRB_TRY(GrB_Matrix_extractTuples_BOOL(CfI, CfJ, NULL, &n, C_temp));
220 :
221 12 : GRB_TRY(GrB_Vector_new(c_f, GrB_INT64, n));
222 12 : GRB_TRY(GrB_Vector_build(*c_f, CfJ, CfI, n, NULL));
223 :
224 12 : GrB_Vector_wait(*c_f, GrB_MATERIALIZE);
225 12 : break;
226 : }
227 :
228 239 : GrB_free(&C);
229 239 : C = C_temp;
230 239 : C_temp = NULL;
231 :
232 239 : iter++;
233 : }
234 :
235 12 : LG_FREE_WORK;
236 :
237 12 : return (GrB_SUCCESS);
238 : #else
239 : return (GrB_NOT_IMPLEMENTED) ;
240 : #endif
241 : }
|