Line data Source code
1 : //------------------------------------------------------------------------------
2 : // LAGr_PageRank: pagerank (for use in production, not for the GAP benchmark)
3 : //------------------------------------------------------------------------------
4 :
5 : // LAGraph, (c) 2019-2022 by The LAGraph Contributors, All Rights Reserved.
6 : // SPDX-License-Identifier: BSD-2-Clause
7 : //
8 : // For additional details (including references to third party source code and
9 : // other files) see the LICENSE file or contact permission@sei.cmu.edu. See
10 : // Contributors.txt for a full list of contributors. Created, in part, with
11 : // funding and support from the U.S. Government (see Acknowledgments.txt file).
12 : // DM22-0790
13 :
14 : // Contributed by Timothy A. Davis and Mohsen Aznaveh, Texas A&M University
15 :
16 : //------------------------------------------------------------------------------
17 :
18 : // This is an Advanced algorithm (G->AT and G->out_degree are required).
19 :
20 : // PageRank (not for the GAP benchmark, but for production use). Do not use
21 : // this method for the GAP benchmark. Use LAGr_PageRankGAP instead.
22 :
23 : // Unlike LAGr_PageRankGAP, this algorithm handles sinks correctly (nodes with
24 : // no outgoing edges). The method in the GAP specification ignores sinks, and
25 : // thus sum(centrality) is not maintained as 1. This method handles sinks
26 : // properly, and thus keeps sum(centrality) equal to 1.
27 :
28 : // The G->AT and G->out_degree cached properties must be defined for this
29 : // method. If G is undirected or G->A is known to have a symmetric structure,
30 : // then G->A is used instead of G->AT, however. G->out_degree must be computed
31 : // so that it contains no explicit zeros; as done by LAGraph_Cached_OutDegree.
32 :
33 : #define LG_FREE_WORK \
34 : { \
35 : GrB_free (&d1) ; \
36 : GrB_free (&d) ; \
37 : GrB_free (&t) ; \
38 : GrB_free (&w) ; \
39 : GrB_free (&sink) ; \
40 : GrB_free (&rsink) ; \
41 : }
42 :
43 : #define LG_FREE_ALL \
44 : { \
45 : LG_FREE_WORK ; \
46 : GrB_free (&r) ; \
47 : }
48 :
49 : #include "LG_internal.h"
50 :
51 4 : int LAGr_PageRank
52 : (
53 : // output:
54 : GrB_Vector *centrality, // centrality(i): pagerank of node i
55 : int *iters, // number of iterations taken
56 : // input:
57 : const LAGraph_Graph G, // input graph
58 : float damping, // damping factor (typically 0.85)
59 : float tol, // stopping tolerance (typically 1e-4) ;
60 : int itermax, // maximum number of iterations (typically 100)
61 : char *msg
62 : )
63 : {
64 :
65 : //--------------------------------------------------------------------------
66 : // check inputs
67 : //--------------------------------------------------------------------------
68 :
69 4 : LG_CLEAR_MSG ;
70 4 : GrB_Vector r = NULL, d = NULL, t = NULL, w = NULL, d1 = NULL ;
71 4 : GrB_Vector sink = NULL, rsink = NULL ;
72 4 : LG_ASSERT (centrality != NULL && iters != NULL, GrB_NULL_POINTER) ;
73 4 : LG_TRY (LAGraph_CheckGraph (G, msg)) ;
74 : GrB_Matrix AT ;
75 4 : if (G->kind == LAGraph_ADJACENCY_UNDIRECTED ||
76 2 : G->is_symmetric_structure == LAGraph_TRUE)
77 : {
78 : // A and A' have the same structure
79 2 : AT = G->A ;
80 : }
81 : else
82 : {
83 : // A and A' differ
84 2 : AT = G->AT ;
85 2 : LG_ASSERT_MSG (AT != NULL, LAGRAPH_NOT_CACHED, "G->AT is required") ;
86 : }
87 4 : GrB_Vector d_out = G->out_degree ;
88 4 : LG_ASSERT_MSG (d_out != NULL,
89 : LAGRAPH_NOT_CACHED, "G->out_degree is required") ;
90 :
91 : //--------------------------------------------------------------------------
92 : // initializations
93 : //--------------------------------------------------------------------------
94 :
95 : GrB_Index n ;
96 4 : (*centrality) = NULL ;
97 4 : GRB_TRY (GrB_Matrix_nrows (&n, AT)) ;
98 :
99 4 : const float damping_over_n = damping / n ;
100 4 : const float scaled_damping = (1 - damping) / n ;
101 4 : float rdiff = 1 ; // first iteration is always done
102 :
103 : // r = 1 / n
104 4 : GRB_TRY (GrB_Vector_new (&t, GrB_FP32, n)) ;
105 4 : GRB_TRY (GrB_Vector_new (&r, GrB_FP32, n)) ;
106 4 : GRB_TRY (GrB_Vector_new (&w, GrB_FP32, n)) ;
107 4 : GRB_TRY (GrB_assign (r, NULL, NULL, (float) (1.0 / n), GrB_ALL, n, NULL)) ;
108 :
109 : // find all sinks, where sink(i) = true if node i has d_out(i)=0, or with
110 : // d_out(i) not present. LAGraph_Cached_OutDegree computes d_out =
111 : // G->out_degree so that it has no explicit zeros, so a structural mask can
112 : // be used here.
113 : GrB_Index nsinks, nvals ;
114 4 : GRB_TRY (GrB_Vector_nvals (&nvals, d_out)) ;
115 4 : nsinks = n - nvals ;
116 4 : if (nsinks > 0)
117 : {
118 : // sink<!struct(d_out)> = true
119 1 : GRB_TRY (GrB_Vector_new (&sink, GrB_BOOL, n)) ;
120 1 : GRB_TRY (GrB_assign (sink, d_out, NULL, (bool) true, GrB_ALL, n,
121 : GrB_DESC_SC)) ;
122 1 : GRB_TRY (GrB_Vector_new (&rsink, GrB_FP32, n)) ;
123 : }
124 :
125 : // prescale with damping factor, so it isn't done each iteration
126 : // d = d_out / damping ;
127 4 : GRB_TRY (GrB_Vector_new (&d, GrB_FP32, n)) ;
128 4 : GRB_TRY (GrB_apply (d, NULL, NULL, GrB_DIV_FP32, d_out, damping, NULL)) ;
129 :
130 : // d1 = 1 / damping
131 4 : float dmin = 1.0 / damping ;
132 4 : GRB_TRY (GrB_Vector_new (&d1, GrB_FP32, n)) ;
133 4 : GRB_TRY (GrB_assign (d1, NULL, NULL, dmin, GrB_ALL, n, NULL)) ;
134 : // d = max (d1, d)
135 4 : GRB_TRY (GrB_eWiseAdd (d, NULL, NULL, GrB_MAX_FP32, d1, d, NULL)) ;
136 4 : GrB_free (&d1) ;
137 :
138 : //--------------------------------------------------------------------------
139 : // pagerank iterations
140 : //--------------------------------------------------------------------------
141 :
142 48 : for ((*iters) = 0 ; rdiff > tol ; (*iters)++)
143 : {
144 : // check for convergence
145 45 : LG_ASSERT_MSGF ((*iters) < itermax, LAGRAPH_CONVERGENCE_FAILURE,
146 : "pagerank failed to converge in %d iterations", itermax) ;
147 : // determine teleport and handle any sinks
148 44 : float teleport = scaled_damping ; // teleport = (1 - damping) / n
149 44 : if (nsinks > 0)
150 : {
151 : // handle the sinks: teleport += (damping/n) * sum (r (sink))
152 : // rsink<struct(sink)> = r
153 12 : GRB_TRY (GrB_Vector_clear (rsink)) ;
154 12 : GRB_TRY (GrB_assign (rsink, sink, NULL, r, GrB_ALL, n, GrB_DESC_S));
155 : // sum_rsink = sum (rsink)
156 12 : float sum_rsink = 0 ;
157 12 : GRB_TRY (GrB_reduce (&sum_rsink, NULL, GrB_PLUS_MONOID_FP32,
158 : rsink, NULL)) ;
159 12 : teleport += damping_over_n * sum_rsink ;
160 : }
161 : // swap t and r ; now t is the old score
162 44 : GrB_Vector temp = t ; t = r ; r = temp ;
163 : // w = t ./ d
164 44 : GRB_TRY (GrB_eWiseMult (w, NULL, NULL, GrB_DIV_FP32, t, d, NULL)) ;
165 : // r = teleport
166 44 : GRB_TRY (GrB_assign (r, NULL, NULL, teleport, GrB_ALL, n, NULL)) ;
167 : // r += A'*w
168 44 : GRB_TRY (GrB_mxv (r, NULL, GrB_PLUS_FP32, LAGraph_plus_second_fp32,
169 : AT, w, NULL)) ;
170 : // t -= r
171 44 : GRB_TRY (GrB_assign (t, NULL, GrB_MINUS_FP32, r, GrB_ALL, n, NULL)) ;
172 : // t = abs (t)
173 44 : GRB_TRY (GrB_apply (t, NULL, NULL, GrB_ABS_FP32, t, NULL)) ;
174 : // rdiff = sum (t)
175 44 : GRB_TRY (GrB_reduce (&rdiff, NULL, GrB_PLUS_MONOID_FP32, t, NULL)) ;
176 : }
177 :
178 : //--------------------------------------------------------------------------
179 : // free workspace and return result
180 : //--------------------------------------------------------------------------
181 :
182 3 : (*centrality) = r ;
183 3 : LG_FREE_WORK ;
184 3 : return (GrB_SUCCESS) ;
185 : }
|