-
Notifications
You must be signed in to change notification settings - Fork 1
/
Sto_IHT.cpp
executable file
·210 lines (172 loc) · 7 KB
/
Sto_IHT.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
#include <unistd.h> // sleep
#include <armadillo>
#include "Functions.h"
using namespace arma;
using namespace std;
/* Description: Parallel Stochastic Iterative Hard Thresholding (StoIHT) algorithm
to approximate the vector x from measurements u = A*x.
publication: Linear Convergence of Stochastic Iterative Greedy Algorithms with Sparse Constraints
https://arxiv.org/abs/1407.0088
*/
vec parallel_Sto_IHT(const mat A, const vec y, const int sparsity, const vec prob_vec,
const unsigned int max_iter, const double gamma, const double tol,
unsigned int &num_iters, const simulation_parameters simulation_params){
// signal parameters
const unsigned int sig_dim = A.n_cols;
const unsigned int meas_num = A.n_rows;
const unsigned int num_block = prob_vec.n_elem;
const unsigned int block_size = meas_num/num_block;
// initialization of variables that are shared among cores
vec x_hat(sig_dim,fill::zeros); // estimation of the signal
bool done = false; // flag to check the convergence criteria
unsigned int i = 0; // total number of iterations
// parallel section of the code starts here
#pragma omp parallel num_threads(simulation_params.num_cores)
{
// initializaiotn of variables that are local to each core
vec x_hat_local(sig_dim,fill::zeros);
unsigned int selected_block,first_ind_block,last_ind_block;
mat A_block; // submatrix of A
vec y_block,gradient,b;
uvec sorted_ind,est_supp;
// iterations to find the solutions
while(!done){
// master thread uses the tally vector to check the convergence criteria
if (omp_get_thread_num() == 0 ){
// check exit criteria
if (norm (y - A*x_hat) < tol || i >= max_iter){
done = true; // the flag 'done' is shared among the cores
}
if (omp_get_num_threads() > 1){
continue;
}
}
i++;
// randomize
selected_block = floor(randu()*num_block);
first_ind_block = block_size*selected_block;
last_ind_block = block_size*(selected_block+1)-1;
// Proxy
A_block = A.rows(first_ind_block,last_ind_block);
y_block = y.subvec(first_ind_block,last_ind_block);
#pragma omp critical
{x_hat_local = x_hat; } // read the global estimate while memory is locked
gradient = -2 * A_block.t() * (y_block - A_block*x_hat_local);
b = x_hat_local - gradient * gamma/(num_block*prob_vec(selected_block));
// Identify
sorted_ind = sort_index(abs(b),"descend");
est_supp = sorted_ind(span(0,sparsity - 1));
// Estimate
x_hat_local.zeros();
x_hat_local(est_supp) = b(est_supp);
#pragma omp critical
{x_hat = x_hat_local;} //write the global estimate while memory is locked
}
}
// parallel section of the code ends here
num_iters = i;
return x_hat;
}
/* Asynchronous StoIHT Iteration
Algorithm 2 in An Asynchronous Parallel Approach to Sparse Recovery
https://arxiv.org/abs/1701.03458*/
uvec Sto_IHT_async_iteration(vec &x_hat,const vec &tally,const mat &A,const vec &y,
const int sparsity, const vec prob_vec, const double gamma){
// randomize
const unsigned int num_block = prob_vec.n_elem;
const unsigned int block_size = y.n_elem / num_block;
const unsigned int selected_block = floor(randu()*num_block);
const unsigned int first_ind_block = block_size*selected_block;
const unsigned int last_ind_block = block_size*(selected_block+1)-1;
// Proxy
const mat A_block = A.rows(first_ind_block,last_ind_block);
const vec y_block = y.subvec(first_ind_block,last_ind_block);
const vec gradient = -2 * A_block.t() * (y_block - A_block*x_hat);
const vec b = x_hat - gradient * gamma/(num_block*prob_vec(selected_block));
x_hat.zeros(); // this variable is local to each core. NOTE: passed by reference
// Identify using b (local)
uvec sorted_ind = sort_index(abs(b),"descend");
const uvec est_supp_local = sorted_ind(span(0,sparsity - 1));
// Estimate using b (local)
x_hat(est_supp_local) = b(est_supp_local);
// Identify using tally (collective)
sorted_ind = sort_index(tally,"descend");
const uvec est_supp_collective = sorted_ind(span(0,sparsity - 1));
// Estimate using tally (collective)
x_hat(est_supp_collective) = b(est_supp_collective);
return est_supp_local;
}
void update_tally(vec &tally,const uvec est_supp_local,const uvec prev_est_supp,const unsigned int iter_local){
/* update the tally score according the rules in:
An Asynchronous Parallel Approach to Sparse Recovery
https://arxiv.org/abs/1701.03458*/
tally(est_supp_local) += iter_local;
if (iter_local >= 2){
tally(prev_est_supp) -= iter_local-1;
}
return;
}
/* Description: Parallel Stochastic Iterative Hard Thresholding (StoIHT) algorithm with tally score to approximate the vector x from measurements u = A*x.
Publication: An Asynchronous Parallel Approach to Sparse Recovery
https://arxiv.org/abs/1701.03458*/
vec tally_Sto_IHT(const mat &A, const vec &y, const int sparsity, const vec prob_vec,
const unsigned int max_iter, const double gamma,const double tol,
unsigned int &num_iters, const simulation_parameters simulation_params){
uvec slow_cores;
set_slow_cores(slow_cores, simulation_params);
const unsigned int sig_dim = A.n_cols;
// initialization of variables that are shared among cores
vec tally(sig_dim,fill::zeros); // vector of tally scores
vec x_hat_total(sig_dim,fill::zeros); // estimation of the signal
bool done = false; // flag to check the convergence criteria
unsigned int i = 0; // total number of iterations
// parallel section of the code starts here
#pragma omp parallel num_threads(simulation_params.num_cores)
{
//#pragma omp single // a single core executes the following line
// initializaiotn of variables that are local to each core
uvec prev_est_supp; // estimated support in previous iteration
vec x_hat_local(sig_dim,fill::zeros); // this is local to each core
unsigned int iter_local = 0; // number of iteration for this core
// iterations to find the solutions
while(!done){
// master thread uses the tally vector to check the convergence criteria
if (omp_get_thread_num() == 0){
const uvec sorted_ind = sort_index(abs(tally),"descend");
const uvec est_supp = sorted_ind(span(0,sparsity - 1));
const mat A_supp = A.cols(est_supp);
x_hat_local.zeros();
x_hat_local(est_supp) = solve(A_supp,y);
if (norm (y - A*x_hat_local) < tol || i >= max_iter){
x_hat_total = x_hat_local;
done = true;
}
}
//slow cores sleep for simulation_params.sleep_slow_cores microseconds
if (any( slow_cores == omp_get_thread_num()) ){
usleep(simulation_params.sleep_slow_cores);
}
i++;
iter_local++;
// update the local estimate of the support
uvec est_supp_local;
try{
est_supp_local = Sto_IHT_async_iteration(x_hat_local, tally, A, y,
sparsity, prob_vec, gamma);
}
catch(std::logic_error)
{
// algorithm did not converge
i = max_iter;
done = true;
}
// Update Tally
update_tally(tally,est_supp_local,prev_est_supp,iter_local);
prev_est_supp = est_supp_local;
}
}
// parallel section of the code ends here
//cout << endl;
num_iters = i;
return x_hat_total;
}