-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathparallel.h
77 lines (74 loc) · 2.42 KB
/
parallel.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#pragma once
#include <vector>
#include <thread>
#include <algorithm>
template <typename F, typename Iter, typename T>
void parallel_for_each(const Iter first, const Iter last, T &result, const F &func)
{
auto f = std::ref(func);
const size_t nthreads = (size_t)std::max(1, (int)std::thread::hardware_concurrency());
const size_t total = std::distance(first, last);
std::vector<T> rr(nthreads);
{
std::vector<std::thread> threads;
std::vector<Iter> iters;
size_t step = total / nthreads;
size_t remaining = total % nthreads;
Iter n = first;
iters.emplace_back(first);
for (size_t i = 0; i < nthreads - 1; ++i)
{
std::advance(n, i < remaining ? step + 1 : step);
iters.emplace_back(n);
}
iters.emplace_back(last);
result.reserve(total);
for (auto &r : rr)
{
r.reserve(total / nthreads + 1);
}
for (size_t t = 0; t < nthreads; t++)
{
threads.emplace_back(std::thread([&, t]
{ std::for_each(iters[t], iters[t + 1], [&](auto &x)
{ f(x, rr[t]); }); }));
}
std::for_each(threads.begin(), threads.end(), [&](std::thread &x)
{ x.join(); });
}
for (size_t t = 0; t < nthreads; t++)
{
result.insert(result.end(),
std::make_move_iterator(rr[t].begin()),
std::make_move_iterator(rr[t].end()));
}
}
template <typename F, typename Iter>
void parallel_for_each(const Iter first, const Iter last, const F &func)
{
auto f = std::ref(func);
const size_t nthreads = (size_t)std::max(1, (int)std::thread::hardware_concurrency());
const size_t total = std::distance(first, last);
{
std::vector<std::thread> threads;
std::vector<Iter> iters;
size_t step = total / nthreads;
size_t remaining = total % nthreads;
Iter n = first;
iters.emplace_back(first);
for (size_t i = 0; i < nthreads - 1; ++i)
{
std::advance(n, i < remaining ? step + 1 : step);
iters.emplace_back(n);
}
iters.emplace_back(last);
for (size_t t = 0; t < nthreads; t++)
{
threads.emplace_back(std::thread([&, t]
{ std::for_each(iters[t], iters[t + 1], [&](auto &x)
{ f(x); }); }));
}
std::for_each(threads.begin(), threads.end(), [&](std::thread &x)
{ x.join(); });
}
}