#include <iostream>
#include <algorithm>
#include <thread>
#include <functional>
using namespace std;
template <typename Iterator, typename T>
struct accumulate_block
{
void operator ()(Iterator first, Iterator last, T &result)
{
result = accumulate(first, last, result);
}
};
template <typename Iterator, typename T>
T parallel_accumulate(Iterator first, Iterator last, T init)
{
unsigned long const length = std::distance(first, last);
//if there is no element, return init value
if(!length){
return init;
}
unsigned long const min_per_thread = 25;
//how much threads at least we needed
unsigned long const max_threads =
(length + min_per_thread - 1) / min_per_thread;
unsigned long const hardware_threads =
std::thread::hardware_concurrency();
//if max_threads more than hardware threads,
//just use hardware threads
//if hardware threads not support even muiltithreads, use 2 threads
//instead of , use hardware threads
unsigned long const num_threads =
std::min( hardware_threads != 0
? hardware_threads
: 2
, max_threads);
//the length of the range was divided by threads
unsigned long const block_size = length / num_threads;
std::vector<T> results (num_threads);
std::vector<std::thread> threads (num_threads - 1);
auto block_start = first;
for(unsigned long i = 0; i < (num_threads - 1); ++i){
auto block_end = block_start;
//put block_end to the end of current block
std::advance(block_end, block_size);
threads[i] = std::thread(
accumulate_block<Iterator, T>(),
block_start, block_end, std::ref(results[i]));
block_start = block_end;
}
accumulate_block<Iterator, T>()(block_start,
last,
results[num_threads - 1]);
std::for_each(threads.begin(), threads.end(),
std::mem_fn(&std::thread::join));
return std::accumulate(results.begin(), results.end(), init);
}
int main()
{
vector<int> ivec{1, 2, 3, 4, 5, 5, 6, 7, 8, 9};
int result = 0;
result = parallel_accumulate(ivec.cbegin(), ivec.cend(), result);
cout << result << endl;
return 0;
}
通过打包 accumulate 实现多线程版本的 accumulate
原文:http://www.cnblogs.com/wuOverflow/p/4295566.html