inline functions from this // file, or you compile this file and link it with other files to // produce an executable, this file does not by itself cause the // resulting executable to be covered by the GNU General Public // License. This exception does not however invalidate any other // reasons why the executable file might be covered by the GNU General // Public License. /** @file parallel/par_loop.h * @brief Parallelization of embarrassingly parallel execution by * means of equal splitting. * This file is a GNU parallel extension to the Standard C++ Library. */ // Written by Felix Putze. #ifndef _GLIBCXX_PARALLEL_PAR_LOOP_H #define _GLIBCXX_PARALLEL_PAR_LOOP_H 1 #include #include #include #include namespace __gnu_parallel { /** @brief Embarrassingly parallel algorithm for random access * iterators, using hand-crafted parallelization by equal splitting * the work. * * @param begin Begin iterator of element sequence. * @param end End iterator of element sequence. * @param o User-supplied functor (comparator, predicate, adding * functor, ...) * @param f Functor to "process" an element with op (depends on * desired functionality, e. g. for std::for_each(), ...). * @param r Functor to "add" a single result to the already * processed elements (depends on functionality). * @param base Base value for reduction. * @param output Pointer to position where final result is written to * @param bound Maximum number of elements processed (e. g. for * std::count_n()). * @return User-supplied functor (that may contain a part of the result). */ template Op for_each_template_random_access_ed(RandomAccessIterator begin, RandomAccessIterator end, Op o, Fu& f, Red r, Result base, Result& output, typename std::iterator_traits :: difference_type bound) { typedef std::iterator_traits traits_type; typedef typename traits_type::difference_type difference_type; const difference_type length = end - begin; Result *thread_results; bool* constructed; thread_index_t num_threads = __gnu_parallel::min(get_max_threads(), length); # pragma omp parallel num_threads(num_threads) { # pragma omp single { num_threads = omp_get_num_threads(); thread_results = static_cast( ::operator new(num_threads * sizeof(Result))); constructed = new bool[num_threads]; } thread_index_t iam = omp_get_thread_num(); // Neutral element. Result* reduct = static_cast(::operator new(sizeof(Result))); difference_type start = equally_split_point(length, num_threads, iam), stop = equally_split_point(length, num_threads, iam + 1); if (start < stop) { new(reduct) Result(f(o, begin + start)); ++start; constructed[iam] = true; } else constructed[iam] = false; for (; start < stop; ++start) *reduct = r(*reduct, f(o, begin + start)); thread_results[iam] = *reduct; } //parallel for (thread_index_t i = 0; i < num_threads; ++i) if (constructed[i]) output = r(output, thread_results[i]); // Points to last element processed (needed as return value for // some algorithms like transform). f.finish_iterator = begin + length; delete[] thread_results; delete[] constructed; return o; } } // end namespace #endif // -*- C++ -*- // Copyright (C) 2007, 2008 Free Software Foundation, Inc. // // This file is part of the GNU ISO C++ Library. This library is free // software; you can redistribute it and/or modify it under the terms // of the GNU General Public License as published by the Free Software // Foundation; either version 2, or (at your option) any later // version. // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details. // You should have received a copy of the GNU General Public License // along with this library; see the file COPYING. If not, write to // the Free Software Foundation, 59 Temple Place - Suite 330, Boston, // MA 02111-1307, USA. // As a special exception, you may use this file as part of a free // software library without restriction. Specifically, if other files // instantiate templates or use macros or inline functions from this // file, or you compile this file and link it with other files to // produce an executable, this file does not by itself cause the // resulting executable to be covered by the GNU General Public // License. This exception does not however invalidate any other // reasons why the executable file might be covered by the GNU General // Public License. /** @file parallel/list_partition.h * @brief Functionality to split sequence referenced by only input * iterators. * This file is a GNU parallel extension to the Standard C++ Library. */ // Written by Leonor Frias Moya and Johannes Singler. #ifndef _GLIBCXX_PARALLEL_LIST_PARTITION_H #define _GLIBCXX_PARALLEL_LIST_PARTITION_H 1 #include #include namespace __gnu_parallel { /** @brief Shrinks and doubles the ranges. * @param os_starts Start positions worked on (oversampled). * @param count_to_two Counts up to 2. * @param range_length Current length of a chunk. * @param make_twice Whether the @c os_starts is allowed to be * grown or not */ template void shrink_and_double(std::vector& os_starts, size_t& count_to_two, size_t& range_length, const bool make_twice) { ++count_to_two; if (not make_twice or count_to_two < 2) shrink(os_starts, count_to_two, range_length); else { os_starts.resize((os_starts.size() - 1) * 2 + 1); count_to_two = 0; } } /** @brief Combines two ranges into one and thus halves the number of ranges. * @param os_starts Start positions worked on (oversampled). * @param count_to_two Counts up to 2. * @param range_length Current length of a chunk. */ template void shrink(std::vector& os_starts, size_t& count_to_two, size_t& range_length) { for (typename std::vector::size_type i = 0; i <= (os_starts.size() / 2); ++i) os_starts[i] = os_starts[i * 2]; range_length *= 2; } /** @brief Splits a sequence given by input iterators into parts of * almost equal size * * The function needs only one pass over the sequence. * @param begin Begin iterator of input sequence. * @param end End iterator of input sequence. * @param starts Start iterators for the resulting parts, dimension * @c num_parts+1. For convenience, @c starts @c [num_parts] * contains the end iterator of the sequence. * @param lengths Length of the resulting parts. * @param num_parts Number of parts to split the sequence into. * @param f Functor to be applied to each element by traversing it * @param oversampling Oversampling factor. If 0, then the * partitions will differ in at most @f$ \sqrt{\mathrm{end} - * \mathrm{begin}} @f$ elements. Otherwise, the ratio between the * longest and the shortest part is bounded by @f$ * 1/(\mathrm{oversampling} \cdot \mathrm{num\_parts}) @f$. * @return Length of the whole sequence. */ template size_t list_partition(const InputIterator begin, const InputIterator end, InputIterator* starts, size_t* lengths, const int num_parts, FunctorType& f, int oversampling = 0) { bool make_twice = false; // The resizing algorithm is chosen according to the oversampling factor. if (oversampling == 0) { make_twice = true; oversampling = 1; } std::vector os_starts(2 * oversampling * num_parts + 1); os_starts[0]= begin; InputIterator prev = begin, it = begin; size_t dist_limit = 0, dist = 0; size_t cur = 1, next = 1; size_t range_length = 1; size_t count_to_two = 0; while (it != end) { cur = next; for (; cur < os_starts.size() and it != end; ++cur) { for (dist_limit += range_length; dist < dist_limit and it != end; ++dist) { f(it); ++it; } os_starts[cur] = it; } // Must compare for end and not cur < os_starts.size() , because // cur could be == os_starts.size() as well if (it == end) break; shrink_and_double(os_starts, count_to_two, range_length, make_twice); next = os_starts.size() / 2 + 1; } // Calculation of the parts (one must be extracted from current // because the partition beginning at end, consists only of // itself). size_t size_part = (cur - 1) / num_parts; int size_greater = static_cast((cur - 1) % num_parts); starts[0] = os_starts[0]; size_t index = 0; // Smallest partitions. for (int i = 1; i < (num_parts + 1 - size_greater); ++i) { lengths[i - 1] = size_part * range_length; index += size_part; starts[i] = os_starts[index]; } // Biggest partitions. for (int i = num_parts + 1 - size_greater; i <= num_parts; ++i) { lengths[i - 1] = (size_part+1) * range_length; index += (size_part+1); starts[i] = os_starts[index]; } // Correction of the end size (the end iteration has not finished). lengths[num_parts - 1] -= (dist_limit - dist); return dist; } } #endif // -*- C++ -*- // Copyright (C) 2007, 2008 Free Software Foundation, Inc. // // This file is part of the GNU ISO C++ Library. This library is free // software; you can redistribute it and/or modify it under the terms // of the GNU General Public License as published by the Free Software // Foundation; either version 2, or (at your option) any later // version. // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details. // You should have received a copy of the GNU General Public License // along with this library; see the file COPYING. If not, write to // the Free Software Foundation, 59 Temple Place - Suite 330, Boston, // MA 02111-1307, USA. // As a special exception, you may use this file as part of a free // software library without restriction. Specifically, if other files // instantiate templates or use macros or inline functions from this // file, or you compile this file and link it with other files to // produce an executable, this file does not by itself cause the // resulting executable to be covered by the GNU General Public // License. This exception does not however invalidate any other // reasons why the executable file might be covered by the GNU General // Public License. /** @file parallel/base.h * @brief Sequential helper functions. * This file is a GNU parallel extension to the Standard C++ Library. */ // Written by Johannes Singler. #ifndef _GLIBCXX_PARALLEL_BASE_H #define _GLIBCXX_PARALLEL_BASE_H 1 #include #include #include #include #include #include // Parallel mode namespaces. /** * @namespace std::__parallel * @brief GNU parallel code, replaces standard behavior with parallel behavior. */ namespace std { namespace __parallel { } } /** * @namespace __gnu_parallel * @brief GNU parallel code for public use. */ namespace __gnu_parallel { // Import all the parallel versions of components in namespace std. using namespace std::__parallel; } /** * @namespace __gnu_sequential * @brief GNU sequential classes for public use. */ namespace __gnu_sequential { // Import whatever is the serial version. #ifdef _GLIBCXX_PARALLEL using namespace std::__norm; #else using namespace std; #endif } namespace __gnu_parallel { // NB: Including this file cannot produce (unresolved) symbols from // the OpenMP runtime unless the parallel mode is actually invoked // and active, which imples that the OpenMP runtime is actually // going to be linked in. inline int get_max_threads() { int __i = omp_get_max_threads(); return __i > 1 ? __i : 1; } inline bool is_parallel(const _Parallelism __p) { return __p != sequential; } // XXX remove std::duplicates from here if possible, // XXX but keep minimal dependencies. /** @brief Calculates the rounded-down logarithm of @c n for base 2. * @param n Argument. * @return Returns 0 for argument 0. */ template inline Size log2(Size n) { Size k; for (k = 0; n != 1; n >>= 1) ++k; return k; } /** @brief Encode two integers into one __gnu_parallel::lcas_t. * @param a First integer, to be encoded in the most-significant @c * lcas_t_bits/2 bits. * @param b Second integer, to be encoded in the least-significant * @c lcas_t_bits/2 bits. * @return __gnu_parallel::lcas_t value encoding @c a and @c b. * @see decode2 */ inline lcas_t encode2(int a, int b) //must all be non-negative, actually { return (((lcas_t)a) << (lcas_t_bits / 2)) | (((lcas_t)b) << 0); } /** @brief Decode two integers from one __gnu_parallel::lcas_t. * @param x __gnu_parallel::lcas_t to decode integers from. * @param a First integer, to be decoded from the most-significant * @c lcas_t_bits/2 bits of @c x. * @param b Second integer, to be encoded in the least-significant * @c lcas_t_bits/2 bits of @c x. * @see encode2 */ inline void decode2(lcas_t x, int& a, int& b) { a = (int)((x >> (lcas_t_bits / 2)) & lcas_t_mask); b = (int)((x >> 0 ) & lcas_t_mask); } /** @brief Equivalent to std::min. */ template const T& min(const T& a, const T& b) { return (a < b) ? a : b; } /** @brief Equivalent to std::max. */ template const T& max(const T& a, const T& b) { return (a > b) ? a : b; } /** @brief Constructs predicate for equality from strict weak * ordering predicate */ // XXX comparator at the end, as per others template class equal_from_less : public std::binary_function { private: Comparator& comp; public: equal_from_less(Comparator& _comp) : comp(_comp) { } bool operator()(const T1& a, const T2& b) { return !comp(a, b) && !comp(b, a); } }; /** @brief Similar to std::binder1st, * but giving the argument types explicitly. */ template class unary_negate : public std::unary_function { protected: _Predicate _M_pred; public: explicit unary_negate(const _Predicate& __x) : _M_pred(__x) { } bool operator()(const argument_type& __x) { return !_M_pred(__x); } }; /** @brief Similar to std::binder1st, * but giving the argument types explicitly. */ template class binder1st : public std::unary_function { protected: _Operation op; first_argument_type value; public: binder1st(const _Operation& __x, const first_argument_type& __y) : op(__x), value(__y) { } result_type operator()(const second_argument_type& __x) { return op(value, __x); } // _GLIBCXX_RESOLVE_LIB_DEFECTS // 109. Missing binders for non-const sequence elements result_type operator()(second_argument_type& __x) const { return op(value, __x); } }; /** * @brief Similar to std::binder2nd, but giving the argument types * explicitly. */ template class binder2nd : public std::unary_function { protected: _Operation op; second_argument_type value; public: binder2nd(const _Operation& __x, const second_argument_type& __y) : op(__x), value(__y) { } result_type operator()(const first_argument_type& __x) const { return op(__x, value); } // _GLIBCXX_RESOLVE_LIB_DEFECTS // 109. Missing binders for non-const sequence elements result_type operator()(first_argument_type& __x) { return op(__x, value); } }; /** @brief Similar to std::equal_to, but allows two different types. */ template struct equal_to : std::binary_function { bool operator()(const T1& t1, const T2& t2) const { return t1 == t2; } }; /** @brief Similar to std::less, but allows two different types. */ template struct less : std::binary_function { bool operator()(const T1& t1, const T2& t2) const { return t1 < t2; } bool operator()(const T2& t2, const T1& t1) const { return t2 < t1; } }; // Partial specialization for one type. Same as std::less. template struct less<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, bool> { bool operator()(const _Tp& __x, const _Tp& __y) const { return __x < __y; } }; /** @brief Similar to std::plus, but allows two different types. */ template struct plus : public std::binary_function<_Tp1, _Tp2, _Tp1> { typedef __typeof__(*static_cast<_Tp1*>(NULL) + *static_cast<_Tp2*>(NULL)) result; result operator()(const _Tp1& __x, const _Tp2& __y) const { return __x + __y; } }; // Partial specialization for one type. Same as std::plus. template struct plus<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp> { typedef __typeof__(*static_cast<_Tp*>(NULL) + *static_cast<_Tp*>(NULL)) result; result operator()(const _Tp& __x, const _Tp& __y) const { return __x + __y; } }; /** @brief Similar to std::multiplies, but allows two different types. */ template struct multiplies : public std::binary_function<_Tp1, _Tp2, _Tp1> { typedef __typeof__(*static_cast<_Tp1*>(NULL) * *static_cast<_Tp2*>(NULL)) result; result operator()(const _Tp1& __x, const _Tp2& __y) const { return __x * __y; } }; // Partial specialization for one type. Same as std::multiplies. template struct multiplies<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp> { typedef __typeof__(*static_cast<_Tp*>(NULL) * *static_cast<_Tp*>(NULL)) result; result operator()(const _Tp& __x, const _Tp& __y) const { return __x * __y; } }; template class pseudo_sequence; /** @brief Iterator associated with __gnu_parallel::pseudo_sequence. * If features the usual random-access iterator functionality. * @param T Sequence value type. * @param difference_type Sequence difference type. */ template class pseudo_sequence_iterator { public: typedef _DifferenceTp difference_type; private: typedef pseudo_sequence_iterator type; const T& val; difference_type pos; public: pseudo_sequence_iterator(const T& val, difference_type pos) : val(val), pos(pos) { } // Pre-increment operator. type& operator++() { ++pos; return *this; } // Post-increment operator. const type operator++(int) { return type(pos++); } const T& operator*() const { return val; } const T& operator[](difference_type) const { return val; } bool operator==(const type& i2) { return pos == i2.pos; } difference_type operator!=(const type& i2) { return pos != i2.pos; } difference_type operator-(const type& i2) { return pos - i2.pos; } }; /** @brief Sequence that conceptually consists of multiple copies of the same element. * The copies are not stored explicitly, of course. * @param T Sequence value type. * @param difference_type Sequence difference type. */ template class pseudo_sequence { typedef pseudo_sequence type; public: typedef _DifferenceTp difference_type; // Better case down to uint64, than up to _DifferenceTp. typedef pseudo_sequence_iterator iterator; /** @brief Constructor. * @param val Element of the sequence. * @param count Number of (virtual) copies. */ pseudo_sequence(const T& val, difference_type count) : val(val), count(count) { } /** @brief Begin iterator. */ iterator begin() const { return iterator(val, 0); } /** @brief End iterator. */ iterator end() const { return iterator(val, count); } private: const T& val; difference_type count; }; /** @brief Functor that does nothing */ template class void_functor { inline void operator()(const _ValueTp& v) const { } }; /** @brief Compute the median of three referenced elements, according to @c comp. * @param a First iterator. * @param b Second iterator. * @param c Third iterator. * @param comp Comparator. */ template RandomAccessIterator median_of_three_iterators(RandomAccessIterator a, RandomAccessIterator b, RandomAccessIterator c, Comparator& comp) { if (comp(*a, *b)) if (comp(*b, *c)) return b; else if (comp(*a, *c)) return c; X% else return a; else { // Just swap a and b. if (comp(*a, *c)) return a; else if (comp(*b, *c)) return c; else return b; } } // Avoid the use of assert, because we're trying to keep the // include out of the mix. (Same as debug mode). inline void __replacement_assert(const char* __file, int __line, const char* __function, const char* __condition) { std::printf("%s:%d: %s: Assertion '%s' failed.\n", __file, __line, __function, __condition); __builtin_abort(); } #define _GLIBCXX_PARALLEL_ASSERT(_Condition) \ do \ { \ if (!(_Condition)) \ __gnu_parallel::__replacement_assert(__FILE__, __LINE__, \ __PRETTY_FUNCTION__, #_Condition); \ } while (false) } //namespace __gnu_parallel #endif // -*- C++ -*- // Copyright (C) 2007, 2008 Free Software Foundation, Inc. // // This file is part of the GNU ISO C++ Library. This library is free // software; you can redistribute it and/or modify it under the terms // of the GNU General Public License as published by the Free Software // Foundation; either version 2, or (at your option) any later // version. // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details. // You should have received a copy of the GNU General Public License // along with this library; see the file COPYING. If not, write to // the Free Software Foundation, 59 Temple Place - Suite 330, Boston, // MA 02111-1307, USA. // As a special exception, you may use this file as part of a free // software library without restriction. Specifically, if other files // instantiate templates or use macros or inline functions from this // file, or you compile this file and link it with other files to // produce an executable, this file does not by itself cause the // resulting executable to be covered by the GNU General Public // License. This exception does not however invalidate any other // reasons why the executable file might be covered by the GNU General // Public License. /** @file parallel/omp_loop_static.h * @brief Parallelization of embarrassingly parallel execution by * means of an OpenMP for loop with static scheduling. * This file is a GNU parallel extension to the Standard C++ Library. */ // Written by Felix Putze. #ifndef _GLIBCXX_PARALLEL_OMP_LOOP_STATIC_H #define _GLIBCXX_PARALLEL_OMP_LOOP_STATIC_H 1 #include #include #include namespace __gnu_parallel { /** @brief Embarrassingly parallel algorithm for random access * iterators, using an OpenMP for loop with static scheduling. * * @param begin Begin iterator of element sequence. * @param end End iterator of element sequence. * @param o User-supplied functor (comparator, predicate, adding * functor, ...). * @param f Functor to "process" an element with op (depends on * desired functionality, e. g. for std::for_each(), ...). * @param r Functor to "add" a single result to the already processed * elements (depends on functionality). * @param base Base value for reduction. * @param output Pointer to position where final result is written to * @param bound Maximum number of elements processed (e. g. for * std::count_n()). * @return User-supplied functor (that may contain a part of the result). */ template Op for_each_template_random_access_omp_loop_static(RandomAccessIterator begin, RandomAccessIterator end, Op o, Fu& f, Red r, Result base, Result& output, typename std::iterator_traits :: difference_type bound) { typedef typename std::iterator_traits::difference_type difference_type; difference_type length = end - begin; thread_index_t num_threads = std::min(get_max_threads(), length); Result *thread_results; # pragma omp parallel num_threads(num_threads) { # pragma omp single { num_threads = omp_get_num_threads(); thread_results = new Result[num_threads]; for (thread_index_t i = 0; i < num_threads; ++i) thread_results[i] = Result(); } thread_index_t iam = omp_get_thread_num(); # pragma omp for schedule(static, _Settings::get().workstealing_chunk_size) for (difference_type pos = 0; pos < length; ++pos) thread_results[iam] = r(thread_results[iam], f(o, begin+pos)); } //parallel for (thread_index_t i = 0; i < num_threads; ++i) output = r(output, thread_results[i]); delete [] thread_results; // Points to last element processed (needed as return value for // some algorithms like transform). f.finish_iterator = begin + length; return o; } } // end namespace #endif // -*- C++ -*- // Copyright (C) 2007, 2008 Free Software Foundation, Inc. // // This file is part of the GNU ISO C++ Library. This library is free // software; you can redistribute it and/or modify it under the terms // of the GNU General Public License as published by the Free Software // Foundation; either version 2, or (at your option) any later // version. // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details. // You should have received a copy of the GNU General Public License // along with this library; see the file COPYING. If not, write to // the Free Software Foundation, 59 Temple Place - Suite 330, Boston, // MA 02111-1307, USA. // As a special exception, you may use this file as part of a free // software library without restriction. Specifically, if other files // instantiate templates or use macros or inline functions from this // file, or you compile this file and link it with other files to // produce an executable, this file does not by itself cause the // resulting executable to be covered by the GNU General Public // License. This exception does not however invalidate any other // reasons why the executable file might be covered by the GNU General // Public License. /** @file parallel/for_each.h * @brief Main interface for embarrassingly parallel functions. * * The explicit implementation are in other header files, like * workstealing.h, par_loop.h, omp_loop.h, and omp_loop_static.h. * This file is a GNU parallel extension to the Standard C++ Library. */ // Written by Felix Putze. #ifndef _GLIBCXX_PARALLEL_FOR_EACH_H #define _GLIBCXX_PARALLEL_FOR_EACH_H 1 #include #include #include #include namespace __gnu_parallel { /** @brief Chose the desired algorithm by evaluating @c parallelism_tag. * @param begin Begin iterator of input sequence. * @param end End iterator of input sequence. * @param user_op A user-specified functor (comparator, predicate, * associative operator,...) * @param functionality functor to "process" an element with * user_op (depends on desired functionality, e. g. accumulate, * for_each,... * @param reduction Reduction functor. * @param reduction_start Initial value for reduction. * @param output Output iterator. * @param bound Maximum number of elements processed. * @param parallelism_tag Parallelization method */ template UserOp for_each_template_random_access(InputIterator begin, InputIterator end, UserOp user_op, Functionality& functionality, Red reduction, Result reduction_start, Result& output, typename std::iterator_traits:: difference_type bound, _Parallelism parallelism_tag) { if (parallelism_tag == parallel_unbalanced) return for_each_template_random_access_ed(begin, end, user_op, functionality, reduction, reduction_start, output, bound); else if (parallelism_tag == parallel_omp_loop) return for_each_template_random_access_omp_loop(begin, end, user_op, functionality, reduction, reduction_start, output, bound); else if (parallelism_tag == parallel_omp_loop_static) return for_each_template_random_access_omp_loop(begin, end, user_op, functionality, reduction, reduction_start, output, bound); else //e. g. parallel_balanced return for_each_template_random_access_workstealing(begin, end, user_op, functionality, reduction, reduction_start, output, bound); } } #endif // -*- C++ -*- // Copyright (C) 2007 Free Software Foundation, Inc. // // This file is part of the GNU ISO C++ Library. This library is free // software; you can redistribute it and/or modify it under the terms // of the GNU General Public License as published by the Free Software // Foundation; either version 2, or (at your option) any later // version. // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details. // You should have received a copy of the GNU General Public License // along with this library; see the file COPYING. If not, write to // the Free Software Foundation, 59 Temple Place - Suite 330, Boston, // MA 02111-1307, USA. // As a special exception, you may use this file as part of a free // software library without restriction. Specifically, if other files // instantiate templates or use macros or inline functions from this // file, or you compile this file and link it with other files to // produce an executable, this file does not by itself cause the // resulting executable to be covered by the GNU General Public // License. This exception does not however invalidate any other // reasons why the executable file might be covered by the GNU General // Public License. /** @file parallel/parallel.h * @brief End-user include file. Provides advanced settings and * tuning options. * This file is a GNU parallel extension to the Standard C++ Library. */ // Written by Felix Putze and Johannes Singler. #ifndef _GLIBCXX_PARALLEL_PARALLEL_H #define _GLIBCXX_PARALLEL_PARALLEL_H 1 #include #include #include #include #include #endif // -*- C++ -*- // Copyright (C) 2007, 2008 Free Software Foundation, Inc. // // This file is part of the GNU ISO C++ Library. This library is free // software; you can redistribute it and/or modify it under the terms // of the GNU General Public License as published by the Free Software // Foundation; either version 2, or (at your option) any later // version. // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details. // You should have received a copy of the GNU General Public License // along with this library; see the file COPYING. If not, write to // the Free Software Foundation, 59 Temple Place - Suite 330, Boston, // MA 02111-1307, USA. // As a special exception, you may use this file as part of a free // software library without restriction. Specifically, if other files // instantiate templates or use macros or inline functions from this // file, or you compile this file and link it with other files to // produce an executable, this file does not by itself cause the // resulting executable to be covered by the GNU General Public // License. This exception does not however invalidate any other // reasons why the executable file might be covered by the GNU General // Public License. /** @file parallel/losertree.h * @brief Many generic loser tree variants. * This file is a GNU parallel extension to the Standard C++ Library. */ // Written by Johannes Singler. #ifndef _GLIBCXX_PARALLEL_LOSERTREE_H #define _GLIBCXX_PARALLEL_LOSERTREE_H 1 #include #include #include #include namespace __gnu_parallel { /** * @brief Guarded loser/tournament tree. * * The smallest element is at the top. * * Guarding is done explicitly through one flag sup per element, * inf is not needed due to a better initialization routine. This * is a well-performing variant. * * @param T the element type * @param Comparator the comparator to use, defaults to std::less */ template class LoserTreeBase { protected: /** @brief Internal representation of a LoserTree element. */ struct Loser { /** @brief flag, true iff this is a "maximum" sentinel. */ bool sup; /** @brief index of the source sequence. */ int source; /** @brief key of the element in the LoserTree. */ T key; }; unsigned int ik, k, offset; /** log_2{k} */ unsigned int _M_log_k; /** @brief LoserTree elements. */ Loser* losers; /** @brief Comparator to use. */ Comparator comp; /** * @brief State flag that determines whether the LoserTree is empty. * * Only used for building the LoserTree. */ bool first_insert; public: /** * @brief The constructor. * * @param _k The number of sequences to merge. * @param _comp The comparator to use. */ LoserTreeBase(unsigned int _k, Comparator _comp) : comp(_comp) { ik = _k; // Compute log_2{k} for the Loser Tree _M_log_k = log2(ik - 1) + 1; // Next greater power of 2. k = 1 << _M_log_k; offset = k; // Avoid default-constructing losers[].key losers = static_cast(::operator new(2 * k * sizeof(Loser))); for (unsigned int i = ik - 1; i < k; ++i) losers[i + k].sup = true; first_insert = true; } /** * @brief The destructor. */ ~LoserTreeBase() { ::operator delete(losers); } /** * @brief Initializes the sequence "source" with the element "key". * * @param key the element to insert * @param source index of the source sequence * @param sup flag that determines whether the value to insert is an * explicit supremum. */ inline void insert_start(const T& key, int source, bool sup) { unsigned int pos = k + source; if(first_insert) { // Construct all keys, so we can easily deconstruct them. for (unsigned int i = 0; i < (2 * k); ++i) new(&(losers[i].key)) T(key); first_insert = false; } else new(&(losers[pos].key)) T(key); losers[pos].sup = sup; losers[pos].source = source; } /** * @return the index of the sequence with the smallest element. */ int get_min_source() { return losers[0].source; } }; /** * @brief Stable LoserTree variant. * * Provides the stable implementations of insert_start, init_winner, * init and delete_min_insert. * * Unstable variant is done using partial specialisation below. */ template class LoserTree : public LoserTreeBase { typedef LoserTreeBase Base; using Base::k; using Base::losers; using Base::first_insert; public: LoserTree(unsigned int _k, Comparator _comp) : Base::LoserTreeBase(_k, _comp) {} unsigned int init_winner(unsigned int root) { if (root >= k) { return root; } else { unsigned int left = init_winner (2 * root); unsigned int right = init_winner (2 * root + 1); if (losers[right].sup || (!losers[left].sup && !comp(losers[right].key, losers[left].key))) { // Left one is less or equal. losers[root] = losers[right]; return left; } else { // Right one is less. losers[root] = losers[left]; return right; } } } void init() { losers[0] = losers[init_winner(1)]; } /** * @brief Delete the smallest element and insert a new element from * the previously smallest element's sequence. * * This implementation is stable. */ // Do not pass a const reference since key will be used as local variable. void delete_min_insert(T key, bool sup) { #if _GLIBCXX_ASSERTIONS // no dummy sequence can ever be at the top! _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); #endif int source = losers[0].source; for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) { // The smaller one gets promoted, ties are broken by source. if ((sup && (!losers[pos].sup || losers[pos].source < source)) || (!sup && !losers[pos].sup && ((comp(losers[pos].key, key)) || (!comp(key, losers[pos].key) && losers[pos].source < source)))) { // The other one is smaller. std::swap(losers[pos].sup, sup); std::swap(losers[pos].source, source); std::swap(losers[pos].key, key); } } losers[0].sup = sup; losers[0].source = source; losers[0].key = key; } }; /** * @brief Unstable LoserTree variant. * * Stability (non-stable here) is selected with partial specialization. */ template class LoserTree : public LoserTreeBase { typedef LoserTreeBase Base; using Base::_M_log_k; using Base::k; using Base::losers; using Base::first_insert; public: LoserTree(unsigned int _k, Comparator _comp) : Base::LoserTreeBase(_k, _comp) {} /** * Computes the winner of the competition at position "root". * * Called recursively (starting at 0) to build the initial tree. * * @param root index of the "game" to start. */ unsigned int init_winner (unsigned int root) { if (root >= k) { return root; } else { unsigned int left = init_winner (2 * root); unsigned int right = init_winner (2 * root + 1); if (losers[right].sup || (!losers[left].sup && !comp(losers[right].key, losers[left].key))) { // Left one is less or equal. losers[root] = losers[right]; return left; } else { // Right one is less. losers[root] = losers[left]; return right; } } } inline void init() { losers[0] = losers[init_winner(1)]; } /** * Delete the key smallest element and insert the element key instead. * * @param key the key to insert * @param sup true iff key is an explicitly marked supremum */ // Do not pass a const reference since key will be used as local variable. inline void delete_min_insert(T key, bool sup) { #if _GLIBCXX_ASSERTIONS // no dummy sequence can ever be at the top! _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); #endif int source = losers[0].source; for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) { // The smaller one gets promoted. if (sup || (!losers[pos].sup && comp(losers[pos].key, key))) { // The other one is smaller. std::swap(losers[pos].sup, sup); std::swap(losers[pos].source, source); std::swap(losers[pos].key, key); } } losers[0].sup = sup; losers[0].source = source; losers[0].key = key; } }; /** * @brief Base class of Loser Tree implementation using pointers. */ template class LoserTreePointerBase { protected: /** @brief Internal representation of LoserTree elements. */ struct Loser { bool sup; int source; const T* keyp; }; unsigned int ik, k, offset; Loser* losers; Comparator comp; public: LoserTreePointerBase(unsigned int _k, Comparator _comp = std::less()) : comp(_comp) { ik = _k; // Next greater power of 2. k = 1 << (log2(ik - 1) + 1); offset = k; losers = new Loser[k * 2]; for (unsigned int i = ik - 1; i < k; i++) losers[i + k].sup = true; } ~LoserTreePointerBase() { ::operator delete[](losers); } int get_min_source() { return losers[0].source; } void insert_start(const T& key, int source, bool sup) { unsigned int pos = k + source; losers[pos].sup = sup; losers[pos].source = source; losers[pos].keyp = &key; } }; /** * @brief Stable LoserTree implementation. * * The unstable variant is implemented using partial instantiation below. */ template class LoserTreePointer : public LoserTreePointerBase { typedef LoserTreePointerBase Base; using Base::k; using Base::losers; public: LoserTreePointer(unsigned int _k, Comparator _comp = std::less()) : Base::LoserTreePointerBase(_k, _comp) {} unsigned int init_winner(unsigned int root) { if (root >= k) { return root; } else { unsigned int left = init_winner (2 * root); unsigned int right = init_winner (2 * root + 1); if (losers[right].sup || (!losers[left].sup && !comp(*losers[right].keyp, *losers[left].keyp))) { // Left one is less or equal. losers[root] = losers[right]; return left; } else { // Right one is less. losers[root] = losers[left]; return right; } } } void init() { losers[0] = losers[init_winner(1)]; } void delete_min_insert(const T& key, bool sup) { #if _GLIBCXX_ASSERTIONS // no dummy sequence can ever be at the top! _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); #endif const T* keyp = &key; int source = losers[0].source; for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) { // The smaller one gets promoted, ties are broken by source. if ((sup && (!losers[pos].sup || losers[pos].source < source)) || (!sup && !losers[pos].sup && ((comp(*losers[pos].keyp, *keyp)) || (!comp(*keyp, *losers[pos].keyp) && losers[pos].source < source)))) { // The other one is smaller. std::swap(losers[pos].sup, sup); std::swap(losers[pos].source, source); std::swap(losers[pos].keyp, keyp); q%r%s%t%u%v%w%x%y%z%{%|%}%~% } } losers[0].sup = sup; losers[0].source = source; losers[0].keyp = keyp; } }; /** * @brief Unstable LoserTree implementation. * * The stable variant is above. */ template class LoserTreePointer : public LoserTreePointerBase { typedef LoserTreePointerBase Base; using Base::k; using Base::losers; public: LoserTreePointer(unsigned int _k, Comparator _comp = std::less()) : Base::LoserTreePointerBase(_k, _comp) {} unsigned int init_winner(unsigned int root) { if (root >= k) { return root; } else { unsigned int left = init_winner (2 * root); unsigned int right = init_winner (2 * root + 1); if (losers[right].sup || (!losers[left].sup && !comp(*losers[right].keyp, *losers[left].keyp))) { // Left one is less or equal. losers[root] = losers[right]; return left; } else { // Right one is less. losers[root] = losers[left]; return right; } } } void init() { losers[0] = losers[init_winner(1)]; } void delete_min_insert(const T& key, bool sup) { #if _GLIBCXX_ASSERTIONS // no dummy sequence can ever be at the top! _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); #endif const T* keyp = &key; int source = losers[0].source; for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) { // The smaller one gets promoted. if (sup || (!losers[pos].sup && comp(*losers[pos].keyp, *keyp))) { // The other one is smaller. std::swap(losers[pos].sup, sup); std::swap(losers[pos].source, source); std::swap(losers[pos].keyp, keyp); } } losers[0].sup = sup; losers[0].source = source; losers[0].keyp = keyp; } }; /** @brief Base class for unguarded LoserTree implementation. * * The whole element is copied into the tree structure. * * No guarding is done, therefore not a single input sequence must * run empty. Unused sequence heads are marked with a sentinel which * is > all elements that are to be merged. * * This is a very fast variant. */ template class LoserTreeUnguardedBase { protected: struct Loser { int source; T key; }; unsigned int ik, k, offset; Loser* losers; Comparator comp; public: inline LoserTreeUnguardedBase(unsigned int _k, const T _sentinel, Comparator _comp = std::less()) : comp(_comp) { ik = _k; // Next greater power of 2. k = 1 << (log2(ik - 1) + 1); offset = k; // Avoid default-constructing losers[].key losers = static_cast(::operator new(2 * k * sizeof(Loser))); for (unsigned int i = k + ik - 1; i < (2 * k); ++i) { losers[i].key = _sentinel; losers[i].source = -1; } } inline ~LoserTreeUnguardedBase() { ::operator delete(losers); } inline int get_min_source() { #if _GLIBCXX_ASSERTIONS // no dummy sequence can ever be at the top! _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); #endif return losers[0].source; } inline void insert_start(const T& key, int source, bool) { unsigned int pos = k + source; new(&(losers[pos].key)) T(key); losers[pos].source = source; } }; /** * @brief Stable implementation of unguarded LoserTree. * * Unstable variant is selected below with partial specialization. */ template class LoserTreeUnguarded : public LoserTreeUnguardedBase { typedef LoserTreeUnguardedBase Base; using Base::k; using Base::losers; public: LoserTreeUnguarded(unsigned int _k, const T _sentinel, Comparator _comp = std::less()) : Base::LoserTreeUnguardedBase(_k, _sentinel, _comp) {} unsigned int init_winner(unsigned int root) { if (root >= k) { return root; } else { unsigned int left = init_winner (2 * root); unsigned int right = init_winner (2 * root + 1); if (!comp(losers[right].key, losers[left].key)) { // Left one is less or equal. losers[root] = losers[right]; return left; } else { // Right one is less. losers[root] = losers[left]; return right; } } } inline void init() { losers[0] = losers[init_winner(1)]; #if _GLIBCXX_ASSERTIONS // no dummy sequence can ever be at the top at the beginning (0 sequences!) _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); #endif } // Do not pass a const reference since key will be used as local variable. inline void delete_min_insert(T key, bool) { #if _GLIBCXX_ASSERTIONS // no dummy sequence can ever be at the top! _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); #endif int source = losers[0].source; for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) { // The smaller one gets promoted, ties are broken by source. if (comp(losers[pos].key, key) || (!comp(key, losers[pos].key) && losers[pos].source < source)) { // The other one is smaller. std::swap(losers[pos].source, source); std::swap(losers[pos].key, key); } } losers[0].source = source; losers[0].key = key; } }; /** * @brief Non-Stable implementation of unguarded LoserTree. * * Stable implementation is above. */ template class LoserTreeUnguarded : public LoserTreeUnguardedBase { typedef LoserTreeUnguardedBase Base; using Base::k; using Base::losers; public: LoserTreeUnguarded(unsigned int _k, const T _sentinel, Comparator _comp = std::less()) : Base::LoserTreeUnguardedBase(_k, _sentinel, _comp) {} unsigned int init_winner (unsigned int root) { if (root >= k) { return root; } else { unsigned int left = init_winner (2 * root); unsigned int right = init_winner (2 * root + 1); #if _GLIBCXX_ASSERTIONS // If left one is sentinel then right one must be, too. if (losers[left].source == -1) _GLIBCXX_PARALLEL_ASSERT(losers[right].source == -1); #endif if (!comp(losers[right].key, losers[left].key)) { // Left one is less or equal. losers[root] = losers[right]; return left; } else { // Right one is less. losers[root] = losers[left]; return right; } } } inline void init() { losers[0] = losers[init_winner(1)]; #if _GLIBCXX_ASSERTIONS // no dummy sequence can ever be at the top at the beginning (0 sequences!) _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); #endif } // Do not pass a const reference since key will be used as local variable. inline void delete_min_insert(T key, bool) { #if _GLIBCXX_ASSERTIONS // no dummy sequence can ever be at the top! _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); #endif int source = losers[0].source; for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) { // The smaller one gets promoted. if (comp(losers[pos].key, key)) { // The other one is smaller. std::swap(losers[pos].source, source); std::swap(losers[pos].key, key); } } losers[0].source = source; losers[0].key = key; } }; /** @brief Unguarded loser tree, keeping only pointers to the * elements in the tree structure. * * No guarding is done, therefore not a single input sequence must * run empty. This is a very fast variant. */ template class LoserTreePointerUnguardedBase { protected: struct Loser { int source; const T* keyp; }; unsigned int ik, k, offset; Loser* losers; Comparator comp; public: inline LoserTreePointerUnguardedBase(unsigned int _k, const T& _sentinel, Comparator _comp = std::less()) : comp(_comp) { ik = _k; // Next greater power of 2. k = 1 << (log2(ik - 1) + 1); offset = k; // Avoid default-constructing losers[].key losers = new Loser[2 * k]; for (unsigned int i = k + ik - 1; i < (2 * k); ++i) { losers[i].keyp = &_sentinel; losers[i].source = -1; } } inline ~LoserTreePointerUnguardedBase() { delete[] losers; } inline int get_min_source() { #if _GLIBCXX_ASSERTIONS // no dummy sequence can ever be at the top! _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); #endif return losers[0].source; } inline void insert_start(const T& key, int source, bool) { unsigned int pos = k + source; losers[pos].keyp = &key; losers[pos].source = source; } }; /** * @brief Stable unguarded LoserTree variant storing pointers. * * Unstable variant is implemented below using partial specialization. */ template class LoserTreePointerUnguarded : public LoserTreePointerUnguardedBase { typedef LoserTreePointerUnguardedBase Base; using Base::k; using Base::losers; public: LoserTreePointerUnguarded(unsigned int _k, const T& _sentinel, Comparator _comp = std::less()) : Base::LoserTreePointerUnguardedBase(_k, _sentinel, _comp) {} unsigned int init_winner(unsigned int root) { if (root >= k) { return root; } else { unsigned int left = init_winner (2 * root); unsigned int right = init_winner (2 * root + 1); if (!comp(*losers[right].keyp, *losers[left].keyp)) { // Left one is less or equal. losers[root] = losers[right]; return left; } else { // Right one is less. losers[root] = losers[left]; return right; } } } inline void init() { losers[0] = losers[init_winner(1)]; #if _GLIBCXX_ASSERTIONS // no dummy sequence can ever be at the top at the beginning (0 sequences!) _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); #endif } inline void delete_min_insert(const T& key, bool sup) { #if _GLIBCXX_ASSERTIONS // no dummy sequence can ever be at the top! _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); #endif const T* keyp = &key; int source = losers[0].source; for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) { // The smaller one gets promoted, ties are broken by source. if (comp(*losers[pos].keyp, *keyp) || (!comp(*keyp, *losers[pos].keyp) && losers[pos].source < source)) { // The other one is smaller. std::swap(losers[pos].source, source); std::swap(losers[pos].keyp, keyp); } } losers[0].source = source; losers[0].keyp = keyp; } }; /** * @brief Unstable unguarded LoserTree variant storing pointers. * * Stable variant is above. */ template class LoserTreePointerUnguarded : public LoserTreePointerUnguardedBase { typedef LoserTreePointerUnguardedBase Base; using Base::k; using Base::losers; public: LoserTreePointerUnguarded(unsigned int _k, const T& _sentinel, Comparator _comp = std::less()) : Base::LoserTreePointerUnguardedBase(_k, _sentinel, _comp) {} unsigned int init_winner(unsigned int root) { if (root >= k) { return root; } else { unsigned int left = init_winner (2 * root); unsigned int right = init_winner (2 * root + 1); #if _GLIBCXX_ASSERTIONS // If left one is sentinel then right one must be, too. if (losers[left].source == -1) _GLIBCXX_PARALLEL_ASSERT(losers[right].source == -1); #endif if (!comp(*losers[right].keyp, *losers[left].keyp)) { // Left one is less or equal. losers[root] = losers[right]; return left; } else { // Right one is less. losers[root] = losers[left]; return right; } } } inline void init() { losers[0] = losers[init_winner(1)]; #if _GLIBCXX_ASSERTIONS // no dummy sequence can ever be at the top at the beginning (0 sequences!) _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); #endif } inline void delete_min_insert(const T& key, bool sup) { #if _GLIBCXX_ASSERTIONS // no dummy sequence can ever be at the top! _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1); #endif const T* keyp = &key; int source = losers[0].source; for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) { // The smaller one gets promoted. if (comp(*(losers[pos].keyp), *keyp)) { // The other one is smaller. std::swap(losers[pos].source, source); std::swap(losers[pos].keyp, keyp); } } losers[0].source = source; losers[0].keyp = keyp; } }; } // namespace __gnu_parallel #endif // -*- C++ -*- // Copyright (C) 2007, 2008 Free Software Foundation, Inc. // // This file is part of the GNU ISO C++ Library. This library is free // software; you can redistribute it and/or modify it under the terms // of the GNU General Public License as published by the Free Software // Foundation; either version 2, or (at your option) any later // version. // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details. // You should have received a copy of the GNU General Public License // along with this library; see the file COPYING. If not, write to // the Free Software Foundation, 59 Temple Place - Suite 330, Boston, // MA 02111-1307, USA. // As a special exception, you may use this file as part of a free // software library without restriction. Specifically, if other files // instantiate templates or use macros or